Add new action to run agent eval (#29158)
The old one wasn't linking, and https://github.com/zed-industries/zed/pull/29081 has a bunch of merge conflicts. Wanted to start simple/small. ## Todo * [x] Remove low-signal examples * [x] Make the eval run on a cron, on main, and on any PR with the `run-eval` label * [x] Noise in logs about failure to write settings ``` [2025-04-21T20:45:04Z ERROR settings] Failed to write settings to file "/home/runner/.config/zed/settings.json" Caused by: No such file or directory (os error 2) at path "/home/runner/.config/zed/.tmpLewFEs" ``` * [x] `Agentic loop stalled` (https://github.com/zed-industries/zed/actions/runs/14581044243/job/40897622894) * [x] Make sure that events are recorded in snowflake * [ ] Change judge criteria to be more explicit about meanings of scores Release Notes: - N/A --------- Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Agus Zubiaga <hi@aguz.me> Co-authored-by: Max Brunsfeld <maxbrunsfeld@gmail.com> Co-authored-by: Thomas Mickley-Doyle <tmickleydoyle@gmail.com>
This commit is contained in:
parent
b14356d1d3
commit
458ffaa134
58 changed files with 291 additions and 385 deletions
|
@ -1010,6 +1010,7 @@ impl ActiveThread {
|
|||
}
|
||||
}
|
||||
ThreadEvent::CheckpointChanged => cx.notify(),
|
||||
ThreadEvent::ReceivedTextChunk => {}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1231,6 +1231,7 @@ impl Thread {
|
|||
current_token_usage = token_usage;
|
||||
}
|
||||
LanguageModelCompletionEvent::Text(chunk) => {
|
||||
cx.emit(ThreadEvent::ReceivedTextChunk);
|
||||
if let Some(last_message) = thread.messages.last_mut() {
|
||||
if last_message.role == Role::Assistant {
|
||||
last_message.push_text(&chunk);
|
||||
|
@ -1780,7 +1781,7 @@ impl Thread {
|
|||
thread_data,
|
||||
final_project_snapshot
|
||||
);
|
||||
client.telemetry().flush_events();
|
||||
client.telemetry().flush_events().await;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
|
@ -1825,7 +1826,7 @@ impl Thread {
|
|||
thread_data,
|
||||
final_project_snapshot
|
||||
);
|
||||
client.telemetry().flush_events();
|
||||
client.telemetry().flush_events().await;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
|
@ -2081,7 +2082,7 @@ impl Thread {
|
|||
github_login = github_login
|
||||
);
|
||||
|
||||
client.telemetry().flush_events();
|
||||
client.telemetry().flush_events().await;
|
||||
}
|
||||
}
|
||||
})
|
||||
|
@ -2199,6 +2200,7 @@ pub enum ThreadEvent {
|
|||
ShowError(ThreadError),
|
||||
UsageUpdated(RequestUsage),
|
||||
StreamedCompletion,
|
||||
ReceivedTextChunk,
|
||||
StreamedAssistantText(MessageId, String),
|
||||
StreamedAssistantThinking(MessageId, String),
|
||||
StreamedToolUse {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue