Add new action to run agent eval (#29158)

The old one wasn't linking, and
https://github.com/zed-industries/zed/pull/29081 has a bunch of merge
conflicts. Wanted to start simple/small.

## Todo

* [x] Remove low-signal examples
* [x] Make the eval run on a cron, on main, and on any PR with the
`run-eval` label
* [x] Noise in logs about failure to write settings
    ```
[2025-04-21T20:45:04Z ERROR settings] Failed to write settings to file
"/home/runner/.config/zed/settings.json"
    
       Caused by:
No such file or directory (os error 2) at path
"/home/runner/.config/zed/.tmpLewFEs"
    ```
* [x] `Agentic loop stalled`
(https://github.com/zed-industries/zed/actions/runs/14581044243/job/40897622894)
* [x] Make sure that events are recorded in snowflake
* [ ] Change judge criteria to be more explicit about meanings of scores

Release Notes:

- N/A

---------

Co-authored-by: Antonio Scandurra <me@as-cii.com>
Co-authored-by: Agus Zubiaga <hi@aguz.me>
Co-authored-by: Max Brunsfeld <maxbrunsfeld@gmail.com>
Co-authored-by: Thomas Mickley-Doyle <tmickleydoyle@gmail.com>
This commit is contained in:
Nathan Sobo 2025-04-21 22:30:21 -06:00 committed by GitHub
parent b14356d1d3
commit 458ffaa134
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
58 changed files with 291 additions and 385 deletions

View file

@ -516,6 +516,7 @@ pub async fn post_events(
if let Some(kinesis_client) = app.kinesis_client.clone() {
if let Some(stream) = app.config.kinesis_stream.clone() {
let mut request = kinesis_client.put_records().stream_name(stream);
let mut has_records = false;
for row in for_snowflake(
request_body.clone(),
first_event_at,
@ -530,9 +531,12 @@ pub async fn post_events(
.build()
.unwrap(),
);
has_records = true;
}
}
request.send().await.log_err();
if has_records {
request.send().await.log_err();
}
}
};
@ -555,7 +559,7 @@ fn for_snowflake(
country_code: Option<String>,
checksum_matched: bool,
) -> impl Iterator<Item = SnowflakeRow> {
body.events.into_iter().flat_map(move |event| {
body.events.into_iter().filter_map(move |event| {
let timestamp =
first_event_at + Duration::milliseconds(event.milliseconds_since_first_event);
// We will need to double check, but I believe all of the events that
@ -744,9 +748,11 @@ fn for_snowflake(
// NOTE: most amplitude user properties are read out of our event_properties
// dictionary. See https://app.amplitude.com/data/zed/Zed/sources/detail/production/falcon%3A159998
// for how that is configured.
let user_properties = Some(serde_json::json!({
"is_staff": body.is_staff,
}));
let user_properties = body.is_staff.map(|is_staff| {
serde_json::json!({
"is_staff": is_staff,
})
});
Some(SnowflakeRow {
time: timestamp,