Agent Eval: bounded concurrency (#28843)

Release Notes:

- N/A
This commit is contained in:
Michael Sloan 2025-04-16 00:05:46 -06:00 committed by GitHub
parent da2d8bd845
commit 609895d95f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -10,6 +10,7 @@ use anyhow::{Result, anyhow};
use clap::Parser; use clap::Parser;
use extension::ExtensionHostProxy; use extension::ExtensionHostProxy;
use futures::future; use futures::future;
use futures::stream::StreamExt;
use gpui::http_client::{Uri, read_proxy_from_env}; use gpui::http_client::{Uri, read_proxy_from_env};
use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task, UpdateGlobal}; use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task, UpdateGlobal};
use gpui_tokio::Tokio; use gpui_tokio::Tokio;
@ -46,6 +47,9 @@ struct Args {
/// How many times to run the judge on each example run. /// How many times to run the judge on each example run.
#[arg(long, default_value = "3")] #[arg(long, default_value = "3")]
judge_repetitions: u32, judge_repetitions: u32,
/// Maximum number of examples to run concurrently.
#[arg(long, default_value = "10")]
concurrency: usize,
} }
fn main() { fn main() {
@ -217,22 +221,25 @@ fn main() {
} }
let judge_repetitions = args.judge_repetitions; let judge_repetitions = args.judge_repetitions;
let concurrency = args.concurrency;
let tasks = examples let tasks = examples
.into_iter() .into_iter()
.map(|example| { .map(|example| {
let app_state = app_state.clone(); let app_state = app_state.clone();
let model = model.clone(); let model = model.clone();
cx.spawn(async move |cx| { cx.spawn(async move |cx| {
( let result =
run_example(&example, model, app_state, judge_repetitions, cx).await, run_example(&example, model, app_state, judge_repetitions, cx).await;
example, (result, example)
)
}) })
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let results: Vec<(Result<Vec<Result<JudgeOutput>>>, Example)> = let results = futures::stream::iter(tasks)
future::join_all(tasks).await; .buffer_unordered(concurrency)
.collect::<Vec<(Result<Vec<Result<JudgeOutput>>>, Example)>>()
.await;
println!("\n\n"); println!("\n\n");
println!("========================================"); println!("========================================");