Agent Eval: bounded concurrency (#28843)

Release Notes:

- N/A
This commit is contained in:
Michael Sloan 2025-04-16 00:05:46 -06:00 committed by GitHub
parent da2d8bd845
commit 609895d95f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -10,6 +10,7 @@ use anyhow::{Result, anyhow};
use clap::Parser;
use extension::ExtensionHostProxy;
use futures::future;
use futures::stream::StreamExt;
use gpui::http_client::{Uri, read_proxy_from_env};
use gpui::{App, AppContext, Application, AsyncApp, Entity, SemanticVersion, Task, UpdateGlobal};
use gpui_tokio::Tokio;
@ -46,6 +47,9 @@ struct Args {
/// How many times to run the judge on each example run.
#[arg(long, default_value = "3")]
judge_repetitions: u32,
/// Maximum number of examples to run concurrently.
#[arg(long, default_value = "10")]
concurrency: usize,
}
fn main() {
@ -217,22 +221,25 @@ fn main() {
}
let judge_repetitions = args.judge_repetitions;
let concurrency = args.concurrency;
let tasks = examples
.into_iter()
.map(|example| {
let app_state = app_state.clone();
let model = model.clone();
cx.spawn(async move |cx| {
(
run_example(&example, model, app_state, judge_repetitions, cx).await,
example,
)
let result =
run_example(&example, model, app_state, judge_repetitions, cx).await;
(result, example)
})
})
.collect::<Vec<_>>();
let results: Vec<(Result<Vec<Result<JudgeOutput>>>, Example)> =
future::join_all(tasks).await;
let results = futures::stream::iter(tasks)
.buffer_unordered(concurrency)
.collect::<Vec<(Result<Vec<Result<JudgeOutput>>>, Example)>>()
.await;
println!("\n\n");
println!("========================================");