Add more eval examples + filtering examples by language + fix git concurrent usage (#28719)

Release Notes:

- N/A

---------

Co-authored-by: michael <michael@zed.dev>
Co-authored-by: agus <agus@zed.dev>
This commit is contained in:
Thomas Mickley-Doyle 2025-04-14 17:05:46 -05:00 committed by GitHub
parent a8b1ef3531
commit d74f0735c2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
76 changed files with 365 additions and 8 deletions

View file

@ -39,6 +39,9 @@ struct Args {
/// Model to use (default: "claude-3-7-sonnet-latest")
#[arg(long, default_value = "claude-3-7-sonnet-latest")]
model: String,
/// Languages to run (comma-separated, e.g. "js,ts,py"). If unspecified, only Rust examples are run.
#[arg(long, value_delimiter = ',')]
languages: Option<Vec<String>>,
}
fn main() {
@ -46,6 +49,8 @@ fn main() {
let args = Args::parse();
let all_available_examples = list_all_examples().unwrap();
let languages = args.languages.unwrap_or_else(|| vec!["rs".to_string()]);
let example_paths = all_available_examples
.iter()
.filter_map(|example_path| {
@ -94,6 +99,17 @@ fn main() {
let mut examples = Vec::new();
for example_path in example_paths {
let example = Example::load_from_directory(&example_path, &run_dir)?;
if !example
.base
.language_extension
.as_ref()
.map_or(false, |lang| languages.contains(lang))
{
println!("Skipping {}", example.name);
continue;
}
examples.push((example_path, example));
}
let mut repo_urls = HashSet::new();
@ -133,6 +149,10 @@ fn main() {
future::join_all(clone_tasks).await;
for (_, example) in examples.iter() {
example.setup().await?;
}
let tasks = examples
.into_iter()
.map(|(example_path, example)| {
@ -197,7 +217,6 @@ async fn run_example(
app_state: Arc<AgentAppState>,
cx: &mut AsyncApp,
) -> Result<JudgeOutput> {
example.setup().await?;
cx.update(|cx| example.run(model.clone(), app_state, cx))?
.await?;
let diff = example.repository_diff().await?;