Agent Eval: Initial support for running examples repeatedly (#28844)
Not ideal as it creates a separate worktree for each repetition Release Notes: - N/A
This commit is contained in:
parent
609895d95f
commit
9a9f2e71ca
2 changed files with 25 additions and 10 deletions
|
@ -44,6 +44,10 @@ struct Args {
|
|||
model: String,
|
||||
#[arg(long, value_delimiter = ',')]
|
||||
languages: Option<Vec<String>>,
|
||||
/// How many times to run each example. Note that this is currently not very efficient as N
|
||||
/// worktrees will be created for the examples.
|
||||
#[arg(long, default_value = "1")]
|
||||
repetitions: u32,
|
||||
/// How many times to run the judge on each example run.
|
||||
#[arg(long, default_value = "3")]
|
||||
judge_repetitions: u32,
|
||||
|
@ -146,12 +150,20 @@ fn main() {
|
|||
continue;
|
||||
}
|
||||
|
||||
let name_len = example.name.len();
|
||||
if name_len > max_name_width {
|
||||
max_name_width = example.name.len();
|
||||
}
|
||||
// TODO: This creates a worktree per repetition. Ideally these examples should
|
||||
// either be run sequentially on the same worktree, or reuse worktrees when there
|
||||
// are more examples to run than the concurrency limit.
|
||||
for repetition_number in 0..args.repetitions {
|
||||
let mut example = example.clone();
|
||||
example.set_repetition_number(repetition_number);
|
||||
|
||||
examples.push(example);
|
||||
let name_len = example.name.len();
|
||||
if name_len > max_name_width {
|
||||
max_name_width = example.name.len();
|
||||
}
|
||||
|
||||
examples.push(example);
|
||||
}
|
||||
}
|
||||
|
||||
println!("Skipped examples: {}\n", skipped.join(", "));
|
||||
|
|
|
@ -94,11 +94,7 @@ impl Example {
|
|||
let base_path = dir_path.join("base.toml");
|
||||
let prompt_path = dir_path.join("prompt.md");
|
||||
let criteria_path = dir_path.join("criteria.md");
|
||||
|
||||
let output_file_path = run_dir.join(format!(
|
||||
"{}.md",
|
||||
dir_path.file_name().unwrap().to_str().unwrap()
|
||||
));
|
||||
let output_file_path = run_dir.join(format!("{}.md", name));
|
||||
|
||||
Ok(Example {
|
||||
name: name.clone(),
|
||||
|
@ -112,6 +108,13 @@ impl Example {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn set_repetition_number(&mut self, repetition_number: u32) {
|
||||
if repetition_number > 0 {
|
||||
self.name = format!("{}-{}", self.name, repetition_number);
|
||||
self.output_file_path = self.run_dir.join(format!("{}.md", self.name));
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_log_prefix_style(&mut self, color: &str, name_width: usize) {
|
||||
self.log_prefix = format!(
|
||||
"{}{:<width$}\x1b[0m | ",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue