Agent Eval: Initial support for running examples repeatedly (#28844)
Not ideal as it creates a separate worktree for each repetition Release Notes: - N/A
This commit is contained in:
parent
609895d95f
commit
9a9f2e71ca
2 changed files with 25 additions and 10 deletions
|
@ -44,6 +44,10 @@ struct Args {
|
||||||
model: String,
|
model: String,
|
||||||
#[arg(long, value_delimiter = ',')]
|
#[arg(long, value_delimiter = ',')]
|
||||||
languages: Option<Vec<String>>,
|
languages: Option<Vec<String>>,
|
||||||
|
/// How many times to run each example. Note that this is currently not very efficient as N
|
||||||
|
/// worktrees will be created for the examples.
|
||||||
|
#[arg(long, default_value = "1")]
|
||||||
|
repetitions: u32,
|
||||||
/// How many times to run the judge on each example run.
|
/// How many times to run the judge on each example run.
|
||||||
#[arg(long, default_value = "3")]
|
#[arg(long, default_value = "3")]
|
||||||
judge_repetitions: u32,
|
judge_repetitions: u32,
|
||||||
|
@ -146,6 +150,13 @@ fn main() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: This creates a worktree per repetition. Ideally these examples should
|
||||||
|
// either be run sequentially on the same worktree, or reuse worktrees when there
|
||||||
|
// are more examples to run than the concurrency limit.
|
||||||
|
for repetition_number in 0..args.repetitions {
|
||||||
|
let mut example = example.clone();
|
||||||
|
example.set_repetition_number(repetition_number);
|
||||||
|
|
||||||
let name_len = example.name.len();
|
let name_len = example.name.len();
|
||||||
if name_len > max_name_width {
|
if name_len > max_name_width {
|
||||||
max_name_width = example.name.len();
|
max_name_width = example.name.len();
|
||||||
|
@ -153,6 +164,7 @@ fn main() {
|
||||||
|
|
||||||
examples.push(example);
|
examples.push(example);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
println!("Skipped examples: {}\n", skipped.join(", "));
|
println!("Skipped examples: {}\n", skipped.join(", "));
|
||||||
|
|
||||||
|
|
|
@ -94,11 +94,7 @@ impl Example {
|
||||||
let base_path = dir_path.join("base.toml");
|
let base_path = dir_path.join("base.toml");
|
||||||
let prompt_path = dir_path.join("prompt.md");
|
let prompt_path = dir_path.join("prompt.md");
|
||||||
let criteria_path = dir_path.join("criteria.md");
|
let criteria_path = dir_path.join("criteria.md");
|
||||||
|
let output_file_path = run_dir.join(format!("{}.md", name));
|
||||||
let output_file_path = run_dir.join(format!(
|
|
||||||
"{}.md",
|
|
||||||
dir_path.file_name().unwrap().to_str().unwrap()
|
|
||||||
));
|
|
||||||
|
|
||||||
Ok(Example {
|
Ok(Example {
|
||||||
name: name.clone(),
|
name: name.clone(),
|
||||||
|
@ -112,6 +108,13 @@ impl Example {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_repetition_number(&mut self, repetition_number: u32) {
|
||||||
|
if repetition_number > 0 {
|
||||||
|
self.name = format!("{}-{}", self.name, repetition_number);
|
||||||
|
self.output_file_path = self.run_dir.join(format!("{}.md", self.name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn set_log_prefix_style(&mut self, color: &str, name_width: usize) {
|
pub fn set_log_prefix_style(&mut self, color: &str, name_width: usize) {
|
||||||
self.log_prefix = format!(
|
self.log_prefix = format!(
|
||||||
"{}{:<width$}\x1b[0m | ",
|
"{}{:<width$}\x1b[0m | ",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue