diff --git a/crates/eval/src/eval.rs b/crates/eval/src/eval.rs index 1e1da15464..ca6b11be0a 100644 --- a/crates/eval/src/eval.rs +++ b/crates/eval/src/eval.rs @@ -44,6 +44,10 @@ struct Args { model: String, #[arg(long, value_delimiter = ',')] languages: Option>, + /// How many times to run each example. Note that this is currently not very efficient as N + /// worktrees will be created for the examples. + #[arg(long, default_value = "1")] + repetitions: u32, /// How many times to run the judge on each example run. #[arg(long, default_value = "3")] judge_repetitions: u32, @@ -146,12 +150,20 @@ fn main() { continue; } - let name_len = example.name.len(); - if name_len > max_name_width { - max_name_width = example.name.len(); - } + // TODO: This creates a worktree per repetition. Ideally these examples should + // either be run sequentially on the same worktree, or reuse worktrees when there + // are more examples to run than the concurrency limit. + for repetition_number in 0..args.repetitions { + let mut example = example.clone(); + example.set_repetition_number(repetition_number); - examples.push(example); + let name_len = example.name.len(); + if name_len > max_name_width { + max_name_width = example.name.len(); + } + + examples.push(example); + } } println!("Skipped examples: {}\n", skipped.join(", ")); diff --git a/crates/eval/src/example.rs b/crates/eval/src/example.rs index e69b520fb4..8e7f6fc006 100644 --- a/crates/eval/src/example.rs +++ b/crates/eval/src/example.rs @@ -94,11 +94,7 @@ impl Example { let base_path = dir_path.join("base.toml"); let prompt_path = dir_path.join("prompt.md"); let criteria_path = dir_path.join("criteria.md"); - - let output_file_path = run_dir.join(format!( - "{}.md", - dir_path.file_name().unwrap().to_str().unwrap() - )); + let output_file_path = run_dir.join(format!("{}.md", name)); Ok(Example { name: name.clone(), @@ -112,6 +108,13 @@ impl Example { }) } + pub fn set_repetition_number(&mut self, repetition_number: u32) { + if repetition_number > 0 { + self.name = format!("{}-{}", self.name, repetition_number); + self.output_file_path = self.run_dir.join(format!("{}.md", self.name)); + } + } + pub fn set_log_prefix_style(&mut self, color: &str, name_width: usize) { self.log_prefix = format!( "{}{: