diff --git a/crates/eval/src/examples/overwrite_file.rs b/crates/eval/src/examples/overwrite_file.rs index 368ebd5cea..4438f37a06 100644 --- a/crates/eval/src/examples/overwrite_file.rs +++ b/crates/eval/src/examples/overwrite_file.rs @@ -12,8 +12,10 @@ This eval tests a fix for a destructive behavior of the `edit_file` tool. Previously, it would rewrite existing files too aggressively, which often resulted in content loss. -Pass rate before the fix: 10% -Pass rate after the fix: 100% +Model | Pass rate +----------------|---------- +Sonnet 3.7 | 100% +Gemini 2.5 Pro | 80% */ #[async_trait(?Send)] @@ -38,7 +40,9 @@ impl Example for FileOverwriteExample { let input = tool_use.parse_input::()?; match input.mode { EditFileMode::Edit => false, - EditFileMode::Create | EditFileMode::Overwrite => true, + EditFileMode::Create | EditFileMode::Overwrite => { + input.path.ends_with("src/language_model_selector.rs") + } } } else { false diff --git a/crates/eval/src/explorer.rs b/crates/eval/src/explorer.rs index a89b556ab4..ee1dfa95c3 100644 --- a/crates/eval/src/explorer.rs +++ b/crates/eval/src/explorer.rs @@ -2,22 +2,65 @@ use anyhow::{Context as _, Result}; use clap::Parser; use serde_json::{Value, json}; use std::fs; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; #[derive(Parser, Debug)] #[clap(about = "Generate HTML explorer from JSON thread files")] struct Args { - /// Paths to JSON files containing thread data + /// Paths to JSON files or directories. If a directory is provided, + /// it will be searched for 'last.messages.json' files up to 2 levels deep. #[clap(long, required = true, num_args = 1..)] input: Vec, - /// Path where the HTML explorer file will be written + /// Path where the output HTML file will be written #[clap(long)] output: PathBuf, } -pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result { - if let Some(parent) = output.parent() { +/// Recursively finds files with `target_filename` in `dir_path` up to `max_depth`. +#[allow(dead_code)] +fn find_target_files_recursive( + dir_path: &Path, + target_filename: &str, + current_depth: u8, + max_depth: u8, + found_files: &mut Vec, +) -> Result<()> { + if current_depth > max_depth { + return Ok(()); + } + + for entry_result in fs::read_dir(dir_path) + .with_context(|| format!("Failed to read directory: {}", dir_path.display()))? + { + let entry = entry_result.with_context(|| { + format!("Failed to read directory entry in: {}", dir_path.display()) + })?; + let path = entry.path(); + + if path.is_dir() { + find_target_files_recursive( + &path, + target_filename, + current_depth + 1, + max_depth, + found_files, + )?; + } else if path.is_file() { + if let Some(filename_osstr) = path.file_name() { + if let Some(filename_str) = filename_osstr.to_str() { + if filename_str == target_filename { + found_files.push(path); + } + } + } + } + } + Ok(()) +} + +pub fn generate_explorer_html(input_paths: &[PathBuf], output_path: &PathBuf) -> Result { + if let Some(parent) = output_path.parent() { if !parent.exists() { fs::create_dir_all(parent).context(format!( "Failed to create output directory: {}", @@ -27,41 +70,67 @@ pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result() - .context(format!("Failed to parse JSON: {}", input_path.display()))?; - thread_data["filename"] = json!(input_path); // This will be shown in a thread heading + .context(format!("Failed to parse JSON from file: {}", input_path.display()))?; + + if let Some(obj) = thread_data.as_object_mut() { + obj.insert("filename".to_string(), json!(input_path.display().to_string())); + } else { + eprintln!("Warning: JSON data in {} is not a root object. Wrapping it to include filename.", input_path.display()); + thread_data = json!({ + "original_data": thread_data, + "filename": input_path.display().to_string() + }); + } Ok(thread_data) }) .collect::>>()?; - let all_threads = json!({ "threads": threads }); - let html_content = inject_thread_data(template, all_threads)?; - fs::write(&output, &html_content) - .context(format!("Failed to write output: {}", output.display()))?; + let all_threads_data = json!({ "threads": threads }); + let html_content = inject_thread_data(template_content, all_threads_data)?; + fs::write(&output_path, &html_content) + .context(format!("Failed to write output: {}", output_path.display()))?; - println!("Saved {} thread(s) to {}", threads.len(), output.display()); + println!( + "Saved data from {} resolved file(s) ({} threads) to {}", + input_paths.len(), + threads.len(), + output_path.display() + ); Ok(html_content) } fn inject_thread_data(template: String, threads_data: Value) -> Result { let injection_marker = "let threadsData = window.threadsData || { threads: [dummyThread] };"; - template - .find(injection_marker) - .context("Could not find the thread injection point in the template")?; + if !template.contains(injection_marker) { + anyhow::bail!( + "Could not find the thread injection point in the template. Expected: '{}'", + injection_marker + ); + } - let threads_json = serde_json::to_string_pretty(&threads_data) - .context("Failed to serialize threads data to JSON")?; - let script_injection = format!("let threadsData = {};", threads_json); + let threads_json_string = serde_json::to_string_pretty(&threads_data) + .context("Failed to serialize threads data to JSON")? + .replace("", r"<\/script>"); + + let script_injection = format!("let threadsData = {};", threads_json_string); let final_html = template.replacen(injection_marker, &script_injection, 1); Ok(final_html) @@ -71,5 +140,45 @@ fn inject_thread_data(template: String, threads_data: Value) -> Result { #[allow(dead_code)] fn main() -> Result<()> { let args = Args::parse(); - generate_explorer_html(&args.input, &args.output).map(|_| ()) + + const DEFAULT_FILENAME: &str = "last.messages.json"; + const MAX_SEARCH_DEPTH: u8 = 2; + + let mut resolved_input_files: Vec = Vec::new(); + + for input_path_arg in &args.input { + if !input_path_arg.exists() { + eprintln!( + "Warning: Input path {} does not exist. Skipping.", + input_path_arg.display() + ); + continue; + } + + if input_path_arg.is_dir() { + find_target_files_recursive( + input_path_arg, + DEFAULT_FILENAME, + 0, // starting depth + MAX_SEARCH_DEPTH, + &mut resolved_input_files, + ) + .with_context(|| { + format!( + "Error searching for '{}' files in directory: {}", + DEFAULT_FILENAME, + input_path_arg.display() + ) + })?; + } else if input_path_arg.is_file() { + resolved_input_files.push(input_path_arg.clone()); + } + } + + resolved_input_files.sort_unstable(); + resolved_input_files.dedup(); + + println!("No input paths provided/found."); + + generate_explorer_html(&resolved_input_files, &args.output).map(|_| ()) }