evals: Allow threads explorer to search for JSON files recursively (#31509)

It's just more convenient to call it from CLI this way.

+ minor fixes in evals

Release Notes:

- N/A
This commit is contained in:
Oleksiy Syvokon 2025-05-27 17:18:47 +03:00 committed by GitHub
parent 239ffa49e1
commit 61a40e293d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 139 additions and 26 deletions

View file

@ -12,8 +12,10 @@ This eval tests a fix for a destructive behavior of the `edit_file` tool.
Previously, it would rewrite existing files too aggressively, which often Previously, it would rewrite existing files too aggressively, which often
resulted in content loss. resulted in content loss.
Pass rate before the fix: 10% Model | Pass rate
Pass rate after the fix: 100% ----------------|----------
Sonnet 3.7 | 100%
Gemini 2.5 Pro | 80%
*/ */
#[async_trait(?Send)] #[async_trait(?Send)]
@ -38,7 +40,9 @@ impl Example for FileOverwriteExample {
let input = tool_use.parse_input::<EditFileToolInput>()?; let input = tool_use.parse_input::<EditFileToolInput>()?;
match input.mode { match input.mode {
EditFileMode::Edit => false, EditFileMode::Edit => false,
EditFileMode::Create | EditFileMode::Overwrite => true, EditFileMode::Create | EditFileMode::Overwrite => {
input.path.ends_with("src/language_model_selector.rs")
}
} }
} else { } else {
false false

View file

@ -2,22 +2,65 @@ use anyhow::{Context as _, Result};
use clap::Parser; use clap::Parser;
use serde_json::{Value, json}; use serde_json::{Value, json};
use std::fs; use std::fs;
use std::path::PathBuf; use std::path::{Path, PathBuf};
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[clap(about = "Generate HTML explorer from JSON thread files")] #[clap(about = "Generate HTML explorer from JSON thread files")]
struct Args { struct Args {
/// Paths to JSON files containing thread data /// Paths to JSON files or directories. If a directory is provided,
/// it will be searched for 'last.messages.json' files up to 2 levels deep.
#[clap(long, required = true, num_args = 1..)] #[clap(long, required = true, num_args = 1..)]
input: Vec<PathBuf>, input: Vec<PathBuf>,
/// Path where the HTML explorer file will be written /// Path where the output HTML file will be written
#[clap(long)] #[clap(long)]
output: PathBuf, output: PathBuf,
} }
pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result<String> { /// Recursively finds files with `target_filename` in `dir_path` up to `max_depth`.
if let Some(parent) = output.parent() { #[allow(dead_code)]
fn find_target_files_recursive(
dir_path: &Path,
target_filename: &str,
current_depth: u8,
max_depth: u8,
found_files: &mut Vec<PathBuf>,
) -> Result<()> {
if current_depth > max_depth {
return Ok(());
}
for entry_result in fs::read_dir(dir_path)
.with_context(|| format!("Failed to read directory: {}", dir_path.display()))?
{
let entry = entry_result.with_context(|| {
format!("Failed to read directory entry in: {}", dir_path.display())
})?;
let path = entry.path();
if path.is_dir() {
find_target_files_recursive(
&path,
target_filename,
current_depth + 1,
max_depth,
found_files,
)?;
} else if path.is_file() {
if let Some(filename_osstr) = path.file_name() {
if let Some(filename_str) = filename_osstr.to_str() {
if filename_str == target_filename {
found_files.push(path);
}
}
}
}
}
Ok(())
}
pub fn generate_explorer_html(input_paths: &[PathBuf], output_path: &PathBuf) -> Result<String> {
if let Some(parent) = output_path.parent() {
if !parent.exists() { if !parent.exists() {
fs::create_dir_all(parent).context(format!( fs::create_dir_all(parent).context(format!(
"Failed to create output directory: {}", "Failed to create output directory: {}",
@ -27,41 +70,67 @@ pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result<St
} }
let template_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/explorer.html"); let template_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/explorer.html");
let template = fs::read_to_string(&template_path).context(format!( let template_content = fs::read_to_string(&template_path).context(format!(
"Template file not found or couldn't be read: {}", "Template file not found or couldn't be read: {}",
template_path.display() template_path.display()
))?; ))?;
let threads = inputs if input_paths.is_empty() {
println!(
"No input JSON files found to process. Explorer will be generated with template defaults or empty data."
);
}
let threads = input_paths
.iter() .iter()
.map(|input_path| { .map(|input_path| {
let mut thread_data: Value = fs::read_to_string(input_path) let file_content = fs::read_to_string(input_path)
.context(format!("Failed to read file: {}", input_path.display()))? .context(format!("Failed to read file: {}", input_path.display()))?;
let mut thread_data: Value = file_content
.parse::<Value>() .parse::<Value>()
.context(format!("Failed to parse JSON: {}", input_path.display()))?; .context(format!("Failed to parse JSON from file: {}", input_path.display()))?;
thread_data["filename"] = json!(input_path); // This will be shown in a thread heading
if let Some(obj) = thread_data.as_object_mut() {
obj.insert("filename".to_string(), json!(input_path.display().to_string()));
} else {
eprintln!("Warning: JSON data in {} is not a root object. Wrapping it to include filename.", input_path.display());
thread_data = json!({
"original_data": thread_data,
"filename": input_path.display().to_string()
});
}
Ok(thread_data) Ok(thread_data)
}) })
.collect::<Result<Vec<_>>>()?; .collect::<Result<Vec<_>>>()?;
let all_threads = json!({ "threads": threads }); let all_threads_data = json!({ "threads": threads });
let html_content = inject_thread_data(template, all_threads)?; let html_content = inject_thread_data(template_content, all_threads_data)?;
fs::write(&output, &html_content) fs::write(&output_path, &html_content)
.context(format!("Failed to write output: {}", output.display()))?; .context(format!("Failed to write output: {}", output_path.display()))?;
println!("Saved {} thread(s) to {}", threads.len(), output.display()); println!(
"Saved data from {} resolved file(s) ({} threads) to {}",
input_paths.len(),
threads.len(),
output_path.display()
);
Ok(html_content) Ok(html_content)
} }
fn inject_thread_data(template: String, threads_data: Value) -> Result<String> { fn inject_thread_data(template: String, threads_data: Value) -> Result<String> {
let injection_marker = "let threadsData = window.threadsData || { threads: [dummyThread] };"; let injection_marker = "let threadsData = window.threadsData || { threads: [dummyThread] };";
template if !template.contains(injection_marker) {
.find(injection_marker) anyhow::bail!(
.context("Could not find the thread injection point in the template")?; "Could not find the thread injection point in the template. Expected: '{}'",
injection_marker
);
}
let threads_json = serde_json::to_string_pretty(&threads_data) let threads_json_string = serde_json::to_string_pretty(&threads_data)
.context("Failed to serialize threads data to JSON")?; .context("Failed to serialize threads data to JSON")?
let script_injection = format!("let threadsData = {};", threads_json); .replace("</script>", r"<\/script>");
let script_injection = format!("let threadsData = {};", threads_json_string);
let final_html = template.replacen(injection_marker, &script_injection, 1); let final_html = template.replacen(injection_marker, &script_injection, 1);
Ok(final_html) Ok(final_html)
@ -71,5 +140,45 @@ fn inject_thread_data(template: String, threads_data: Value) -> Result<String> {
#[allow(dead_code)] #[allow(dead_code)]
fn main() -> Result<()> { fn main() -> Result<()> {
let args = Args::parse(); let args = Args::parse();
generate_explorer_html(&args.input, &args.output).map(|_| ())
const DEFAULT_FILENAME: &str = "last.messages.json";
const MAX_SEARCH_DEPTH: u8 = 2;
let mut resolved_input_files: Vec<PathBuf> = Vec::new();
for input_path_arg in &args.input {
if !input_path_arg.exists() {
eprintln!(
"Warning: Input path {} does not exist. Skipping.",
input_path_arg.display()
);
continue;
}
if input_path_arg.is_dir() {
find_target_files_recursive(
input_path_arg,
DEFAULT_FILENAME,
0, // starting depth
MAX_SEARCH_DEPTH,
&mut resolved_input_files,
)
.with_context(|| {
format!(
"Error searching for '{}' files in directory: {}",
DEFAULT_FILENAME,
input_path_arg.display()
)
})?;
} else if input_path_arg.is_file() {
resolved_input_files.push(input_path_arg.clone());
}
}
resolved_input_files.sort_unstable();
resolved_input_files.dedup();
println!("No input paths provided/found.");
generate_explorer_html(&resolved_input_files, &args.output).map(|_| ())
} }