evals: Allow threads explorer to search for JSON files recursively (#31509)
It's just more convenient to call it from CLI this way. + minor fixes in evals Release Notes: - N/A
This commit is contained in:
parent
239ffa49e1
commit
61a40e293d
2 changed files with 139 additions and 26 deletions
|
@ -12,8 +12,10 @@ This eval tests a fix for a destructive behavior of the `edit_file` tool.
|
|||
Previously, it would rewrite existing files too aggressively, which often
|
||||
resulted in content loss.
|
||||
|
||||
Pass rate before the fix: 10%
|
||||
Pass rate after the fix: 100%
|
||||
Model | Pass rate
|
||||
----------------|----------
|
||||
Sonnet 3.7 | 100%
|
||||
Gemini 2.5 Pro | 80%
|
||||
*/
|
||||
|
||||
#[async_trait(?Send)]
|
||||
|
@ -38,7 +40,9 @@ impl Example for FileOverwriteExample {
|
|||
let input = tool_use.parse_input::<EditFileToolInput>()?;
|
||||
match input.mode {
|
||||
EditFileMode::Edit => false,
|
||||
EditFileMode::Create | EditFileMode::Overwrite => true,
|
||||
EditFileMode::Create | EditFileMode::Overwrite => {
|
||||
input.path.ends_with("src/language_model_selector.rs")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
false
|
||||
|
|
|
@ -2,22 +2,65 @@ use anyhow::{Context as _, Result};
|
|||
use clap::Parser;
|
||||
use serde_json::{Value, json};
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[clap(about = "Generate HTML explorer from JSON thread files")]
|
||||
struct Args {
|
||||
/// Paths to JSON files containing thread data
|
||||
/// Paths to JSON files or directories. If a directory is provided,
|
||||
/// it will be searched for 'last.messages.json' files up to 2 levels deep.
|
||||
#[clap(long, required = true, num_args = 1..)]
|
||||
input: Vec<PathBuf>,
|
||||
|
||||
/// Path where the HTML explorer file will be written
|
||||
/// Path where the output HTML file will be written
|
||||
#[clap(long)]
|
||||
output: PathBuf,
|
||||
}
|
||||
|
||||
pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result<String> {
|
||||
if let Some(parent) = output.parent() {
|
||||
/// Recursively finds files with `target_filename` in `dir_path` up to `max_depth`.
|
||||
#[allow(dead_code)]
|
||||
fn find_target_files_recursive(
|
||||
dir_path: &Path,
|
||||
target_filename: &str,
|
||||
current_depth: u8,
|
||||
max_depth: u8,
|
||||
found_files: &mut Vec<PathBuf>,
|
||||
) -> Result<()> {
|
||||
if current_depth > max_depth {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
for entry_result in fs::read_dir(dir_path)
|
||||
.with_context(|| format!("Failed to read directory: {}", dir_path.display()))?
|
||||
{
|
||||
let entry = entry_result.with_context(|| {
|
||||
format!("Failed to read directory entry in: {}", dir_path.display())
|
||||
})?;
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_dir() {
|
||||
find_target_files_recursive(
|
||||
&path,
|
||||
target_filename,
|
||||
current_depth + 1,
|
||||
max_depth,
|
||||
found_files,
|
||||
)?;
|
||||
} else if path.is_file() {
|
||||
if let Some(filename_osstr) = path.file_name() {
|
||||
if let Some(filename_str) = filename_osstr.to_str() {
|
||||
if filename_str == target_filename {
|
||||
found_files.push(path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn generate_explorer_html(input_paths: &[PathBuf], output_path: &PathBuf) -> Result<String> {
|
||||
if let Some(parent) = output_path.parent() {
|
||||
if !parent.exists() {
|
||||
fs::create_dir_all(parent).context(format!(
|
||||
"Failed to create output directory: {}",
|
||||
|
@ -27,41 +70,67 @@ pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result<St
|
|||
}
|
||||
|
||||
let template_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/explorer.html");
|
||||
let template = fs::read_to_string(&template_path).context(format!(
|
||||
let template_content = fs::read_to_string(&template_path).context(format!(
|
||||
"Template file not found or couldn't be read: {}",
|
||||
template_path.display()
|
||||
))?;
|
||||
|
||||
let threads = inputs
|
||||
if input_paths.is_empty() {
|
||||
println!(
|
||||
"No input JSON files found to process. Explorer will be generated with template defaults or empty data."
|
||||
);
|
||||
}
|
||||
|
||||
let threads = input_paths
|
||||
.iter()
|
||||
.map(|input_path| {
|
||||
let mut thread_data: Value = fs::read_to_string(input_path)
|
||||
.context(format!("Failed to read file: {}", input_path.display()))?
|
||||
let file_content = fs::read_to_string(input_path)
|
||||
.context(format!("Failed to read file: {}", input_path.display()))?;
|
||||
let mut thread_data: Value = file_content
|
||||
.parse::<Value>()
|
||||
.context(format!("Failed to parse JSON: {}", input_path.display()))?;
|
||||
thread_data["filename"] = json!(input_path); // This will be shown in a thread heading
|
||||
.context(format!("Failed to parse JSON from file: {}", input_path.display()))?;
|
||||
|
||||
if let Some(obj) = thread_data.as_object_mut() {
|
||||
obj.insert("filename".to_string(), json!(input_path.display().to_string()));
|
||||
} else {
|
||||
eprintln!("Warning: JSON data in {} is not a root object. Wrapping it to include filename.", input_path.display());
|
||||
thread_data = json!({
|
||||
"original_data": thread_data,
|
||||
"filename": input_path.display().to_string()
|
||||
});
|
||||
}
|
||||
Ok(thread_data)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let all_threads = json!({ "threads": threads });
|
||||
let html_content = inject_thread_data(template, all_threads)?;
|
||||
fs::write(&output, &html_content)
|
||||
.context(format!("Failed to write output: {}", output.display()))?;
|
||||
let all_threads_data = json!({ "threads": threads });
|
||||
let html_content = inject_thread_data(template_content, all_threads_data)?;
|
||||
fs::write(&output_path, &html_content)
|
||||
.context(format!("Failed to write output: {}", output_path.display()))?;
|
||||
|
||||
println!("Saved {} thread(s) to {}", threads.len(), output.display());
|
||||
println!(
|
||||
"Saved data from {} resolved file(s) ({} threads) to {}",
|
||||
input_paths.len(),
|
||||
threads.len(),
|
||||
output_path.display()
|
||||
);
|
||||
Ok(html_content)
|
||||
}
|
||||
|
||||
fn inject_thread_data(template: String, threads_data: Value) -> Result<String> {
|
||||
let injection_marker = "let threadsData = window.threadsData || { threads: [dummyThread] };";
|
||||
template
|
||||
.find(injection_marker)
|
||||
.context("Could not find the thread injection point in the template")?;
|
||||
if !template.contains(injection_marker) {
|
||||
anyhow::bail!(
|
||||
"Could not find the thread injection point in the template. Expected: '{}'",
|
||||
injection_marker
|
||||
);
|
||||
}
|
||||
|
||||
let threads_json = serde_json::to_string_pretty(&threads_data)
|
||||
.context("Failed to serialize threads data to JSON")?;
|
||||
let script_injection = format!("let threadsData = {};", threads_json);
|
||||
let threads_json_string = serde_json::to_string_pretty(&threads_data)
|
||||
.context("Failed to serialize threads data to JSON")?
|
||||
.replace("</script>", r"<\/script>");
|
||||
|
||||
let script_injection = format!("let threadsData = {};", threads_json_string);
|
||||
let final_html = template.replacen(injection_marker, &script_injection, 1);
|
||||
|
||||
Ok(final_html)
|
||||
|
@ -71,5 +140,45 @@ fn inject_thread_data(template: String, threads_data: Value) -> Result<String> {
|
|||
#[allow(dead_code)]
|
||||
fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
generate_explorer_html(&args.input, &args.output).map(|_| ())
|
||||
|
||||
const DEFAULT_FILENAME: &str = "last.messages.json";
|
||||
const MAX_SEARCH_DEPTH: u8 = 2;
|
||||
|
||||
let mut resolved_input_files: Vec<PathBuf> = Vec::new();
|
||||
|
||||
for input_path_arg in &args.input {
|
||||
if !input_path_arg.exists() {
|
||||
eprintln!(
|
||||
"Warning: Input path {} does not exist. Skipping.",
|
||||
input_path_arg.display()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
if input_path_arg.is_dir() {
|
||||
find_target_files_recursive(
|
||||
input_path_arg,
|
||||
DEFAULT_FILENAME,
|
||||
0, // starting depth
|
||||
MAX_SEARCH_DEPTH,
|
||||
&mut resolved_input_files,
|
||||
)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Error searching for '{}' files in directory: {}",
|
||||
DEFAULT_FILENAME,
|
||||
input_path_arg.display()
|
||||
)
|
||||
})?;
|
||||
} else if input_path_arg.is_file() {
|
||||
resolved_input_files.push(input_path_arg.clone());
|
||||
}
|
||||
}
|
||||
|
||||
resolved_input_files.sort_unstable();
|
||||
resolved_input_files.dedup();
|
||||
|
||||
println!("No input paths provided/found.");
|
||||
|
||||
generate_explorer_html(&resolved_input_files, &args.output).map(|_| ())
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue