evals: Allow threads explorer to search for JSON files recursively (#31509)
It's just more convenient to call it from CLI this way. + minor fixes in evals Release Notes: - N/A
This commit is contained in:
parent
239ffa49e1
commit
61a40e293d
2 changed files with 139 additions and 26 deletions
|
@ -12,8 +12,10 @@ This eval tests a fix for a destructive behavior of the `edit_file` tool.
|
||||||
Previously, it would rewrite existing files too aggressively, which often
|
Previously, it would rewrite existing files too aggressively, which often
|
||||||
resulted in content loss.
|
resulted in content loss.
|
||||||
|
|
||||||
Pass rate before the fix: 10%
|
Model | Pass rate
|
||||||
Pass rate after the fix: 100%
|
----------------|----------
|
||||||
|
Sonnet 3.7 | 100%
|
||||||
|
Gemini 2.5 Pro | 80%
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#[async_trait(?Send)]
|
#[async_trait(?Send)]
|
||||||
|
@ -38,7 +40,9 @@ impl Example for FileOverwriteExample {
|
||||||
let input = tool_use.parse_input::<EditFileToolInput>()?;
|
let input = tool_use.parse_input::<EditFileToolInput>()?;
|
||||||
match input.mode {
|
match input.mode {
|
||||||
EditFileMode::Edit => false,
|
EditFileMode::Edit => false,
|
||||||
EditFileMode::Create | EditFileMode::Overwrite => true,
|
EditFileMode::Create | EditFileMode::Overwrite => {
|
||||||
|
input.path.ends_with("src/language_model_selector.rs")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
|
|
|
@ -2,22 +2,65 @@ use anyhow::{Context as _, Result};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use serde_json::{Value, json};
|
use serde_json::{Value, json};
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::PathBuf;
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[clap(about = "Generate HTML explorer from JSON thread files")]
|
#[clap(about = "Generate HTML explorer from JSON thread files")]
|
||||||
struct Args {
|
struct Args {
|
||||||
/// Paths to JSON files containing thread data
|
/// Paths to JSON files or directories. If a directory is provided,
|
||||||
|
/// it will be searched for 'last.messages.json' files up to 2 levels deep.
|
||||||
#[clap(long, required = true, num_args = 1..)]
|
#[clap(long, required = true, num_args = 1..)]
|
||||||
input: Vec<PathBuf>,
|
input: Vec<PathBuf>,
|
||||||
|
|
||||||
/// Path where the HTML explorer file will be written
|
/// Path where the output HTML file will be written
|
||||||
#[clap(long)]
|
#[clap(long)]
|
||||||
output: PathBuf,
|
output: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result<String> {
|
/// Recursively finds files with `target_filename` in `dir_path` up to `max_depth`.
|
||||||
if let Some(parent) = output.parent() {
|
#[allow(dead_code)]
|
||||||
|
fn find_target_files_recursive(
|
||||||
|
dir_path: &Path,
|
||||||
|
target_filename: &str,
|
||||||
|
current_depth: u8,
|
||||||
|
max_depth: u8,
|
||||||
|
found_files: &mut Vec<PathBuf>,
|
||||||
|
) -> Result<()> {
|
||||||
|
if current_depth > max_depth {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
for entry_result in fs::read_dir(dir_path)
|
||||||
|
.with_context(|| format!("Failed to read directory: {}", dir_path.display()))?
|
||||||
|
{
|
||||||
|
let entry = entry_result.with_context(|| {
|
||||||
|
format!("Failed to read directory entry in: {}", dir_path.display())
|
||||||
|
})?;
|
||||||
|
let path = entry.path();
|
||||||
|
|
||||||
|
if path.is_dir() {
|
||||||
|
find_target_files_recursive(
|
||||||
|
&path,
|
||||||
|
target_filename,
|
||||||
|
current_depth + 1,
|
||||||
|
max_depth,
|
||||||
|
found_files,
|
||||||
|
)?;
|
||||||
|
} else if path.is_file() {
|
||||||
|
if let Some(filename_osstr) = path.file_name() {
|
||||||
|
if let Some(filename_str) = filename_osstr.to_str() {
|
||||||
|
if filename_str == target_filename {
|
||||||
|
found_files.push(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_explorer_html(input_paths: &[PathBuf], output_path: &PathBuf) -> Result<String> {
|
||||||
|
if let Some(parent) = output_path.parent() {
|
||||||
if !parent.exists() {
|
if !parent.exists() {
|
||||||
fs::create_dir_all(parent).context(format!(
|
fs::create_dir_all(parent).context(format!(
|
||||||
"Failed to create output directory: {}",
|
"Failed to create output directory: {}",
|
||||||
|
@ -27,41 +70,67 @@ pub fn generate_explorer_html(inputs: &[PathBuf], output: &PathBuf) -> Result<St
|
||||||
}
|
}
|
||||||
|
|
||||||
let template_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/explorer.html");
|
let template_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("src/explorer.html");
|
||||||
let template = fs::read_to_string(&template_path).context(format!(
|
let template_content = fs::read_to_string(&template_path).context(format!(
|
||||||
"Template file not found or couldn't be read: {}",
|
"Template file not found or couldn't be read: {}",
|
||||||
template_path.display()
|
template_path.display()
|
||||||
))?;
|
))?;
|
||||||
|
|
||||||
let threads = inputs
|
if input_paths.is_empty() {
|
||||||
|
println!(
|
||||||
|
"No input JSON files found to process. Explorer will be generated with template defaults or empty data."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let threads = input_paths
|
||||||
.iter()
|
.iter()
|
||||||
.map(|input_path| {
|
.map(|input_path| {
|
||||||
let mut thread_data: Value = fs::read_to_string(input_path)
|
let file_content = fs::read_to_string(input_path)
|
||||||
.context(format!("Failed to read file: {}", input_path.display()))?
|
.context(format!("Failed to read file: {}", input_path.display()))?;
|
||||||
|
let mut thread_data: Value = file_content
|
||||||
.parse::<Value>()
|
.parse::<Value>()
|
||||||
.context(format!("Failed to parse JSON: {}", input_path.display()))?;
|
.context(format!("Failed to parse JSON from file: {}", input_path.display()))?;
|
||||||
thread_data["filename"] = json!(input_path); // This will be shown in a thread heading
|
|
||||||
|
if let Some(obj) = thread_data.as_object_mut() {
|
||||||
|
obj.insert("filename".to_string(), json!(input_path.display().to_string()));
|
||||||
|
} else {
|
||||||
|
eprintln!("Warning: JSON data in {} is not a root object. Wrapping it to include filename.", input_path.display());
|
||||||
|
thread_data = json!({
|
||||||
|
"original_data": thread_data,
|
||||||
|
"filename": input_path.display().to_string()
|
||||||
|
});
|
||||||
|
}
|
||||||
Ok(thread_data)
|
Ok(thread_data)
|
||||||
})
|
})
|
||||||
.collect::<Result<Vec<_>>>()?;
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
let all_threads = json!({ "threads": threads });
|
let all_threads_data = json!({ "threads": threads });
|
||||||
let html_content = inject_thread_data(template, all_threads)?;
|
let html_content = inject_thread_data(template_content, all_threads_data)?;
|
||||||
fs::write(&output, &html_content)
|
fs::write(&output_path, &html_content)
|
||||||
.context(format!("Failed to write output: {}", output.display()))?;
|
.context(format!("Failed to write output: {}", output_path.display()))?;
|
||||||
|
|
||||||
println!("Saved {} thread(s) to {}", threads.len(), output.display());
|
println!(
|
||||||
|
"Saved data from {} resolved file(s) ({} threads) to {}",
|
||||||
|
input_paths.len(),
|
||||||
|
threads.len(),
|
||||||
|
output_path.display()
|
||||||
|
);
|
||||||
Ok(html_content)
|
Ok(html_content)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inject_thread_data(template: String, threads_data: Value) -> Result<String> {
|
fn inject_thread_data(template: String, threads_data: Value) -> Result<String> {
|
||||||
let injection_marker = "let threadsData = window.threadsData || { threads: [dummyThread] };";
|
let injection_marker = "let threadsData = window.threadsData || { threads: [dummyThread] };";
|
||||||
template
|
if !template.contains(injection_marker) {
|
||||||
.find(injection_marker)
|
anyhow::bail!(
|
||||||
.context("Could not find the thread injection point in the template")?;
|
"Could not find the thread injection point in the template. Expected: '{}'",
|
||||||
|
injection_marker
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
let threads_json = serde_json::to_string_pretty(&threads_data)
|
let threads_json_string = serde_json::to_string_pretty(&threads_data)
|
||||||
.context("Failed to serialize threads data to JSON")?;
|
.context("Failed to serialize threads data to JSON")?
|
||||||
let script_injection = format!("let threadsData = {};", threads_json);
|
.replace("</script>", r"<\/script>");
|
||||||
|
|
||||||
|
let script_injection = format!("let threadsData = {};", threads_json_string);
|
||||||
let final_html = template.replacen(injection_marker, &script_injection, 1);
|
let final_html = template.replacen(injection_marker, &script_injection, 1);
|
||||||
|
|
||||||
Ok(final_html)
|
Ok(final_html)
|
||||||
|
@ -71,5 +140,45 @@ fn inject_thread_data(template: String, threads_data: Value) -> Result<String> {
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
generate_explorer_html(&args.input, &args.output).map(|_| ())
|
|
||||||
|
const DEFAULT_FILENAME: &str = "last.messages.json";
|
||||||
|
const MAX_SEARCH_DEPTH: u8 = 2;
|
||||||
|
|
||||||
|
let mut resolved_input_files: Vec<PathBuf> = Vec::new();
|
||||||
|
|
||||||
|
for input_path_arg in &args.input {
|
||||||
|
if !input_path_arg.exists() {
|
||||||
|
eprintln!(
|
||||||
|
"Warning: Input path {} does not exist. Skipping.",
|
||||||
|
input_path_arg.display()
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if input_path_arg.is_dir() {
|
||||||
|
find_target_files_recursive(
|
||||||
|
input_path_arg,
|
||||||
|
DEFAULT_FILENAME,
|
||||||
|
0, // starting depth
|
||||||
|
MAX_SEARCH_DEPTH,
|
||||||
|
&mut resolved_input_files,
|
||||||
|
)
|
||||||
|
.with_context(|| {
|
||||||
|
format!(
|
||||||
|
"Error searching for '{}' files in directory: {}",
|
||||||
|
DEFAULT_FILENAME,
|
||||||
|
input_path_arg.display()
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
} else if input_path_arg.is_file() {
|
||||||
|
resolved_input_files.push(input_path_arg.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resolved_input_files.sort_unstable();
|
||||||
|
resolved_input_files.dedup();
|
||||||
|
|
||||||
|
println!("No input paths provided/found.");
|
||||||
|
|
||||||
|
generate_explorer_html(&resolved_input_files, &args.output).map(|_| ())
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue