Code block evals (#29619)

Add a targeted eval for code block formatting, and revise the system
prompt accordingly.

### Eval before, n=8

<img width="728" alt="eval before"
src="https://github.com/user-attachments/assets/552b6146-3d26-4eaa-86f9-9fc36c0cadf2"
/>

### Eval after prompt change, n=8 (excluding the new evals, so just
testing the prompt change)

<img width="717" alt="eval after"
src="https://github.com/user-attachments/assets/c78c7a54-4c65-470c-b135-8691584cd73e"
/>

Release Notes:

- N/A
This commit is contained in:
Richard Feldman 2025-04-29 18:52:09 -04:00 committed by GitHub
parent 2508e491d5
commit d7004030b3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 536 additions and 134 deletions

View file

@ -174,6 +174,7 @@ impl Tool for EditFileTool {
"The `old_string` and `new_string` are identical, so no changes would be made."
));
}
let old_string = input.old_string.clone();
let result = cx
.background_spawn(async move {
@ -213,6 +214,21 @@ impl Tool for EditFileTool {
input.path.display()
)
} else {
let old_string_with_buffer = format!(
"old_string:\n\n{}\n\n-------file-------\n\n{}",
&old_string,
buffer.text()
);
let path = {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
old_string_with_buffer.hash(&mut hasher);
PathBuf::from(format!("failed_tool_{}.txt", hasher.finish()))
};
std::fs::write(path, old_string_with_buffer).unwrap();
anyhow!("Failed to match the provided `old_string`")
}
})?;