Code block evals (#29619)
Add a targeted eval for code block formatting, and revise the system prompt accordingly. ### Eval before, n=8 <img width="728" alt="eval before" src="https://github.com/user-attachments/assets/552b6146-3d26-4eaa-86f9-9fc36c0cadf2" /> ### Eval after prompt change, n=8 (excluding the new evals, so just testing the prompt change) <img width="717" alt="eval after" src="https://github.com/user-attachments/assets/c78c7a54-4c65-470c-b135-8691584cd73e" /> Release Notes: - N/A
This commit is contained in:
parent
2508e491d5
commit
d7004030b3
10 changed files with 536 additions and 134 deletions
|
@ -174,6 +174,7 @@ impl Tool for EditFileTool {
|
|||
"The `old_string` and `new_string` are identical, so no changes would be made."
|
||||
));
|
||||
}
|
||||
let old_string = input.old_string.clone();
|
||||
|
||||
let result = cx
|
||||
.background_spawn(async move {
|
||||
|
@ -213,6 +214,21 @@ impl Tool for EditFileTool {
|
|||
input.path.display()
|
||||
)
|
||||
} else {
|
||||
let old_string_with_buffer = format!(
|
||||
"old_string:\n\n{}\n\n-------file-------\n\n{}",
|
||||
&old_string,
|
||||
buffer.text()
|
||||
);
|
||||
let path = {
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
old_string_with_buffer.hash(&mut hasher);
|
||||
|
||||
PathBuf::from(format!("failed_tool_{}.txt", hasher.finish()))
|
||||
};
|
||||
std::fs::write(path, old_string_with_buffer).unwrap();
|
||||
anyhow!("Failed to match the provided `old_string`")
|
||||
}
|
||||
})?;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue