agent: Overwrite files more cautiously (#30649)
1. The `edit_file` tool tended to use `create_or_overwrite` a bit too often, leading to corruption of long files. This change replaces the boolean flag with an `EditFileMode` enum, which helps Agent make a more deliberate choice when overwriting files. With this change, the pass rate of the new eval increased from 10% to 100%. 2. eval: Added ability to run eval on top of an existing thread. Threads can now be loaded from JSON files in the `SerializedThread` format, which makes it easy to use real threads as starting points for tests/evals. 3. Don't try to restore tool cards when running in headless or eval mode -- we don't have a window to properly do this. Release Notes: - N/A
This commit is contained in:
parent
22f76ac1a7
commit
255d8f7cf8
18 changed files with 425 additions and 37 deletions
|
@ -42,7 +42,7 @@ use crate::list_directory_tool::ListDirectoryTool;
|
|||
use crate::now_tool::NowTool;
|
||||
use crate::thinking_tool::ThinkingTool;
|
||||
|
||||
pub use edit_file_tool::EditFileToolInput;
|
||||
pub use edit_file_tool::{EditFileMode, EditFileToolInput};
|
||||
pub use find_path_tool::FindPathToolInput;
|
||||
pub use open_tool::OpenTool;
|
||||
pub use read_file_tool::{ReadFileTool, ReadFileToolInput};
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
use super::*;
|
||||
use crate::{ReadFileToolInput, edit_file_tool::EditFileToolInput, grep_tool::GrepToolInput};
|
||||
use crate::{
|
||||
ReadFileToolInput,
|
||||
edit_file_tool::{EditFileMode, EditFileToolInput},
|
||||
grep_tool::GrepToolInput,
|
||||
};
|
||||
use Role::*;
|
||||
use anyhow::anyhow;
|
||||
use assistant_tool::ToolRegistry;
|
||||
|
@ -71,7 +75,7 @@ fn eval_extract_handle_command_output() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -127,7 +131,7 @@ fn eval_delete_run_git_blame() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -182,7 +186,7 @@ fn eval_translate_doc_comments() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -297,7 +301,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -372,7 +376,7 @@ fn eval_disable_cursor_blinking() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -566,7 +570,7 @@ fn eval_from_pixels_constructor() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -643,7 +647,7 @@ fn eval_zode() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: true,
|
||||
mode: EditFileMode::Create,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -888,7 +892,7 @@ fn eval_add_overwrite_test() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
|
@ -76,12 +76,22 @@ pub struct EditFileToolInput {
|
|||
/// </example>
|
||||
pub path: PathBuf,
|
||||
|
||||
/// If true, this tool will recreate the file from scratch.
|
||||
/// If false, this tool will produce granular edits to an existing file.
|
||||
/// The mode of operation on the file. Possible values:
|
||||
/// - 'edit': Make granular edits to an existing file.
|
||||
/// - 'create': Create a new file if it doesn't exist.
|
||||
/// - 'overwrite': Replace the entire contents of an existing file.
|
||||
///
|
||||
/// When a file already exists or you just created it, always prefer editing
|
||||
/// When a file already exists or you just created it, prefer editing
|
||||
/// it as opposed to recreating it from scratch.
|
||||
pub create_or_overwrite: bool,
|
||||
pub mode: EditFileMode,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum EditFileMode {
|
||||
Edit,
|
||||
Create,
|
||||
Overwrite,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||
|
@ -195,7 +205,11 @@ impl Tool for EditFileTool {
|
|||
.as_ref()
|
||||
.map_or(false, |file| file.disk_state().exists())
|
||||
})?;
|
||||
if !input.create_or_overwrite && !exists {
|
||||
let create_or_overwrite = match input.mode {
|
||||
EditFileMode::Create | EditFileMode::Overwrite => true,
|
||||
_ => false,
|
||||
};
|
||||
if !create_or_overwrite && !exists {
|
||||
return Err(anyhow!("{} not found", input.path.display()));
|
||||
}
|
||||
|
||||
|
@ -207,7 +221,7 @@ impl Tool for EditFileTool {
|
|||
})
|
||||
.await;
|
||||
|
||||
let (output, mut events) = if input.create_or_overwrite {
|
||||
let (output, mut events) = if create_or_overwrite {
|
||||
edit_agent.overwrite(
|
||||
buffer.clone(),
|
||||
input.display_description.clone(),
|
||||
|
@ -876,7 +890,7 @@ mod tests {
|
|||
let input = serde_json::to_value(EditFileToolInput {
|
||||
display_description: "Some edit".into(),
|
||||
path: "root/nonexistent_file.txt".into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
})
|
||||
.unwrap();
|
||||
Arc::new(EditFileTool)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue