agent: Overwrite files more cautiously (#30649)
1. The `edit_file` tool tended to use `create_or_overwrite` a bit too often, leading to corruption of long files. This change replaces the boolean flag with an `EditFileMode` enum, which helps Agent make a more deliberate choice when overwriting files. With this change, the pass rate of the new eval increased from 10% to 100%. 2. eval: Added ability to run eval on top of an existing thread. Threads can now be loaded from JSON files in the `SerializedThread` format, which makes it easy to use real threads as starting points for tests/evals. 3. Don't try to restore tool cards when running in headless or eval mode -- we don't have a window to properly do this. Release Notes: - N/A
This commit is contained in:
parent
22f76ac1a7
commit
255d8f7cf8
18 changed files with 425 additions and 37 deletions
|
@ -1,5 +1,9 @@
|
|||
use super::*;
|
||||
use crate::{ReadFileToolInput, edit_file_tool::EditFileToolInput, grep_tool::GrepToolInput};
|
||||
use crate::{
|
||||
ReadFileToolInput,
|
||||
edit_file_tool::{EditFileMode, EditFileToolInput},
|
||||
grep_tool::GrepToolInput,
|
||||
};
|
||||
use Role::*;
|
||||
use anyhow::anyhow;
|
||||
use assistant_tool::ToolRegistry;
|
||||
|
@ -71,7 +75,7 @@ fn eval_extract_handle_command_output() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -127,7 +131,7 @@ fn eval_delete_run_git_blame() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -182,7 +186,7 @@ fn eval_translate_doc_comments() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -297,7 +301,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -372,7 +376,7 @@ fn eval_disable_cursor_blinking() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -566,7 +570,7 @@ fn eval_from_pixels_constructor() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
)],
|
||||
),
|
||||
|
@ -643,7 +647,7 @@ fn eval_zode() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: true,
|
||||
mode: EditFileMode::Create,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
@ -888,7 +892,7 @@ fn eval_add_overwrite_test() {
|
|||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
mode: EditFileMode::Edit,
|
||||
},
|
||||
),
|
||||
],
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue