Encourage editing over re-creating a file from scratch (#29870)

I also introduced a new eval to prove the encouragement actually makes a
difference.

Release Notes:

- Improved agent behavior when streaming edits, encouraging it to
editing files as opposed to creating them from scratch
This commit is contained in:
Antonio Scandurra 2025-05-04 15:18:28 +02:00 committed by GitHub
parent ca1dc821cf
commit 4d51602e7b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 67 additions and 0 deletions

View file

@ -54,6 +54,7 @@ pub use edit_file_tool::{EditFileTool, EditFileToolInput};
pub use find_path_tool::FindPathToolInput;
pub use open_tool::OpenTool;
pub use read_file_tool::ReadFileToolInput;
pub use streaming_edit_file_tool::StreamingEditFileToolInput;
pub use terminal_tool::TerminalTool;
pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {

View file

@ -61,6 +61,9 @@ pub struct StreamingEditFileToolInput {
/// If true, this tool will recreate the file from scratch.
/// If false, this tool will produce granular edits to an existing file.
///
/// When a file already exists or you just created it, always prefer editing
/// it as opposed to recreating it from scratch.
pub create_or_overwrite: bool,
}

View file

@ -0,0 +1,61 @@
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
use anyhow::Result;
use assistant_tools::StreamingEditFileToolInput;
use async_trait::async_trait;
pub struct CommentTranslation;
#[async_trait(?Send)]
impl Example for CommentTranslation {
fn meta(&self) -> ExampleMetadata {
ExampleMetadata {
name: "comment_translation".to_string(),
url: "https://github.com/servo/font-kit.git".to_string(),
revision: "504d084e29bce4f60614bc702e91af7f7d9e60ad".to_string(),
language_server: None,
max_assertions: Some(1),
}
}
async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
cx.push_user_message(r#"
Edit the following files and translate all their comments to italian, in this exact order:
- font-kit/src/family.rs
- font-kit/src/canvas.rs
- font-kit/src/error.rs
"#);
cx.run_to_end().await?;
let mut create_or_overwrite_count = 0;
cx.agent_thread().read_with(cx, |thread, cx| {
for message in thread.messages() {
for tool_use in thread.tool_uses_for_message(message.id, cx) {
if tool_use.name == "edit_file" {
let input: StreamingEditFileToolInput =
serde_json::from_value(tool_use.input)?;
if input.create_or_overwrite {
create_or_overwrite_count += 1;
}
}
}
}
anyhow::Ok(())
})??;
cx.assert_eq(create_or_overwrite_count, 0, "no_creation_or_overwrite")?;
Ok(())
}
fn diff_assertions(&self) -> Vec<JudgeAssertion> {
vec![JudgeAssertion {
id: "comments_translated".to_string(),
description: concat!(
"- Only `family.rs`, `canvas.rs` and `error.rs` should have changed.\n",
"- Their doc comments should have been all translated to Italian."
)
.into(),
}]
}
}

View file

@ -13,6 +13,7 @@ use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
mod add_arg_to_trait_method;
mod code_block_citations;
mod comment_translation;
mod file_search;
mod planets;
@ -22,6 +23,7 @@ pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
Rc::new(add_arg_to_trait_method::AddArgToTraitMethod),
Rc::new(code_block_citations::CodeBlockCitations),
Rc::new(planets::Planets),
Rc::new(comment_translation::CommentTranslation),
];
for example_path in list_declarative_examples(examples_dir).unwrap() {