Encourage editing over re-creating a file from scratch (#29870)

I also introduced a new eval to prove the encouragement actually makes a
difference.

Release Notes:

- Improved agent behavior when streaming edits, encouraging it to
editing files as opposed to creating them from scratch
This commit is contained in:
Antonio Scandurra 2025-05-04 15:18:28 +02:00 committed by GitHub
parent ca1dc821cf
commit 4d51602e7b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 67 additions and 0 deletions

View file

@ -0,0 +1,61 @@
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
use anyhow::Result;
use assistant_tools::StreamingEditFileToolInput;
use async_trait::async_trait;
pub struct CommentTranslation;
#[async_trait(?Send)]
impl Example for CommentTranslation {
fn meta(&self) -> ExampleMetadata {
ExampleMetadata {
name: "comment_translation".to_string(),
url: "https://github.com/servo/font-kit.git".to_string(),
revision: "504d084e29bce4f60614bc702e91af7f7d9e60ad".to_string(),
language_server: None,
max_assertions: Some(1),
}
}
async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
cx.push_user_message(r#"
Edit the following files and translate all their comments to italian, in this exact order:
- font-kit/src/family.rs
- font-kit/src/canvas.rs
- font-kit/src/error.rs
"#);
cx.run_to_end().await?;
let mut create_or_overwrite_count = 0;
cx.agent_thread().read_with(cx, |thread, cx| {
for message in thread.messages() {
for tool_use in thread.tool_uses_for_message(message.id, cx) {
if tool_use.name == "edit_file" {
let input: StreamingEditFileToolInput =
serde_json::from_value(tool_use.input)?;
if input.create_or_overwrite {
create_or_overwrite_count += 1;
}
}
}
}
anyhow::Ok(())
})??;
cx.assert_eq(create_or_overwrite_count, 0, "no_creation_or_overwrite")?;
Ok(())
}
fn diff_assertions(&self) -> Vec<JudgeAssertion> {
vec![JudgeAssertion {
id: "comments_translated".to_string(),
description: concat!(
"- Only `family.rs`, `canvas.rs` and `error.rs` should have changed.\n",
"- Their doc comments should have been all translated to Italian."
)
.into(),
}]
}
}

View file

@ -13,6 +13,7 @@ use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
mod add_arg_to_trait_method;
mod code_block_citations;
mod comment_translation;
mod file_search;
mod planets;
@ -22,6 +23,7 @@ pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
Rc::new(add_arg_to_trait_method::AddArgToTraitMethod),
Rc::new(code_block_citations::CodeBlockCitations),
Rc::new(planets::Planets),
Rc::new(comment_translation::CommentTranslation),
];
for example_path in list_declarative_examples(examples_dir).unwrap() {