From ab017129d8646b09c29f1fa9ef3d07b318f68005 Mon Sep 17 00:00:00 2001 From: Oleksiy Syvokon Date: Thu, 22 May 2025 12:01:43 +0300 Subject: [PATCH] agent: Improve Gemini support in the edit_file tool (#31116) This change improves `eval_extract_handle_command_output` results for all models: Model | Pass rate before | Pass rate after ----------------------------|------------------|---------------- claude-3.7-sonnet | 0.96 | 0.98 gemini-2.5-pro | 0.35 | 0.86 gpt-4.1 | 0.81 | 1.00 Part of this improvement comes from more robust evaluation, which now accepts multiple possible outcomes. Another part is from the prompt adaptation: addressing common Gemini failure modes, adding a few-shot example, and, in the final commit, auto-rewriting instructions for clarity and conciseness. This change still needs validation from larger end-to-end evals. Release Notes: - N/A --- Cargo.lock | 21 +- .../assistant_tools/src/edit_agent/evals.rs | 53 ++- .../extract_handle_command_output/after.rs | 375 ------------------ .../possible-01.diff | 11 + .../possible-02.diff | 26 ++ .../possible-03.diff | 11 + .../possible-04.diff | 24 ++ .../possible-05.diff | 26 ++ .../possible-06.diff | 23 ++ .../possible-07.diff | 26 ++ .../possible-08.diff | 26 ++ .../src/templates/edit_file_prompt.hbs | 63 ++- crates/language/Cargo.toml | 1 + crates/language/src/language.rs | 4 +- crates/language/src/text_diff.rs | 15 + 15 files changed, 307 insertions(+), 398 deletions(-) delete mode 100644 crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/after.rs create mode 100644 crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-01.diff create mode 100644 crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-02.diff create mode 100644 crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-03.diff create mode 100644 crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-04.diff create mode 100644 crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-05.diff create mode 100644 crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-06.diff create mode 100644 crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-07.diff create mode 100644 crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-08.diff diff --git a/Cargo.lock b/Cargo.lock index dcfded036a..0056d506ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4402,6 +4402,15 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "diffy" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b545b8c50194bdd008283985ab0b31dba153cfd5b3066a92770634fbc0d7d291" +dependencies = [ + "nu-ansi-term 0.50.1", +] + [[package]] name = "digest" version = "0.10.7" @@ -8677,6 +8686,7 @@ dependencies = [ "clock", "collections", "ctor", + "diffy", "ec4rs", "env_logger 0.11.8", "fs", @@ -10191,6 +10201,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "num" version = "0.4.3" @@ -16389,7 +16408,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "matchers", - "nu-ansi-term", + "nu-ansi-term 0.46.0", "once_cell", "regex", "serde", diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index 19e72dd2ee..bfae6afddc 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -34,13 +34,30 @@ use util::path; #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_extract_handle_command_output() { + // Test how well agent generates multiple edit hunks. + // + // Model | Pass rate + // ----------------------------|---------- + // claude-3.7-sonnet | 0.98 + // gemini-2.5-pro | 0.86 + // gemini-2.5-flash | 0.11 + // gpt-4.1 | 1.00 + let input_file_path = "root/blame.rs"; let input_file_content = include_str!("evals/fixtures/extract_handle_command_output/before.rs"); - let output_file_content = include_str!("evals/fixtures/extract_handle_command_output/after.rs"); + let possible_diffs = vec![ + include_str!("evals/fixtures/extract_handle_command_output/possible-01.diff"), + include_str!("evals/fixtures/extract_handle_command_output/possible-02.diff"), + include_str!("evals/fixtures/extract_handle_command_output/possible-03.diff"), + include_str!("evals/fixtures/extract_handle_command_output/possible-04.diff"), + include_str!("evals/fixtures/extract_handle_command_output/possible-05.diff"), + include_str!("evals/fixtures/extract_handle_command_output/possible-06.diff"), + include_str!("evals/fixtures/extract_handle_command_output/possible-07.diff"), + ]; let edit_description = "Extract `handle_command_output` method from `run_git_blame`."; eval( 100, - 0.95, + 0.7, // Taking the lower bar for Gemini EvalInput::from_conversation( vec![ message( @@ -49,6 +66,7 @@ fn eval_extract_handle_command_output() { Read the `{input_file_path}` file and extract a method in the final stanza of `run_git_blame` to deal with command failures, call it `handle_command_output` and take the std::process::Output as the only parameter. + Do not document the method and do not add any comments. Add it right next to `run_git_blame` and copy it verbatim from `run_git_blame`. "})], @@ -83,7 +101,7 @@ fn eval_extract_handle_command_output() { ), ], Some(input_file_content.into()), - EvalAssertion::assert_eq(output_file_content), + EvalAssertion::assert_diff_any(possible_diffs), ), ); } @@ -649,7 +667,7 @@ fn eval_zode() { let invalid_starts = [' ', '`', '\n']; let mut message = String::new(); for start in invalid_starts { - if sample.text.starts_with(start) { + if sample.text_after.starts_with(start) { message.push_str(&format!("The sample starts with a {:?}\n", start)); break; } @@ -1074,7 +1092,8 @@ impl EvalInput { #[derive(Clone)] struct EvalSample { - text: String, + text_before: String, + text_after: String, edit_output: EditAgentOutput, diff: String, } @@ -1131,7 +1150,7 @@ impl EvalAssertion { let expected = expected.into(); Self::new(async move |sample, _judge, _cx| { Ok(EvalAssertionOutcome { - score: if strip_empty_lines(&sample.text) == strip_empty_lines(&expected) { + score: if strip_empty_lines(&sample.text_after) == strip_empty_lines(&expected) { 100 } else { 0 @@ -1141,6 +1160,22 @@ impl EvalAssertion { }) } + fn assert_diff_any(expected_diffs: Vec>) -> Self { + let expected_diffs: Vec = expected_diffs.into_iter().map(Into::into).collect(); + Self::new(async move |sample, _judge, _cx| { + let matches = expected_diffs.iter().any(|possible_diff| { + let expected = + language::apply_diff_patch(&sample.text_before, possible_diff).unwrap(); + strip_empty_lines(&expected) == strip_empty_lines(&sample.text_after) + }); + + Ok(EvalAssertionOutcome { + score: if matches { 100 } else { 0 }, + message: None, + }) + }) + } + fn judge_diff(assertions: &'static str) -> Self { Self::new(async move |sample, judge, cx| { let prompt = DiffJudgeTemplate { @@ -1225,7 +1260,7 @@ fn eval(iterations: usize, expected_pass_ratio: f32, mut eval: EvalInput) { if output.assertion.score < 80 { failed_count += 1; failed_evals - .entry(output.sample.text.clone()) + .entry(output.sample.text_after.clone()) .or_insert(Vec::new()) .push(output); } @@ -1470,6 +1505,7 @@ impl EditAgentTest { tools, ..Default::default() }; + let edit_output = if matches!(eval.edit_file_input.mode, EditFileMode::Edit) { if let Some(input_content) = eval.input_content.as_deref() { buffer.update(cx, |buffer, cx| buffer.set_text(input_content, cx)); @@ -1498,7 +1534,8 @@ impl EditAgentTest { eval.input_content.as_deref().unwrap_or_default(), &buffer_text, ), - text: buffer_text, + text_before: eval.input_content.unwrap_or_default(), + text_after: buffer_text, }; let assertion = eval .assertion diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/after.rs b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/after.rs deleted file mode 100644 index 715aff57cb..0000000000 --- a/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/after.rs +++ /dev/null @@ -1,375 +0,0 @@ -use crate::commit::get_messages; -use crate::{GitRemote, Oid}; -use anyhow::{Context as _, Result, anyhow}; -use collections::{HashMap, HashSet}; -use futures::AsyncWriteExt; -use gpui::SharedString; -use serde::{Deserialize, Serialize}; -use std::process::Stdio; -use std::{ops::Range, path::Path}; -use text::Rope; -use time::OffsetDateTime; -use time::UtcOffset; -use time::macros::format_description; - -pub use git2 as libgit; - -#[derive(Debug, Clone, Default)] -pub struct Blame { - pub entries: Vec, - pub messages: HashMap, - pub remote_url: Option, -} - -#[derive(Clone, Debug, Default)] -pub struct ParsedCommitMessage { - pub message: SharedString, - pub permalink: Option, - pub pull_request: Option, - pub remote: Option, -} - -impl Blame { - pub async fn for_path( - git_binary: &Path, - working_directory: &Path, - path: &Path, - content: &Rope, - remote_url: Option, - ) -> Result { - let output = run_git_blame(git_binary, working_directory, path, content).await?; - let mut entries = parse_git_blame(&output)?; - entries.sort_unstable_by(|a, b| a.range.start.cmp(&b.range.start)); - - let mut unique_shas = HashSet::default(); - - for entry in entries.iter_mut() { - unique_shas.insert(entry.sha); - } - - let shas = unique_shas.into_iter().collect::>(); - let messages = get_messages(working_directory, &shas) - .await - .context("failed to get commit messages")?; - - Ok(Self { - entries, - messages, - remote_url, - }) - } -} - -const GIT_BLAME_NO_COMMIT_ERROR: &str = "fatal: no such ref: HEAD"; -const GIT_BLAME_NO_PATH: &str = "fatal: no such path"; - -async fn run_git_blame( - git_binary: &Path, - working_directory: &Path, - path: &Path, - contents: &Rope, -) -> Result { - let mut child = util::command::new_smol_command(git_binary) - .current_dir(working_directory) - .arg("blame") - .arg("--incremental") - .arg("--contents") - .arg("-") - .arg(path.as_os_str()) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .context("starting git blame process")?; - - let stdin = child - .stdin - .as_mut() - .context("failed to get pipe to stdin of git blame command")?; - - for chunk in contents.chunks() { - stdin.write_all(chunk.as_bytes()).await?; - } - stdin.flush().await?; - - let output = child.output().await.context("reading git blame output")?; - - handle_command_output(output) -} - -fn handle_command_output(output: std::process::Output) -> Result { - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - let trimmed = stderr.trim(); - if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) { - return Ok(String::new()); - } - anyhow::bail!("git blame process failed: {stderr}"); - } - - Ok(String::from_utf8(output.stdout)?) -} - -#[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)] -pub struct BlameEntry { - pub sha: Oid, - - pub range: Range, - - pub original_line_number: u32, - - pub author: Option, - pub author_mail: Option, - pub author_time: Option, - pub author_tz: Option, - - pub committer_name: Option, - pub committer_email: Option, - pub committer_time: Option, - pub committer_tz: Option, - - pub summary: Option, - - pub previous: Option, - pub filename: String, -} - -impl BlameEntry { - // Returns a BlameEntry by parsing the first line of a `git blame --incremental` - // entry. The line MUST have this format: - // - // <40-byte-hex-sha1> - fn new_from_blame_line(line: &str) -> Result { - let mut parts = line.split_whitespace(); - - let sha = parts - .next() - .and_then(|line| line.parse::().ok()) - .with_context(|| format!("parsing sha from {line}"))?; - - let original_line_number = parts - .next() - .and_then(|line| line.parse::().ok()) - .with_context(|| format!("parsing original line number from {line}"))?; - let final_line_number = parts - .next() - .and_then(|line| line.parse::().ok()) - .with_context(|| format!("parsing final line number from {line}"))?; - - let line_count = parts - .next() - .and_then(|line| line.parse::().ok()) - .with_context(|| format!("parsing line count from {line}"))?; - - let start_line = final_line_number.saturating_sub(1); - let end_line = start_line + line_count; - let range = start_line..end_line; - - Ok(Self { - sha, - range, - original_line_number, - ..Default::default() - }) - } - - pub fn author_offset_date_time(&self) -> Result { - if let (Some(author_time), Some(author_tz)) = (self.author_time, &self.author_tz) { - let format = format_description!("[offset_hour][offset_minute]"); - let offset = UtcOffset::parse(author_tz, &format)?; - let date_time_utc = OffsetDateTime::from_unix_timestamp(author_time)?; - - Ok(date_time_utc.to_offset(offset)) - } else { - // Directly return current time in UTC if there's no committer time or timezone - Ok(time::OffsetDateTime::now_utc()) - } - } -} - -// parse_git_blame parses the output of `git blame --incremental`, which returns -// all the blame-entries for a given path incrementally, as it finds them. -// -// Each entry *always* starts with: -// -// <40-byte-hex-sha1> -// -// Each entry *always* ends with: -// -// filename -// -// Line numbers are 1-indexed. -// -// A `git blame --incremental` entry looks like this: -// -// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 2 2 1 -// author Joe Schmoe -// author-mail -// author-time 1709741400 -// author-tz +0100 -// committer Joe Schmoe -// committer-mail -// committer-time 1709741400 -// committer-tz +0100 -// summary Joe's cool commit -// previous 486c2409237a2c627230589e567024a96751d475 index.js -// filename index.js -// -// If the entry has the same SHA as an entry that was already printed then no -// signature information is printed: -// -// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 3 4 1 -// previous 486c2409237a2c627230589e567024a96751d475 index.js -// filename index.js -// -// More about `--incremental` output: https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-blame.html -fn parse_git_blame(output: &str) -> Result> { - let mut entries: Vec = Vec::new(); - let mut index: HashMap = HashMap::default(); - - let mut current_entry: Option = None; - - for line in output.lines() { - let mut done = false; - - match &mut current_entry { - None => { - let mut new_entry = BlameEntry::new_from_blame_line(line)?; - - if let Some(existing_entry) = index - .get(&new_entry.sha) - .and_then(|slot| entries.get(*slot)) - { - new_entry.author.clone_from(&existing_entry.author); - new_entry - .author_mail - .clone_from(&existing_entry.author_mail); - new_entry.author_time = existing_entry.author_time; - new_entry.author_tz.clone_from(&existing_entry.author_tz); - new_entry - .committer_name - .clone_from(&existing_entry.committer_name); - new_entry - .committer_email - .clone_from(&existing_entry.committer_email); - new_entry.committer_time = existing_entry.committer_time; - new_entry - .committer_tz - .clone_from(&existing_entry.committer_tz); - new_entry.summary.clone_from(&existing_entry.summary); - } - - current_entry.replace(new_entry); - } - Some(entry) => { - let Some((key, value)) = line.split_once(' ') else { - continue; - }; - let is_committed = !entry.sha.is_zero(); - match key { - "filename" => { - entry.filename = value.into(); - done = true; - } - "previous" => entry.previous = Some(value.into()), - - "summary" if is_committed => entry.summary = Some(value.into()), - "author" if is_committed => entry.author = Some(value.into()), - "author-mail" if is_committed => entry.author_mail = Some(value.into()), - "author-time" if is_committed => { - entry.author_time = Some(value.parse::()?) - } - "author-tz" if is_committed => entry.author_tz = Some(value.into()), - - "committer" if is_committed => entry.committer_name = Some(value.into()), - "committer-mail" if is_committed => entry.committer_email = Some(value.into()), - "committer-time" if is_committed => { - entry.committer_time = Some(value.parse::()?) - } - "committer-tz" if is_committed => entry.committer_tz = Some(value.into()), - _ => {} - } - } - }; - - if done { - if let Some(entry) = current_entry.take() { - index.insert(entry.sha, entries.len()); - - // We only want annotations that have a commit. - if !entry.sha.is_zero() { - entries.push(entry); - } - } - } - } - - Ok(entries) -} - -#[cfg(test)] -mod tests { - use std::path::PathBuf; - - use super::BlameEntry; - use super::parse_git_blame; - - fn read_test_data(filename: &str) -> String { - let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - path.push("test_data"); - path.push(filename); - - std::fs::read_to_string(&path) - .unwrap_or_else(|_| panic!("Could not read test data at {:?}. Is it generated?", path)) - } - - fn assert_eq_golden(entries: &Vec, golden_filename: &str) { - let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - path.push("test_data"); - path.push("golden"); - path.push(format!("{}.json", golden_filename)); - - let mut have_json = - serde_json::to_string_pretty(&entries).expect("could not serialize entries to JSON"); - // We always want to save with a trailing newline. - have_json.push('\n'); - - let update = std::env::var("UPDATE_GOLDEN") - .map(|val| val.eq_ignore_ascii_case("true")) - .unwrap_or(false); - - if update { - std::fs::create_dir_all(path.parent().unwrap()) - .expect("could not create golden test data directory"); - std::fs::write(&path, have_json).expect("could not write out golden data"); - } else { - let want_json = - std::fs::read_to_string(&path).unwrap_or_else(|_| { - panic!("could not read golden test data file at {:?}. Did you run the test with UPDATE_GOLDEN=true before?", path); - }).replace("\r\n", "\n"); - - pretty_assertions::assert_eq!(have_json, want_json, "wrong blame entries"); - } - } - - #[test] - fn test_parse_git_blame_not_committed() { - let output = read_test_data("blame_incremental_not_committed"); - let entries = parse_git_blame(&output).unwrap(); - assert_eq_golden(&entries, "blame_incremental_not_committed"); - } - - #[test] - fn test_parse_git_blame_simple() { - let output = read_test_data("blame_incremental_simple"); - let entries = parse_git_blame(&output).unwrap(); - assert_eq_golden(&entries, "blame_incremental_simple"); - } - - #[test] - fn test_parse_git_blame_complex() { - let output = read_test_data("blame_incremental_complex"); - let entries = parse_git_blame(&output).unwrap(); - assert_eq_golden(&entries, "blame_incremental_complex"); - } -} diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-01.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-01.diff new file mode 100644 index 0000000000..c13a223c63 --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-01.diff @@ -0,0 +1,11 @@ +@@ -94,6 +94,10 @@ + + let output = child.output().await.context("reading git blame output")?; + ++ handle_command_output(output) ++} ++ ++fn handle_command_output(output: std::process::Output) -> Result { + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let trimmed = stderr.trim(); diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-02.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-02.diff new file mode 100644 index 0000000000..aa36a9241e --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-02.diff @@ -0,0 +1,26 @@ +@@ -95,15 +95,19 @@ + let output = child.output().await.context("reading git blame output")?; + + if !output.status.success() { +- let stderr = String::from_utf8_lossy(&output.stderr); +- let trimmed = stderr.trim(); +- if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) { +- return Ok(String::new()); +- } +- anyhow::bail!("git blame process failed: {stderr}"); ++ return handle_command_output(output); + } + + Ok(String::from_utf8(output.stdout)?) ++} ++ ++fn handle_command_output(output: std::process::Output) -> Result { ++ let stderr = String::from_utf8_lossy(&output.stderr); ++ let trimmed = stderr.trim(); ++ if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) { ++ return Ok(String::new()); ++ } ++ anyhow::bail!("git blame process failed: {stderr}"); + } + + #[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)] diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-03.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-03.diff new file mode 100644 index 0000000000..d3c19b4380 --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-03.diff @@ -0,0 +1,11 @@ +@@ -93,7 +93,10 @@ + stdin.flush().await?; + + let output = child.output().await.context("reading git blame output")?; ++ handle_command_output(output) ++} + ++fn handle_command_output(output: std::process::Output) -> Result { + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let trimmed = stderr.trim(); diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-04.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-04.diff new file mode 100644 index 0000000000..1f87e4352c --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-04.diff @@ -0,0 +1,24 @@ +@@ -93,17 +93,20 @@ + stdin.flush().await?; + + let output = child.output().await.context("reading git blame output")?; ++ handle_command_output(&output)?; ++ Ok(String::from_utf8(output.stdout)?) ++} + ++fn handle_command_output(output: &std::process::Output) -> Result<()> { + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let trimmed = stderr.trim(); + if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) { +- return Ok(String::new()); ++ return Ok(()); + } + anyhow::bail!("git blame process failed: {stderr}"); + } +- +- Ok(String::from_utf8(output.stdout)?) ++ Ok(()) + } + + #[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)] diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-05.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-05.diff new file mode 100644 index 0000000000..8f4b745b9a --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-05.diff @@ -0,0 +1,26 @@ +@@ -95,15 +95,19 @@ + let output = child.output().await.context("reading git blame output")?; + + if !output.status.success() { +- let stderr = String::from_utf8_lossy(&output.stderr); +- let trimmed = stderr.trim(); +- if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) { +- return Ok(String::new()); +- } +- anyhow::bail!("git blame process failed: {stderr}"); ++ return handle_command_output(&output); + } + + Ok(String::from_utf8(output.stdout)?) ++} ++ ++fn handle_command_output(output: &std::process::Output) -> Result { ++ let stderr = String::from_utf8_lossy(&output.stderr); ++ let trimmed = stderr.trim(); ++ if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) { ++ return Ok(String::new()); ++ } ++ anyhow::bail!("git blame process failed: {stderr}"); + } + + #[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)] diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-06.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-06.diff new file mode 100644 index 0000000000..3514d9c8e2 --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-06.diff @@ -0,0 +1,23 @@ +@@ -93,7 +93,12 @@ + stdin.flush().await?; + + let output = child.output().await.context("reading git blame output")?; ++ handle_command_output(&output)?; + ++ Ok(String::from_utf8(output.stdout)?) ++} ++ ++fn handle_command_output(output: &std::process::Output) -> Result { + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let trimmed = stderr.trim(); +@@ -102,8 +107,7 @@ + } + anyhow::bail!("git blame process failed: {stderr}"); + } +- +- Ok(String::from_utf8(output.stdout)?) ++ Ok(String::from_utf8_lossy(&output.stdout).into_owned()) + } + + #[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)] diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-07.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-07.diff new file mode 100644 index 0000000000..9691479e29 --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-07.diff @@ -0,0 +1,26 @@ +@@ -95,15 +95,19 @@ + let output = child.output().await.context("reading git blame output")?; + + if !output.status.success() { +- let stderr = String::from_utf8_lossy(&output.stderr); +- let trimmed = stderr.trim(); +- if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) { +- return Ok(String::new()); +- } +- anyhow::bail!("git blame process failed: {stderr}"); ++ return handle_command_output(output); + } + + Ok(String::from_utf8(output.stdout)?) ++} ++ ++fn handle_command_output(output: std::process::Output) -> Result { ++ let stderr = String::from_utf8_lossy(&output.stderr); ++ let trimmed = stderr.trim(); ++ if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) { ++ return Ok(String::new()); ++ } ++ anyhow::bail!("git blame process failed: {stderr}"); + } + + #[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)] diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-08.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-08.diff new file mode 100644 index 0000000000..f5da859005 --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/extract_handle_command_output/possible-08.diff @@ -0,0 +1,26 @@ +@@ -95,15 +95,19 @@ + let output = child.output().await.context("reading git blame output")?; + + if !output.status.success() { +- let stderr = String::from_utf8_lossy(&output.stderr); +- let trimmed = stderr.trim(); +- if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) { +- return Ok(String::new()); +- } +- anyhow::bail!("git blame process failed: {stderr}"); ++ return handle_command_output(output); + } + + Ok(String::from_utf8(output.stdout)?) ++} ++ ++fn handle_command_output(output: std::process::Output) -> Result { ++ let stderr = String::from_utf8_lossy(&output.stderr); ++ let trimmed = stderr.trim(); ++ if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) { ++ return Ok(String::new()); ++ } ++ anyhow::bail!("git blame process failed: {stderr}") + } + + #[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)] diff --git a/crates/assistant_tools/src/templates/edit_file_prompt.hbs b/crates/assistant_tools/src/templates/edit_file_prompt.hbs index 2e18fcc4b0..3308c9e4f8 100644 --- a/crates/assistant_tools/src/templates/edit_file_prompt.hbs +++ b/crates/assistant_tools/src/templates/edit_file_prompt.hbs @@ -27,20 +27,57 @@ NEW TEXT 3 HERE ``` -Rules for editing: +# File Editing Instructions + +- Use `` and `` tags to replace content +- `` must exactly match existing file content, including indentation +- `` must come from the actual file, not an outline +- `` cannot be empty +- Be minimal with replacements: + - For unique lines, include only those lines + - For non-unique lines, include enough context to identify them +- Do not escape quotes, newlines, or other characters within tags +- For multiple occurrences, repeat the same tag pair for each instance +- Edits are sequential - each assumes previous edits are already applied +- Only edit the specified file +- Always close all tags properly + + +{{!-- This example is important for Gemini 2.5 --}} + + + + +struct User { + name: String, + email: String, +} + + +struct User { + name: String, + email: String, + active: bool, +} + + + + let user = User { + name: String::from("John"), + email: String::from("john@example.com"), + }; + + + let user = User { + name: String::from("John"), + email: String::from("john@example.com"), + active: true, + }; + + + + -- `old_text` represents lines in the input file that will be replaced with `new_text`. -- `old_text` MUST exactly match the existing file content, character for character, including indentation. -- `old_text` MUST NEVER come from the outline, but from actual lines in the file. -- Strive to be minimal in the lines you replace in `old_text`: - - If the lines you want to replace are unique, you MUST include just those in the `old_text`. - - If the lines you want to replace are NOT unique, you MUST include enough context around them in `old_text` to distinguish them from other lines. -- If you want to replace many occurrences of the same text, repeat the same `old_text`/`new_text` pair multiple times and I will apply them sequentially, one occurrence at a time. -- When reporting multiple edits, each edit assumes the previous one has already been applied! Therefore, you must ensure `old_text` doesn't reference text that has already been modified by a previous edit. -- Don't explain the edits, just report them. -- Only edit the file specified in `` and NEVER include edits to other files! -- If you open an tag, you MUST close it using -- If you open an tag, you MUST close it using {{path}} diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml index 407573513d..8750480bb9 100644 --- a/crates/language/Cargo.toml +++ b/crates/language/Cargo.toml @@ -66,6 +66,7 @@ tree-sitter.workspace = true unicase = "2.6" util.workspace = true workspace-hack.workspace = true +diffy = "0.4.2" [dev-dependencies] collections = { workspace = true, features = ["test-support"] } diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 75cdca1b0f..77884634fc 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -65,7 +65,9 @@ use std::{num::NonZeroU32, sync::OnceLock}; use syntax_map::{QueryCursorHandle, SyntaxSnapshot}; use task::RunnableTag; pub use task_context::{ContextProvider, RunnableRange}; -pub use text_diff::{DiffOptions, line_diff, text_diff, text_diff_with_options, unified_diff}; +pub use text_diff::{ + DiffOptions, apply_diff_patch, line_diff, text_diff, text_diff_with_options, unified_diff, +}; use theme::SyntaxTheme; pub use toolchain::{LanguageToolchainStore, Toolchain, ToolchainList, ToolchainLister}; use tree_sitter::{self, Query, QueryCursor, WasmStore, wasmtime}; diff --git a/crates/language/src/text_diff.rs b/crates/language/src/text_diff.rs index fd95830d66..f9221f571a 100644 --- a/crates/language/src/text_diff.rs +++ b/crates/language/src/text_diff.rs @@ -1,4 +1,5 @@ use crate::{CharClassifier, CharKind, LanguageScope}; +use anyhow::{Context, anyhow}; use imara_diff::{ Algorithm, UnifiedDiffBuilder, diff, intern::{InternedInput, Token}, @@ -119,6 +120,12 @@ pub fn text_diff_with_options( edits } +pub fn apply_diff_patch(base_text: &str, patch: &str) -> Result { + let patch = diffy::Patch::from_str(patch).context("Failed to parse patch")?; + let result = diffy::apply(base_text, &patch); + result.map_err(|err| anyhow!(err)) +} + fn should_perform_word_diff_within_hunk( old_row_range: &Range, old_byte_range: &Range, @@ -270,4 +277,12 @@ mod tests { ] ); } + + #[test] + fn test_apply_diff_patch() { + let old_text = "one two\nthree four five\nsix seven eight nine\nten\n"; + let new_text = "one two\nthree FOUR five\nsix SEVEN eight nine\nten\nELEVEN\n"; + let patch = unified_diff(old_text, new_text); + assert_eq!(apply_diff_patch(old_text, &patch).unwrap(), new_text); + } }