diff --git a/crates/assistant_tools/src/edit_agent.rs b/crates/assistant_tools/src/edit_agent.rs index a247d5f4de..f891a769c5 100644 --- a/crates/assistant_tools/src/edit_agent.rs +++ b/crates/assistant_tools/src/edit_agent.rs @@ -286,7 +286,13 @@ impl EditAgent { _ => { let ranges = resolved_old_text .into_iter() - .map(|text| text.range) + .map(|text| { + let start_line = + (snapshot.offset_to_point(text.range.start).row + 1) as usize; + let end_line = + (snapshot.offset_to_point(text.range.end).row + 1) as usize; + start_line..end_line + }) .collect(); output_events .unbounded_send(EditAgentOutputEvent::AmbiguousEditRange(ranges)) @@ -429,25 +435,25 @@ impl EditAgent { let task = cx.background_spawn(async move { let mut matcher = StreamingFuzzyMatcher::new(snapshot); while let Some(edit_event) = edit_events.next().await { - let EditParserEvent::OldTextChunk { chunk, done } = edit_event? else { + let EditParserEvent::OldTextChunk { + chunk, + done, + line_hint, + } = edit_event? + else { break; }; - old_range_tx.send(matcher.push(&chunk))?; + old_range_tx.send(matcher.push(&chunk, line_hint))?; if done { break; } } let matches = matcher.finish(); + let best_match = matcher.select_best_match(); - let old_range = if matches.len() == 1 { - matches.first() - } else { - // No matches or multiple ambiguous matches - None - }; - old_range_tx.send(old_range.cloned())?; + old_range_tx.send(best_match.clone())?; let indent = LineIndent::from_iter( matcher @@ -456,10 +462,18 @@ impl EditAgent { .unwrap_or(&String::new()) .chars(), ); - let resolved_old_texts = matches - .into_iter() - .map(|range| ResolvedOldText { range, indent }) - .collect::>(); + + let resolved_old_texts = if let Some(best_match) = best_match { + vec![ResolvedOldText { + range: best_match, + indent, + }] + } else { + matches + .into_iter() + .map(|range| ResolvedOldText { range, indent }) + .collect::>() + }; Ok((edit_events, resolved_old_texts)) }); @@ -1374,10 +1388,12 @@ mod tests { &agent, indoc! {" - return 42; + return 42; + } - return 100; + return 100; + } "}, &mut rng, @@ -1407,7 +1423,7 @@ mod tests { // And AmbiguousEditRange even should be emitted let events = drain_events(&mut events); - let ambiguous_ranges = vec![17..31, 52..66, 87..101]; + let ambiguous_ranges = vec![2..3, 6..7, 10..11]; assert!( events.contains(&EditAgentOutputEvent::AmbiguousEditRange(ambiguous_ranges)), "Should emit AmbiguousEditRange for non-unique text" diff --git a/crates/assistant_tools/src/edit_agent/edit_parser.rs b/crates/assistant_tools/src/edit_agent/edit_parser.rs index 3829491560..f477443a5b 100644 --- a/crates/assistant_tools/src/edit_agent/edit_parser.rs +++ b/crates/assistant_tools/src/edit_agent/edit_parser.rs @@ -1,4 +1,5 @@ use derive_more::{Add, AddAssign}; +use regex::Regex; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; @@ -11,8 +12,15 @@ const END_TAGS: [&str; 3] = [OLD_TEXT_END_TAG, NEW_TEXT_END_TAG, EDITS_END_TAG]; #[derive(Debug)] pub enum EditParserEvent { - OldTextChunk { chunk: String, done: bool }, - NewTextChunk { chunk: String, done: bool }, + OldTextChunk { + chunk: String, + done: bool, + line_hint: Option, + }, + NewTextChunk { + chunk: String, + done: bool, + }, } #[derive( @@ -33,7 +41,7 @@ pub struct EditParser { #[derive(Debug, PartialEq)] enum EditParserState { Pending, - WithinOldText { start: bool }, + WithinOldText { start: bool, line_hint: Option }, AfterOldText, WithinNewText { start: bool }, } @@ -54,14 +62,24 @@ impl EditParser { loop { match &mut self.state { EditParserState::Pending => { - if let Some(start) = self.buffer.find("") { - self.buffer.drain(..start + "".len()); - self.state = EditParserState::WithinOldText { start: true }; + if let Some(start) = self.buffer.find("') { + let tag_end = start + tag_end + 1; + let tag = &self.buffer[start..tag_end]; + let line_hint = self.parse_line_hint(tag); + self.buffer.drain(..tag_end); + self.state = EditParserState::WithinOldText { + start: true, + line_hint, + }; + } else { + break; + } } else { break; } } - EditParserState::WithinOldText { start } => { + EditParserState::WithinOldText { start, line_hint } => { if !self.buffer.is_empty() { if *start && self.buffer.starts_with('\n') { self.buffer.remove(0); @@ -69,6 +87,7 @@ impl EditParser { *start = false; } + let line_hint = *line_hint; if let Some(tag_range) = self.find_end_tag() { let mut chunk = self.buffer[..tag_range.start].to_string(); if chunk.ends_with('\n') { @@ -82,12 +101,17 @@ impl EditParser { self.buffer.drain(..tag_range.end); self.state = EditParserState::AfterOldText; - edit_events.push(EditParserEvent::OldTextChunk { chunk, done: true }); + edit_events.push(EditParserEvent::OldTextChunk { + chunk, + done: true, + line_hint, + }); } else { if !self.ends_with_tag_prefix() { edit_events.push(EditParserEvent::OldTextChunk { chunk: mem::take(&mut self.buffer), done: false, + line_hint, }); } break; @@ -154,6 +178,16 @@ impl EditParser { end_prefixes.any(|prefix| self.buffer.ends_with(&prefix)) } + fn parse_line_hint(&self, tag: &str) -> Option { + static LINE_HINT_REGEX: std::sync::LazyLock = + std::sync::LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap()); + + LINE_HINT_REGEX + .captures(tag) + .and_then(|caps| caps.get(1)) + .and_then(|m| m.as_str().parse::().ok()) + } + pub fn finish(self) -> EditParserMetrics { self.metrics } @@ -178,6 +212,7 @@ mod tests { vec![Edit { old_text: "original".to_string(), new_text: "updated".to_string(), + line_hint: None, }] ); assert_eq!( @@ -209,10 +244,12 @@ mod tests { Edit { old_text: "first old".to_string(), new_text: "first new".to_string(), + line_hint: None, }, Edit { old_text: "second old".to_string(), new_text: "second new".to_string(), + line_hint: None, }, ] ); @@ -244,14 +281,17 @@ mod tests { Edit { old_text: "content".to_string(), new_text: "updated content".to_string(), + line_hint: None, }, Edit { old_text: "second item".to_string(), new_text: "modified second item".to_string(), + line_hint: None, }, Edit { old_text: "third case".to_string(), new_text: "improved third case".to_string(), + line_hint: None, }, ] ); @@ -276,6 +316,7 @@ mod tests { vec![Edit { old_text: "code with nested elements".to_string(), new_text: "new content".to_string(), + line_hint: None, }] ); assert_eq!( @@ -299,6 +340,7 @@ mod tests { vec![Edit { old_text: "".to_string(), new_text: "".to_string(), + line_hint: None, }] ); assert_eq!( @@ -322,6 +364,7 @@ mod tests { vec![Edit { old_text: "line1\nline2\nline3".to_string(), new_text: "line1\nmodified line2\nline3".to_string(), + line_hint: None, }] ); assert_eq!( @@ -368,10 +411,12 @@ mod tests { Edit { old_text: "a\nb\nc".to_string(), new_text: "a\nB\nc".to_string(), + line_hint: None, }, Edit { old_text: "d\ne\nf".to_string(), new_text: "D\ne\nF".to_string(), + line_hint: None, } ] ); @@ -402,6 +447,7 @@ mod tests { vec![Edit { old_text: "Lorem".to_string(), new_text: "LOREM".to_string(), + line_hint: None, },] ); assert_eq!( @@ -413,10 +459,77 @@ mod tests { ); } + #[gpui::test(iterations = 100)] + fn test_line_hints(mut rng: StdRng) { + // Line hint is a single quoted line number + let mut parser = EditParser::new(); + + let edits = parse_random_chunks( + r#" + original code + updated code"#, + &mut parser, + &mut rng, + ); + + assert_eq!(edits.len(), 1); + assert_eq!(edits[0].old_text, "original code"); + assert_eq!(edits[0].line_hint, Some(23)); + assert_eq!(edits[0].new_text, "updated code"); + + // Line hint is a single unquoted line number + let mut parser = EditParser::new(); + + let edits = parse_random_chunks( + r#" + original code + updated code"#, + &mut parser, + &mut rng, + ); + + assert_eq!(edits.len(), 1); + assert_eq!(edits[0].old_text, "original code"); + assert_eq!(edits[0].line_hint, Some(45)); + assert_eq!(edits[0].new_text, "updated code"); + + // Line hint is a range + let mut parser = EditParser::new(); + + let edits = parse_random_chunks( + r#" + original code + updated code"#, + &mut parser, + &mut rng, + ); + + assert_eq!(edits.len(), 1); + assert_eq!(edits[0].old_text, "original code"); + assert_eq!(edits[0].line_hint, Some(23)); + assert_eq!(edits[0].new_text, "updated code"); + + // No line hint + let mut parser = EditParser::new(); + let edits = parse_random_chunks( + r#" + old + new"#, + &mut parser, + &mut rng, + ); + + assert_eq!(edits.len(), 1); + assert_eq!(edits[0].old_text, "old"); + assert_eq!(edits[0].line_hint, None); + assert_eq!(edits[0].new_text, "new"); + } + #[derive(Default, Debug, PartialEq, Eq)] struct Edit { old_text: String, new_text: String, + line_hint: Option, } fn parse_random_chunks(input: &str, parser: &mut EditParser, rng: &mut StdRng) -> Vec { @@ -433,10 +546,15 @@ mod tests { for chunk_ix in chunk_indices { for event in parser.push(&input[last_ix..chunk_ix]) { match event { - EditParserEvent::OldTextChunk { chunk, done } => { + EditParserEvent::OldTextChunk { + chunk, + done, + line_hint, + } => { old_text.as_mut().unwrap().push_str(&chunk); if done { pending_edit.old_text = old_text.take().unwrap(); + pending_edit.line_hint = line_hint; new_text = Some(String::new()); } } diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index d19cf922fc..567bfecc0a 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -438,14 +438,21 @@ fn eval_disable_cursor_blinking() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_from_pixels_constructor() { - // Results for 2025-05-22 + // Results for 2025-06-13 + // + // The outcome of this evaluation depends heavily on the LINE_HINT_TOLERANCE + // value. Higher values improve the pass rate but may sometimes cause + // edits to be misapplied. In the context of this eval, this means + // the agent might add from_pixels tests in incorrect locations + // (e.g., at the beginning of the file), yet the evaluation may still + // rate it highly. // // Model | Pass rate // ============================================ // - // claude-3.7-sonnet | - // gemini-2.5-pro-preview-03-25 | 0.94 - // gemini-2.5-flash-preview-04-17 | + // claude-4.0-sonnet | 0.99 + // claude-3.7-sonnet | 0.88 + // gemini-2.5-pro-preview-03-25 | 0.96 // gpt-4.1 | let input_file_path = "root/canvas.rs"; let input_file_content = include_str!("evals/fixtures/from_pixels_constructor/before.rs"); diff --git a/crates/assistant_tools/src/edit_agent/streaming_fuzzy_matcher.rs b/crates/assistant_tools/src/edit_agent/streaming_fuzzy_matcher.rs index 33fe687618..b2fc4beaeb 100644 --- a/crates/assistant_tools/src/edit_agent/streaming_fuzzy_matcher.rs +++ b/crates/assistant_tools/src/edit_agent/streaming_fuzzy_matcher.rs @@ -10,8 +10,9 @@ const DELETION_COST: u32 = 10; pub struct StreamingFuzzyMatcher { snapshot: TextBufferSnapshot, query_lines: Vec, + line_hint: Option, incomplete_line: String, - best_matches: Vec>, + matches: Vec>, matrix: SearchMatrix, } @@ -21,8 +22,9 @@ impl StreamingFuzzyMatcher { Self { snapshot, query_lines: Vec::new(), + line_hint: None, incomplete_line: String::new(), - best_matches: Vec::new(), + matches: Vec::new(), matrix: SearchMatrix::new(buffer_line_count + 1), } } @@ -41,9 +43,10 @@ impl StreamingFuzzyMatcher { /// /// Returns `Some(range)` if a match has been found with the accumulated /// query so far, or `None` if no suitable match exists yet. - pub fn push(&mut self, chunk: &str) -> Option> { + pub fn push(&mut self, chunk: &str, line_hint: Option) -> Option> { // Add the chunk to our incomplete line buffer self.incomplete_line.push_str(chunk); + self.line_hint = line_hint; if let Some((last_pos, _)) = self.incomplete_line.match_indices('\n').next_back() { let complete_part = &self.incomplete_line[..=last_pos]; @@ -55,20 +58,11 @@ impl StreamingFuzzyMatcher { self.incomplete_line.replace_range(..last_pos + 1, ""); - self.best_matches = self.resolve_location_fuzzy(); - - if let Some(first_match) = self.best_matches.first() { - Some(first_match.clone()) - } else { - None - } - } else { - if let Some(first_match) = self.best_matches.first() { - Some(first_match.clone()) - } else { - None - } + self.matches = self.resolve_location_fuzzy(); } + + let best_match = self.select_best_match(); + best_match.or_else(|| self.matches.first().cloned()) } /// Finish processing and return the final best match(es). @@ -80,9 +74,9 @@ impl StreamingFuzzyMatcher { if !self.incomplete_line.is_empty() { self.query_lines.push(self.incomplete_line.clone()); self.incomplete_line.clear(); - self.best_matches = self.resolve_location_fuzzy(); + self.matches = self.resolve_location_fuzzy(); } - self.best_matches.clone() + self.matches.clone() } fn resolve_location_fuzzy(&mut self) -> Vec> { @@ -198,6 +192,43 @@ impl StreamingFuzzyMatcher { valid_matches.into_iter().map(|(_, range)| range).collect() } + + /// Return the best match with starting position close enough to line_hint. + pub fn select_best_match(&self) -> Option> { + // Allow line hint to be off by that many lines. + // Higher values increase probability of applying edits to a wrong place, + // Lower values increase edits failures and overall conversation length. + const LINE_HINT_TOLERANCE: u32 = 200; + + if self.matches.is_empty() { + return None; + } + + if self.matches.len() == 1 { + return self.matches.first().cloned(); + } + + let Some(line_hint) = self.line_hint else { + // Multiple ambiguous matches + return None; + }; + + let mut best_match = None; + let mut best_distance = u32::MAX; + + for range in &self.matches { + let start_point = self.snapshot.offset_to_point(range.start); + let start_line = start_point.row; + let distance = start_line.abs_diff(line_hint); + + if distance <= LINE_HINT_TOLERANCE && distance < best_distance { + best_distance = distance; + best_match = Some(range.clone()); + } + } + + best_match + } } fn fuzzy_eq(left: &str, right: &str) -> bool { @@ -640,6 +671,52 @@ mod tests { ); } + #[gpui::test] + fn test_line_hint_selection() { + let text = indoc! {r#" + fn first_function() { + return 42; + } + + fn second_function() { + return 42; + } + + fn third_function() { + return 42; + } + "#}; + + let buffer = TextBuffer::new(0, BufferId::new(1).unwrap(), text.to_string()); + let snapshot = buffer.snapshot(); + let mut matcher = StreamingFuzzyMatcher::new(snapshot.clone()); + + // Given a query that matches all three functions + let query = "return 42;\n"; + + // Test with line hint pointing to second function (around line 5) + let best_match = matcher.push(query, Some(5)).expect("Failed to match query"); + + let matched_text = snapshot + .text_for_range(best_match.clone()) + .collect::(); + assert!(matched_text.contains("return 42;")); + assert_eq!( + best_match, + 63..77, + "Expected to match `second_function` based on the line hint" + ); + + let mut matcher = StreamingFuzzyMatcher::new(snapshot.clone()); + matcher.push(query, None); + matcher.finish(); + let best_match = matcher.select_best_match(); + assert!( + best_match.is_none(), + "Best match should be None when query cannot be uniquely resolved" + ); + } + #[track_caller] fn assert_location_resolution(text_with_expected_range: &str, query: &str, rng: &mut StdRng) { let (text, expected_ranges) = marked_text_ranges(text_with_expected_range, false); @@ -653,7 +730,7 @@ mod tests { // Push chunks incrementally for chunk in &chunks { - matcher.push(chunk); + matcher.push(chunk, None); } let actual_ranges = matcher.finish(); @@ -706,7 +783,7 @@ mod tests { fn push(finder: &mut StreamingFuzzyMatcher, chunk: &str) -> Option { finder - .push(chunk) + .push(chunk, None) .map(|range| finder.snapshot.text_for_range(range).collect::()) } diff --git a/crates/assistant_tools/src/edit_file_tool.rs b/crates/assistant_tools/src/edit_file_tool.rs index a2a66e2b26..7f21843b71 100644 --- a/crates/assistant_tools/src/edit_file_tool.rs +++ b/crates/assistant_tools/src/edit_file_tool.rs @@ -333,14 +333,18 @@ impl Tool for EditFileTool { ); anyhow::ensure!( ambiguous_ranges.is_empty(), - // TODO: Include ambiguous_ranges, converted to line numbers. - // This would work best if we add `line_hint` parameter - // to edit_file_tool - formatdoc! {" - matches more than one position in the file. Read the - relevant sections of {input_path} again and extend so - that I can perform the requested edits. - "} + { + let line_numbers = ambiguous_ranges + .iter() + .map(|range| range.start.to_string()) + .collect::>() + .join(", "); + formatdoc! {" + matches more than one position in the file (lines: {line_numbers}). Read the + relevant sections of {input_path} again and extend so + that I can perform the requested edits. + "} + } ); Ok(ToolResultOutput { content: ToolResultContent::Text("No edits were made.".into()), diff --git a/crates/assistant_tools/src/templates/edit_file_prompt.hbs b/crates/assistant_tools/src/templates/edit_file_prompt.hbs index 21d8419343..db17c527aa 100644 --- a/crates/assistant_tools/src/templates/edit_file_prompt.hbs +++ b/crates/assistant_tools/src/templates/edit_file_prompt.hbs @@ -3,21 +3,21 @@ You MUST respond with a series of edits to a file, using the following format: ``` - + OLD TEXT 1 HERE NEW TEXT 1 HERE - + OLD TEXT 2 HERE NEW TEXT 2 HERE - + OLD TEXT 3 HERE @@ -33,6 +33,7 @@ NEW TEXT 3 HERE - `` must exactly match existing file content, including indentation - `` must come from the actual file, not an outline - `` cannot be empty +- `line` should be a starting line number for the text to be replaced - Be minimal with replacements: - For unique lines, include only those lines - For non-unique lines, include enough context to identify them @@ -48,7 +49,7 @@ Claude and gpt-4.1 don't really need it. --}} - + struct User { name: String, email: String, @@ -62,7 +63,7 @@ struct User { } - + let user = User { name: String::from("John"), email: String::from("john@example.com"),