edit_file: Let agent specify locations of edit chunks (#32628)

These changes help the agent edit files when `<old_text>` matches more
than one location.

First, the agent can specify an optional `<old_text line=XX>` parameter.
When this is provided and multiple matches exist, we use this hint to
identify the best match.

Second, when there is ambiguity in matches, we now return the agent a
more helpful message listing the line numbers of all possible matches.

Together, these changes should reduce the number of misplaced edits and
agent confusion.

I have ensured the LLM Worker works with these prompt changes.


Release Notes:

- Agent: Improved locating edits
This commit is contained in:
Oleksiy Syvokon 2025-06-14 09:59:30 +03:00 committed by GitHub
parent e8d495806f
commit 5d293ae8ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 286 additions and 63 deletions

View file

@ -10,8 +10,9 @@ const DELETION_COST: u32 = 10;
pub struct StreamingFuzzyMatcher {
snapshot: TextBufferSnapshot,
query_lines: Vec<String>,
line_hint: Option<u32>,
incomplete_line: String,
best_matches: Vec<Range<usize>>,
matches: Vec<Range<usize>>,
matrix: SearchMatrix,
}
@ -21,8 +22,9 @@ impl StreamingFuzzyMatcher {
Self {
snapshot,
query_lines: Vec::new(),
line_hint: None,
incomplete_line: String::new(),
best_matches: Vec::new(),
matches: Vec::new(),
matrix: SearchMatrix::new(buffer_line_count + 1),
}
}
@ -41,9 +43,10 @@ impl StreamingFuzzyMatcher {
///
/// Returns `Some(range)` if a match has been found with the accumulated
/// query so far, or `None` if no suitable match exists yet.
pub fn push(&mut self, chunk: &str) -> Option<Range<usize>> {
pub fn push(&mut self, chunk: &str, line_hint: Option<u32>) -> Option<Range<usize>> {
// Add the chunk to our incomplete line buffer
self.incomplete_line.push_str(chunk);
self.line_hint = line_hint;
if let Some((last_pos, _)) = self.incomplete_line.match_indices('\n').next_back() {
let complete_part = &self.incomplete_line[..=last_pos];
@ -55,20 +58,11 @@ impl StreamingFuzzyMatcher {
self.incomplete_line.replace_range(..last_pos + 1, "");
self.best_matches = self.resolve_location_fuzzy();
if let Some(first_match) = self.best_matches.first() {
Some(first_match.clone())
} else {
None
}
} else {
if let Some(first_match) = self.best_matches.first() {
Some(first_match.clone())
} else {
None
}
self.matches = self.resolve_location_fuzzy();
}
let best_match = self.select_best_match();
best_match.or_else(|| self.matches.first().cloned())
}
/// Finish processing and return the final best match(es).
@ -80,9 +74,9 @@ impl StreamingFuzzyMatcher {
if !self.incomplete_line.is_empty() {
self.query_lines.push(self.incomplete_line.clone());
self.incomplete_line.clear();
self.best_matches = self.resolve_location_fuzzy();
self.matches = self.resolve_location_fuzzy();
}
self.best_matches.clone()
self.matches.clone()
}
fn resolve_location_fuzzy(&mut self) -> Vec<Range<usize>> {
@ -198,6 +192,43 @@ impl StreamingFuzzyMatcher {
valid_matches.into_iter().map(|(_, range)| range).collect()
}
/// Return the best match with starting position close enough to line_hint.
pub fn select_best_match(&self) -> Option<Range<usize>> {
// Allow line hint to be off by that many lines.
// Higher values increase probability of applying edits to a wrong place,
// Lower values increase edits failures and overall conversation length.
const LINE_HINT_TOLERANCE: u32 = 200;
if self.matches.is_empty() {
return None;
}
if self.matches.len() == 1 {
return self.matches.first().cloned();
}
let Some(line_hint) = self.line_hint else {
// Multiple ambiguous matches
return None;
};
let mut best_match = None;
let mut best_distance = u32::MAX;
for range in &self.matches {
let start_point = self.snapshot.offset_to_point(range.start);
let start_line = start_point.row;
let distance = start_line.abs_diff(line_hint);
if distance <= LINE_HINT_TOLERANCE && distance < best_distance {
best_distance = distance;
best_match = Some(range.clone());
}
}
best_match
}
}
fn fuzzy_eq(left: &str, right: &str) -> bool {
@ -640,6 +671,52 @@ mod tests {
);
}
#[gpui::test]
fn test_line_hint_selection() {
let text = indoc! {r#"
fn first_function() {
return 42;
}
fn second_function() {
return 42;
}
fn third_function() {
return 42;
}
"#};
let buffer = TextBuffer::new(0, BufferId::new(1).unwrap(), text.to_string());
let snapshot = buffer.snapshot();
let mut matcher = StreamingFuzzyMatcher::new(snapshot.clone());
// Given a query that matches all three functions
let query = "return 42;\n";
// Test with line hint pointing to second function (around line 5)
let best_match = matcher.push(query, Some(5)).expect("Failed to match query");
let matched_text = snapshot
.text_for_range(best_match.clone())
.collect::<String>();
assert!(matched_text.contains("return 42;"));
assert_eq!(
best_match,
63..77,
"Expected to match `second_function` based on the line hint"
);
let mut matcher = StreamingFuzzyMatcher::new(snapshot.clone());
matcher.push(query, None);
matcher.finish();
let best_match = matcher.select_best_match();
assert!(
best_match.is_none(),
"Best match should be None when query cannot be uniquely resolved"
);
}
#[track_caller]
fn assert_location_resolution(text_with_expected_range: &str, query: &str, rng: &mut StdRng) {
let (text, expected_ranges) = marked_text_ranges(text_with_expected_range, false);
@ -653,7 +730,7 @@ mod tests {
// Push chunks incrementally
for chunk in &chunks {
matcher.push(chunk);
matcher.push(chunk, None);
}
let actual_ranges = matcher.finish();
@ -706,7 +783,7 @@ mod tests {
fn push(finder: &mut StreamingFuzzyMatcher, chunk: &str) -> Option<String> {
finder
.push(chunk)
.push(chunk, None)
.map(|range| finder.snapshot.text_for_range(range).collect::<String>())
}