edit_file: Let agent specify locations of edit chunks (#32628)

These changes help the agent edit files when `<old_text>` matches more
than one location.

First, the agent can specify an optional `<old_text line=XX>` parameter.
When this is provided and multiple matches exist, we use this hint to
identify the best match.

Second, when there is ambiguity in matches, we now return the agent a
more helpful message listing the line numbers of all possible matches.

Together, these changes should reduce the number of misplaced edits and
agent confusion.

I have ensured the LLM Worker works with these prompt changes.


Release Notes:

- Agent: Improved locating edits
This commit is contained in:
Oleksiy Syvokon 2025-06-14 09:59:30 +03:00 committed by GitHub
parent e8d495806f
commit 5d293ae8ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 286 additions and 63 deletions

View file

@ -1,4 +1,5 @@
use derive_more::{Add, AddAssign};
use regex::Regex;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
@ -11,8 +12,15 @@ const END_TAGS: [&str; 3] = [OLD_TEXT_END_TAG, NEW_TEXT_END_TAG, EDITS_END_TAG];
#[derive(Debug)]
pub enum EditParserEvent {
OldTextChunk { chunk: String, done: bool },
NewTextChunk { chunk: String, done: bool },
OldTextChunk {
chunk: String,
done: bool,
line_hint: Option<u32>,
},
NewTextChunk {
chunk: String,
done: bool,
},
}
#[derive(
@ -33,7 +41,7 @@ pub struct EditParser {
#[derive(Debug, PartialEq)]
enum EditParserState {
Pending,
WithinOldText { start: bool },
WithinOldText { start: bool, line_hint: Option<u32> },
AfterOldText,
WithinNewText { start: bool },
}
@ -54,14 +62,24 @@ impl EditParser {
loop {
match &mut self.state {
EditParserState::Pending => {
if let Some(start) = self.buffer.find("<old_text>") {
self.buffer.drain(..start + "<old_text>".len());
self.state = EditParserState::WithinOldText { start: true };
if let Some(start) = self.buffer.find("<old_text") {
if let Some(tag_end) = self.buffer[start..].find('>') {
let tag_end = start + tag_end + 1;
let tag = &self.buffer[start..tag_end];
let line_hint = self.parse_line_hint(tag);
self.buffer.drain(..tag_end);
self.state = EditParserState::WithinOldText {
start: true,
line_hint,
};
} else {
break;
}
} else {
break;
}
}
EditParserState::WithinOldText { start } => {
EditParserState::WithinOldText { start, line_hint } => {
if !self.buffer.is_empty() {
if *start && self.buffer.starts_with('\n') {
self.buffer.remove(0);
@ -69,6 +87,7 @@ impl EditParser {
*start = false;
}
let line_hint = *line_hint;
if let Some(tag_range) = self.find_end_tag() {
let mut chunk = self.buffer[..tag_range.start].to_string();
if chunk.ends_with('\n') {
@ -82,12 +101,17 @@ impl EditParser {
self.buffer.drain(..tag_range.end);
self.state = EditParserState::AfterOldText;
edit_events.push(EditParserEvent::OldTextChunk { chunk, done: true });
edit_events.push(EditParserEvent::OldTextChunk {
chunk,
done: true,
line_hint,
});
} else {
if !self.ends_with_tag_prefix() {
edit_events.push(EditParserEvent::OldTextChunk {
chunk: mem::take(&mut self.buffer),
done: false,
line_hint,
});
}
break;
@ -154,6 +178,16 @@ impl EditParser {
end_prefixes.any(|prefix| self.buffer.ends_with(&prefix))
}
fn parse_line_hint(&self, tag: &str) -> Option<u32> {
static LINE_HINT_REGEX: std::sync::LazyLock<Regex> =
std::sync::LazyLock::new(|| Regex::new(r#"line=(?:"?)(\d+)"#).unwrap());
LINE_HINT_REGEX
.captures(tag)
.and_then(|caps| caps.get(1))
.and_then(|m| m.as_str().parse::<u32>().ok())
}
pub fn finish(self) -> EditParserMetrics {
self.metrics
}
@ -178,6 +212,7 @@ mod tests {
vec![Edit {
old_text: "original".to_string(),
new_text: "updated".to_string(),
line_hint: None,
}]
);
assert_eq!(
@ -209,10 +244,12 @@ mod tests {
Edit {
old_text: "first old".to_string(),
new_text: "first new".to_string(),
line_hint: None,
},
Edit {
old_text: "second old".to_string(),
new_text: "second new".to_string(),
line_hint: None,
},
]
);
@ -244,14 +281,17 @@ mod tests {
Edit {
old_text: "content".to_string(),
new_text: "updated content".to_string(),
line_hint: None,
},
Edit {
old_text: "second item".to_string(),
new_text: "modified second item".to_string(),
line_hint: None,
},
Edit {
old_text: "third case".to_string(),
new_text: "improved third case".to_string(),
line_hint: None,
},
]
);
@ -276,6 +316,7 @@ mod tests {
vec![Edit {
old_text: "code with <tag>nested</tag> elements".to_string(),
new_text: "new <code>content</code>".to_string(),
line_hint: None,
}]
);
assert_eq!(
@ -299,6 +340,7 @@ mod tests {
vec![Edit {
old_text: "".to_string(),
new_text: "".to_string(),
line_hint: None,
}]
);
assert_eq!(
@ -322,6 +364,7 @@ mod tests {
vec![Edit {
old_text: "line1\nline2\nline3".to_string(),
new_text: "line1\nmodified line2\nline3".to_string(),
line_hint: None,
}]
);
assert_eq!(
@ -368,10 +411,12 @@ mod tests {
Edit {
old_text: "a\nb\nc".to_string(),
new_text: "a\nB\nc".to_string(),
line_hint: None,
},
Edit {
old_text: "d\ne\nf".to_string(),
new_text: "D\ne\nF".to_string(),
line_hint: None,
}
]
);
@ -402,6 +447,7 @@ mod tests {
vec![Edit {
old_text: "Lorem".to_string(),
new_text: "LOREM".to_string(),
line_hint: None,
},]
);
assert_eq!(
@ -413,10 +459,77 @@ mod tests {
);
}
#[gpui::test(iterations = 100)]
fn test_line_hints(mut rng: StdRng) {
// Line hint is a single quoted line number
let mut parser = EditParser::new();
let edits = parse_random_chunks(
r#"
<old_text line="23">original code</old_text>
<new_text>updated code</new_text>"#,
&mut parser,
&mut rng,
);
assert_eq!(edits.len(), 1);
assert_eq!(edits[0].old_text, "original code");
assert_eq!(edits[0].line_hint, Some(23));
assert_eq!(edits[0].new_text, "updated code");
// Line hint is a single unquoted line number
let mut parser = EditParser::new();
let edits = parse_random_chunks(
r#"
<old_text line=45>original code</old_text>
<new_text>updated code</new_text>"#,
&mut parser,
&mut rng,
);
assert_eq!(edits.len(), 1);
assert_eq!(edits[0].old_text, "original code");
assert_eq!(edits[0].line_hint, Some(45));
assert_eq!(edits[0].new_text, "updated code");
// Line hint is a range
let mut parser = EditParser::new();
let edits = parse_random_chunks(
r#"
<old_text line="23:50">original code</old_text>
<new_text>updated code</new_text>"#,
&mut parser,
&mut rng,
);
assert_eq!(edits.len(), 1);
assert_eq!(edits[0].old_text, "original code");
assert_eq!(edits[0].line_hint, Some(23));
assert_eq!(edits[0].new_text, "updated code");
// No line hint
let mut parser = EditParser::new();
let edits = parse_random_chunks(
r#"
<old_text>old</old_text>
<new_text>new</new_text>"#,
&mut parser,
&mut rng,
);
assert_eq!(edits.len(), 1);
assert_eq!(edits[0].old_text, "old");
assert_eq!(edits[0].line_hint, None);
assert_eq!(edits[0].new_text, "new");
}
#[derive(Default, Debug, PartialEq, Eq)]
struct Edit {
old_text: String,
new_text: String,
line_hint: Option<u32>,
}
fn parse_random_chunks(input: &str, parser: &mut EditParser, rng: &mut StdRng) -> Vec<Edit> {
@ -433,10 +546,15 @@ mod tests {
for chunk_ix in chunk_indices {
for event in parser.push(&input[last_ix..chunk_ix]) {
match event {
EditParserEvent::OldTextChunk { chunk, done } => {
EditParserEvent::OldTextChunk {
chunk,
done,
line_hint,
} => {
old_text.as_mut().unwrap().push_str(&chunk);
if done {
pending_edit.old_text = old_text.take().unwrap();
pending_edit.line_hint = line_hint;
new_text = Some(String::new());
}
}