Tolerate edits ending with </edits> instead of </new_text> (#31453)

Release Notes:

- Improve reliability of the agent when a model outputs malformed edits.
This commit is contained in:
Antonio Scandurra 2025-05-26 21:36:58 +02:00 committed by GitHub
parent 4567360fd9
commit 4acb4730a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -2,12 +2,12 @@ use derive_more::{Add, AddAssign};
use schemars::JsonSchema; use schemars::JsonSchema;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use smallvec::SmallVec; use smallvec::SmallVec;
use std::{cmp, mem, ops::Range}; use std::{mem, ops::Range};
const OLD_TEXT_END_TAG: &str = "</old_text>"; const OLD_TEXT_END_TAG: &str = "</old_text>";
const NEW_TEXT_END_TAG: &str = "</new_text>"; const NEW_TEXT_END_TAG: &str = "</new_text>";
const END_TAG_LEN: usize = OLD_TEXT_END_TAG.len(); const EDITS_END_TAG: &str = "</edits>";
const _: () = debug_assert!(OLD_TEXT_END_TAG.len() == NEW_TEXT_END_TAG.len()); const END_TAGS: [&str; 3] = [OLD_TEXT_END_TAG, NEW_TEXT_END_TAG, EDITS_END_TAG];
#[derive(Debug)] #[derive(Debug)]
pub enum EditParserEvent { pub enum EditParserEvent {
@ -115,8 +115,9 @@ impl EditParser {
self.state = EditParserState::Pending; self.state = EditParserState::Pending;
edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true }); edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true });
} else { } else {
let mut end_prefixes = (1..END_TAG_LEN) let mut end_prefixes = END_TAGS
.flat_map(|i| [&NEW_TEXT_END_TAG[..i], &OLD_TEXT_END_TAG[..i]]) .iter()
.flat_map(|tag| (1..tag.len()).map(move |i| &tag[..i]))
.chain(["\n"]); .chain(["\n"]);
if end_prefixes.all(|prefix| !self.buffer.ends_with(&prefix)) { if end_prefixes.all(|prefix| !self.buffer.ends_with(&prefix)) {
edit_events.push(EditParserEvent::NewTextChunk { edit_events.push(EditParserEvent::NewTextChunk {
@ -133,16 +134,11 @@ impl EditParser {
} }
fn find_end_tag(&self) -> Option<Range<usize>> { fn find_end_tag(&self) -> Option<Range<usize>> {
let old_text_end_tag_ix = self.buffer.find(OLD_TEXT_END_TAG); let (tag, start_ix) = END_TAGS
let new_text_end_tag_ix = self.buffer.find(NEW_TEXT_END_TAG); .iter()
let start_ix = if let Some((old_text_ix, new_text_ix)) = .flat_map(|tag| Some((tag, self.buffer.find(tag)?)))
old_text_end_tag_ix.zip(new_text_end_tag_ix) .min_by_key(|(_, ix)| *ix)?;
{ Some(start_ix..start_ix + tag.len())
cmp::min(old_text_ix, new_text_ix)
} else {
old_text_end_tag_ix.or(new_text_end_tag_ix)?
};
Some(start_ix..start_ix + END_TAG_LEN)
} }
pub fn finish(self) -> EditParserMetrics { pub fn finish(self) -> EditParserMetrics {
@ -373,6 +369,35 @@ mod tests {
mismatched_tags: 4 mismatched_tags: 4
} }
); );
let mut parser = EditParser::new();
assert_eq!(
parse_random_chunks(
// Reduced from an actual Opus 4 output
indoc! {"
<edits>
<old_text>
Lorem
</old_text>
<new_text>
LOREM
</edits>
"},
&mut parser,
&mut rng
),
vec![Edit {
old_text: "Lorem".to_string(),
new_text: "LOREM".to_string(),
},]
);
assert_eq!(
parser.finish(),
EditParserMetrics {
tags: 2,
mismatched_tags: 1
}
);
} }
#[derive(Default, Debug, PartialEq, Eq)] #[derive(Default, Debug, PartialEq, Eq)]
@ -407,6 +432,9 @@ mod tests {
} }
last_ix = chunk_ix; last_ix = chunk_ix;
} }
assert_eq!(pending_edit, Edit::default(), "unfinished edit");
edits edits
} }
} }