From 4acb4730a5a75f90bd701630e05788c3c76b9f6c Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Mon, 26 May 2025 21:36:58 +0200 Subject: [PATCH] Tolerate edits ending with `` instead of `` (#31453) Release Notes: - Improve reliability of the agent when a model outputs malformed edits. --- .../src/edit_agent/edit_parser.rs | 58 ++++++++++++++----- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/crates/assistant_tools/src/edit_agent/edit_parser.rs b/crates/assistant_tools/src/edit_agent/edit_parser.rs index d3f6d15514..ac6a40f9c9 100644 --- a/crates/assistant_tools/src/edit_agent/edit_parser.rs +++ b/crates/assistant_tools/src/edit_agent/edit_parser.rs @@ -2,12 +2,12 @@ use derive_more::{Add, AddAssign}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; -use std::{cmp, mem, ops::Range}; +use std::{mem, ops::Range}; const OLD_TEXT_END_TAG: &str = ""; const NEW_TEXT_END_TAG: &str = ""; -const END_TAG_LEN: usize = OLD_TEXT_END_TAG.len(); -const _: () = debug_assert!(OLD_TEXT_END_TAG.len() == NEW_TEXT_END_TAG.len()); +const EDITS_END_TAG: &str = ""; +const END_TAGS: [&str; 3] = [OLD_TEXT_END_TAG, NEW_TEXT_END_TAG, EDITS_END_TAG]; #[derive(Debug)] pub enum EditParserEvent { @@ -115,8 +115,9 @@ impl EditParser { self.state = EditParserState::Pending; edit_events.push(EditParserEvent::NewTextChunk { chunk, done: true }); } else { - let mut end_prefixes = (1..END_TAG_LEN) - .flat_map(|i| [&NEW_TEXT_END_TAG[..i], &OLD_TEXT_END_TAG[..i]]) + let mut end_prefixes = END_TAGS + .iter() + .flat_map(|tag| (1..tag.len()).map(move |i| &tag[..i])) .chain(["\n"]); if end_prefixes.all(|prefix| !self.buffer.ends_with(&prefix)) { edit_events.push(EditParserEvent::NewTextChunk { @@ -133,16 +134,11 @@ impl EditParser { } fn find_end_tag(&self) -> Option> { - let old_text_end_tag_ix = self.buffer.find(OLD_TEXT_END_TAG); - let new_text_end_tag_ix = self.buffer.find(NEW_TEXT_END_TAG); - let start_ix = if let Some((old_text_ix, new_text_ix)) = - old_text_end_tag_ix.zip(new_text_end_tag_ix) - { - cmp::min(old_text_ix, new_text_ix) - } else { - old_text_end_tag_ix.or(new_text_end_tag_ix)? - }; - Some(start_ix..start_ix + END_TAG_LEN) + let (tag, start_ix) = END_TAGS + .iter() + .flat_map(|tag| Some((tag, self.buffer.find(tag)?))) + .min_by_key(|(_, ix)| *ix)?; + Some(start_ix..start_ix + tag.len()) } pub fn finish(self) -> EditParserMetrics { @@ -373,6 +369,35 @@ mod tests { mismatched_tags: 4 } ); + + let mut parser = EditParser::new(); + assert_eq!( + parse_random_chunks( + // Reduced from an actual Opus 4 output + indoc! {" + + + Lorem + + + LOREM + + "}, + &mut parser, + &mut rng + ), + vec![Edit { + old_text: "Lorem".to_string(), + new_text: "LOREM".to_string(), + },] + ); + assert_eq!( + parser.finish(), + EditParserMetrics { + tags: 2, + mismatched_tags: 1 + } + ); } #[derive(Default, Debug, PartialEq, Eq)] @@ -407,6 +432,9 @@ mod tests { } last_ix = chunk_ix; } + + assert_eq!(pending_edit, Edit::default(), "unfinished edit"); + edits } }