From 6253b95f82da7bfacbebdb74701062d17367d094 Mon Sep 17 00:00:00 2001 From: Oleksiy Syvokon Date: Mon, 26 May 2025 19:36:21 +0300 Subject: [PATCH] agent: Fix creating files with Gemini (#31439) This change instructs models to wrap new file content in Markdown fences and introduces a parser for this format. The reasons are: 1. This is the format we put a lot of effort into explaining in the system prompt. 2. Gemini really prefers to do it. 3. It adds an option for a model to think before writing the content The `eval_zode` pass rate for GEmini models goes from 0% to 100%. Other models were already at 100%, this hasn't changed. Release Notes: - N/A --- Cargo.lock | 1 + crates/assistant_tools/Cargo.toml | 1 + crates/assistant_tools/src/edit_agent.rs | 109 ++++++--- .../src/edit_agent/create_file_parser.rs | 218 ++++++++++++++++++ .../assistant_tools/src/edit_agent/evals.rs | 63 ++++- .../src/templates/create_file_prompt.hbs | 6 +- .../src/templates/edit_file_prompt.hbs | 3 +- 7 files changed, 356 insertions(+), 45 deletions(-) create mode 100644 crates/assistant_tools/src/edit_agent/create_file_parser.rs diff --git a/Cargo.lock b/Cargo.lock index a786b25c08..21b0db58bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -683,6 +683,7 @@ dependencies = [ "language_models", "log", "markdown", + "once_cell", "open", "paths", "portable-pty", diff --git a/crates/assistant_tools/Cargo.toml b/crates/assistant_tools/Cargo.toml index 6d6baf2d54..9ee664afd8 100644 --- a/crates/assistant_tools/Cargo.toml +++ b/crates/assistant_tools/Cargo.toml @@ -62,6 +62,7 @@ which.workspace = true workspace-hack.workspace = true workspace.workspace = true zed_llm_client.workspace = true +once_cell = "1.21.3" [dev-dependencies] client = { workspace = true, features = ["test-support"] } diff --git a/crates/assistant_tools/src/edit_agent.rs b/crates/assistant_tools/src/edit_agent.rs index 4925f2c02e..d8e0ddfd3d 100644 --- a/crates/assistant_tools/src/edit_agent.rs +++ b/crates/assistant_tools/src/edit_agent.rs @@ -1,3 +1,4 @@ +mod create_file_parser; mod edit_parser; #[cfg(test)] mod evals; @@ -6,6 +7,7 @@ use crate::{Template, Templates}; use aho_corasick::AhoCorasick; use anyhow::Result; use assistant_tool::ActionLog; +use create_file_parser::{CreateFileParser, CreateFileParserEvent}; use edit_parser::{EditParser, EditParserEvent, EditParserMetrics}; use futures::{ Stream, StreamExt, @@ -123,16 +125,16 @@ impl EditAgent { mpsc::UnboundedReceiver, ) { let (output_events_tx, output_events_rx) = mpsc::unbounded(); + let (parse_task, parse_rx) = Self::parse_create_file_chunks(edit_chunks, cx); let this = self.clone(); let task = cx.spawn(async move |cx| { this.action_log .update(cx, |log, cx| log.buffer_created(buffer.clone(), cx))?; - let output = this - .overwrite_with_chunks_internal(buffer, edit_chunks, output_events_tx, cx) - .await; + this.overwrite_with_chunks_internal(buffer, parse_rx, output_events_tx, cx) + .await?; this.project .update(cx, |project, cx| project.set_agent_location(None, cx))?; - output + parse_task.await }); (task, output_events_rx) } @@ -140,10 +142,10 @@ impl EditAgent { async fn overwrite_with_chunks_internal( &self, buffer: Entity, - edit_chunks: impl 'static + Send + Stream>, + mut parse_rx: UnboundedReceiver>, output_events_tx: mpsc::UnboundedSender, cx: &mut AsyncApp, - ) -> Result { + ) -> Result<()> { cx.update(|cx| { buffer.update(cx, |buffer, cx| buffer.set_text("", cx)); self.action_log.update(cx, |log, cx| { @@ -163,34 +165,31 @@ impl EditAgent { .ok(); })?; - let mut raw_edits = String::new(); - pin_mut!(edit_chunks); - while let Some(chunk) = edit_chunks.next().await { - let chunk = chunk?; - raw_edits.push_str(&chunk); - cx.update(|cx| { - buffer.update(cx, |buffer, cx| buffer.append(chunk, cx)); - self.action_log - .update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx)); - self.project.update(cx, |project, cx| { - project.set_agent_location( - Some(AgentLocation { - buffer: buffer.downgrade(), - position: language::Anchor::MAX, - }), - cx, - ) - }); - })?; - output_events_tx - .unbounded_send(EditAgentOutputEvent::Edited) - .ok(); + while let Some(event) = parse_rx.next().await { + match event? { + CreateFileParserEvent::NewTextChunk { chunk } => { + cx.update(|cx| { + buffer.update(cx, |buffer, cx| buffer.append(chunk, cx)); + self.action_log + .update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx)); + self.project.update(cx, |project, cx| { + project.set_agent_location( + Some(AgentLocation { + buffer: buffer.downgrade(), + position: language::Anchor::MAX, + }), + cx, + ) + }); + })?; + output_events_tx + .unbounded_send(EditAgentOutputEvent::Edited) + .ok(); + } + } } - Ok(EditAgentOutput { - raw_edits, - parser_metrics: EditParserMetrics::default(), - }) + Ok(()) } pub fn edit( @@ -435,6 +434,44 @@ impl EditAgent { (output, rx) } + fn parse_create_file_chunks( + chunks: impl 'static + Send + Stream>, + cx: &mut AsyncApp, + ) -> ( + Task>, + UnboundedReceiver>, + ) { + let (tx, rx) = mpsc::unbounded(); + let output = cx.background_spawn(async move { + pin_mut!(chunks); + + let mut parser = CreateFileParser::new(); + let mut raw_edits = String::new(); + while let Some(chunk) = chunks.next().await { + match chunk { + Ok(chunk) => { + raw_edits.push_str(&chunk); + for event in parser.push(Some(&chunk)) { + tx.unbounded_send(Ok(event))?; + } + } + Err(error) => { + tx.unbounded_send(Err(error.into()))?; + } + } + } + // Send final events with None to indicate completion + for event in parser.push(None) { + tx.unbounded_send(Ok(event))?; + } + Ok(EditAgentOutput { + raw_edits, + parser_metrics: EditParserMetrics::default(), + }) + }); + (output, rx) + } + fn reindent_new_text_chunks( delta: IndentDelta, mut stream: impl Unpin + Stream>, @@ -1138,7 +1175,7 @@ mod tests { }) ); - chunks_tx.unbounded_send("jkl\n").unwrap(); + chunks_tx.unbounded_send("```\njkl\n").unwrap(); cx.run_until_parked(); assert_eq!( drain_events(&mut events), @@ -1146,7 +1183,7 @@ mod tests { ); assert_eq!( buffer.read_with(cx, |buffer, _| buffer.snapshot().text()), - "jkl\n" + "jkl" ); assert_eq!( project.read_with(cx, |project, _| project.agent_location()), @@ -1164,7 +1201,7 @@ mod tests { ); assert_eq!( buffer.read_with(cx, |buffer, _| buffer.snapshot().text()), - "jkl\nmno\n" + "jkl\nmno" ); assert_eq!( project.read_with(cx, |project, _| project.agent_location()), @@ -1174,7 +1211,7 @@ mod tests { }) ); - chunks_tx.unbounded_send("pqr").unwrap(); + chunks_tx.unbounded_send("pqr\n```").unwrap(); cx.run_until_parked(); assert_eq!( drain_events(&mut events), diff --git a/crates/assistant_tools/src/edit_agent/create_file_parser.rs b/crates/assistant_tools/src/edit_agent/create_file_parser.rs new file mode 100644 index 0000000000..911746e922 --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/create_file_parser.rs @@ -0,0 +1,218 @@ +use once_cell::sync::Lazy; +use regex::Regex; +use smallvec::SmallVec; +use util::debug_panic; + +const START_MARKER: Lazy = Lazy::new(|| Regex::new(r"\n?```\S*\n").unwrap()); +const END_MARKER: Lazy = Lazy::new(|| Regex::new(r"\n```\s*$").unwrap()); + +#[derive(Debug)] +pub enum CreateFileParserEvent { + NewTextChunk { chunk: String }, +} + +#[derive(Debug)] +pub struct CreateFileParser { + state: ParserState, + buffer: String, +} + +#[derive(Debug, PartialEq)] +enum ParserState { + Pending, + WithinText, + Finishing, + Finished, +} + +impl CreateFileParser { + pub fn new() -> Self { + CreateFileParser { + state: ParserState::Pending, + buffer: String::new(), + } + } + + pub fn push(&mut self, chunk: Option<&str>) -> SmallVec<[CreateFileParserEvent; 1]> { + if chunk.is_none() { + self.state = ParserState::Finishing; + } + + let chunk = chunk.unwrap_or_default(); + + self.buffer.push_str(chunk); + + let mut edit_events = SmallVec::new(); + loop { + match &mut self.state { + ParserState::Pending => { + if let Some(m) = START_MARKER.find(&self.buffer) { + self.buffer.drain(..m.end()); + self.state = ParserState::WithinText; + } else { + break; + } + } + ParserState::WithinText => { + let text = self.buffer.trim_end_matches(&['`', '\n', ' ']); + let text_len = text.len(); + + if text_len > 0 { + edit_events.push(CreateFileParserEvent::NewTextChunk { + chunk: self.buffer.drain(..text_len).collect(), + }); + } + break; + } + ParserState::Finishing => { + if let Some(m) = END_MARKER.find(&self.buffer) { + self.buffer.drain(m.start()..); + } + if !self.buffer.is_empty() { + if !self.buffer.ends_with('\n') { + self.buffer.push('\n'); + } + edit_events.push(CreateFileParserEvent::NewTextChunk { + chunk: self.buffer.drain(..).collect(), + }); + } + self.state = ParserState::Finished; + break; + } + ParserState::Finished => debug_panic!("Can't call parser after finishing"), + } + } + edit_events + } +} + +#[cfg(test)] +mod tests { + use super::*; + use indoc::indoc; + use rand::prelude::*; + use std::cmp; + + #[gpui::test(iterations = 100)] + fn test_happy_path(mut rng: StdRng) { + let mut parser = CreateFileParser::new(); + assert_eq!( + parse_random_chunks("```\nHello world\n```", &mut parser, &mut rng), + "Hello world".to_string() + ); + } + + #[gpui::test(iterations = 100)] + fn test_cut_prefix(mut rng: StdRng) { + let mut parser = CreateFileParser::new(); + assert_eq!( + parse_random_chunks( + indoc! {" + Let me write this file for you: + + ``` + Hello world + ``` + + "}, + &mut parser, + &mut rng + ), + "Hello world".to_string() + ); + } + + #[gpui::test(iterations = 100)] + fn test_language_name_on_fences(mut rng: StdRng) { + let mut parser = CreateFileParser::new(); + assert_eq!( + parse_random_chunks( + indoc! {" + ```rust + Hello world + ``` + + "}, + &mut parser, + &mut rng + ), + "Hello world".to_string() + ); + } + + #[gpui::test(iterations = 100)] + fn test_leave_suffix(mut rng: StdRng) { + let mut parser = CreateFileParser::new(); + assert_eq!( + parse_random_chunks( + indoc! {" + Let me write this file for you: + + ``` + Hello world + ``` + + The end + "}, + &mut parser, + &mut rng + ), + // This output is marlformed, so we're doing our best effort + "Hello world\n```\n\nThe end\n".to_string() + ); + } + + #[gpui::test(iterations = 100)] + fn test_inner_fences(mut rng: StdRng) { + let mut parser = CreateFileParser::new(); + assert_eq!( + parse_random_chunks( + indoc! {" + Let me write this file for you: + + ``` + ``` + Hello world + ``` + ``` + "}, + &mut parser, + &mut rng + ), + // This output is marlformed, so we're doing our best effort + "```\nHello world\n```\n".to_string() + ); + } + + fn parse_random_chunks(input: &str, parser: &mut CreateFileParser, rng: &mut StdRng) -> String { + let chunk_count = rng.gen_range(1..=cmp::min(input.len(), 50)); + let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count); + chunk_indices.sort(); + chunk_indices.push(input.len()); + + let chunk_indices = chunk_indices + .into_iter() + .map(Some) + .chain(vec![None]) + .collect::>>(); + + let mut edit = String::default(); + let mut last_ix = 0; + for chunk_ix in chunk_indices { + let mut chunk = None; + if let Some(chunk_ix) = chunk_ix { + chunk = Some(&input[last_ix..chunk_ix]); + last_ix = chunk_ix; + } + + for event in parser.push(chunk) { + match event { + CreateFileParserEvent::NewTextChunk { chunk } => { + edit.push_str(&chunk); + } + } + } + } + edit + } +} diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index bfae6afddc..5856dd83db 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -163,6 +163,15 @@ fn eval_delete_run_git_blame() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_translate_doc_comments() { + // Results for 2025-05-22 + // + // Model | Pass rate + // ============================================ + // + // claude-3.7-sonnet | + // gemini-2.5-pro-preview-03-25 | 1.0 + // gemini-2.5-flash-preview-04-17 | + // gpt-4.1 | let input_file_path = "root/canvas.rs"; let input_file_content = include_str!("evals/fixtures/translate_doc_comments/before.rs"); let edit_description = "Translate all doc comments to Italian"; @@ -216,6 +225,15 @@ fn eval_translate_doc_comments() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_use_wasi_sdk_in_compile_parser_to_wasm() { + // Results for 2025-05-22 + // + // Model | Pass rate + // ============================================ + // + // claude-3.7-sonnet | 0.98 + // gemini-2.5-pro-preview-03-25 | 0.99 + // gemini-2.5-flash-preview-04-17 | + // gpt-4.1 | let input_file_path = "root/lib.rs"; let input_file_content = include_str!("evals/fixtures/use_wasi_sdk_in_compile_parser_to_wasm/before.rs"); @@ -332,6 +350,15 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_disable_cursor_blinking() { + // Results for 2025-05-22 + // + // Model | Pass rate + // ============================================ + // + // claude-3.7-sonnet | + // gemini-2.5-pro-preview-03-25 | 1.0 + // gemini-2.5-flash-preview-04-17 | + // gpt-4.1 | let input_file_path = "root/editor.rs"; let input_file_content = include_str!("evals/fixtures/disable_cursor_blinking/before.rs"); let edit_description = "Comment out the call to `BlinkManager::enable`"; @@ -406,6 +433,15 @@ fn eval_disable_cursor_blinking() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_from_pixels_constructor() { + // Results for 2025-05-22 + // + // Model | Pass rate + // ============================================ + // + // claude-3.7-sonnet | + // gemini-2.5-pro-preview-03-25 | 0.94 + // gemini-2.5-flash-preview-04-17 | + // gpt-4.1 | let input_file_path = "root/canvas.rs"; let input_file_content = include_str!("evals/fixtures/from_pixels_constructor/before.rs"); let edit_description = "Implement from_pixels constructor and add tests."; @@ -597,11 +633,20 @@ fn eval_from_pixels_constructor() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_zode() { + // Results for 2025-05-22 + // + // Model | Pass rate + // ============================================ + // + // claude-3.7-sonnet | 1.0 + // gemini-2.5-pro-preview-03-25 | 1.0 + // gemini-2.5-flash-preview-04-17 | 1.0 + // gpt-4.1 | 1.0 let input_file_path = "root/zode.py"; let input_content = None; let edit_description = "Create the main Zode CLI script"; eval( - 200, + 50, 1., EvalInput::from_conversation( vec![ @@ -694,6 +739,15 @@ fn eval_zode() { #[test] #[cfg_attr(not(feature = "eval"), ignore)] fn eval_add_overwrite_test() { + // Results for 2025-05-22 + // + // Model | Pass rate + // ============================================ + // + // claude-3.7-sonnet | 0.16 + // gemini-2.5-pro-preview-03-25 | 0.35 + // gemini-2.5-flash-preview-04-17 | + // gpt-4.1 | let input_file_path = "root/action_log.rs"; let input_file_content = include_str!("evals/fixtures/add_overwrite_test/before.rs"); let edit_description = "Add a new test for overwriting a file in action_log.rs"; @@ -920,14 +974,11 @@ fn eval_create_empty_file() { // thoughts into it. This issue is not specific to empty files, but // it's easier to reproduce with them. // + // Results for 2025-05-21: // // Model | Pass rate // ============================================ // - // -------------------------------------------- - // Prompt version: 2025-05-21 - // -------------------------------------------- - // // claude-3.7-sonnet | 1.00 // gemini-2.5-pro-preview-03-25 | 1.00 // gemini-2.5-flash-preview-04-17 | 1.00 @@ -1430,7 +1481,7 @@ impl EditAgentTest { model.provider_id() == selected_model.provider && model.id() == selected_model.model }) - .unwrap(); + .expect("Model not found"); let provider = models.provider(&model.provider_id()).unwrap(); (provider, model) })?; diff --git a/crates/assistant_tools/src/templates/create_file_prompt.hbs b/crates/assistant_tools/src/templates/create_file_prompt.hbs index ffefee04ea..39f83447fa 100644 --- a/crates/assistant_tools/src/templates/create_file_prompt.hbs +++ b/crates/assistant_tools/src/templates/create_file_prompt.hbs @@ -1,8 +1,10 @@ You are an expert engineer and your task is to write a new file from scratch. -You MUST respond directly with the file's content, without explanations, additional text or triple backticks. +You MUST respond with the file's content wrapped in triple backticks (```). +The backticks should be on their own line. The text you output will be saved verbatim as the content of the file. -Tool calls have been disabled. You MUST start your response directly with the file's new content. +Tool calls have been disabled. +Start your response with ```. {{path}} diff --git a/crates/assistant_tools/src/templates/edit_file_prompt.hbs b/crates/assistant_tools/src/templates/edit_file_prompt.hbs index 3308c9e4f8..21d8419343 100644 --- a/crates/assistant_tools/src/templates/edit_file_prompt.hbs +++ b/crates/assistant_tools/src/templates/edit_file_prompt.hbs @@ -43,7 +43,8 @@ NEW TEXT 3 HERE - Always close all tags properly -{{!-- This example is important for Gemini 2.5 --}} +{{!-- The following example adds almost 10% pass rate for Gemini 2.5. +Claude and gpt-4.1 don't really need it. --}}