agent: Fix creating files with Gemini (#31439)

This change instructs models to wrap new file content in Markdown fences
and introduces a parser for this format. The reasons are:

1. This is the format we put a lot of effort into explaining in the
system prompt.
2. Gemini really prefers to do it.
3. It adds an option for a model to think before writing the content

The `eval_zode` pass rate for GEmini models goes from 0% to 100%. Other
models were already at 100%, this hasn't changed.


Release Notes:

- N/A
This commit is contained in:
Oleksiy Syvokon 2025-05-26 19:36:21 +03:00 committed by GitHub
parent bffde7c6b4
commit 6253b95f82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 356 additions and 45 deletions

1
Cargo.lock generated
View file

@ -683,6 +683,7 @@ dependencies = [
"language_models",
"log",
"markdown",
"once_cell",
"open",
"paths",
"portable-pty",

View file

@ -62,6 +62,7 @@ which.workspace = true
workspace-hack.workspace = true
workspace.workspace = true
zed_llm_client.workspace = true
once_cell = "1.21.3"
[dev-dependencies]
client = { workspace = true, features = ["test-support"] }

View file

@ -1,3 +1,4 @@
mod create_file_parser;
mod edit_parser;
#[cfg(test)]
mod evals;
@ -6,6 +7,7 @@ use crate::{Template, Templates};
use aho_corasick::AhoCorasick;
use anyhow::Result;
use assistant_tool::ActionLog;
use create_file_parser::{CreateFileParser, CreateFileParserEvent};
use edit_parser::{EditParser, EditParserEvent, EditParserMetrics};
use futures::{
Stream, StreamExt,
@ -123,16 +125,16 @@ impl EditAgent {
mpsc::UnboundedReceiver<EditAgentOutputEvent>,
) {
let (output_events_tx, output_events_rx) = mpsc::unbounded();
let (parse_task, parse_rx) = Self::parse_create_file_chunks(edit_chunks, cx);
let this = self.clone();
let task = cx.spawn(async move |cx| {
this.action_log
.update(cx, |log, cx| log.buffer_created(buffer.clone(), cx))?;
let output = this
.overwrite_with_chunks_internal(buffer, edit_chunks, output_events_tx, cx)
.await;
this.overwrite_with_chunks_internal(buffer, parse_rx, output_events_tx, cx)
.await?;
this.project
.update(cx, |project, cx| project.set_agent_location(None, cx))?;
output
parse_task.await
});
(task, output_events_rx)
}
@ -140,10 +142,10 @@ impl EditAgent {
async fn overwrite_with_chunks_internal(
&self,
buffer: Entity<Buffer>,
edit_chunks: impl 'static + Send + Stream<Item = Result<String, LanguageModelCompletionError>>,
mut parse_rx: UnboundedReceiver<Result<CreateFileParserEvent>>,
output_events_tx: mpsc::UnboundedSender<EditAgentOutputEvent>,
cx: &mut AsyncApp,
) -> Result<EditAgentOutput> {
) -> Result<()> {
cx.update(|cx| {
buffer.update(cx, |buffer, cx| buffer.set_text("", cx));
self.action_log.update(cx, |log, cx| {
@ -163,34 +165,31 @@ impl EditAgent {
.ok();
})?;
let mut raw_edits = String::new();
pin_mut!(edit_chunks);
while let Some(chunk) = edit_chunks.next().await {
let chunk = chunk?;
raw_edits.push_str(&chunk);
cx.update(|cx| {
buffer.update(cx, |buffer, cx| buffer.append(chunk, cx));
self.action_log
.update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
self.project.update(cx, |project, cx| {
project.set_agent_location(
Some(AgentLocation {
buffer: buffer.downgrade(),
position: language::Anchor::MAX,
}),
cx,
)
});
})?;
output_events_tx
.unbounded_send(EditAgentOutputEvent::Edited)
.ok();
while let Some(event) = parse_rx.next().await {
match event? {
CreateFileParserEvent::NewTextChunk { chunk } => {
cx.update(|cx| {
buffer.update(cx, |buffer, cx| buffer.append(chunk, cx));
self.action_log
.update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
self.project.update(cx, |project, cx| {
project.set_agent_location(
Some(AgentLocation {
buffer: buffer.downgrade(),
position: language::Anchor::MAX,
}),
cx,
)
});
})?;
output_events_tx
.unbounded_send(EditAgentOutputEvent::Edited)
.ok();
}
}
}
Ok(EditAgentOutput {
raw_edits,
parser_metrics: EditParserMetrics::default(),
})
Ok(())
}
pub fn edit(
@ -435,6 +434,44 @@ impl EditAgent {
(output, rx)
}
fn parse_create_file_chunks(
chunks: impl 'static + Send + Stream<Item = Result<String, LanguageModelCompletionError>>,
cx: &mut AsyncApp,
) -> (
Task<Result<EditAgentOutput>>,
UnboundedReceiver<Result<CreateFileParserEvent>>,
) {
let (tx, rx) = mpsc::unbounded();
let output = cx.background_spawn(async move {
pin_mut!(chunks);
let mut parser = CreateFileParser::new();
let mut raw_edits = String::new();
while let Some(chunk) = chunks.next().await {
match chunk {
Ok(chunk) => {
raw_edits.push_str(&chunk);
for event in parser.push(Some(&chunk)) {
tx.unbounded_send(Ok(event))?;
}
}
Err(error) => {
tx.unbounded_send(Err(error.into()))?;
}
}
}
// Send final events with None to indicate completion
for event in parser.push(None) {
tx.unbounded_send(Ok(event))?;
}
Ok(EditAgentOutput {
raw_edits,
parser_metrics: EditParserMetrics::default(),
})
});
(output, rx)
}
fn reindent_new_text_chunks(
delta: IndentDelta,
mut stream: impl Unpin + Stream<Item = Result<EditParserEvent>>,
@ -1138,7 +1175,7 @@ mod tests {
})
);
chunks_tx.unbounded_send("jkl\n").unwrap();
chunks_tx.unbounded_send("```\njkl\n").unwrap();
cx.run_until_parked();
assert_eq!(
drain_events(&mut events),
@ -1146,7 +1183,7 @@ mod tests {
);
assert_eq!(
buffer.read_with(cx, |buffer, _| buffer.snapshot().text()),
"jkl\n"
"jkl"
);
assert_eq!(
project.read_with(cx, |project, _| project.agent_location()),
@ -1164,7 +1201,7 @@ mod tests {
);
assert_eq!(
buffer.read_with(cx, |buffer, _| buffer.snapshot().text()),
"jkl\nmno\n"
"jkl\nmno"
);
assert_eq!(
project.read_with(cx, |project, _| project.agent_location()),
@ -1174,7 +1211,7 @@ mod tests {
})
);
chunks_tx.unbounded_send("pqr").unwrap();
chunks_tx.unbounded_send("pqr\n```").unwrap();
cx.run_until_parked();
assert_eq!(
drain_events(&mut events),

View file

@ -0,0 +1,218 @@
use once_cell::sync::Lazy;
use regex::Regex;
use smallvec::SmallVec;
use util::debug_panic;
const START_MARKER: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n?```\S*\n").unwrap());
const END_MARKER: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n```\s*$").unwrap());
#[derive(Debug)]
pub enum CreateFileParserEvent {
NewTextChunk { chunk: String },
}
#[derive(Debug)]
pub struct CreateFileParser {
state: ParserState,
buffer: String,
}
#[derive(Debug, PartialEq)]
enum ParserState {
Pending,
WithinText,
Finishing,
Finished,
}
impl CreateFileParser {
pub fn new() -> Self {
CreateFileParser {
state: ParserState::Pending,
buffer: String::new(),
}
}
pub fn push(&mut self, chunk: Option<&str>) -> SmallVec<[CreateFileParserEvent; 1]> {
if chunk.is_none() {
self.state = ParserState::Finishing;
}
let chunk = chunk.unwrap_or_default();
self.buffer.push_str(chunk);
let mut edit_events = SmallVec::new();
loop {
match &mut self.state {
ParserState::Pending => {
if let Some(m) = START_MARKER.find(&self.buffer) {
self.buffer.drain(..m.end());
self.state = ParserState::WithinText;
} else {
break;
}
}
ParserState::WithinText => {
let text = self.buffer.trim_end_matches(&['`', '\n', ' ']);
let text_len = text.len();
if text_len > 0 {
edit_events.push(CreateFileParserEvent::NewTextChunk {
chunk: self.buffer.drain(..text_len).collect(),
});
}
break;
}
ParserState::Finishing => {
if let Some(m) = END_MARKER.find(&self.buffer) {
self.buffer.drain(m.start()..);
}
if !self.buffer.is_empty() {
if !self.buffer.ends_with('\n') {
self.buffer.push('\n');
}
edit_events.push(CreateFileParserEvent::NewTextChunk {
chunk: self.buffer.drain(..).collect(),
});
}
self.state = ParserState::Finished;
break;
}
ParserState::Finished => debug_panic!("Can't call parser after finishing"),
}
}
edit_events
}
}
#[cfg(test)]
mod tests {
use super::*;
use indoc::indoc;
use rand::prelude::*;
use std::cmp;
#[gpui::test(iterations = 100)]
fn test_happy_path(mut rng: StdRng) {
let mut parser = CreateFileParser::new();
assert_eq!(
parse_random_chunks("```\nHello world\n```", &mut parser, &mut rng),
"Hello world".to_string()
);
}
#[gpui::test(iterations = 100)]
fn test_cut_prefix(mut rng: StdRng) {
let mut parser = CreateFileParser::new();
assert_eq!(
parse_random_chunks(
indoc! {"
Let me write this file for you:
```
Hello world
```
"},
&mut parser,
&mut rng
),
"Hello world".to_string()
);
}
#[gpui::test(iterations = 100)]
fn test_language_name_on_fences(mut rng: StdRng) {
let mut parser = CreateFileParser::new();
assert_eq!(
parse_random_chunks(
indoc! {"
```rust
Hello world
```
"},
&mut parser,
&mut rng
),
"Hello world".to_string()
);
}
#[gpui::test(iterations = 100)]
fn test_leave_suffix(mut rng: StdRng) {
let mut parser = CreateFileParser::new();
assert_eq!(
parse_random_chunks(
indoc! {"
Let me write this file for you:
```
Hello world
```
The end
"},
&mut parser,
&mut rng
),
// This output is marlformed, so we're doing our best effort
"Hello world\n```\n\nThe end\n".to_string()
);
}
#[gpui::test(iterations = 100)]
fn test_inner_fences(mut rng: StdRng) {
let mut parser = CreateFileParser::new();
assert_eq!(
parse_random_chunks(
indoc! {"
Let me write this file for you:
```
```
Hello world
```
```
"},
&mut parser,
&mut rng
),
// This output is marlformed, so we're doing our best effort
"```\nHello world\n```\n".to_string()
);
}
fn parse_random_chunks(input: &str, parser: &mut CreateFileParser, rng: &mut StdRng) -> String {
let chunk_count = rng.gen_range(1..=cmp::min(input.len(), 50));
let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
chunk_indices.sort();
chunk_indices.push(input.len());
let chunk_indices = chunk_indices
.into_iter()
.map(Some)
.chain(vec![None])
.collect::<Vec<Option<usize>>>();
let mut edit = String::default();
let mut last_ix = 0;
for chunk_ix in chunk_indices {
let mut chunk = None;
if let Some(chunk_ix) = chunk_ix {
chunk = Some(&input[last_ix..chunk_ix]);
last_ix = chunk_ix;
}
for event in parser.push(chunk) {
match event {
CreateFileParserEvent::NewTextChunk { chunk } => {
edit.push_str(&chunk);
}
}
}
}
edit
}
}

View file

@ -163,6 +163,15 @@ fn eval_delete_run_git_blame() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_translate_doc_comments() {
// Results for 2025-05-22
//
// Model | Pass rate
// ============================================
//
// claude-3.7-sonnet |
// gemini-2.5-pro-preview-03-25 | 1.0
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/canvas.rs";
let input_file_content = include_str!("evals/fixtures/translate_doc_comments/before.rs");
let edit_description = "Translate all doc comments to Italian";
@ -216,6 +225,15 @@ fn eval_translate_doc_comments() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
// Results for 2025-05-22
//
// Model | Pass rate
// ============================================
//
// claude-3.7-sonnet | 0.98
// gemini-2.5-pro-preview-03-25 | 0.99
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/lib.rs";
let input_file_content =
include_str!("evals/fixtures/use_wasi_sdk_in_compile_parser_to_wasm/before.rs");
@ -332,6 +350,15 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_disable_cursor_blinking() {
// Results for 2025-05-22
//
// Model | Pass rate
// ============================================
//
// claude-3.7-sonnet |
// gemini-2.5-pro-preview-03-25 | 1.0
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/editor.rs";
let input_file_content = include_str!("evals/fixtures/disable_cursor_blinking/before.rs");
let edit_description = "Comment out the call to `BlinkManager::enable`";
@ -406,6 +433,15 @@ fn eval_disable_cursor_blinking() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_from_pixels_constructor() {
// Results for 2025-05-22
//
// Model | Pass rate
// ============================================
//
// claude-3.7-sonnet |
// gemini-2.5-pro-preview-03-25 | 0.94
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/canvas.rs";
let input_file_content = include_str!("evals/fixtures/from_pixels_constructor/before.rs");
let edit_description = "Implement from_pixels constructor and add tests.";
@ -597,11 +633,20 @@ fn eval_from_pixels_constructor() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_zode() {
// Results for 2025-05-22
//
// Model | Pass rate
// ============================================
//
// claude-3.7-sonnet | 1.0
// gemini-2.5-pro-preview-03-25 | 1.0
// gemini-2.5-flash-preview-04-17 | 1.0
// gpt-4.1 | 1.0
let input_file_path = "root/zode.py";
let input_content = None;
let edit_description = "Create the main Zode CLI script";
eval(
200,
50,
1.,
EvalInput::from_conversation(
vec![
@ -694,6 +739,15 @@ fn eval_zode() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_add_overwrite_test() {
// Results for 2025-05-22
//
// Model | Pass rate
// ============================================
//
// claude-3.7-sonnet | 0.16
// gemini-2.5-pro-preview-03-25 | 0.35
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/action_log.rs";
let input_file_content = include_str!("evals/fixtures/add_overwrite_test/before.rs");
let edit_description = "Add a new test for overwriting a file in action_log.rs";
@ -920,14 +974,11 @@ fn eval_create_empty_file() {
// thoughts into it. This issue is not specific to empty files, but
// it's easier to reproduce with them.
//
// Results for 2025-05-21:
//
// Model | Pass rate
// ============================================
//
// --------------------------------------------
// Prompt version: 2025-05-21
// --------------------------------------------
//
// claude-3.7-sonnet | 1.00
// gemini-2.5-pro-preview-03-25 | 1.00
// gemini-2.5-flash-preview-04-17 | 1.00
@ -1430,7 +1481,7 @@ impl EditAgentTest {
model.provider_id() == selected_model.provider
&& model.id() == selected_model.model
})
.unwrap();
.expect("Model not found");
let provider = models.provider(&model.provider_id()).unwrap();
(provider, model)
})?;

View file

@ -1,8 +1,10 @@
You are an expert engineer and your task is to write a new file from scratch.
You MUST respond directly with the file's content, without explanations, additional text or triple backticks.
You MUST respond with the file's content wrapped in triple backticks (```).
The backticks should be on their own line.
The text you output will be saved verbatim as the content of the file.
Tool calls have been disabled. You MUST start your response directly with the file's new content.
Tool calls have been disabled.
Start your response with ```.
<file_path>
{{path}}

View file

@ -43,7 +43,8 @@ NEW TEXT 3 HERE
- Always close all tags properly
{{!-- This example is important for Gemini 2.5 --}}
{{!-- The following example adds almost 10% pass rate for Gemini 2.5.
Claude and gpt-4.1 don't really need it. --}}
<example>
<edits>