agent: Fix creating files with Gemini (#31439)
This change instructs models to wrap new file content in Markdown fences and introduces a parser for this format. The reasons are: 1. This is the format we put a lot of effort into explaining in the system prompt. 2. Gemini really prefers to do it. 3. It adds an option for a model to think before writing the content The `eval_zode` pass rate for GEmini models goes from 0% to 100%. Other models were already at 100%, this hasn't changed. Release Notes: - N/A
This commit is contained in:
parent
bffde7c6b4
commit
6253b95f82
7 changed files with 356 additions and 45 deletions
218
crates/assistant_tools/src/edit_agent/create_file_parser.rs
Normal file
218
crates/assistant_tools/src/edit_agent/create_file_parser.rs
Normal file
|
@ -0,0 +1,218 @@
|
|||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use smallvec::SmallVec;
|
||||
use util::debug_panic;
|
||||
|
||||
const START_MARKER: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n?```\S*\n").unwrap());
|
||||
const END_MARKER: Lazy<Regex> = Lazy::new(|| Regex::new(r"\n```\s*$").unwrap());
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CreateFileParserEvent {
|
||||
NewTextChunk { chunk: String },
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CreateFileParser {
|
||||
state: ParserState,
|
||||
buffer: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum ParserState {
|
||||
Pending,
|
||||
WithinText,
|
||||
Finishing,
|
||||
Finished,
|
||||
}
|
||||
|
||||
impl CreateFileParser {
|
||||
pub fn new() -> Self {
|
||||
CreateFileParser {
|
||||
state: ParserState::Pending,
|
||||
buffer: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push(&mut self, chunk: Option<&str>) -> SmallVec<[CreateFileParserEvent; 1]> {
|
||||
if chunk.is_none() {
|
||||
self.state = ParserState::Finishing;
|
||||
}
|
||||
|
||||
let chunk = chunk.unwrap_or_default();
|
||||
|
||||
self.buffer.push_str(chunk);
|
||||
|
||||
let mut edit_events = SmallVec::new();
|
||||
loop {
|
||||
match &mut self.state {
|
||||
ParserState::Pending => {
|
||||
if let Some(m) = START_MARKER.find(&self.buffer) {
|
||||
self.buffer.drain(..m.end());
|
||||
self.state = ParserState::WithinText;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ParserState::WithinText => {
|
||||
let text = self.buffer.trim_end_matches(&['`', '\n', ' ']);
|
||||
let text_len = text.len();
|
||||
|
||||
if text_len > 0 {
|
||||
edit_events.push(CreateFileParserEvent::NewTextChunk {
|
||||
chunk: self.buffer.drain(..text_len).collect(),
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
ParserState::Finishing => {
|
||||
if let Some(m) = END_MARKER.find(&self.buffer) {
|
||||
self.buffer.drain(m.start()..);
|
||||
}
|
||||
if !self.buffer.is_empty() {
|
||||
if !self.buffer.ends_with('\n') {
|
||||
self.buffer.push('\n');
|
||||
}
|
||||
edit_events.push(CreateFileParserEvent::NewTextChunk {
|
||||
chunk: self.buffer.drain(..).collect(),
|
||||
});
|
||||
}
|
||||
self.state = ParserState::Finished;
|
||||
break;
|
||||
}
|
||||
ParserState::Finished => debug_panic!("Can't call parser after finishing"),
|
||||
}
|
||||
}
|
||||
edit_events
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use indoc::indoc;
|
||||
use rand::prelude::*;
|
||||
use std::cmp;
|
||||
|
||||
#[gpui::test(iterations = 100)]
|
||||
fn test_happy_path(mut rng: StdRng) {
|
||||
let mut parser = CreateFileParser::new();
|
||||
assert_eq!(
|
||||
parse_random_chunks("```\nHello world\n```", &mut parser, &mut rng),
|
||||
"Hello world".to_string()
|
||||
);
|
||||
}
|
||||
|
||||
#[gpui::test(iterations = 100)]
|
||||
fn test_cut_prefix(mut rng: StdRng) {
|
||||
let mut parser = CreateFileParser::new();
|
||||
assert_eq!(
|
||||
parse_random_chunks(
|
||||
indoc! {"
|
||||
Let me write this file for you:
|
||||
|
||||
```
|
||||
Hello world
|
||||
```
|
||||
|
||||
"},
|
||||
&mut parser,
|
||||
&mut rng
|
||||
),
|
||||
"Hello world".to_string()
|
||||
);
|
||||
}
|
||||
|
||||
#[gpui::test(iterations = 100)]
|
||||
fn test_language_name_on_fences(mut rng: StdRng) {
|
||||
let mut parser = CreateFileParser::new();
|
||||
assert_eq!(
|
||||
parse_random_chunks(
|
||||
indoc! {"
|
||||
```rust
|
||||
Hello world
|
||||
```
|
||||
|
||||
"},
|
||||
&mut parser,
|
||||
&mut rng
|
||||
),
|
||||
"Hello world".to_string()
|
||||
);
|
||||
}
|
||||
|
||||
#[gpui::test(iterations = 100)]
|
||||
fn test_leave_suffix(mut rng: StdRng) {
|
||||
let mut parser = CreateFileParser::new();
|
||||
assert_eq!(
|
||||
parse_random_chunks(
|
||||
indoc! {"
|
||||
Let me write this file for you:
|
||||
|
||||
```
|
||||
Hello world
|
||||
```
|
||||
|
||||
The end
|
||||
"},
|
||||
&mut parser,
|
||||
&mut rng
|
||||
),
|
||||
// This output is marlformed, so we're doing our best effort
|
||||
"Hello world\n```\n\nThe end\n".to_string()
|
||||
);
|
||||
}
|
||||
|
||||
#[gpui::test(iterations = 100)]
|
||||
fn test_inner_fences(mut rng: StdRng) {
|
||||
let mut parser = CreateFileParser::new();
|
||||
assert_eq!(
|
||||
parse_random_chunks(
|
||||
indoc! {"
|
||||
Let me write this file for you:
|
||||
|
||||
```
|
||||
```
|
||||
Hello world
|
||||
```
|
||||
```
|
||||
"},
|
||||
&mut parser,
|
||||
&mut rng
|
||||
),
|
||||
// This output is marlformed, so we're doing our best effort
|
||||
"```\nHello world\n```\n".to_string()
|
||||
);
|
||||
}
|
||||
|
||||
fn parse_random_chunks(input: &str, parser: &mut CreateFileParser, rng: &mut StdRng) -> String {
|
||||
let chunk_count = rng.gen_range(1..=cmp::min(input.len(), 50));
|
||||
let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
|
||||
chunk_indices.sort();
|
||||
chunk_indices.push(input.len());
|
||||
|
||||
let chunk_indices = chunk_indices
|
||||
.into_iter()
|
||||
.map(Some)
|
||||
.chain(vec![None])
|
||||
.collect::<Vec<Option<usize>>>();
|
||||
|
||||
let mut edit = String::default();
|
||||
let mut last_ix = 0;
|
||||
for chunk_ix in chunk_indices {
|
||||
let mut chunk = None;
|
||||
if let Some(chunk_ix) = chunk_ix {
|
||||
chunk = Some(&input[last_ix..chunk_ix]);
|
||||
last_ix = chunk_ix;
|
||||
}
|
||||
|
||||
for event in parser.push(chunk) {
|
||||
match event {
|
||||
CreateFileParserEvent::NewTextChunk { chunk } => {
|
||||
edit.push_str(&chunk);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
edit
|
||||
}
|
||||
}
|
|
@ -163,6 +163,15 @@ fn eval_delete_run_git_blame() {
|
|||
#[test]
|
||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||
fn eval_translate_doc_comments() {
|
||||
// Results for 2025-05-22
|
||||
//
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
//
|
||||
// claude-3.7-sonnet |
|
||||
// gemini-2.5-pro-preview-03-25 | 1.0
|
||||
// gemini-2.5-flash-preview-04-17 |
|
||||
// gpt-4.1 |
|
||||
let input_file_path = "root/canvas.rs";
|
||||
let input_file_content = include_str!("evals/fixtures/translate_doc_comments/before.rs");
|
||||
let edit_description = "Translate all doc comments to Italian";
|
||||
|
@ -216,6 +225,15 @@ fn eval_translate_doc_comments() {
|
|||
#[test]
|
||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||
fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
||||
// Results for 2025-05-22
|
||||
//
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
//
|
||||
// claude-3.7-sonnet | 0.98
|
||||
// gemini-2.5-pro-preview-03-25 | 0.99
|
||||
// gemini-2.5-flash-preview-04-17 |
|
||||
// gpt-4.1 |
|
||||
let input_file_path = "root/lib.rs";
|
||||
let input_file_content =
|
||||
include_str!("evals/fixtures/use_wasi_sdk_in_compile_parser_to_wasm/before.rs");
|
||||
|
@ -332,6 +350,15 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
|||
#[test]
|
||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||
fn eval_disable_cursor_blinking() {
|
||||
// Results for 2025-05-22
|
||||
//
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
//
|
||||
// claude-3.7-sonnet |
|
||||
// gemini-2.5-pro-preview-03-25 | 1.0
|
||||
// gemini-2.5-flash-preview-04-17 |
|
||||
// gpt-4.1 |
|
||||
let input_file_path = "root/editor.rs";
|
||||
let input_file_content = include_str!("evals/fixtures/disable_cursor_blinking/before.rs");
|
||||
let edit_description = "Comment out the call to `BlinkManager::enable`";
|
||||
|
@ -406,6 +433,15 @@ fn eval_disable_cursor_blinking() {
|
|||
#[test]
|
||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||
fn eval_from_pixels_constructor() {
|
||||
// Results for 2025-05-22
|
||||
//
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
//
|
||||
// claude-3.7-sonnet |
|
||||
// gemini-2.5-pro-preview-03-25 | 0.94
|
||||
// gemini-2.5-flash-preview-04-17 |
|
||||
// gpt-4.1 |
|
||||
let input_file_path = "root/canvas.rs";
|
||||
let input_file_content = include_str!("evals/fixtures/from_pixels_constructor/before.rs");
|
||||
let edit_description = "Implement from_pixels constructor and add tests.";
|
||||
|
@ -597,11 +633,20 @@ fn eval_from_pixels_constructor() {
|
|||
#[test]
|
||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||
fn eval_zode() {
|
||||
// Results for 2025-05-22
|
||||
//
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
//
|
||||
// claude-3.7-sonnet | 1.0
|
||||
// gemini-2.5-pro-preview-03-25 | 1.0
|
||||
// gemini-2.5-flash-preview-04-17 | 1.0
|
||||
// gpt-4.1 | 1.0
|
||||
let input_file_path = "root/zode.py";
|
||||
let input_content = None;
|
||||
let edit_description = "Create the main Zode CLI script";
|
||||
eval(
|
||||
200,
|
||||
50,
|
||||
1.,
|
||||
EvalInput::from_conversation(
|
||||
vec![
|
||||
|
@ -694,6 +739,15 @@ fn eval_zode() {
|
|||
#[test]
|
||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||
fn eval_add_overwrite_test() {
|
||||
// Results for 2025-05-22
|
||||
//
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
//
|
||||
// claude-3.7-sonnet | 0.16
|
||||
// gemini-2.5-pro-preview-03-25 | 0.35
|
||||
// gemini-2.5-flash-preview-04-17 |
|
||||
// gpt-4.1 |
|
||||
let input_file_path = "root/action_log.rs";
|
||||
let input_file_content = include_str!("evals/fixtures/add_overwrite_test/before.rs");
|
||||
let edit_description = "Add a new test for overwriting a file in action_log.rs";
|
||||
|
@ -920,14 +974,11 @@ fn eval_create_empty_file() {
|
|||
// thoughts into it. This issue is not specific to empty files, but
|
||||
// it's easier to reproduce with them.
|
||||
//
|
||||
// Results for 2025-05-21:
|
||||
//
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
//
|
||||
// --------------------------------------------
|
||||
// Prompt version: 2025-05-21
|
||||
// --------------------------------------------
|
||||
//
|
||||
// claude-3.7-sonnet | 1.00
|
||||
// gemini-2.5-pro-preview-03-25 | 1.00
|
||||
// gemini-2.5-flash-preview-04-17 | 1.00
|
||||
|
@ -1430,7 +1481,7 @@ impl EditAgentTest {
|
|||
model.provider_id() == selected_model.provider
|
||||
&& model.id() == selected_model.model
|
||||
})
|
||||
.unwrap();
|
||||
.expect("Model not found");
|
||||
let provider = models.provider(&model.provider_id()).unwrap();
|
||||
(provider, model)
|
||||
})?;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue