Systematically optimize agentic editing performance (#28961)
Now that we've established a proper eval in tree, this PR is reboots of our agent loop back to a set of minimal tools and simpler prompts. We should aim to get this branch feeling subjectively competitive with what's on main and then merge it, and build from there. Let's invest in our eval and use it to drive better performance of the agent loop. How you can help: Pick an example, and then make the outcome faster or better. It's fine to even use your own subjective judgment, as our evaluation criteria likely need tuning as well at this point. Focus on making the agent work better in your own subjective experience first. Let's focus on simple/practical improvements to make this thing work better, then determine how we can craft our judgment criteria to lock those improvements in. Release Notes: - N/A --------- Co-authored-by: Max <max@zed.dev> Co-authored-by: Antonio <antonio@zed.dev> Co-authored-by: Agus <agus@zed.dev> Co-authored-by: Richard <richard@zed.dev> Co-authored-by: Max Brunsfeld <maxbrunsfeld@gmail.com> Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Michael Sloan <mgsloan@gmail.com>
This commit is contained in:
parent
8102a16747
commit
bab28560ef
68 changed files with 1575 additions and 478 deletions
|
@ -1,14 +1,14 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
|
||||
use anyhow::{Result, anyhow};
|
||||
use assistant_tool::{ActionLog, Tool, ToolResult};
|
||||
use gpui::{App, Entity, Task};
|
||||
use indoc::formatdoc;
|
||||
use itertools::Itertools;
|
||||
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
|
||||
use project::Project;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use ui::IconName;
|
||||
use util::markdown::MarkdownString;
|
||||
|
||||
|
@ -95,11 +95,24 @@ impl Tool for ReadFileTool {
|
|||
};
|
||||
|
||||
let Some(project_path) = project.read(cx).find_project_path(&input.path, cx) else {
|
||||
return Task::ready(Err(anyhow!("Path {} not found in project", &input.path,))).into();
|
||||
return Task::ready(Err(anyhow!("Path {} not found in project", &input.path))).into();
|
||||
};
|
||||
let Some(worktree) = project
|
||||
.read(cx)
|
||||
.worktree_for_id(project_path.worktree_id, cx)
|
||||
else {
|
||||
return Task::ready(Err(anyhow!("Worktree not found for project path"))).into();
|
||||
};
|
||||
let exists = worktree.update(cx, |worktree, cx| {
|
||||
worktree.file_exists(&project_path.path, cx)
|
||||
});
|
||||
|
||||
let file_path = input.path.clone();
|
||||
cx.spawn(async move |cx| {
|
||||
if !exists.await? {
|
||||
return Err(anyhow!("{} not found", file_path))
|
||||
}
|
||||
|
||||
let buffer = cx
|
||||
.update(|cx| {
|
||||
project.update(cx, |project, cx| project.open_buffer(project_path, cx))
|
||||
|
@ -141,11 +154,231 @@ impl Tool for ReadFileTool {
|
|||
} else {
|
||||
// File is too big, so return an error with the outline
|
||||
// and a suggestion to read again with line numbers.
|
||||
let outline = file_outline(project, file_path, action_log, None, 0, cx).await?;
|
||||
let outline = file_outline(project, file_path, action_log, None, cx).await?;
|
||||
Ok(formatdoc! {"
|
||||
This file was too big to read all at once. Here is an outline of its symbols:
|
||||
|
||||
Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start_line and end_line fields to see the implementations of symbols in the outline."))
|
||||
{outline}
|
||||
|
||||
Using the line numbers in this outline, you can call this tool again while specifying
|
||||
the start_line and end_line fields to see the implementations of symbols in the outline."
|
||||
})
|
||||
}
|
||||
}
|
||||
}).into()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use gpui::{AppContext, TestAppContext};
|
||||
use language::{Language, LanguageConfig, LanguageMatcher};
|
||||
use project::{FakeFs, Project};
|
||||
use serde_json::json;
|
||||
use settings::SettingsStore;
|
||||
use util::path;
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_read_nonexistent_file(cx: &mut TestAppContext) {
|
||||
init_test(cx);
|
||||
|
||||
let fs = FakeFs::new(cx.executor());
|
||||
fs.insert_tree("/root", json!({})).await;
|
||||
let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
|
||||
let action_log = cx.new(|_| ActionLog::new(project.clone()));
|
||||
let result = cx
|
||||
.update(|cx| {
|
||||
let input = json!({
|
||||
"path": "root/nonexistent_file.txt"
|
||||
});
|
||||
Arc::new(ReadFileTool)
|
||||
.run(input, &[], project.clone(), action_log, cx)
|
||||
.output
|
||||
})
|
||||
.await;
|
||||
assert_eq!(
|
||||
result.unwrap_err().to_string(),
|
||||
"root/nonexistent_file.txt not found"
|
||||
);
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_read_small_file(cx: &mut TestAppContext) {
|
||||
init_test(cx);
|
||||
|
||||
let fs = FakeFs::new(cx.executor());
|
||||
fs.insert_tree(
|
||||
"/root",
|
||||
json!({
|
||||
"small_file.txt": "This is a small file content"
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
|
||||
let action_log = cx.new(|_| ActionLog::new(project.clone()));
|
||||
let result = cx
|
||||
.update(|cx| {
|
||||
let input = json!({
|
||||
"path": "root/small_file.txt"
|
||||
});
|
||||
Arc::new(ReadFileTool)
|
||||
.run(input, &[], project.clone(), action_log, cx)
|
||||
.output
|
||||
})
|
||||
.await;
|
||||
assert_eq!(result.unwrap(), "This is a small file content");
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_read_large_file(cx: &mut TestAppContext) {
|
||||
init_test(cx);
|
||||
|
||||
let fs = FakeFs::new(cx.executor());
|
||||
fs.insert_tree(
|
||||
"/root",
|
||||
json!({
|
||||
"large_file.rs": (0..1000).map(|i| format!("struct Test{} {{\n a: u32,\n b: usize,\n}}", i)).collect::<Vec<_>>().join("\n")
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
|
||||
let language_registry = project.read_with(cx, |project, _| project.languages().clone());
|
||||
language_registry.add(Arc::new(rust_lang()));
|
||||
let action_log = cx.new(|_| ActionLog::new(project.clone()));
|
||||
|
||||
let result = cx
|
||||
.update(|cx| {
|
||||
let input = json!({
|
||||
"path": "root/large_file.rs"
|
||||
});
|
||||
Arc::new(ReadFileTool)
|
||||
.run(input, &[], project.clone(), action_log.clone(), cx)
|
||||
.output
|
||||
})
|
||||
.await;
|
||||
let content = result.unwrap();
|
||||
assert_eq!(
|
||||
content.lines().skip(2).take(6).collect::<Vec<_>>(),
|
||||
vec![
|
||||
"struct Test0 [L1-4]",
|
||||
" a [L2]",
|
||||
" b [L3]",
|
||||
"struct Test1 [L5-8]",
|
||||
" a [L6]",
|
||||
" b [L7]",
|
||||
]
|
||||
);
|
||||
|
||||
let result = cx
|
||||
.update(|cx| {
|
||||
let input = json!({
|
||||
"path": "root/large_file.rs",
|
||||
"offset": 1
|
||||
});
|
||||
Arc::new(ReadFileTool)
|
||||
.run(input, &[], project.clone(), action_log, cx)
|
||||
.output
|
||||
})
|
||||
.await;
|
||||
let content = result.unwrap();
|
||||
let expected_content = (0..1000)
|
||||
.flat_map(|i| {
|
||||
vec![
|
||||
format!("struct Test{} [L{}-{}]", i, i * 4 + 1, i * 4 + 4),
|
||||
format!(" a [L{}]", i * 4 + 2),
|
||||
format!(" b [L{}]", i * 4 + 3),
|
||||
]
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
pretty_assertions::assert_eq!(
|
||||
content
|
||||
.lines()
|
||||
.skip(2)
|
||||
.take(expected_content.len())
|
||||
.collect::<Vec<_>>(),
|
||||
expected_content
|
||||
);
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_read_file_with_line_range(cx: &mut TestAppContext) {
|
||||
init_test(cx);
|
||||
|
||||
let fs = FakeFs::new(cx.executor());
|
||||
fs.insert_tree(
|
||||
"/root",
|
||||
json!({
|
||||
"multiline.txt": "Line 1\nLine 2\nLine 3\nLine 4\nLine 5"
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
|
||||
let action_log = cx.new(|_| ActionLog::new(project.clone()));
|
||||
let result = cx
|
||||
.update(|cx| {
|
||||
let input = json!({
|
||||
"path": "root/multiline.txt",
|
||||
"start_line": 2,
|
||||
"end_line": 4
|
||||
});
|
||||
Arc::new(ReadFileTool)
|
||||
.run(input, &[], project.clone(), action_log, cx)
|
||||
.output
|
||||
})
|
||||
.await;
|
||||
assert_eq!(result.unwrap(), "Line 2\nLine 3");
|
||||
}
|
||||
|
||||
fn init_test(cx: &mut TestAppContext) {
|
||||
cx.update(|cx| {
|
||||
let settings_store = SettingsStore::test(cx);
|
||||
cx.set_global(settings_store);
|
||||
language::init(cx);
|
||||
Project::init_settings(cx);
|
||||
});
|
||||
}
|
||||
|
||||
fn rust_lang() -> Language {
|
||||
Language::new(
|
||||
LanguageConfig {
|
||||
name: "Rust".into(),
|
||||
matcher: LanguageMatcher {
|
||||
path_suffixes: vec!["rs".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
Some(tree_sitter_rust::LANGUAGE.into()),
|
||||
)
|
||||
.with_outline_query(
|
||||
r#"
|
||||
(line_comment) @annotation
|
||||
|
||||
(struct_item
|
||||
"struct" @context
|
||||
name: (_) @name) @item
|
||||
(enum_item
|
||||
"enum" @context
|
||||
name: (_) @name) @item
|
||||
(enum_variant
|
||||
name: (_) @name) @item
|
||||
(field_declaration
|
||||
name: (_) @name) @item
|
||||
(impl_item
|
||||
"impl" @context
|
||||
trait: (_)? @name
|
||||
"for"? @context
|
||||
type: (_) @name
|
||||
body: (_ "{" (_)* "}")) @item
|
||||
(function_item
|
||||
"fn" @context
|
||||
name: (_) @name) @item
|
||||
(mod_item
|
||||
"mod" @context
|
||||
name: (_) @name) @item
|
||||
"#,
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue