Reuse conversation cache when streaming edits (#30245)
Release Notes: - Improved latency when the agent applies edits.
This commit is contained in:
parent
032022e37b
commit
9f6809a28d
50 changed files with 847 additions and 21557 deletions
|
@ -2,14 +2,16 @@ use super::*;
|
|||
use crate::{ReadFileToolInput, edit_file_tool::EditFileToolInput, grep_tool::GrepToolInput};
|
||||
use Role::*;
|
||||
use anyhow::anyhow;
|
||||
use assistant_tool::ToolRegistry;
|
||||
use client::{Client, UserStore};
|
||||
use collections::HashMap;
|
||||
use fs::FakeFs;
|
||||
use futures::{FutureExt, future::LocalBoxFuture};
|
||||
use gpui::{AppContext, TestAppContext};
|
||||
use indoc::indoc;
|
||||
use indoc::{formatdoc, indoc};
|
||||
use language_model::{
|
||||
LanguageModelRegistry, LanguageModelToolResult, LanguageModelToolUse, LanguageModelToolUseId,
|
||||
LanguageModelRegistry, LanguageModelRequestTool, LanguageModelToolResult, LanguageModelToolUse,
|
||||
LanguageModelToolUseId,
|
||||
};
|
||||
use project::Project;
|
||||
use rand::prelude::*;
|
||||
|
@ -37,7 +39,7 @@ fn eval_extract_handle_command_output() {
|
|||
conversation: vec![
|
||||
message(
|
||||
User,
|
||||
[text(indoc! {"
|
||||
[text(formatdoc! {"
|
||||
Read the `{input_file_path}` file and extract a method in
|
||||
the final stanza of `run_git_blame` to deal with command failures,
|
||||
call it `handle_command_output` and take the std::process::Output as the only parameter.
|
||||
|
@ -96,7 +98,7 @@ fn eval_delete_run_git_blame() {
|
|||
conversation: vec![
|
||||
message(
|
||||
User,
|
||||
[text(indoc! {"
|
||||
[text(formatdoc! {"
|
||||
Read the `{input_file_path}` file and delete `run_git_blame`. Just that
|
||||
one function, not its usages.
|
||||
"})],
|
||||
|
@ -138,6 +140,61 @@ fn eval_delete_run_git_blame() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||
fn eval_translate_doc_comments() {
|
||||
let input_file_path = "root/canvas.rs";
|
||||
let input_file_content = include_str!("evals/fixtures/translate_doc_comments/before.rs");
|
||||
let edit_description = "Translate all doc comments to Italian";
|
||||
eval(
|
||||
200,
|
||||
1.,
|
||||
EvalInput {
|
||||
conversation: vec![
|
||||
message(
|
||||
User,
|
||||
[text(formatdoc! {"
|
||||
Read the {input_file_path} file and edit it (without overwriting it),
|
||||
translating all the doc comments to italian.
|
||||
"})],
|
||||
),
|
||||
message(
|
||||
Assistant,
|
||||
[tool_use(
|
||||
"tool_1",
|
||||
"read_file",
|
||||
ReadFileToolInput {
|
||||
path: input_file_path.into(),
|
||||
start_line: None,
|
||||
end_line: None,
|
||||
},
|
||||
)],
|
||||
),
|
||||
message(
|
||||
User,
|
||||
[tool_result("tool_1", "read_file", input_file_content)],
|
||||
),
|
||||
message(
|
||||
Assistant,
|
||||
[tool_use(
|
||||
"tool_2",
|
||||
"edit_file",
|
||||
EditFileToolInput {
|
||||
display_description: edit_description.into(),
|
||||
path: input_file_path.into(),
|
||||
create_or_overwrite: false,
|
||||
},
|
||||
)],
|
||||
),
|
||||
],
|
||||
input_path: input_file_path.into(),
|
||||
input_content: Some(input_file_content.into()),
|
||||
edit_description: edit_description.into(),
|
||||
assertion: EvalAssertion::judge_diff("Doc comments were translated to Italian"),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "eval"), ignore)]
|
||||
fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
||||
|
@ -152,7 +209,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
|||
conversation: vec![
|
||||
message(
|
||||
User,
|
||||
[text(indoc! {"
|
||||
[text(formatdoc! {"
|
||||
Read the `{input_file_path}` file and change `compile_parser_to_wasm` to use `wasi-sdk` instead of emscripten.
|
||||
Use `ureq` to download the SDK for the current platform and architecture.
|
||||
Extract the archive into a sibling of `lib` inside the `tree-sitter` directory in the cache_dir.
|
||||
|
@ -160,7 +217,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
|||
that's inside of the archive.
|
||||
Don't re-download the SDK if that executable already exists.
|
||||
|
||||
Use these clang flags: -fPIC -shared -Os -Wl,--export=tree_sitter_{language_name}
|
||||
Use these clang flags: -fPIC -shared -Os -Wl,--export=tree_sitter_{{language_name}}
|
||||
|
||||
Here are the available wasi-sdk assets:
|
||||
- wasi-sdk-25.0-x86_64-macos.tar.gz
|
||||
|
@ -261,11 +318,10 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
|||
fn eval_disable_cursor_blinking() {
|
||||
let input_file_path = "root/editor.rs";
|
||||
let input_file_content = include_str!("evals/fixtures/disable_cursor_blinking/before.rs");
|
||||
let output_file_content = include_str!("evals/fixtures/disable_cursor_blinking/after.rs");
|
||||
let edit_description = "Comment out the call to `BlinkManager::enable`";
|
||||
eval(
|
||||
200,
|
||||
0.6, // TODO: make this eval better
|
||||
0.95,
|
||||
EvalInput {
|
||||
conversation: vec![
|
||||
message(User, [text("Let's research how to cursor blinking works.")]),
|
||||
|
@ -324,7 +380,11 @@ fn eval_disable_cursor_blinking() {
|
|||
input_path: input_file_path.into(),
|
||||
input_content: Some(input_file_content.into()),
|
||||
edit_description: edit_description.into(),
|
||||
assertion: EvalAssertion::assert_eq(output_file_content),
|
||||
assertion: EvalAssertion::judge_diff(indoc! {"
|
||||
- Calls to BlinkManager in `observe_window_activation` were commented out
|
||||
- The call to `blink_manager.enable` above the call to show_cursor_names was commented out
|
||||
- All the edits have valid indentation
|
||||
"}),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
@ -1031,7 +1091,8 @@ impl EvalAssertion {
|
|||
|
||||
fn eval(iterations: usize, expected_pass_ratio: f32, mut eval: EvalInput) {
|
||||
let mut evaluated_count = 0;
|
||||
report_progress(evaluated_count, iterations);
|
||||
let mut failed_count = 0;
|
||||
report_progress(evaluated_count, failed_count, iterations);
|
||||
|
||||
let (tx, rx) = mpsc::channel();
|
||||
|
||||
|
@ -1048,7 +1109,6 @@ fn eval(iterations: usize, expected_pass_ratio: f32, mut eval: EvalInput) {
|
|||
}
|
||||
drop(tx);
|
||||
|
||||
let mut failed_count = 0;
|
||||
let mut failed_evals = HashMap::default();
|
||||
let mut errored_evals = HashMap::default();
|
||||
let mut eval_outputs = Vec::new();
|
||||
|
@ -1073,7 +1133,7 @@ fn eval(iterations: usize, expected_pass_ratio: f32, mut eval: EvalInput) {
|
|||
}
|
||||
|
||||
evaluated_count += 1;
|
||||
report_progress(evaluated_count, iterations);
|
||||
report_progress(evaluated_count, failed_count, iterations);
|
||||
}
|
||||
|
||||
let actual_pass_ratio = (iterations - failed_count) as f32 / iterations as f32;
|
||||
|
@ -1144,8 +1204,19 @@ impl Display for EvalOutput {
|
|||
}
|
||||
}
|
||||
|
||||
fn report_progress(evaluated_count: usize, iterations: usize) {
|
||||
print!("\r\x1b[KEvaluated {}/{}", evaluated_count, iterations);
|
||||
fn report_progress(evaluated_count: usize, failed_count: usize, iterations: usize) {
|
||||
let passed_count = evaluated_count - failed_count;
|
||||
let passed_ratio = if evaluated_count == 0 {
|
||||
0.0
|
||||
} else {
|
||||
passed_count as f64 / evaluated_count as f64
|
||||
};
|
||||
print!(
|
||||
"\r\x1b[KEvaluated {}/{} ({:.2}%)",
|
||||
evaluated_count,
|
||||
iterations,
|
||||
passed_ratio * 100.0
|
||||
);
|
||||
std::io::stdout().flush().unwrap();
|
||||
}
|
||||
|
||||
|
@ -1158,25 +1229,30 @@ struct EditAgentTest {
|
|||
impl EditAgentTest {
|
||||
async fn new(cx: &mut TestAppContext) -> Self {
|
||||
cx.executor().allow_parking();
|
||||
cx.update(settings::init);
|
||||
cx.update(Project::init_settings);
|
||||
cx.update(language::init);
|
||||
cx.update(gpui_tokio::init);
|
||||
cx.update(client::init_settings);
|
||||
|
||||
let fs = FakeFs::new(cx.executor().clone());
|
||||
cx.update(|cx| {
|
||||
settings::init(cx);
|
||||
gpui_tokio::init(cx);
|
||||
let http_client = Arc::new(ReqwestClient::user_agent("agent tests").unwrap());
|
||||
cx.set_http_client(http_client);
|
||||
|
||||
client::init_settings(cx);
|
||||
let client = Client::production(cx);
|
||||
let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
|
||||
|
||||
settings::init(cx);
|
||||
Project::init_settings(cx);
|
||||
language::init(cx);
|
||||
language_model::init(client.clone(), cx);
|
||||
language_models::init(user_store.clone(), client.clone(), fs.clone(), cx);
|
||||
crate::init(client.http_client(), cx);
|
||||
});
|
||||
|
||||
fs.insert_tree("/root", json!({})).await;
|
||||
let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
|
||||
let (agent_model, judge_model) = cx
|
||||
.update(|cx| {
|
||||
let http_client = ReqwestClient::user_agent("agent tests").unwrap();
|
||||
cx.set_http_client(Arc::new(http_client));
|
||||
|
||||
let client = Client::production(cx);
|
||||
let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
|
||||
language_model::init(client.clone(), cx);
|
||||
language_models::init(user_store.clone(), client.clone(), fs.clone(), cx);
|
||||
|
||||
cx.spawn(async move |cx| {
|
||||
let agent_model =
|
||||
Self::load_model("anthropic", "claude-3-7-sonnet-latest", cx).await;
|
||||
|
@ -1225,12 +1301,32 @@ impl EditAgentTest {
|
|||
.update(cx, |project, cx| project.open_buffer(path, cx))
|
||||
.await
|
||||
.unwrap();
|
||||
let conversation = LanguageModelRequest {
|
||||
messages: eval.conversation,
|
||||
tools: cx.update(|cx| {
|
||||
ToolRegistry::default_global(cx)
|
||||
.tools()
|
||||
.into_iter()
|
||||
.filter_map(|tool| {
|
||||
let input_schema = tool
|
||||
.input_schema(self.agent.model.tool_input_format())
|
||||
.ok()?;
|
||||
Some(LanguageModelRequestTool {
|
||||
name: tool.name(),
|
||||
description: tool.description(),
|
||||
input_schema,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
let edit_output = if let Some(input_content) = eval.input_content.as_deref() {
|
||||
buffer.update(cx, |buffer, cx| buffer.set_text(input_content, cx));
|
||||
let (edit_output, _) = self.agent.edit(
|
||||
buffer.clone(),
|
||||
eval.edit_description,
|
||||
eval.conversation,
|
||||
&conversation,
|
||||
&mut cx.to_async(),
|
||||
);
|
||||
edit_output.await?
|
||||
|
@ -1238,7 +1334,7 @@ impl EditAgentTest {
|
|||
let (edit_output, _) = self.agent.overwrite(
|
||||
buffer.clone(),
|
||||
eval.edit_description,
|
||||
eval.conversation,
|
||||
&conversation,
|
||||
&mut cx.to_async(),
|
||||
);
|
||||
edit_output.await?
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue