Introduce a new StreamingEditFileTool (#29733)

This pull request introduces a new tool for streaming edits. The short-term goal is for this tool to replace the existing `EditFileTool`, but we want to get this out the door as soon as possible so that we can start testing it. `StreamingEditFileTool` is mutually exclusive with `EditFileTool`. It will be enabled by default for anyone who has the `agent-stream-edits` feature flag, as well as people that set `assistant.stream_edits` to `true` in their settings. ### Implementation Streaming is achieved by requesting a completion while the `edit_file` tool gets called. We invoke the model by taking the existing conversation with the agent and appending a prompt specifically tailored for editing. In that prompt, we ask the model to produce a stream of `<old_text>`/`<new_text>` tags. As the model streams text in, we incrementally parse it and start editing as soon as we can. ### Evals Note that, as part of this pull request, I also defined some new evals that I used to drive the behavior of the recursive LLM call. To run them, use this command: ```bash cargo test --package=assistant_tools --features eval -- eval_extract_handle_command_output ``` Or comment out the `#[cfg_attr(not(feature = "eval"), ignore)]` macro. I recommend running them one at a time, because right now we don't really have a way of orchestrating of all these evals. I think we should invest into that effort once the new agent panel goes live. Release Notes: - N/A --------- Co-authored-by: Nathan Sobo <nathan@zed.dev> Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de> Co-authored-by: Oleksiy Syvokon <oleksiy.syvokon@gmail.com>
2025-05-01 17:37:43 +02:00 · 2025-05-01 17:37:43 +02:00 · f891dfb358
commit f891dfb358
parent e3a2d52472
32 changed files with 49077 additions and 20 deletions
--- a/crates/eval/Cargo.toml
+++ b/crates/eval/Cargo.toml
@ -48,6 +48,7 @@ markdown.workspace = true
 node_runtime.workspace = true
 pathdiff.workspace = true
 paths.workspace = true
+pretty_assertions.workspace = true
 project.workspace = true
 prompt_store.workspace = true
 regex.workspace = true
--- a/crates/eval/runner_settings.json
+++ b/crates/eval/runner_settings.json
@ -1,6 +1,7 @@
 {
  "assistant": {
    "always_allow_tool_actions": true,
+    "stream_edits": true,
    "version": "2"
  }
 }
--- a/crates/eval/src/eval.rs
+++ b/crates/eval/src/eval.rs
@ -420,12 +420,12 @@ pub fn init(cx: &mut App) -> Arc<AgentAppState> {
    language_model::init(client.clone(), cx);
    language_models::init(user_store.clone(), client.clone(), fs.clone(), cx);
    languages::init(languages.clone(), node_runtime.clone(), cx);
-    assistant_tools::init(client.http_client(), cx);
    context_server::init(cx);
    prompt_store::init(cx);
    let stdout_is_a_pty = false;
    let prompt_builder = PromptBuilder::load(fs.clone(), stdout_is_a_pty, cx);
    agent::init(fs.clone(), client.clone(), prompt_builder.clone(), cx);
+    assistant_tools::init(client.http_client(), cx);

    SettingsStore::update_global(cx, |store, cx| {
        store.set_user_settings(include_str!("../runner_settings.json"), cx)
--- a/crates/eval/src/example.rs
+++ b/crates/eval/src/example.rs
@ -160,7 +160,11 @@ impl ExampleContext {
            if left == right {
                Ok(())
            } else {
-                println!("{}{:#?} != {:#?}", self.log_prefix, left, right);
+                println!(
+                    "{}{}",
+                    self.log_prefix,
+                    pretty_assertions::Comparison::new(&left, &right)
+                );
                Err(anyhow::Error::from(FailedAssertion(message.clone())))
            },
            message,
@ -334,8 +338,8 @@ impl ExampleContext {
    }

    pub fn edits(&self) -> HashMap<Arc<Path>, FileEdits> {
-        self.app
-            .read_entity(&self.agent_thread, |thread, cx| {
+        self.agent_thread
+            .read_with(&self.app, |thread, cx| {
                let action_log = thread.action_log().read(cx);
                HashMap::from_iter(action_log.changed_buffers(cx).into_iter().map(
                    |(buffer, diff)| {
@ -503,16 +507,16 @@ impl ToolUse {
    }
 }

-#[derive(Debug)]
+#[derive(Debug, Eq, PartialEq)]
 pub struct FileEdits {
-    hunks: Vec<FileEditHunk>,
+    pub hunks: Vec<FileEditHunk>,
 }

-#[derive(Debug)]
-struct FileEditHunk {
-    base_text: String,
-    text: String,
-    status: DiffHunkStatus,
+#[derive(Debug, Eq, PartialEq)]
+pub struct FileEditHunk {
+    pub base_text: String,
+    pub text: String,
+    pub status: DiffHunkStatus,
 }

 impl FileEdits {