WIP: Add agent2 crate from test-driven-agent branch

2025-08-01 22:32:36 -06:00 · 2025-08-01 22:32:36 -06:00 · afb5c4147a
commit afb5c4147a
parent 8890f590b1
14 changed files with 1075 additions and 0 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -148,6 +148,39 @@ dependencies = [
 "serde_json",
 ]

+[[package]]
+name = "agent2"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "assistant_tool",
+ "assistant_tools",
+ "chrono",
+ "client",
+ "collections",
+ "ctor",
+ "env_logger 0.11.8",
+ "fs",
+ "futures 0.3.31",
+ "gpui",
+ "gpui_tokio",
+ "handlebars 4.5.0",
+ "language_model",
+ "language_models",
+ "parking_lot",
+ "project",
+ "reqwest_client",
+ "rust-embed",
+ "schemars",
+ "serde",
+ "serde_json",
+ "settings",
+ "smol",
+ "thiserror 2.0.12",
+ "util",
+ "worktree",
+]
+
 [[package]]
 name = "agent_servers"
 version = "0.1.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -4,6 +4,7 @@ members = [
    "crates/acp_thread",
    "crates/activity_indicator",
    "crates/agent",
+    "crates/agent2",
    "crates/agent_servers",
    "crates/agent_settings",
    "crates/agent_ui",
@ -228,6 +229,7 @@ edition = "2024"

 acp_thread = { path = "crates/acp_thread" }
 agent = { path = "crates/agent" }
+agent2 = { path = "crates/agent2" }
 activity_indicator = { path = "crates/activity_indicator" }
 agent_ui = { path = "crates/agent_ui" }
 agent_settings = { path = "crates/agent_settings" }
--- a/crates/agent2/Cargo.toml
+++ b/crates/agent2/Cargo.toml
@ -0,0 +1,49 @@
+[package]
+name = "agent2"
+version = "0.1.0"
+edition = "2021"
+license = "GPL-3.0-or-later"
+publish = false
+
+[lib]
+path = "src/agent2.rs"
+
+[lints]
+workspace = true
+
+[dependencies]
+anyhow.workspace = true
+assistant_tool.workspace = true
+assistant_tools.workspace = true
+chrono.workspace = true
+collections.workspace = true
+fs.workspace = true
+futures.workspace = true
+gpui.workspace = true
+handlebars = { workspace = true, features = ["rust-embed"] }
+language_model.workspace = true
+language_models.workspace = true
+parking_lot.workspace = true
+project.workspace = true
+rust-embed.workspace = true
+schemars.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+settings.workspace = true
+smol.workspace = true
+thiserror.workspace = true
+util.workspace = true
+worktree.workspace = true
+
+[dev-dependencies]
+ctor.workspace = true
+client = { workspace = true, "features" = ["test-support"] }
+env_logger.workspace = true
+fs = { workspace = true, "features" = ["test-support"] }
+gpui = { workspace = true, "features" = ["test-support"] }
+gpui_tokio.workspace = true
+language_model = { workspace = true, "features" = ["test-support"] }
+project = { workspace = true, "features" = ["test-support"] }
+reqwest_client.workspace = true
+settings = { workspace = true, "features" = ["test-support"] }
+worktree = { workspace = true, "features" = ["test-support"] }
--- a/crates/agent2/LICENSE-GPL
+++ b/crates/agent2/LICENSE-GPL
@ -0,0 +1 @@
+../../LICENSE-GPL
--- a/crates/agent2/src/agent2.rs
+++ b/crates/agent2/src/agent2.rs
@ -0,0 +1,6 @@
+mod prompts;
+mod templates;
+mod thread;
+mod tools;
+
+pub use thread::*;
--- a/crates/agent2/src/prompts.rs
+++ b/crates/agent2/src/prompts.rs
@ -0,0 +1,29 @@
+use crate::{
+    templates::{BaseTemplate, Template, Templates, WorktreeData},
+    thread::Prompt,
+};
+use anyhow::Result;
+use gpui::{App, Entity};
+use project::Project;
+
+struct BasePrompt {
+    project: Entity<Project>,
+}
+
+impl Prompt for BasePrompt {
+    fn render(&self, templates: &Templates, cx: &App) -> Result<String> {
+        BaseTemplate {
+            os: std::env::consts::OS.to_string(),
+            shell: util::get_system_shell(),
+            worktrees: self
+                .project
+                .read(cx)
+                .worktrees(cx)
+                .map(|worktree| WorktreeData {
+                    root_name: worktree.read(cx).root_name().to_string(),
+                })
+                .collect(),
+        }
+        .render(templates)
+    }
+}
--- a/crates/agent2/src/templates.rs
+++ b/crates/agent2/src/templates.rs
@ -0,0 +1,57 @@
+use std::sync::Arc;
+
+use anyhow::Result;
+use handlebars::Handlebars;
+use rust_embed::RustEmbed;
+use serde::Serialize;
+
+#[derive(RustEmbed)]
+#[folder = "src/templates"]
+#[include = "*.hbs"]
+struct Assets;
+
+pub struct Templates(Handlebars<'static>);
+
+impl Templates {
+    pub fn new() -> Arc<Self> {
+        let mut handlebars = Handlebars::new();
+        handlebars.register_embed_templates::<Assets>().unwrap();
+        Arc::new(Self(handlebars))
+    }
+}
+
+pub trait Template: Sized {
+    const TEMPLATE_NAME: &'static str;
+
+    fn render(&self, templates: &Templates) -> Result<String>
+    where
+        Self: Serialize + Sized,
+    {
+        Ok(templates.0.render(Self::TEMPLATE_NAME, self)?)
+    }
+}
+
+#[derive(Serialize)]
+pub struct BaseTemplate {
+    pub os: String,
+    pub shell: String,
+    pub worktrees: Vec<WorktreeData>,
+}
+
+impl Template for BaseTemplate {
+    const TEMPLATE_NAME: &'static str = "base.hbs";
+}
+
+#[derive(Serialize)]
+pub struct WorktreeData {
+    pub root_name: String,
+}
+
+#[derive(Serialize)]
+pub struct GlobTemplate {
+    pub project_roots: String,
+}
+
+impl Template for GlobTemplate {
+    const TEMPLATE_NAME: &'static str = "glob.hbs";
+}
--- a/crates/agent2/src/templates/base.hbs
+++ b/crates/agent2/src/templates/base.hbs
@ -0,0 +1,56 @@
+You are a highly skilled software engineer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices.
+
+## Communication
+
+1. Be conversational but professional.
+2. Refer to the USER in the second person and yourself in the first person.
+3. Format your responses in markdown. Use backticks to format file, directory, function, and class names.
+4. NEVER lie or make things up.
+5. Refrain from apologizing all the time when results are unexpected. Instead, just try your best to proceed or explain the circumstances to the user without apologizing.
+
+## Tool Use
+
+1. Make sure to adhere to the tools schema.
+2. Provide every required argument.
+3. DO NOT use tools to access items that are already available in the context section.
+4. Use only the tools that are currently available.
+5. DO NOT use a tool that is not available just because it appears in the conversation. This means the user turned it off.
+
+## Searching and Reading
+
+If you are unsure how to fulfill the user's request, gather more information with tool calls and/or clarifying questions.
+
+If appropriate, use tool calls to explore the current project, which contains the following root directories:
+
+{{#each worktrees}}
+- `{{root_name}}`
+{{/each}}
+
+- When providing paths to tools, the path should always begin with a path that starts with a project root directory listed above.
+- When looking for symbols in the project, prefer the `grep` tool.
+- As you learn about the structure of the project, use that information to scope `grep` searches to targeted subtrees of the project.
+- Bias towards not asking the user for help if you can find the answer yourself.
+
+## Fixing Diagnostics
+
+1. Make 1-2 attempts at fixing diagnostics, then defer to the user.
+2. Never simplify code you've written just to solve diagnostics. Complete, mostly correct code is more valuable than perfect code that doesn't solve the problem.
+
+## Debugging
+
+When debugging, only make code changes if you are certain that you can solve the problem.
+Otherwise, follow debugging best practices:
+1. Address the root cause instead of the symptoms.
+2. Add descriptive logging statements and error messages to track variable and code state.
+3. Add test functions and statements to isolate the problem.
+
+## Calling External APIs
+
+1. Unless explicitly requested by the user, use the best suited external APIs and packages to solve the task. There is no need to ask the user for permission.
+2. When selecting which version of an API or package to use, choose one that is compatible with the user's dependency management file. If no such file exists or if the package is not present, use the latest version that is in your training data.
+3. If an external API requires an API Key, be sure to point this out to the user. Adhere to best security practices (e.g. DO NOT hardcode an API key in a place where it can be exposed)
+
+## System Information
+
+Operating System: {{os}}
+Default Shell: {{shell}}
--- a/crates/agent2/src/templates/glob.hbs
+++ b/crates/agent2/src/templates/glob.hbs
@ -0,0 +1,8 @@
+Find paths on disk with glob patterns.
+
+Assume that all glob patterns are matched in a project directory with the following entries.
+
+{{project_roots}}
+
+When searching with patterns that begin with literal path components, e.g. `foo/bar/**/*.rs`, be
+sure to anchor them with one of the directories listed above.
--- a/crates/agent2/src/thread.rs
+++ b/crates/agent2/src/thread.rs
@ -0,0 +1,420 @@
+use crate::templates::Templates;
+use anyhow::{anyhow, Result};
+use futures::{channel::mpsc, future};
+use gpui::{App, Context, SharedString, Task};
+use language_model::{
+    CompletionIntent, CompletionMode, LanguageModel, LanguageModelCompletionError,
+    LanguageModelCompletionEvent, LanguageModelRequest, LanguageModelRequestMessage,
+    LanguageModelRequestTool, LanguageModelToolResult, LanguageModelToolResultContent,
+    LanguageModelToolSchemaFormat, LanguageModelToolUse, MessageContent, Role, StopReason,
+};
+use schemars::{JsonSchema, Schema};
+use serde::Deserialize;
+use smol::stream::StreamExt;
+use std::{collections::BTreeMap, sync::Arc};
+use util::ResultExt;
+
+#[derive(Debug)]
+pub struct AgentMessage {
+    pub role: Role,
+    pub content: Vec<MessageContent>,
+}
+
+pub type AgentResponseEvent = LanguageModelCompletionEvent;
+
+pub trait Prompt {
+    fn render(&self, prompts: &Templates, cx: &App) -> Result<String>;
+}
+
+pub struct Thread {
+    messages: Vec<AgentMessage>,
+    completion_mode: CompletionMode,
+    /// Holds the task that handles agent interaction until the end of the turn.
+    /// Survives across multiple requests as the model performs tool calls and
+    /// we run tools, report their results.
+    running_turn: Option<Task<()>>,
+    system_prompts: Vec<Arc<dyn Prompt>>,
+    tools: BTreeMap<SharedString, Arc<dyn AgentToolErased>>,
+    templates: Arc<Templates>,
+    // project: Entity<Project>,
+    // action_log: Entity<ActionLog>,
+}
+
+impl Thread {
+    pub fn new(templates: Arc<Templates>) -> Self {
+        Self {
+            messages: Vec::new(),
+            completion_mode: CompletionMode::Normal,
+            system_prompts: Vec::new(),
+            running_turn: None,
+            tools: BTreeMap::default(),
+            templates,
+        }
+    }
+
+    pub fn set_mode(&mut self, mode: CompletionMode) {
+        self.completion_mode = mode;
+    }
+
+    pub fn messages(&self) -> &[AgentMessage] {
+        &self.messages
+    }
+
+    pub fn add_tool(&mut self, tool: impl AgentTool) {
+        self.tools.insert(tool.name(), tool.erase());
+    }
+
+    pub fn remove_tool(&mut self, name: &str) -> bool {
+        self.tools.remove(name).is_some()
+    }
+
+    /// Sending a message results in the model streaming a response, which could include tool calls.
+    /// After calling tools, the model will stops and waits for any outstanding tool calls to be completed and their results sent.
+    /// The returned channel will report all the occurrences in which the model stops before erroring or ending its turn.
+    pub fn send(
+        &mut self,
+        model: Arc<dyn LanguageModel>,
+        content: impl Into<MessageContent>,
+        cx: &mut Context<Self>,
+    ) -> mpsc::UnboundedReceiver<Result<AgentResponseEvent, LanguageModelCompletionError>> {
+        cx.notify();
+        let (events_tx, events_rx) =
+            mpsc::unbounded::<Result<AgentResponseEvent, LanguageModelCompletionError>>();
+
+        let system_message = self.build_system_message(cx);
+        self.messages.extend(system_message);
+
+        self.messages.push(AgentMessage {
+            role: Role::User,
+            content: vec![content.into()],
+        });
+        self.running_turn = Some(cx.spawn(async move |thread, cx| {
+            let turn_result = async {
+                // Perform one request, then keep looping if the model makes tool calls.
+                let mut completion_intent = CompletionIntent::UserPrompt;
+                loop {
+                    let request = thread.update(cx, |thread, cx| {
+                        thread.build_completion_request(completion_intent, cx)
+                    })?;
+
+                    // println!(
+                    //     "request: {}",
+                    //     serde_json::to_string_pretty(&request).unwrap()
+                    // );
+
+                    // Stream events, appending to messages and collecting up tool uses.
+                    let mut events = model.stream_completion(request, cx).await?;
+                    let mut tool_uses = Vec::new();
+                    while let Some(event) = events.next().await {
+                        match event {
+                            Ok(event) => {
+                                thread
+                                    .update(cx, |thread, cx| {
+                                        tool_uses.extend(thread.handle_streamed_completion_event(
+                                            event,
+                                            events_tx.clone(),
+                                            cx,
+                                        ));
+                                    })
+                                    .ok();
+                            }
+                            Err(error) => {
+                                events_tx.unbounded_send(Err(error)).ok();
+                                break;
+                            }
+                        }
+                    }
+
+                    // If there are no tool uses, the turn is done.
+                    if tool_uses.is_empty() {
+                        break;
+                    }
+
+                    // If there are tool uses, wait for their results to be
+                    // computed, then send them together in a single message on
+                    // the next loop iteration.
+                    let tool_results = future::join_all(tool_uses).await;
+                    thread
+                        .update(cx, |thread, _cx| {
+                            thread.messages.push(AgentMessage {
+                                role: Role::User,
+                                content: tool_results.into_iter().map(Into::into).collect(),
+                            });
+                        })
+                        .ok();
+                    completion_intent = CompletionIntent::ToolResults;
+                }
+
+                Ok(())
+            }
+            .await;
+
+            if let Err(error) = turn_result {
+                events_tx.unbounded_send(Err(error)).ok();
+            }
+        }));
+        events_rx
+    }
+
+    pub fn build_system_message(&mut self, cx: &App) -> Option<AgentMessage> {
+        let mut system_message = AgentMessage {
+            role: Role::System,
+            content: Vec::new(),
+        };
+
+        for prompt in &self.system_prompts {
+            if let Some(rendered_prompt) = prompt.render(&self.templates, cx).log_err() {
+                system_message
+                    .content
+                    .push(MessageContent::Text(rendered_prompt));
+            }
+        }
+
+        (!system_message.content.is_empty()).then_some(system_message)
+    }
+
+    /// A helper method that's called on every streamed completion event.
+    /// Returns an optional tool result task, which the main agentic loop in
+    /// send will send back to the model when it resolves.
+    fn handle_streamed_completion_event(
+        &mut self,
+        event: LanguageModelCompletionEvent,
+        events_tx: mpsc::UnboundedSender<Result<AgentResponseEvent, LanguageModelCompletionError>>,
+        cx: &mut Context<Self>,
+    ) -> Option<Task<LanguageModelToolResult>> {
+        use LanguageModelCompletionEvent::*;
+        events_tx.unbounded_send(Ok(event.clone())).ok();
+
+        match event {
+            Text(new_text) => self.handle_text_event(new_text, cx),
+            Thinking { text, signature } => {
+                todo!()
+            }
+            ToolUse(tool_use) => {
+                return self.handle_tool_use_event(tool_use, cx);
+            }
+            StartMessage { role, .. } => {
+                self.messages.push(AgentMessage {
+                    role,
+                    content: Vec::new(),
+                });
+            }
+            UsageUpdate(_) => {}
+            Stop(stop_reason) => self.handle_stop_event(stop_reason),
+            StatusUpdate(_completion_request_status) => {}
+            RedactedThinking { data } => todo!(),
+            ToolUseJsonParseError {
+                id,
+                tool_name,
+                raw_input,
+                json_parse_error,
+            } => todo!(),
+        }
+
+        None
+    }
+
+    fn handle_stop_event(&mut self, stop_reason: StopReason) {
+        match stop_reason {
+            StopReason::EndTurn | StopReason::ToolUse => {}
+            StopReason::MaxTokens => todo!(),
+            StopReason::Refusal => todo!(),
+        }
+    }
+
+    fn handle_text_event(&mut self, new_text: String, cx: &mut Context<Self>) {
+        let last_message = self.last_assistant_message();
+        if let Some(MessageContent::Text(text)) = last_message.content.last_mut() {
+            text.push_str(&new_text);
+        } else {
+            last_message.content.push(MessageContent::Text(new_text));
+        }
+
+        cx.notify();
+    }
+
+    fn handle_tool_use_event(
+        &mut self,
+        tool_use: LanguageModelToolUse,
+        cx: &mut Context<Self>,
+    ) -> Option<Task<LanguageModelToolResult>> {
+        cx.notify();
+
+        let last_message = self.last_assistant_message();
+
+        // Ensure the last message ends in the current tool use
+        let push_new_tool_use = last_message.content.last_mut().map_or(true, |content| {
+            if let MessageContent::ToolUse(last_tool_use) = content {
+                if last_tool_use.id == tool_use.id {
+                    *last_tool_use = tool_use.clone();
+                    false
+                } else {
+                    true
+                }
+            } else {
+                true
+            }
+        });
+        if push_new_tool_use {
+            last_message.content.push(tool_use.clone().into());
+        }
+
+        if !tool_use.is_input_complete {
+            return None;
+        }
+
+        if let Some(tool) = self.tools.get(tool_use.name.as_ref()) {
+            let pending_tool_result = tool.clone().run(tool_use.input, cx);
+
+            Some(cx.foreground_executor().spawn(async move {
+                match pending_tool_result.await {
+                    Ok(tool_output) => LanguageModelToolResult {
+                        tool_use_id: tool_use.id,
+                        tool_name: tool_use.name,
+                        is_error: false,
+                        content: LanguageModelToolResultContent::Text(Arc::from(tool_output)),
+                        output: None,
+                    },
+                    Err(error) => LanguageModelToolResult {
+                        tool_use_id: tool_use.id,
+                        tool_name: tool_use.name,
+                        is_error: true,
+                        content: LanguageModelToolResultContent::Text(Arc::from(error.to_string())),
+                        output: None,
+                    },
+                }
+            }))
+        } else {
+            Some(Task::ready(LanguageModelToolResult {
+                content: LanguageModelToolResultContent::Text(Arc::from(format!(
+                    "No tool named {} exists",
+                    tool_use.name
+                ))),
+                tool_use_id: tool_use.id,
+                tool_name: tool_use.name,
+                is_error: true,
+                output: None,
+            }))
+        }
+    }
+
+    /// Guarantees the last message is from the assistant and returns a mutable reference.
+    fn last_assistant_message(&mut self) -> &mut AgentMessage {
+        if self
+            .messages
+            .last()
+            .map_or(true, |m| m.role != Role::Assistant)
+        {
+            self.messages.push(AgentMessage {
+                role: Role::Assistant,
+                content: Vec::new(),
+            });
+        }
+        self.messages.last_mut().unwrap()
+    }
+
+    fn build_completion_request(
+        &self,
+        completion_intent: CompletionIntent,
+        cx: &mut App,
+    ) -> LanguageModelRequest {
+        LanguageModelRequest {
+            thread_id: None,
+            prompt_id: None,
+            intent: Some(completion_intent),
+            mode: Some(self.completion_mode),
+            messages: self.build_request_messages(),
+            tools: self
+                .tools
+                .values()
+                .filter_map(|tool| {
+                    Some(LanguageModelRequestTool {
+                        name: tool.name().to_string(),
+                        description: tool.description(cx).to_string(),
+                        input_schema: tool
+                            .input_schema(LanguageModelToolSchemaFormat::JsonSchema)
+                            .log_err()?,
+                    })
+                })
+                .collect(),
+            tool_choice: None,
+            stop: Vec::new(),
+            temperature: None,
+        }
+    }
+
+    fn build_request_messages(&self) -> Vec<LanguageModelRequestMessage> {
+        self.messages
+            .iter()
+            .map(|message| LanguageModelRequestMessage {
+                role: message.role,
+                content: message.content.clone(),
+                cache: false,
+            })
+            .collect()
+    }
+}
+
+pub trait AgentTool
+where
+    Self: 'static + Sized,
+{
+    type Input: for<'de> Deserialize<'de> + JsonSchema;
+
+    fn name(&self) -> SharedString;
+    fn description(&self, _cx: &mut App) -> SharedString {
+        let schema = schemars::schema_for!(Self::Input);
+        SharedString::new(
+            schema
+                .get("description")
+                .and_then(|description| description.as_str())
+                .unwrap_or_default(),
+        )
+    }
+
+    /// Returns the JSON schema that describes the tool's input.
+    fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Schema {
+        assistant_tools::root_schema_for::<Self::Input>(format)
+    }
+
+    /// Runs the tool with the provided input.
+    fn run(self: Arc<Self>, input: Self::Input, cx: &mut App) -> Task<Result<String>>;
+
+    fn erase(self) -> Arc<dyn AgentToolErased> {
+        Arc::new(Erased(Arc::new(self)))
+    }
+}
+
+pub struct Erased<T>(T);
+
+pub trait AgentToolErased {
+    fn name(&self) -> SharedString;
+    fn description(&self, cx: &mut App) -> SharedString;
+    fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result<serde_json::Value>;
+    fn run(self: Arc<Self>, input: serde_json::Value, cx: &mut App) -> Task<Result<String>>;
+}
+
+impl<T> AgentToolErased for Erased<Arc<T>>
+where
+    T: AgentTool,
+{
+    fn name(&self) -> SharedString {
+        self.0.name()
+    }
+
+    fn description(&self, cx: &mut App) -> SharedString {
+        self.0.description(cx)
+    }
+
+    fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result<serde_json::Value> {
+        Ok(serde_json::to_value(self.0.input_schema(format))?)
+    }
+
+    fn run(self: Arc<Self>, input: serde_json::Value, cx: &mut App) -> Task<Result<String>> {
+        let parsed_input: Result<T::Input> = serde_json::from_value(input).map_err(Into::into);
+        match parsed_input {
+            Ok(input) => self.0.clone().run(input, cx),
+            Err(error) => Task::ready(Err(anyhow!(error))),
+        }
+    }
+}
--- a/crates/agent2/src/thread/tests.rs
+++ b/crates/agent2/src/thread/tests.rs
@ -0,0 +1,254 @@
+use super::*;
+use client::{proto::language_server_prompt_request, Client, UserStore};
+use fs::FakeFs;
+use gpui::{AppContext, Entity, TestAppContext};
+use language_model::{
+    LanguageModel, LanguageModelCompletionError, LanguageModelCompletionEvent,
+    LanguageModelRegistry, MessageContent, StopReason,
+};
+use reqwest_client::ReqwestClient;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use smol::stream::StreamExt;
+use std::{sync::Arc, time::Duration};
+
+mod test_tools;
+use test_tools::*;
+
+#[gpui::test]
+async fn test_echo(cx: &mut TestAppContext) {
+    let AgentTest { model, agent, .. } = setup(cx).await;
+
+    let events = agent
+        .update(cx, |agent, cx| {
+            agent.send(model.clone(), "Testing: Reply with 'Hello'", cx)
+        })
+        .collect()
+        .await;
+    agent.update(cx, |agent, _cx| {
+        assert_eq!(
+            agent.messages.last().unwrap().content,
+            vec![MessageContent::Text("Hello".to_string())]
+        );
+    });
+    assert_eq!(stop_events(events), vec![StopReason::EndTurn]);
+}
+
+#[gpui::test]
+async fn test_basic_tool_calls(cx: &mut TestAppContext) {
+    let AgentTest { model, agent, .. } = setup(cx).await;
+
+    // Test a tool call that's likely to complete *before* streaming stops.
+    let events = agent
+        .update(cx, |agent, cx| {
+            agent.add_tool(EchoTool);
+            agent.send(
+                model.clone(),
+                "Now test the echo tool with 'Hello'. Does it work? Say 'Yes' or 'No'.",
+                cx,
+            )
+        })
+        .collect()
+        .await;
+    assert_eq!(
+        stop_events(events),
+        vec![StopReason::ToolUse, StopReason::EndTurn]
+    );
+
+    // Test a tool calls that's likely to complete *after* streaming stops.
+    let events = agent
+        .update(cx, |agent, cx| {
+            agent.remove_tool(&AgentTool::name(&EchoTool));
+            agent.add_tool(DelayTool);
+            agent.send(
+                model.clone(),
+                "Now call the delay tool with 200ms. When the timer goes off, then you echo the output of the tool.",
+                cx,
+            )
+        })
+        .collect()
+        .await;
+    assert_eq!(
+        stop_events(events),
+        vec![StopReason::ToolUse, StopReason::EndTurn]
+    );
+    agent.update(cx, |agent, _cx| {
+        assert!(agent
+            .messages
+            .last()
+            .unwrap()
+            .content
+            .iter()
+            .any(|content| {
+                if let MessageContent::Text(text) = content {
+                    text.contains("Ding")
+                } else {
+                    false
+                }
+            }));
+    });
+}
+
+#[gpui::test]
+async fn test_streaming_tool_calls(cx: &mut TestAppContext) {
+    let AgentTest { model, agent, .. } = setup(cx).await;
+
+    // Test a tool call that's likely to complete *before* streaming stops.
+    let mut events = agent.update(cx, |agent, cx| {
+        agent.add_tool(WordListTool);
+        agent.send(model.clone(), "Test the word_list tool.", cx)
+    });
+
+    let mut saw_partial_tool_use = false;
+    while let Some(event) = events.next().await {
+        if let Ok(LanguageModelCompletionEvent::ToolUse(tool_use_event)) = event {
+            agent.update(cx, |agent, _cx| {
+                // Look for a tool use in the agent's last message
+                let last_content = agent.messages().last().unwrap().content.last().unwrap();
+                if let MessageContent::ToolUse(last_tool_use) = last_content {
+                    assert_eq!(last_tool_use.name.as_ref(), "word_list");
+                    if tool_use_event.is_input_complete {
+                        last_tool_use
+                            .input
+                            .get("a")
+                            .expect("'a' has streamed because input is now complete");
+                        last_tool_use
+                            .input
+                            .get("g")
+                            .expect("'g' has streamed because input is now complete");
+                    } else {
+                        if !last_tool_use.is_input_complete
+                            && last_tool_use.input.get("g").is_none()
+                        {
+                            saw_partial_tool_use = true;
+                        }
+                    }
+                } else {
+                    panic!("last content should be a tool use");
+                }
+            });
+        }
+    }
+
+    assert!(
+        saw_partial_tool_use,
+        "should see at least one partially streamed tool use in the history"
+    );
+}
+
+#[gpui::test]
+async fn test_concurrent_tool_calls(cx: &mut TestAppContext) {
+    let AgentTest { model, agent, .. } = setup(cx).await;
+
+    // Test concurrent tool calls with different delay times
+    let events = agent
+        .update(cx, |agent, cx| {
+            agent.add_tool(DelayTool);
+            agent.send(
+                model.clone(),
+                "Call the delay tool twice in the same message. Once with 100ms. Once with 300ms. When both timers are complete, describe the outputs.",
+                cx,
+            )
+        })
+        .collect()
+        .await;
+
+    let stop_reasons = stop_events(events);
+    if stop_reasons.len() == 2 {
+        assert_eq!(stop_reasons, vec![StopReason::ToolUse, StopReason::EndTurn]);
+    } else if stop_reasons.len() == 3 {
+        assert_eq!(
+            stop_reasons,
+            vec![
+                StopReason::ToolUse,
+                StopReason::ToolUse,
+                StopReason::EndTurn
+            ]
+        );
+    } else {
+        panic!("Expected either 1 or 2 tool uses followed by end turn");
+    }
+
+    agent.update(cx, |agent, _cx| {
+        let last_message = agent.messages.last().unwrap();
+        let text = last_message
+            .content
+            .iter()
+            .filter_map(|content| {
+                if let MessageContent::Text(text) = content {
+                    Some(text.as_str())
+                } else {
+                    None
+                }
+            })
+            .collect::<String>();
+
+        assert!(text.contains("Ding"));
+    });
+}
+
+/// Filters out the stop events for asserting against in tests
+fn stop_events(
+    result_events: Vec<Result<AgentResponseEvent, LanguageModelCompletionError>>,
+) -> Vec<StopReason> {
+    result_events
+        .into_iter()
+        .filter_map(|event| match event.unwrap() {
+            LanguageModelCompletionEvent::Stop(stop_reason) => Some(stop_reason),
+            _ => None,
+        })
+        .collect()
+}
+
+struct AgentTest {
+    model: Arc<dyn LanguageModel>,
+    agent: Entity<Thread>,
+}
+
+async fn setup(cx: &mut TestAppContext) -> AgentTest {
+    cx.executor().allow_parking();
+    cx.update(settings::init);
+    let fs = FakeFs::new(cx.executor().clone());
+    // let project = Project::test(fs.clone(), [], cx).await;
+    // let action_log = cx.new(|_| ActionLog::new(project.clone()));
+    let templates = Templates::new();
+    let agent = cx.new(|_| Thread::new(templates));
+
+    let model = cx
+        .update(|cx| {
+            gpui_tokio::init(cx);
+            let http_client = ReqwestClient::user_agent("agent tests").unwrap();
+            cx.set_http_client(Arc::new(http_client));
+
+            client::init_settings(cx);
+            let client = Client::production(cx);
+            let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
+            language_model::init(client.clone(), cx);
+            language_models::init(user_store.clone(), client.clone(), cx);
+
+            let models = LanguageModelRegistry::read_global(cx);
+            let model = models
+                .available_models(cx)
+                .find(|model| model.id().0 == "claude-3-7-sonnet-latest")
+                .unwrap();
+
+            let provider = models.provider(&model.provider_id()).unwrap();
+            let authenticated = provider.authenticate(cx);
+
+            cx.spawn(async move |cx| {
+                authenticated.await.unwrap();
+                model
+            })
+        })
+        .await;
+
+    AgentTest { model, agent }
+}
+
+#[cfg(test)]
+#[ctor::ctor]
+fn init_logger() {
+    if std::env::var("RUST_LOG").is_ok() {
+        env_logger::init();
+    }
+}
--- a/crates/agent2/src/thread/tests/test_tools.rs
+++ b/crates/agent2/src/thread/tests/test_tools.rs
@ -0,0 +1,83 @@
+use super::*;
+
+/// A tool that echoes its input
+#[derive(JsonSchema, Serialize, Deserialize)]
+pub struct EchoToolInput {
+    /// The text to echo.
+    text: String,
+}
+
+pub struct EchoTool;
+
+impl AgentTool for EchoTool {
+    type Input = EchoToolInput;
+
+    fn name(&self) -> SharedString {
+        "echo".into()
+    }
+
+    fn run(self: Arc<Self>, input: Self::Input, _cx: &mut App) -> Task<Result<String>> {
+        Task::ready(Ok(input.text))
+    }
+}
+
+/// A tool that waits for a specified delay
+#[derive(JsonSchema, Serialize, Deserialize)]
+pub struct DelayToolInput {
+    /// The delay in milliseconds.
+    ms: u64,
+}
+
+pub struct DelayTool;
+
+impl AgentTool for DelayTool {
+    type Input = DelayToolInput;
+
+    fn name(&self) -> SharedString {
+        "delay".into()
+    }
+
+    fn run(self: Arc<Self>, input: Self::Input, cx: &mut App) -> Task<Result<String>>
+    where
+        Self: Sized,
+    {
+        cx.foreground_executor().spawn(async move {
+            smol::Timer::after(Duration::from_millis(input.ms)).await;
+            Ok("Ding".to_string())
+        })
+    }
+}
+
+/// A tool that takes an object with map from letters to random words starting with that letter.
+/// All fiealds are required! Pass a word for every letter!
+#[derive(JsonSchema, Serialize, Deserialize)]
+pub struct WordListInput {
+    /// Provide a random word that starts with A.
+    a: Option<String>,
+    /// Provide a random word that starts with B.
+    b: Option<String>,
+    /// Provide a random word that starts with C.
+    c: Option<String>,
+    /// Provide a random word that starts with D.
+    d: Option<String>,
+    /// Provide a random word that starts with E.
+    e: Option<String>,
+    /// Provide a random word that starts with F.
+    f: Option<String>,
+    /// Provide a random word that starts with G.
+    g: Option<String>,
+}
+
+pub struct WordListTool;
+
+impl AgentTool for WordListTool {
+    type Input = WordListInput;
+
+    fn name(&self) -> SharedString {
+        "word_list".into()
+    }
+
+    fn run(self: Arc<Self>, _input: Self::Input, _cx: &mut App) -> Task<Result<String>> {
+        Task::ready(Ok("ok".to_string()))
+    }
+}
--- a/crates/agent2/src/tools.rs
+++ b/crates/agent2/src/tools.rs
@ -0,0 +1 @@
+mod glob;
--- a/crates/agent2/src/tools/glob.rs
+++ b/crates/agent2/src/tools/glob.rs
@ -0,0 +1,76 @@
+use anyhow::{anyhow, Result};
+use gpui::{App, AppContext, Entity, SharedString, Task};
+use project::Project;
+use schemars::JsonSchema;
+use serde::Deserialize;
+use std::{path::PathBuf, sync::Arc};
+use util::paths::PathMatcher;
+use worktree::Snapshot as WorktreeSnapshot;
+
+use crate::{
+    templates::{GlobTemplate, Template, Templates},
+    thread::AgentTool,
+};
+
+// Description is dynamic, see `fn description` below
+#[derive(Deserialize, JsonSchema)]
+struct GlobInput {
+    /// A POSIX glob pattern
+    glob: SharedString,
+}
+
+struct GlobTool {
+    project: Entity<Project>,
+    templates: Arc<Templates>,
+}
+
+impl AgentTool for GlobTool {
+    type Input = GlobInput;
+
+    fn name(&self) -> SharedString {
+        "glob".into()
+    }
+
+    fn description(&self, cx: &mut App) -> SharedString {
+        let project_roots = self
+            .project
+            .read(cx)
+            .worktrees(cx)
+            .map(|worktree| worktree.read(cx).root_name().into())
+            .collect::<Vec<String>>()
+            .join("\n");
+
+        GlobTemplate { project_roots }
+            .render(&self.templates)
+            .expect("template failed to render")
+            .into()
+    }
+
+    fn run(self: Arc<Self>, input: Self::Input, cx: &mut App) -> Task<Result<String>> {
+        let path_matcher = match PathMatcher::new([&input.glob]) {
+            Ok(matcher) => matcher,
+            Err(error) => return Task::ready(Err(anyhow!(error))),
+        };
+
+        let snapshots: Vec<WorktreeSnapshot> = self
+            .project
+            .read(cx)
+            .worktrees(cx)
+            .map(|worktree| worktree.read(cx).snapshot())
+            .collect();
+
+        cx.background_spawn(async move {
+            let paths = snapshots.iter().flat_map(|snapshot| {
+                let root_name = PathBuf::from(snapshot.root_name());
+                snapshot
+                    .entries(false, 0)
+                    .map(move |entry| root_name.join(&entry.path))
+                    .filter(|path| path_matcher.is_match(&path))
+            });
+            let output = paths
+                .map(|path| format!("{}\n", path.display()))
+                .collect::<String>();
+            Ok(output)
+        })
+    }
+}