assistant: Stream tool uses as structured data (#17322)

This PR adjusts the approach we use to encoding tool uses in the completion response to use a structured format rather than simply injecting it into the response stream as text. In #17170 we would encode the tool uses as XML and insert them as text. This would require then re-parsing the tool uses out of the buffer in order to use them. The approach taken in this PR is to make `stream_completion` return a stream of `LanguageModelCompletionEvent`s. Each of these events can be either text, or a tool use. A new `stream_completion_text` method has been added to `LanguageModel` for scenarios where we only care about textual content (currently, everywhere that isn't the Assistant context editor). Release Notes: - N/A
2024-09-03 15:04:51 -04:00 · 2024-09-03 15:04:51 -04:00 · 452272e5df
commit 452272e5df
parent 132e8e8064
14 changed files with 235 additions and 83 deletions
--- a/crates/language_model/src/language_model.rs
+++ b/crates/language_model/src/language_model.rs
@ -8,7 +8,8 @@ pub mod settings;

 use anyhow::Result;
 use client::{Client, UserStore};
-use futures::{future::BoxFuture, stream::BoxStream, TryStreamExt as _};
+use futures::FutureExt;
+use futures::{future::BoxFuture, stream::BoxStream, StreamExt, TryStreamExt as _};
 use gpui::{
    AnyElement, AnyView, AppContext, AsyncAppContext, Model, SharedString, Task, WindowContext,
 };
@ -51,6 +52,20 @@ pub struct LanguageModelCacheConfiguration {
    pub min_total_token: usize,
 }

+/// A completion event from a language model.
+#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
+pub enum LanguageModelCompletionEvent {
+    Text(String),
+    ToolUse(LanguageModelToolUse),
+}
+
+#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
+pub struct LanguageModelToolUse {
+    pub id: String,
+    pub name: String,
+    pub input: serde_json::Value,
+}
+
 pub trait LanguageModel: Send + Sync {
    fn id(&self) -> LanguageModelId;
    fn name(&self) -> LanguageModelName;
@ -82,7 +97,29 @@ pub trait LanguageModel: Send + Sync {
        &self,
        request: LanguageModelRequest,
        cx: &AsyncAppContext,
-    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>>;
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<LanguageModelCompletionEvent>>>>;
+
+    fn stream_completion_text(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
+        let events = self.stream_completion(request, cx);
+
+        async move {
+            Ok(events
+                .await?
+                .filter_map(|result| async move {
+                    match result {
+                        Ok(LanguageModelCompletionEvent::Text(text)) => Some(Ok(text)),
+                        Ok(LanguageModelCompletionEvent::ToolUse(_)) => None,
+                        Err(err) => Some(Err(err)),
+                    }
+                })
+                .boxed())
+        }
+        .boxed()
+    }

    fn use_any_tool(
        &self,