assistant: Stream tool uses as structured data (#17322)

This PR adjusts the approach we use to encoding tool uses in the
completion response to use a structured format rather than simply
injecting it into the response stream as text.

In #17170 we would encode the tool uses as XML and insert them as text.
This would require then re-parsing the tool uses out of the buffer in
order to use them.

The approach taken in this PR is to make `stream_completion` return a
stream of `LanguageModelCompletionEvent`s. Each of these events can be
either text, or a tool use.

A new `stream_completion_text` method has been added to `LanguageModel`
for scenarios where we only care about textual content (currently,
everywhere that isn't the Assistant context editor).

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-09-03 15:04:51 -04:00 committed by GitHub
parent 132e8e8064
commit 452272e5df
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 235 additions and 83 deletions

View file

@ -8,7 +8,8 @@ pub mod settings;
use anyhow::Result;
use client::{Client, UserStore};
use futures::{future::BoxFuture, stream::BoxStream, TryStreamExt as _};
use futures::FutureExt;
use futures::{future::BoxFuture, stream::BoxStream, StreamExt, TryStreamExt as _};
use gpui::{
AnyElement, AnyView, AppContext, AsyncAppContext, Model, SharedString, Task, WindowContext,
};
@ -51,6 +52,20 @@ pub struct LanguageModelCacheConfiguration {
pub min_total_token: usize,
}
/// A completion event from a language model.
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
pub enum LanguageModelCompletionEvent {
Text(String),
ToolUse(LanguageModelToolUse),
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
pub struct LanguageModelToolUse {
pub id: String,
pub name: String,
pub input: serde_json::Value,
}
pub trait LanguageModel: Send + Sync {
fn id(&self) -> LanguageModelId;
fn name(&self) -> LanguageModelName;
@ -82,7 +97,29 @@ pub trait LanguageModel: Send + Sync {
&self,
request: LanguageModelRequest,
cx: &AsyncAppContext,
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>>;
) -> BoxFuture<'static, Result<BoxStream<'static, Result<LanguageModelCompletionEvent>>>>;
fn stream_completion_text(
&self,
request: LanguageModelRequest,
cx: &AsyncAppContext,
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
let events = self.stream_completion(request, cx);
async move {
Ok(events
.await?
.filter_map(|result| async move {
match result {
Ok(LanguageModelCompletionEvent::Text(text)) => Some(Ok(text)),
Ok(LanguageModelCompletionEvent::ToolUse(_)) => None,
Err(err) => Some(Err(err)),
}
})
.boxed())
}
.boxed()
}
fn use_any_tool(
&self,