assistant: Stream tool uses as structured data (#17322)

This PR adjusts the approach we use to encoding tool uses in the
completion response to use a structured format rather than simply
injecting it into the response stream as text.

In #17170 we would encode the tool uses as XML and insert them as text.
This would require then re-parsing the tool uses out of the buffer in
order to use them.

The approach taken in this PR is to make `stream_completion` return a
stream of `LanguageModelCompletionEvent`s. Each of these events can be
either text, or a tool use.

A new `stream_completion_text` method has been added to `LanguageModel`
for scenarios where we only care about textual content (currently,
everywhere that isn't the Assistant context editor).

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-09-03 15:04:51 -04:00 committed by GitHub
parent 132e8e8064
commit 452272e5df
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 235 additions and 83 deletions

View file

@ -25,8 +25,9 @@ use gpui::{
use language::{AnchorRangeExt, Bias, Buffer, LanguageRegistry, OffsetRangeExt, Point, ToOffset};
use language_model::{
LanguageModel, LanguageModelCacheConfiguration, LanguageModelImage, LanguageModelRegistry,
LanguageModelRequest, LanguageModelRequestMessage, MessageContent, Role,
LanguageModel, LanguageModelCacheConfiguration, LanguageModelCompletionEvent,
LanguageModelImage, LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage,
MessageContent, Role,
};
use open_ai::Model as OpenAiModel;
use paths::{context_images_dir, contexts_dir};
@ -1950,13 +1951,13 @@ impl Context {
let mut response_latency = None;
let stream_completion = async {
let request_start = Instant::now();
let mut chunks = stream.await?;
let mut events = stream.await?;
while let Some(chunk) = chunks.next().await {
while let Some(event) = events.next().await {
if response_latency.is_none() {
response_latency = Some(request_start.elapsed());
}
let chunk = chunk?;
let event = event?;
this.update(&mut cx, |this, cx| {
let message_ix = this
@ -1970,11 +1971,36 @@ impl Context {
.map_or(buffer.len(), |message| {
message.start.to_offset(buffer).saturating_sub(1)
});
buffer.edit(
[(message_old_end_offset..message_old_end_offset, chunk)],
None,
cx,
);
match event {
LanguageModelCompletionEvent::Text(chunk) => {
buffer.edit(
[(
message_old_end_offset..message_old_end_offset,
chunk,
)],
None,
cx,
);
}
LanguageModelCompletionEvent::ToolUse(tool_use) => {
let mut text = String::new();
text.push('\n');
text.push_str(
&serde_json::to_string_pretty(&tool_use)
.expect("failed to serialize tool use to JSON"),
);
buffer.edit(
[(
message_old_end_offset..message_old_end_offset,
text,
)],
None,
cx,
);
}
}
});
cx.emit(ContextEvent::StreamedCompletion);
@ -2406,7 +2432,7 @@ impl Context {
self.pending_summary = cx.spawn(|this, mut cx| {
async move {
let stream = model.stream_completion(request, &cx);
let stream = model.stream_completion_text(request, &cx);
let mut messages = stream.await?;
let mut replaced = !replace_old;