assistant: Stream tool uses as structured data (#17322)
This PR adjusts the approach we use to encoding tool uses in the completion response to use a structured format rather than simply injecting it into the response stream as text. In #17170 we would encode the tool uses as XML and insert them as text. This would require then re-parsing the tool uses out of the buffer in order to use them. The approach taken in this PR is to make `stream_completion` return a stream of `LanguageModelCompletionEvent`s. Each of these events can be either text, or a tool use. A new `stream_completion_text` method has been added to `LanguageModel` for scenarios where we only care about textual content (currently, everywhere that isn't the Assistant context editor). Release Notes: - N/A
This commit is contained in:
parent
132e8e8064
commit
452272e5df
14 changed files with 235 additions and 83 deletions
|
@ -25,8 +25,9 @@ use gpui::{
|
|||
|
||||
use language::{AnchorRangeExt, Bias, Buffer, LanguageRegistry, OffsetRangeExt, Point, ToOffset};
|
||||
use language_model::{
|
||||
LanguageModel, LanguageModelCacheConfiguration, LanguageModelImage, LanguageModelRegistry,
|
||||
LanguageModelRequest, LanguageModelRequestMessage, MessageContent, Role,
|
||||
LanguageModel, LanguageModelCacheConfiguration, LanguageModelCompletionEvent,
|
||||
LanguageModelImage, LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage,
|
||||
MessageContent, Role,
|
||||
};
|
||||
use open_ai::Model as OpenAiModel;
|
||||
use paths::{context_images_dir, contexts_dir};
|
||||
|
@ -1950,13 +1951,13 @@ impl Context {
|
|||
let mut response_latency = None;
|
||||
let stream_completion = async {
|
||||
let request_start = Instant::now();
|
||||
let mut chunks = stream.await?;
|
||||
let mut events = stream.await?;
|
||||
|
||||
while let Some(chunk) = chunks.next().await {
|
||||
while let Some(event) = events.next().await {
|
||||
if response_latency.is_none() {
|
||||
response_latency = Some(request_start.elapsed());
|
||||
}
|
||||
let chunk = chunk?;
|
||||
let event = event?;
|
||||
|
||||
this.update(&mut cx, |this, cx| {
|
||||
let message_ix = this
|
||||
|
@ -1970,11 +1971,36 @@ impl Context {
|
|||
.map_or(buffer.len(), |message| {
|
||||
message.start.to_offset(buffer).saturating_sub(1)
|
||||
});
|
||||
buffer.edit(
|
||||
[(message_old_end_offset..message_old_end_offset, chunk)],
|
||||
None,
|
||||
cx,
|
||||
);
|
||||
|
||||
match event {
|
||||
LanguageModelCompletionEvent::Text(chunk) => {
|
||||
buffer.edit(
|
||||
[(
|
||||
message_old_end_offset..message_old_end_offset,
|
||||
chunk,
|
||||
)],
|
||||
None,
|
||||
cx,
|
||||
);
|
||||
}
|
||||
LanguageModelCompletionEvent::ToolUse(tool_use) => {
|
||||
let mut text = String::new();
|
||||
text.push('\n');
|
||||
text.push_str(
|
||||
&serde_json::to_string_pretty(&tool_use)
|
||||
.expect("failed to serialize tool use to JSON"),
|
||||
);
|
||||
|
||||
buffer.edit(
|
||||
[(
|
||||
message_old_end_offset..message_old_end_offset,
|
||||
text,
|
||||
)],
|
||||
None,
|
||||
cx,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
cx.emit(ContextEvent::StreamedCompletion);
|
||||
|
@ -2406,7 +2432,7 @@ impl Context {
|
|||
|
||||
self.pending_summary = cx.spawn(|this, mut cx| {
|
||||
async move {
|
||||
let stream = model.stream_completion(request, &cx);
|
||||
let stream = model.stream_completion_text(request, &cx);
|
||||
let mut messages = stream.await?;
|
||||
|
||||
let mut replaced = !replace_old;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue