From 676cc109a35f87e332eba5ae9ed7fd81edd4cf71 Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Thu, 17 Apr 2025 19:05:12 -0400 Subject: [PATCH] agent: Report usage from thread summarization requests (#29012) This PR makes it so the thread summarization also reports the model request usage, to prevent the case where the count would appear to jump by 2 the next time a message was sent after summarization. Release Notes: - N/A --- crates/agent/src/thread.rs | 11 ++++++-- crates/language_model/src/language_model.rs | 28 +++++++++++++++------ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index af704fdfe3..7adc78591e 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -1302,8 +1302,15 @@ impl Thread { self.pending_summary = cx.spawn(async move |this, cx| { async move { - let stream = model.model.stream_completion_text(request, &cx); - let mut messages = stream.await?; + let stream = model.model.stream_completion_text_with_usage(request, &cx); + let (mut messages, usage) = stream.await?; + + if let Some(usage) = usage { + this.update(cx, |_thread, cx| { + cx.emit(ThreadEvent::UsageUpdated(usage)); + }) + .ok(); + } let mut new_summary = String::new(); while let Some(message) = messages.stream.next().await { diff --git a/crates/language_model/src/language_model.rs b/crates/language_model/src/language_model.rs index 88115c43fb..aa08370edf 100644 --- a/crates/language_model/src/language_model.rs +++ b/crates/language_model/src/language_model.rs @@ -262,10 +262,21 @@ pub trait LanguageModel: Send + Sync { request: LanguageModelRequest, cx: &AsyncApp, ) -> BoxFuture<'static, Result> { - let events = self.stream_completion(request, cx); + self.stream_completion_text_with_usage(request, cx) + .map(|result| result.map(|(stream, _usage)| stream)) + .boxed() + } + + fn stream_completion_text_with_usage( + &self, + request: LanguageModelRequest, + cx: &AsyncApp, + ) -> BoxFuture<'static, Result<(LanguageModelTextStream, Option)>> { + let future = self.stream_completion_with_usage(request, cx); async move { - let mut events = events.await?.fuse(); + let (events, usage) = future.await?; + let mut events = events.fuse(); let mut message_id = None; let mut first_item_text = None; let last_token_usage = Arc::new(Mutex::new(TokenUsage::default())); @@ -305,11 +316,14 @@ pub trait LanguageModel: Send + Sync { })) .boxed(); - Ok(LanguageModelTextStream { - message_id, - stream, - last_token_usage, - }) + Ok(( + LanguageModelTextStream { + message_id, + stream, + last_token_usage, + }, + usage, + )) } .boxed() }