agent: Report usage from thread summarization requests (#29012)

This PR makes it so the thread summarization also reports the model
request usage, to prevent the case where the count would appear to jump
by 2 the next time a message was sent after summarization.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2025-04-17 19:05:12 -04:00 committed by GitHub
parent ba7f886c62
commit 676cc109a3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 30 additions and 9 deletions

View file

@ -262,10 +262,21 @@ pub trait LanguageModel: Send + Sync {
request: LanguageModelRequest,
cx: &AsyncApp,
) -> BoxFuture<'static, Result<LanguageModelTextStream>> {
let events = self.stream_completion(request, cx);
self.stream_completion_text_with_usage(request, cx)
.map(|result| result.map(|(stream, _usage)| stream))
.boxed()
}
fn stream_completion_text_with_usage(
&self,
request: LanguageModelRequest,
cx: &AsyncApp,
) -> BoxFuture<'static, Result<(LanguageModelTextStream, Option<RequestUsage>)>> {
let future = self.stream_completion_with_usage(request, cx);
async move {
let mut events = events.await?.fuse();
let (events, usage) = future.await?;
let mut events = events.fuse();
let mut message_id = None;
let mut first_item_text = None;
let last_token_usage = Arc::new(Mutex::new(TokenUsage::default()));
@ -305,11 +316,14 @@ pub trait LanguageModel: Send + Sync {
}))
.boxed();
Ok(LanguageModelTextStream {
message_id,
stream,
last_token_usage,
})
Ok((
LanguageModelTextStream {
message_id,
stream,
last_token_usage,
},
usage,
))
}
.boxed()
}