agent: Extract usage information from response headers (#29002)

This PR updates the Agent to extract the usage information from the
response headers, if they are present.

For now we just log the information, but we'll be using this soon to
populate some UI.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2025-04-17 16:11:07 -04:00 committed by GitHub
parent b402007de6
commit d93141bded
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 141 additions and 22 deletions

View file

@ -90,6 +90,7 @@ uuid.workspace = true
workspace-hack.workspace = true
workspace.workspace = true
zed_actions.workspace = true
zed_llm_client.workspace = true
[dev-dependencies]
buffer_diff = { workspace = true, features = ["test-support"] }

View file

@ -31,6 +31,7 @@ use settings::Settings;
use thiserror::Error;
use util::{ResultExt as _, TryFutureExt as _, post_inc};
use uuid::Uuid;
use zed_llm_client::UsageLimit;
use crate::context::{AssistantContext, ContextId, format_context_as_string};
use crate::thread_store::{
@ -1070,14 +1071,22 @@ impl Thread {
) {
let pending_completion_id = post_inc(&mut self.completion_count);
let task = cx.spawn(async move |thread, cx| {
let stream = model.stream_completion(request, &cx);
let stream_completion_future = model.stream_completion_with_usage(request, &cx);
let initial_token_usage =
thread.read_with(cx, |thread, _cx| thread.cumulative_token_usage);
let stream_completion = async {
let mut events = stream.await?;
let (mut events, usage) = stream_completion_future.await?;
let mut stop_reason = StopReason::EndTurn;
let mut current_token_usage = TokenUsage::default();
if let Some(usage) = usage {
let limit = match usage.limit {
UsageLimit::Limited(limit) => limit.to_string(),
UsageLimit::Unlimited => "unlimited".to_string(),
};
log::info!("model request usage: {} / {}", usage.amount, limit);
}
while let Some(event) = events.next().await {
let event = event?;