agent: Extract usage information from response headers (#29002)

This PR updates the Agent to extract the usage information from the response headers, if they are present. For now we just log the information, but we'll be using this soon to populate some UI. Release Notes: - N/A
2025-04-17 16:11:07 -04:00 · 2025-04-17 16:11:07 -04:00 · d93141bded
commit d93141bded
parent b402007de6
7 changed files with 141 additions and 22 deletions
--- a/crates/agent/Cargo.toml
+++ b/crates/agent/Cargo.toml
@ -90,6 +90,7 @@ uuid.workspace = true
 workspace-hack.workspace = true
 workspace.workspace = true
 zed_actions.workspace = true
+zed_llm_client.workspace = true

 [dev-dependencies]
 buffer_diff = { workspace = true, features = ["test-support"] }
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@ -31,6 +31,7 @@ use settings::Settings;
 use thiserror::Error;
 use util::{ResultExt as _, TryFutureExt as _, post_inc};
 use uuid::Uuid;
+use zed_llm_client::UsageLimit;

 use crate::context::{AssistantContext, ContextId, format_context_as_string};
 use crate::thread_store::{
@ -1070,14 +1071,22 @@ impl Thread {
    ) {
        let pending_completion_id = post_inc(&mut self.completion_count);
        let task = cx.spawn(async move |thread, cx| {
-            let stream = model.stream_completion(request, &cx);
+            let stream_completion_future = model.stream_completion_with_usage(request, &cx);
            let initial_token_usage =
                thread.read_with(cx, |thread, _cx| thread.cumulative_token_usage);
            let stream_completion = async {
-                let mut events = stream.await?;
+                let (mut events, usage) = stream_completion_future.await?;
                let mut stop_reason = StopReason::EndTurn;
                let mut current_token_usage = TokenUsage::default();

+                if let Some(usage) = usage {
+                    let limit = match usage.limit {
+                        UsageLimit::Limited(limit) => limit.to_string(),
+                        UsageLimit::Unlimited => "unlimited".to_string(),
+                    };
+                    log::info!("model request usage: {} / {}", usage.amount, limit);
+                }
+
                while let Some(event) = events.next().await {
                    let event = event?;