agent: Add token count in the thread view (#28037)

This PR adds the token count to the active thread view. It doesn't behaves quite like Assistant 1 where it updates as you type, though; it updates after you submit the message. <img src="https://github.com/user-attachments/assets/82d2a180-554a-43ee-b776-3743359b609b" width="700" /> --- Release Notes: - agent: Add token count in the thread view --------- Co-authored-by: Agus Zubiaga <hi@aguz.me>
2025-04-03 15:43:58 -03:00 · 2025-04-03 15:43:58 -03:00 · b9724d9cbe
commit b9724d9cbe
parent e5b347b03a
5 changed files with 132 additions and 27 deletions
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@ -214,6 +214,21 @@ pub enum DetailedSummaryState {
    },
 }

+#[derive(Default)]
+pub struct TotalTokenUsage {
+    pub total: usize,
+    pub max: usize,
+    pub ratio: TokenUsageRatio,
+}
+
+#[derive(Default, PartialEq, Eq)]
+pub enum TokenUsageRatio {
+    #[default]
+    Normal,
+    Warning,
+    Exceeded,
+}
+
 /// A thread of conversation with the LLM.
 pub struct Thread {
    id: ThreadId,
@ -1723,26 +1738,33 @@ impl Thread {
        self.cumulative_token_usage.clone()
    }

-    pub fn is_getting_too_long(&self, cx: &App) -> bool {
+    pub fn total_token_usage(&self, cx: &App) -> TotalTokenUsage {
        let model_registry = LanguageModelRegistry::read_global(cx);
        let Some(model) = model_registry.active_model() else {
-            return false;
+            return TotalTokenUsage::default();
        };

-        let max_tokens = model.max_token_count();
-
-        let current_usage =
-            self.cumulative_token_usage.input_tokens + self.cumulative_token_usage.output_tokens;
+        let max = model.max_token_count();

        #[cfg(debug_assertions)]
        let warning_threshold: f32 = std::env::var("ZED_THREAD_WARNING_THRESHOLD")
-            .unwrap_or("0.9".to_string())
+            .unwrap_or("0.8".to_string())
            .parse()
            .unwrap();
        #[cfg(not(debug_assertions))]
-        let warning_threshold: f32 = 0.9;
+        let warning_threshold: f32 = 0.8;

-        current_usage as f32 >= (max_tokens as f32 * warning_threshold)
+        let total = self.cumulative_token_usage.total_tokens() as usize;
+
+        let ratio = if total >= max {
+            TokenUsageRatio::Exceeded
+        } else if total as f32 / max as f32 >= warning_threshold {
+            TokenUsageRatio::Warning
+        } else {
+            TokenUsageRatio::Normal
+        };
+
+        TotalTokenUsage { total, max, ratio }
    }

    pub fn deny_tool_use(