agent: Add token count in the thread view (#28037)

This PR adds the token count to the active thread view. It doesn't
behaves quite like Assistant 1 where it updates as you type, though; it
updates after you submit the message.

<img
src="https://github.com/user-attachments/assets/82d2a180-554a-43ee-b776-3743359b609b"
width="700" />

---

Release Notes:

- agent: Add token count in the thread view

---------

Co-authored-by: Agus Zubiaga <hi@aguz.me>
This commit is contained in:
Danilo Leal 2025-04-03 15:43:58 -03:00 committed by GitHub
parent e5b347b03a
commit b9724d9cbe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 132 additions and 27 deletions

View file

@ -214,6 +214,21 @@ pub enum DetailedSummaryState {
},
}
#[derive(Default)]
pub struct TotalTokenUsage {
pub total: usize,
pub max: usize,
pub ratio: TokenUsageRatio,
}
#[derive(Default, PartialEq, Eq)]
pub enum TokenUsageRatio {
#[default]
Normal,
Warning,
Exceeded,
}
/// A thread of conversation with the LLM.
pub struct Thread {
id: ThreadId,
@ -1723,26 +1738,33 @@ impl Thread {
self.cumulative_token_usage.clone()
}
pub fn is_getting_too_long(&self, cx: &App) -> bool {
pub fn total_token_usage(&self, cx: &App) -> TotalTokenUsage {
let model_registry = LanguageModelRegistry::read_global(cx);
let Some(model) = model_registry.active_model() else {
return false;
return TotalTokenUsage::default();
};
let max_tokens = model.max_token_count();
let current_usage =
self.cumulative_token_usage.input_tokens + self.cumulative_token_usage.output_tokens;
let max = model.max_token_count();
#[cfg(debug_assertions)]
let warning_threshold: f32 = std::env::var("ZED_THREAD_WARNING_THRESHOLD")
.unwrap_or("0.9".to_string())
.unwrap_or("0.8".to_string())
.parse()
.unwrap();
#[cfg(not(debug_assertions))]
let warning_threshold: f32 = 0.9;
let warning_threshold: f32 = 0.8;
current_usage as f32 >= (max_tokens as f32 * warning_threshold)
let total = self.cumulative_token_usage.total_tokens() as usize;
let ratio = if total >= max {
TokenUsageRatio::Exceeded
} else if total as f32 / max as f32 >= warning_threshold {
TokenUsageRatio::Warning
} else {
TokenUsageRatio::Normal
};
TotalTokenUsage { total, max, ratio }
}
pub fn deny_tool_use(