agent: Add token count in the thread view (#28037)
This PR adds the token count to the active thread view. It doesn't behaves quite like Assistant 1 where it updates as you type, though; it updates after you submit the message. <img src="https://github.com/user-attachments/assets/82d2a180-554a-43ee-b776-3743359b609b" width="700" /> --- Release Notes: - agent: Add token count in the thread view --------- Co-authored-by: Agus Zubiaga <hi@aguz.me>
This commit is contained in:
parent
e5b347b03a
commit
b9724d9cbe
5 changed files with 132 additions and 27 deletions
|
@ -1,5 +1,6 @@
|
|||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Result, anyhow};
|
||||
use assistant_context_editor::{
|
||||
|
@ -14,9 +15,9 @@ use client::zed_urls;
|
|||
use editor::{Editor, MultiBuffer};
|
||||
use fs::Fs;
|
||||
use gpui::{
|
||||
Action, AnyElement, App, AsyncWindowContext, Corner, Entity, EventEmitter, FocusHandle,
|
||||
Focusable, FontWeight, KeyContext, Pixels, Subscription, Task, UpdateGlobal, WeakEntity,
|
||||
action_with_deprecated_aliases, prelude::*,
|
||||
Action, Animation, AnimationExt as _, AnyElement, App, AsyncWindowContext, Corner, Entity,
|
||||
EventEmitter, FocusHandle, Focusable, FontWeight, KeyContext, Pixels, Subscription, Task,
|
||||
UpdateGlobal, WeakEntity, action_with_deprecated_aliases, prelude::*, pulsating_between,
|
||||
};
|
||||
use language::LanguageRegistry;
|
||||
use language_model::{LanguageModelProviderTosView, LanguageModelRegistry};
|
||||
|
@ -38,7 +39,7 @@ use crate::active_thread::ActiveThread;
|
|||
use crate::assistant_configuration::{AssistantConfiguration, AssistantConfigurationEvent};
|
||||
use crate::history_store::{HistoryEntry, HistoryStore};
|
||||
use crate::message_editor::MessageEditor;
|
||||
use crate::thread::{Thread, ThreadError, ThreadId};
|
||||
use crate::thread::{Thread, ThreadError, ThreadId, TokenUsageRatio};
|
||||
use crate::thread_history::{PastContext, PastThread, ThreadHistory};
|
||||
use crate::thread_store::ThreadStore;
|
||||
use crate::{
|
||||
|
@ -715,18 +716,21 @@ impl Panel for AssistantPanel {
|
|||
|
||||
impl AssistantPanel {
|
||||
fn render_toolbar(&self, _window: &mut Window, cx: &mut Context<Self>) -> impl IntoElement {
|
||||
let thread = self.thread.read(cx);
|
||||
let is_empty = thread.is_empty();
|
||||
let active_thread = self.thread.read(cx);
|
||||
let thread = active_thread.thread().read(cx);
|
||||
let token_usage = thread.total_token_usage(cx);
|
||||
let thread_id = thread.id().clone();
|
||||
|
||||
let thread_id = thread.thread().read(cx).id().clone();
|
||||
let is_generating = thread.is_generating();
|
||||
let is_empty = active_thread.is_empty();
|
||||
let focus_handle = self.focus_handle(cx);
|
||||
|
||||
let title = match self.active_view {
|
||||
ActiveView::Thread => {
|
||||
if is_empty {
|
||||
thread.summary_or_default(cx)
|
||||
active_thread.summary_or_default(cx)
|
||||
} else {
|
||||
thread
|
||||
active_thread
|
||||
.summary(cx)
|
||||
.unwrap_or_else(|| SharedString::from("Loading Summary…"))
|
||||
}
|
||||
|
@ -742,6 +746,12 @@ impl AssistantPanel {
|
|||
ActiveView::Configuration => "Settings".into(),
|
||||
};
|
||||
|
||||
let show_token_count = match self.active_view {
|
||||
ActiveView::Thread => !is_empty,
|
||||
ActiveView::PromptEditor => self.context_editor.is_some(),
|
||||
_ => false,
|
||||
};
|
||||
|
||||
h_flex()
|
||||
.id("assistant-toolbar")
|
||||
.h(Tab::container_height(cx))
|
||||
|
@ -764,12 +774,67 @@ impl AssistantPanel {
|
|||
.pl_2()
|
||||
.gap_2()
|
||||
.bg(cx.theme().colors().tab_bar_background)
|
||||
.children(if matches!(self.active_view, ActiveView::PromptEditor) {
|
||||
self.context_editor
|
||||
.as_ref()
|
||||
.and_then(|editor| render_remaining_tokens(editor, cx))
|
||||
} else {
|
||||
None
|
||||
.when(show_token_count, |parent| match self.active_view {
|
||||
ActiveView::Thread => {
|
||||
if token_usage.total == 0 {
|
||||
return parent;
|
||||
}
|
||||
|
||||
let token_color = match token_usage.ratio {
|
||||
TokenUsageRatio::Normal => Color::Muted,
|
||||
TokenUsageRatio::Warning => Color::Warning,
|
||||
TokenUsageRatio::Exceeded => Color::Error,
|
||||
};
|
||||
|
||||
parent.child(
|
||||
h_flex()
|
||||
.gap_0p5()
|
||||
.child(
|
||||
Label::new(assistant_context_editor::humanize_token_count(
|
||||
token_usage.total,
|
||||
))
|
||||
.size(LabelSize::Small)
|
||||
.color(token_color)
|
||||
.map(|label| {
|
||||
if is_generating {
|
||||
label
|
||||
.with_animation(
|
||||
"used-tokens-label",
|
||||
Animation::new(Duration::from_secs(2))
|
||||
.repeat()
|
||||
.with_easing(pulsating_between(
|
||||
0.6, 1.,
|
||||
)),
|
||||
|label, delta| label.alpha(delta),
|
||||
)
|
||||
.into_any()
|
||||
} else {
|
||||
label.into_any_element()
|
||||
}
|
||||
}),
|
||||
)
|
||||
.child(
|
||||
Label::new("/").size(LabelSize::Small).color(Color::Muted),
|
||||
)
|
||||
.child(
|
||||
Label::new(assistant_context_editor::humanize_token_count(
|
||||
token_usage.max,
|
||||
))
|
||||
.size(LabelSize::Small)
|
||||
.color(Color::Muted),
|
||||
),
|
||||
)
|
||||
}
|
||||
ActiveView::PromptEditor => {
|
||||
let Some(editor) = self.context_editor.as_ref() else {
|
||||
return parent;
|
||||
};
|
||||
let Some(element) = render_remaining_tokens(editor, cx) else {
|
||||
return parent;
|
||||
};
|
||||
parent.child(element)
|
||||
}
|
||||
_ => parent,
|
||||
})
|
||||
.child(
|
||||
h_flex()
|
||||
|
|
|
@ -28,7 +28,7 @@ use crate::context_picker::{ConfirmBehavior, ContextPicker, ContextPickerComplet
|
|||
use crate::context_store::{ContextStore, refresh_context_store_text};
|
||||
use crate::context_strip::{ContextStrip, ContextStripEvent, SuggestContextKind};
|
||||
use crate::profile_selector::ProfileSelector;
|
||||
use crate::thread::{RequestKind, Thread};
|
||||
use crate::thread::{RequestKind, Thread, TokenUsageRatio};
|
||||
use crate::thread_store::ThreadStore;
|
||||
use crate::{
|
||||
AgentDiff, Chat, ChatMode, NewThread, OpenAgentDiff, RemoveAllContext, ThreadEvent,
|
||||
|
@ -338,7 +338,7 @@ impl Render for MessageEditor {
|
|||
|
||||
let thread = self.thread.read(cx);
|
||||
let is_generating = thread.is_generating();
|
||||
let is_too_long = thread.is_getting_too_long(cx);
|
||||
let total_token_usage = thread.total_token_usage(cx);
|
||||
let is_model_selected = self.is_model_selected(cx);
|
||||
let is_editor_empty = self.is_editor_empty(cx);
|
||||
let needs_confirmation =
|
||||
|
@ -788,7 +788,7 @@ impl Render for MessageEditor {
|
|||
),
|
||||
)
|
||||
)
|
||||
.when(is_too_long, |parent| {
|
||||
.when(total_token_usage.ratio != TokenUsageRatio::Normal, |parent| {
|
||||
parent.child(
|
||||
h_flex()
|
||||
.p_2()
|
||||
|
|
|
@ -214,6 +214,21 @@ pub enum DetailedSummaryState {
|
|||
},
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct TotalTokenUsage {
|
||||
pub total: usize,
|
||||
pub max: usize,
|
||||
pub ratio: TokenUsageRatio,
|
||||
}
|
||||
|
||||
#[derive(Default, PartialEq, Eq)]
|
||||
pub enum TokenUsageRatio {
|
||||
#[default]
|
||||
Normal,
|
||||
Warning,
|
||||
Exceeded,
|
||||
}
|
||||
|
||||
/// A thread of conversation with the LLM.
|
||||
pub struct Thread {
|
||||
id: ThreadId,
|
||||
|
@ -1723,26 +1738,33 @@ impl Thread {
|
|||
self.cumulative_token_usage.clone()
|
||||
}
|
||||
|
||||
pub fn is_getting_too_long(&self, cx: &App) -> bool {
|
||||
pub fn total_token_usage(&self, cx: &App) -> TotalTokenUsage {
|
||||
let model_registry = LanguageModelRegistry::read_global(cx);
|
||||
let Some(model) = model_registry.active_model() else {
|
||||
return false;
|
||||
return TotalTokenUsage::default();
|
||||
};
|
||||
|
||||
let max_tokens = model.max_token_count();
|
||||
|
||||
let current_usage =
|
||||
self.cumulative_token_usage.input_tokens + self.cumulative_token_usage.output_tokens;
|
||||
let max = model.max_token_count();
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
let warning_threshold: f32 = std::env::var("ZED_THREAD_WARNING_THRESHOLD")
|
||||
.unwrap_or("0.9".to_string())
|
||||
.unwrap_or("0.8".to_string())
|
||||
.parse()
|
||||
.unwrap();
|
||||
#[cfg(not(debug_assertions))]
|
||||
let warning_threshold: f32 = 0.9;
|
||||
let warning_threshold: f32 = 0.8;
|
||||
|
||||
current_usage as f32 >= (max_tokens as f32 * warning_threshold)
|
||||
let total = self.cumulative_token_usage.total_tokens() as usize;
|
||||
|
||||
let ratio = if total >= max {
|
||||
TokenUsageRatio::Exceeded
|
||||
} else if total as f32 / max as f32 >= warning_threshold {
|
||||
TokenUsageRatio::Warning
|
||||
} else {
|
||||
TokenUsageRatio::Normal
|
||||
};
|
||||
|
||||
TotalTokenUsage { total, max, ratio }
|
||||
}
|
||||
|
||||
pub fn deny_tool_use(
|
||||
|
|
|
@ -3703,6 +3703,18 @@ pub fn humanize_token_count(count: usize) -> String {
|
|||
format!("{}.{}k", thousands, hundreds)
|
||||
}
|
||||
}
|
||||
1_000_000..=9_999_999 => {
|
||||
let millions = count / 1_000_000;
|
||||
let hundred_thousands = (count % 1_000_000 + 50_000) / 100_000;
|
||||
if hundred_thousands == 0 {
|
||||
format!("{}M", millions)
|
||||
} else if hundred_thousands == 10 {
|
||||
format!("{}M", millions + 1)
|
||||
} else {
|
||||
format!("{}.{}M", millions, hundred_thousands)
|
||||
}
|
||||
}
|
||||
10_000_000.. => format!("{}M", (count + 500_000) / 1_000_000),
|
||||
_ => format!("{}k", (count + 500) / 1000),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,6 +95,12 @@ pub struct TokenUsage {
|
|||
pub cache_read_input_tokens: u32,
|
||||
}
|
||||
|
||||
impl TokenUsage {
|
||||
pub fn total_tokens(&self) -> u32 {
|
||||
self.input_tokens + self.output_tokens
|
||||
}
|
||||
}
|
||||
|
||||
impl Add<TokenUsage> for TokenUsage {
|
||||
type Output = Self;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue