From 5405c2c2d3d3939c8da20dd1b6f188681b5705c3 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 17 Jun 2025 10:43:07 -0400 Subject: [PATCH] Standardize on u64 for token counts (#32869) Previously we were using a mix of `u32` and `usize`, e.g. `max_tokens: usize, max_output_tokens: Option` in the same `struct`. Although [tiktoken](https://github.com/openai/tiktoken) uses `usize`, token counts should be consistent across targets (e.g. the same model doesn't suddenly get a smaller context window if you're compiling for wasm32), and these token counts could end up getting serialized using a binary protocol, so `usize` is not the right choice for token counts. I chose to standardize on `u64` over `u32` because we don't store many of them (so the extra size should be insignificant) and future models may exceed `u32::MAX` tokens. Release Notes: - N/A --- crates/agent/src/active_thread.rs | 4 +-- crates/agent/src/message_editor.rs | 4 +-- crates/agent/src/thread.rs | 12 ++++----- crates/agent/src/tool_use.rs | 2 +- crates/anthropic/src/anthropic.rs | 26 +++++++++---------- .../assistant_context_editor/src/context.rs | 4 +-- .../src/context_editor.rs | 14 +++++----- .../src/language_model_selector.rs | 4 +-- crates/bedrock/src/bedrock.rs | 2 +- crates/bedrock/src/models.rs | 8 +++--- crates/copilot/src/copilot_chat.rs | 12 ++++----- crates/deepseek/src/deepseek.rs | 10 +++---- crates/google_ai/src/google_ai.rs | 22 ++++++++-------- crates/language_model/src/fake_provider.rs | 4 +-- crates/language_model/src/language_model.rs | 20 +++++++------- .../language_models/src/provider/anthropic.rs | 16 ++++++------ .../language_models/src/provider/bedrock.rs | 20 +++++++------- crates/language_models/src/provider/cloud.rs | 14 +++++----- .../src/provider/copilot_chat.rs | 4 +-- .../language_models/src/provider/deepseek.rs | 14 +++++----- crates/language_models/src/provider/google.rs | 16 ++++++------ .../language_models/src/provider/lmstudio.rs | 8 +++--- .../language_models/src/provider/mistral.rs | 16 ++++++------ crates/language_models/src/provider/ollama.rs | 8 +++--- .../language_models/src/provider/open_ai.rs | 17 ++++++------ .../src/provider/open_router.rs | 18 ++++++------- crates/lmstudio/src/lmstudio.rs | 16 ++++++------ crates/mistral/src/mistral.rs | 18 ++++++------- crates/ollama/src/ollama.rs | 18 ++++++------- crates/open_ai/src/open_ai.rs | 12 ++++----- crates/open_router/src/open_router.rs | 18 ++++++------- crates/rules_library/src/rules_library.rs | 2 +- 32 files changed, 191 insertions(+), 192 deletions(-) diff --git a/crates/agent/src/active_thread.rs b/crates/agent/src/active_thread.rs index ce73885309..d69edb151f 100644 --- a/crates/agent/src/active_thread.rs +++ b/crates/agent/src/active_thread.rs @@ -750,7 +750,7 @@ struct EditingMessageState { editor: Entity, context_strip: Entity, context_picker_menu_handle: PopoverMenuHandle, - last_estimated_token_count: Option, + last_estimated_token_count: Option, _subscriptions: [Subscription; 2], _update_token_count_task: Option>, } @@ -857,7 +857,7 @@ impl ActiveThread { } /// Returns the editing message id and the estimated token count in the content - pub fn editing_message_id(&self) -> Option<(MessageId, usize)> { + pub fn editing_message_id(&self) -> Option<(MessageId, u64)> { self.editing_message .as_ref() .map(|(id, state)| (*id, state.last_estimated_token_count.unwrap_or(0))) diff --git a/crates/agent/src/message_editor.rs b/crates/agent/src/message_editor.rs index 288b2b650b..389e0b41b9 100644 --- a/crates/agent/src/message_editor.rs +++ b/crates/agent/src/message_editor.rs @@ -76,7 +76,7 @@ pub struct MessageEditor { profile_selector: Entity, edits_expanded: bool, editor_is_expanded: bool, - last_estimated_token_count: Option, + last_estimated_token_count: Option, update_token_count_task: Option>, _subscriptions: Vec, } @@ -1335,7 +1335,7 @@ impl MessageEditor { ) } - pub fn last_estimated_token_count(&self) -> Option { + pub fn last_estimated_token_count(&self) -> Option { self.last_estimated_token_count } diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index 43b6571d32..87c70b19d3 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -272,8 +272,8 @@ impl DetailedSummaryState { #[derive(Default, Debug)] pub struct TotalTokenUsage { - pub total: usize, - pub max: usize, + pub total: u64, + pub max: u64, } impl TotalTokenUsage { @@ -299,7 +299,7 @@ impl TotalTokenUsage { } } - pub fn add(&self, tokens: usize) -> TotalTokenUsage { + pub fn add(&self, tokens: u64) -> TotalTokenUsage { TotalTokenUsage { total: self.total + tokens, max: self.max, @@ -396,7 +396,7 @@ pub struct ExceededWindowError { /// Model used when last message exceeded context window model_id: LanguageModelId, /// Token count including last message - token_count: usize, + token_count: u64, } impl Thread { @@ -2769,7 +2769,7 @@ impl Thread { .unwrap_or_default(); TotalTokenUsage { - total: token_usage.total_tokens() as usize, + total: token_usage.total_tokens(), max, } } @@ -2791,7 +2791,7 @@ impl Thread { let total = self .token_usage_at_last_message() .unwrap_or_default() - .total_tokens() as usize; + .total_tokens(); Some(TotalTokenUsage { total, max }) } diff --git a/crates/agent/src/tool_use.rs b/crates/agent/src/tool_use.rs index da6adc07f0..ef54a571cb 100644 --- a/crates/agent/src/tool_use.rs +++ b/crates/agent/src/tool_use.rs @@ -427,7 +427,7 @@ impl ToolUseState { // Protect from overly large output let tool_output_limit = configured_model - .map(|model| model.model.max_token_count() * BYTES_PER_TOKEN_ESTIMATE) + .map(|model| model.model.max_token_count() as usize * BYTES_PER_TOKEN_ESTIMATE) .unwrap_or(usize::MAX); let content = match tool_result { diff --git a/crates/anthropic/src/anthropic.rs b/crates/anthropic/src/anthropic.rs index dd5a801936..97ebec710a 100644 --- a/crates/anthropic/src/anthropic.rs +++ b/crates/anthropic/src/anthropic.rs @@ -15,7 +15,7 @@ pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com"; #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] pub struct AnthropicModelCacheConfiguration { - pub min_total_token: usize, + pub min_total_token: u64, pub should_speculate: bool, pub max_cache_anchors: usize, } @@ -68,14 +68,14 @@ pub enum Model { #[serde(rename = "custom")] Custom { name: String, - max_tokens: usize, + max_tokens: u64, /// The name displayed in the UI, such as in the assistant panel model dropdown menu. display_name: Option, /// Override this model with a different Anthropic model for tool calls. tool_override: Option, /// Indicates whether this custom model supports caching. cache_configuration: Option, - max_output_tokens: Option, + max_output_tokens: Option, default_temperature: Option, #[serde(default)] extra_beta_headers: Vec, @@ -211,7 +211,7 @@ impl Model { } } - pub fn max_token_count(&self) -> usize { + pub fn max_token_count(&self) -> u64 { match self { Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking @@ -228,7 +228,7 @@ impl Model { } } - pub fn max_output_tokens(&self) -> u32 { + pub fn max_output_tokens(&self) -> u64 { match self { Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking @@ -693,7 +693,7 @@ pub enum StringOrContents { #[derive(Debug, Serialize, Deserialize)] pub struct Request { pub model: String, - pub max_tokens: u32, + pub max_tokens: u64, pub messages: Vec, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub tools: Vec, @@ -730,13 +730,13 @@ pub struct Metadata { #[derive(Debug, Serialize, Deserialize, Default)] pub struct Usage { #[serde(default, skip_serializing_if = "Option::is_none")] - pub input_tokens: Option, + pub input_tokens: Option, #[serde(default, skip_serializing_if = "Option::is_none")] - pub output_tokens: Option, + pub output_tokens: Option, #[serde(default, skip_serializing_if = "Option::is_none")] - pub cache_creation_input_tokens: Option, + pub cache_creation_input_tokens: Option, #[serde(default, skip_serializing_if = "Option::is_none")] - pub cache_read_input_tokens: Option, + pub cache_read_input_tokens: Option, } #[derive(Debug, Serialize, Deserialize)] @@ -846,7 +846,7 @@ impl ApiError { matches!(self.error_type.as_str(), "rate_limit_error") } - pub fn match_window_exceeded(&self) -> Option { + pub fn match_window_exceeded(&self) -> Option { let Some(ApiErrorCode::InvalidRequestError) = self.code() else { return None; }; @@ -855,12 +855,12 @@ impl ApiError { } } -pub fn parse_prompt_too_long(message: &str) -> Option { +pub fn parse_prompt_too_long(message: &str) -> Option { message .strip_prefix("prompt is too long: ")? .split_once(" tokens")? .0 - .parse::() + .parse() .ok() } diff --git a/crates/assistant_context_editor/src/context.rs b/crates/assistant_context_editor/src/context.rs index 2a833eb130..ef78d1b6e6 100644 --- a/crates/assistant_context_editor/src/context.rs +++ b/crates/assistant_context_editor/src/context.rs @@ -678,7 +678,7 @@ pub struct AssistantContext { summary_task: Task>, completion_count: usize, pending_completions: Vec, - token_count: Option, + token_count: Option, pending_token_count: Task>, pending_save: Task>, pending_cache_warming_task: Task>, @@ -1250,7 +1250,7 @@ impl AssistantContext { } } - pub fn token_count(&self) -> Option { + pub fn token_count(&self) -> Option { self.token_count } diff --git a/crates/assistant_context_editor/src/context_editor.rs b/crates/assistant_context_editor/src/context_editor.rs index 24e59e449c..105778117e 100644 --- a/crates/assistant_context_editor/src/context_editor.rs +++ b/crates/assistant_context_editor/src/context_editor.rs @@ -3121,12 +3121,12 @@ fn invoked_slash_command_fold_placeholder( enum TokenState { NoTokensLeft { - max_token_count: usize, - token_count: usize, + max_token_count: u64, + token_count: u64, }, HasMoreTokens { - max_token_count: usize, - token_count: usize, + max_token_count: u64, + token_count: u64, over_warn_threshold: bool, }, } @@ -3139,9 +3139,7 @@ fn token_state(context: &Entity, cx: &App) -> Option) -> Size { } } -pub fn humanize_token_count(count: usize) -> String { +pub fn humanize_token_count(count: u64) -> String { match count { 0..=999 => count.to_string(), 1000..=9999 => { diff --git a/crates/assistant_context_editor/src/language_model_selector.rs b/crates/assistant_context_editor/src/language_model_selector.rs index 732d8a326e..128d801c0c 100644 --- a/crates/assistant_context_editor/src/language_model_selector.rs +++ b/crates/assistant_context_editor/src/language_model_selector.rs @@ -664,7 +664,7 @@ mod tests { format!("{}/{}", self.provider_id.0, self.name.0) } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { 1000 } @@ -672,7 +672,7 @@ mod tests { &self, _: LanguageModelRequest, _: &App, - ) -> BoxFuture<'static, http_client::Result> { + ) -> BoxFuture<'static, http_client::Result> { unimplemented!() } diff --git a/crates/bedrock/src/bedrock.rs b/crates/bedrock/src/bedrock.rs index 11c54fa30e..e32a456dba 100644 --- a/crates/bedrock/src/bedrock.rs +++ b/crates/bedrock/src/bedrock.rs @@ -152,7 +152,7 @@ pub enum Thinking { #[derive(Debug)] pub struct Request { pub model: String, - pub max_tokens: u32, + pub max_tokens: u64, pub messages: Vec, pub tools: Option, pub thinking: Option, diff --git a/crates/bedrock/src/models.rs b/crates/bedrock/src/models.rs index ec494c8566..7b4e7e8b43 100644 --- a/crates/bedrock/src/models.rs +++ b/crates/bedrock/src/models.rs @@ -99,10 +99,10 @@ pub enum Model { #[serde(rename = "custom")] Custom { name: String, - max_tokens: usize, + max_tokens: u64, /// The name displayed in the UI, such as in the assistant panel model dropdown menu. display_name: Option, - max_output_tokens: Option, + max_output_tokens: Option, default_temperature: Option, }, } @@ -309,7 +309,7 @@ impl Model { } } - pub fn max_token_count(&self) -> usize { + pub fn max_token_count(&self) -> u64 { match self { Self::Claude3_5SonnetV2 | Self::Claude3Opus @@ -328,7 +328,7 @@ impl Model { } } - pub fn max_output_tokens(&self) -> u32 { + pub fn max_output_tokens(&self) -> u64 { match self { Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3_5Haiku => 4_096, Self::Claude3_7Sonnet diff --git a/crates/copilot/src/copilot_chat.rs b/crates/copilot/src/copilot_chat.rs index c89e4cdb98..119098ef02 100644 --- a/crates/copilot/src/copilot_chat.rs +++ b/crates/copilot/src/copilot_chat.rs @@ -126,7 +126,7 @@ struct ModelLimits { #[serde(default)] max_output_tokens: usize, #[serde(default)] - max_prompt_tokens: usize, + max_prompt_tokens: u64, } #[derive(Clone, Serialize, Deserialize, Debug, Eq, PartialEq)] @@ -182,7 +182,7 @@ impl Model { self.name.as_str() } - pub fn max_token_count(&self) -> usize { + pub fn max_token_count(&self) -> u64 { self.capabilities.limits.max_prompt_tokens } @@ -316,15 +316,15 @@ pub struct ResponseEvent { #[derive(Deserialize, Debug)] pub struct Usage { - pub completion_tokens: u32, - pub prompt_tokens: u32, + pub completion_tokens: u64, + pub prompt_tokens: u64, pub prompt_tokens_details: PromptTokensDetails, - pub total_tokens: u32, + pub total_tokens: u64, } #[derive(Deserialize, Debug)] pub struct PromptTokensDetails { - pub cached_tokens: u32, + pub cached_tokens: u64, } #[derive(Debug, Deserialize)] diff --git a/crates/deepseek/src/deepseek.rs b/crates/deepseek/src/deepseek.rs index 0d638002e7..22bde8e594 100644 --- a/crates/deepseek/src/deepseek.rs +++ b/crates/deepseek/src/deepseek.rs @@ -58,8 +58,8 @@ pub enum Model { name: String, /// The name displayed in the UI, such as in the assistant panel model dropdown menu. display_name: Option, - max_tokens: usize, - max_output_tokens: Option, + max_tokens: u64, + max_output_tokens: Option, }, } @@ -94,14 +94,14 @@ impl Model { } } - pub fn max_token_count(&self) -> usize { + pub fn max_token_count(&self) -> u64 { match self { Self::Chat | Self::Reasoner => 64_000, Self::Custom { max_tokens, .. } => *max_tokens, } } - pub fn max_output_tokens(&self) -> Option { + pub fn max_output_tokens(&self) -> Option { match self { Self::Chat => Some(8_192), Self::Reasoner => Some(8_192), @@ -118,7 +118,7 @@ pub struct Request { pub messages: Vec, pub stream: bool, #[serde(default, skip_serializing_if = "Option::is_none")] - pub max_tokens: Option, + pub max_tokens: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub temperature: Option, #[serde(default, skip_serializing_if = "Option::is_none")] diff --git a/crates/google_ai/src/google_ai.rs b/crates/google_ai/src/google_ai.rs index a187b0043e..b8e97cfa99 100644 --- a/crates/google_ai/src/google_ai.rs +++ b/crates/google_ai/src/google_ai.rs @@ -276,17 +276,17 @@ pub struct PromptFeedback { #[serde(rename_all = "camelCase")] pub struct UsageMetadata { #[serde(skip_serializing_if = "Option::is_none")] - pub prompt_token_count: Option, + pub prompt_token_count: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub cached_content_token_count: Option, + pub cached_content_token_count: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub candidates_token_count: Option, + pub candidates_token_count: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub tool_use_prompt_token_count: Option, + pub tool_use_prompt_token_count: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub thoughts_token_count: Option, + pub thoughts_token_count: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub total_token_count: Option, + pub total_token_count: Option, } #[derive(Debug, Serialize, Deserialize)] @@ -395,7 +395,7 @@ pub struct CountTokensRequest { #[derive(Debug, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct CountTokensResponse { - pub total_tokens: usize, + pub total_tokens: u64, } #[derive(Debug, Serialize, Deserialize)] @@ -523,7 +523,7 @@ pub enum Model { name: String, /// The name displayed in the UI, such as in the assistant panel model dropdown menu. display_name: Option, - max_tokens: usize, + max_tokens: u64, #[serde(default)] mode: GoogleModelMode, }, @@ -586,9 +586,9 @@ impl Model { } } - pub fn max_token_count(&self) -> usize { - const ONE_MILLION: usize = 1_048_576; - const TWO_MILLION: usize = 2_097_152; + pub fn max_token_count(&self) -> u64 { + const ONE_MILLION: u64 = 1_048_576; + const TWO_MILLION: u64 = 2_097_152; match self { Model::Gemini15Pro => TWO_MILLION, Model::Gemini15Flash => ONE_MILLION, diff --git a/crates/language_model/src/fake_provider.rs b/crates/language_model/src/fake_provider.rs index f04f568b72..f5191016d8 100644 --- a/crates/language_model/src/fake_provider.rs +++ b/crates/language_model/src/fake_provider.rs @@ -169,11 +169,11 @@ impl LanguageModel for FakeLanguageModel { "fake".to_string() } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { 1000000 } - fn count_tokens(&self, _: LanguageModelRequest, _: &App) -> BoxFuture<'static, Result> { + fn count_tokens(&self, _: LanguageModelRequest, _: &App) -> BoxFuture<'static, Result> { futures::future::ready(Ok(0)).boxed() } diff --git a/crates/language_model/src/language_model.rs b/crates/language_model/src/language_model.rs index 01f005d73c..c411593213 100644 --- a/crates/language_model/src/language_model.rs +++ b/crates/language_model/src/language_model.rs @@ -53,7 +53,7 @@ pub fn init_settings(cx: &mut App) { pub struct LanguageModelCacheConfiguration { pub max_cache_anchors: usize, pub should_speculate: bool, - pub min_total_token: usize, + pub min_total_token: u64, } /// A completion event from a language model. @@ -135,17 +135,17 @@ impl RequestUsage { #[derive(Debug, PartialEq, Clone, Copy, Serialize, Deserialize, Default)] pub struct TokenUsage { #[serde(default, skip_serializing_if = "is_default")] - pub input_tokens: u32, + pub input_tokens: u64, #[serde(default, skip_serializing_if = "is_default")] - pub output_tokens: u32, + pub output_tokens: u64, #[serde(default, skip_serializing_if = "is_default")] - pub cache_creation_input_tokens: u32, + pub cache_creation_input_tokens: u64, #[serde(default, skip_serializing_if = "is_default")] - pub cache_read_input_tokens: u32, + pub cache_read_input_tokens: u64, } impl TokenUsage { - pub fn total_tokens(&self) -> u32 { + pub fn total_tokens(&self) -> u64 { self.input_tokens + self.output_tokens + self.cache_read_input_tokens @@ -254,8 +254,8 @@ pub trait LanguageModel: Send + Sync { LanguageModelToolSchemaFormat::JsonSchema } - fn max_token_count(&self) -> usize; - fn max_output_tokens(&self) -> Option { + fn max_token_count(&self) -> u64; + fn max_output_tokens(&self) -> Option { None } @@ -263,7 +263,7 @@ pub trait LanguageModel: Send + Sync { &self, request: LanguageModelRequest, cx: &App, - ) -> BoxFuture<'static, Result>; + ) -> BoxFuture<'static, Result>; fn stream_completion( &self, @@ -349,7 +349,7 @@ pub trait LanguageModel: Send + Sync { #[derive(Debug, Error)] pub enum LanguageModelKnownError { #[error("Context window limit exceeded ({tokens})")] - ContextWindowLimitExceeded { tokens: usize }, + ContextWindowLimitExceeded { tokens: u64 }, } pub trait LanguageModelTool: 'static + DeserializeOwned + JsonSchema { diff --git a/crates/language_models/src/provider/anthropic.rs b/crates/language_models/src/provider/anthropic.rs index c581f01f4c..a8423fefa5 100644 --- a/crates/language_models/src/provider/anthropic.rs +++ b/crates/language_models/src/provider/anthropic.rs @@ -51,12 +51,12 @@ pub struct AvailableModel { /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel. pub display_name: Option, /// The model's context window size. - pub max_tokens: usize, + pub max_tokens: u64, /// A model `name` to substitute when calling tools, in case the primary model doesn't support tool calling. pub tool_override: Option, /// Configuration of Anthropic's caching API. pub cache_configuration: Option, - pub max_output_tokens: Option, + pub max_output_tokens: Option, pub default_temperature: Option, #[serde(default)] pub extra_beta_headers: Vec, @@ -321,7 +321,7 @@ pub struct AnthropicModel { pub fn count_anthropic_tokens( request: LanguageModelRequest, cx: &App, -) -> BoxFuture<'static, Result> { +) -> BoxFuture<'static, Result> { cx.background_spawn(async move { let messages = request.messages; let mut tokens_from_images = 0; @@ -377,7 +377,7 @@ pub fn count_anthropic_tokens( // Tiktoken doesn't yet support these models, so we manually use the // same tokenizer as GPT-4. tiktoken_rs::num_tokens_from_messages("gpt-4", &string_messages) - .map(|tokens| tokens + tokens_from_images) + .map(|tokens| (tokens + tokens_from_images) as u64) }) .boxed() } @@ -461,11 +461,11 @@ impl LanguageModel for AnthropicModel { self.state.read(cx).api_key.clone() } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { self.model.max_token_count() } - fn max_output_tokens(&self) -> Option { + fn max_output_tokens(&self) -> Option { Some(self.model.max_output_tokens()) } @@ -473,7 +473,7 @@ impl LanguageModel for AnthropicModel { &self, request: LanguageModelRequest, cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { count_anthropic_tokens(request, cx) } @@ -518,7 +518,7 @@ pub fn into_anthropic( request: LanguageModelRequest, model: String, default_temperature: f32, - max_output_tokens: u32, + max_output_tokens: u64, mode: AnthropicModelMode, ) -> anthropic::Request { let mut new_messages: Vec = Vec::new(); diff --git a/crates/language_models/src/provider/bedrock.rs b/crates/language_models/src/provider/bedrock.rs index 8ec659c97a..c377e614c1 100644 --- a/crates/language_models/src/provider/bedrock.rs +++ b/crates/language_models/src/provider/bedrock.rs @@ -88,9 +88,9 @@ pub enum BedrockAuthMethod { pub struct AvailableModel { pub name: String, pub display_name: Option, - pub max_tokens: usize, + pub max_tokens: u64, pub cache_configuration: Option, - pub max_output_tokens: Option, + pub max_output_tokens: Option, pub default_temperature: Option, pub mode: Option, } @@ -503,11 +503,11 @@ impl LanguageModel for BedrockModel { format!("bedrock/{}", self.model.id()) } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { self.model.max_token_count() } - fn max_output_tokens(&self) -> Option { + fn max_output_tokens(&self) -> Option { Some(self.model.max_output_tokens()) } @@ -515,7 +515,7 @@ impl LanguageModel for BedrockModel { &self, request: LanguageModelRequest, cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { get_bedrock_tokens(request, cx) } @@ -583,7 +583,7 @@ pub fn into_bedrock( request: LanguageModelRequest, model: String, default_temperature: f32, - max_output_tokens: u32, + max_output_tokens: u64, mode: BedrockModelMode, ) -> Result { let mut new_messages: Vec = Vec::new(); @@ -747,7 +747,7 @@ pub fn into_bedrock( pub fn get_bedrock_tokens( request: LanguageModelRequest, cx: &App, -) -> BoxFuture<'static, Result> { +) -> BoxFuture<'static, Result> { cx.background_executor() .spawn(async move { let messages = request.messages; @@ -799,7 +799,7 @@ pub fn get_bedrock_tokens( // Tiktoken doesn't yet support these models, so we manually use the // same tokenizer as GPT-4. tiktoken_rs::num_tokens_from_messages("gpt-4", &string_messages) - .map(|tokens| tokens + tokens_from_images) + .map(|tokens| (tokens + tokens_from_images) as u64) }) .boxed() } @@ -947,9 +947,9 @@ pub fn map_to_language_model_completion_events( let completion_event = LanguageModelCompletionEvent::UsageUpdate( TokenUsage { - input_tokens: metadata.input_tokens as u32, + input_tokens: metadata.input_tokens as u64, output_tokens: metadata.output_tokens - as u32, + as u64, cache_creation_input_tokens: default(), cache_read_input_tokens: default(), }, diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index b04971c5ed..59a5537ae9 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -73,9 +73,9 @@ pub struct AvailableModel { /// The size of the context window, indicating the maximum number of tokens the model can process. pub max_tokens: usize, /// The maximum number of output tokens allowed by the model. - pub max_output_tokens: Option, + pub max_output_tokens: Option, /// The maximum number of completion tokens allowed by the model (o1-* only) - pub max_completion_tokens: Option, + pub max_completion_tokens: Option, /// Override this model with a different Anthropic model for tool calls. pub tool_override: Option, /// Indicates whether this custom model supports caching. @@ -715,8 +715,8 @@ impl LanguageModel for CloudLanguageModel { } } - fn max_token_count(&self) -> usize { - self.model.max_token_count + fn max_token_count(&self) -> u64 { + self.model.max_token_count as u64 } fn cache_configuration(&self) -> Option { @@ -737,7 +737,7 @@ impl LanguageModel for CloudLanguageModel { &self, request: LanguageModelRequest, cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { match self.model.provider { zed_llm_client::LanguageModelProvider::Anthropic => count_anthropic_tokens(request, cx), zed_llm_client::LanguageModelProvider::OpenAi => { @@ -786,7 +786,7 @@ impl LanguageModel for CloudLanguageModel { let response_body: CountTokensResponse = serde_json::from_str(&response_body)?; - Ok(response_body.tokens) + Ok(response_body.tokens as u64) } else { Err(anyhow!(ApiError { status, @@ -821,7 +821,7 @@ impl LanguageModel for CloudLanguageModel { request, self.model.id.to_string(), 1.0, - self.model.max_output_tokens as u32, + self.model.max_output_tokens as u64, if self.model.id.0.ends_with("-thinking") { AnthropicModelMode::Thinking { budget_tokens: Some(4_096), diff --git a/crates/language_models/src/provider/copilot_chat.rs b/crates/language_models/src/provider/copilot_chat.rs index e0ccbcbae6..ce1b168e30 100644 --- a/crates/language_models/src/provider/copilot_chat.rs +++ b/crates/language_models/src/provider/copilot_chat.rs @@ -237,7 +237,7 @@ impl LanguageModel for CopilotChatLanguageModel { format!("copilot_chat/{}", self.model.id()) } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { self.model.max_token_count() } @@ -245,7 +245,7 @@ impl LanguageModel for CopilotChatLanguageModel { &self, request: LanguageModelRequest, cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { match self.model.vendor() { ModelVendor::Anthropic => count_anthropic_tokens(request, cx), ModelVendor::Google => count_google_tokens(request, cx), diff --git a/crates/language_models/src/provider/deepseek.rs b/crates/language_models/src/provider/deepseek.rs index 4a4faa13a7..10030c9091 100644 --- a/crates/language_models/src/provider/deepseek.rs +++ b/crates/language_models/src/provider/deepseek.rs @@ -49,8 +49,8 @@ pub struct DeepSeekSettings { pub struct AvailableModel { pub name: String, pub display_name: Option, - pub max_tokens: usize, - pub max_output_tokens: Option, + pub max_tokens: u64, + pub max_output_tokens: Option, } pub struct DeepSeekLanguageModelProvider { @@ -306,11 +306,11 @@ impl LanguageModel for DeepSeekLanguageModel { format!("deepseek/{}", self.model.id()) } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { self.model.max_token_count() } - fn max_output_tokens(&self) -> Option { + fn max_output_tokens(&self) -> Option { self.model.max_output_tokens() } @@ -318,7 +318,7 @@ impl LanguageModel for DeepSeekLanguageModel { &self, request: LanguageModelRequest, cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { cx.background_spawn(async move { let messages = request .messages @@ -335,7 +335,7 @@ impl LanguageModel for DeepSeekLanguageModel { }) .collect::>(); - tiktoken_rs::num_tokens_from_messages("gpt-4", &messages) + tiktoken_rs::num_tokens_from_messages("gpt-4", &messages).map(|tokens| tokens as u64) }) .boxed() } @@ -365,7 +365,7 @@ impl LanguageModel for DeepSeekLanguageModel { pub fn into_deepseek( request: LanguageModelRequest, model: &deepseek::Model, - max_output_tokens: Option, + max_output_tokens: Option, ) -> deepseek::Request { let is_reasoner = *model == deepseek::Model::Reasoner; diff --git a/crates/language_models/src/provider/google.rs b/crates/language_models/src/provider/google.rs index e5b1aa30d4..36d7353c6a 100644 --- a/crates/language_models/src/provider/google.rs +++ b/crates/language_models/src/provider/google.rs @@ -79,7 +79,7 @@ impl From for ModelMode { pub struct AvailableModel { name: String, display_name: Option, - max_tokens: usize, + max_tokens: u64, mode: Option, } @@ -365,7 +365,7 @@ impl LanguageModel for GoogleLanguageModel { format!("google/{}", self.model.request_id()) } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { self.model.max_token_count() } @@ -373,7 +373,7 @@ impl LanguageModel for GoogleLanguageModel { &self, request: LanguageModelRequest, cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { let model_id = self.model.request_id().to_string(); let request = into_google(request, model_id.clone(), self.model.mode()); let http_client = self.http_client.clone(); @@ -702,7 +702,7 @@ impl GoogleEventMapper { pub fn count_google_tokens( request: LanguageModelRequest, cx: &App, -) -> BoxFuture<'static, Result> { +) -> BoxFuture<'static, Result> { // We couldn't use the GoogleLanguageModelProvider to count tokens because the github copilot doesn't have the access to google_ai directly. // So we have to use tokenizer from tiktoken_rs to count tokens. cx.background_spawn(async move { @@ -723,7 +723,7 @@ pub fn count_google_tokens( // Tiktoken doesn't yet support these models, so we manually use the // same tokenizer as GPT-4. - tiktoken_rs::num_tokens_from_messages("gpt-4", &messages) + tiktoken_rs::num_tokens_from_messages("gpt-4", &messages).map(|tokens| tokens as u64) }) .boxed() } @@ -750,10 +750,10 @@ fn update_usage(usage: &mut UsageMetadata, new: &UsageMetadata) { } fn convert_usage(usage: &UsageMetadata) -> language_model::TokenUsage { - let prompt_tokens = usage.prompt_token_count.unwrap_or(0) as u32; - let cached_tokens = usage.cached_content_token_count.unwrap_or(0) as u32; + let prompt_tokens = usage.prompt_token_count.unwrap_or(0); + let cached_tokens = usage.cached_content_token_count.unwrap_or(0); let input_tokens = prompt_tokens - cached_tokens; - let output_tokens = usage.candidates_token_count.unwrap_or(0) as u32; + let output_tokens = usage.candidates_token_count.unwrap_or(0); language_model::TokenUsage { input_tokens, diff --git a/crates/language_models/src/provider/lmstudio.rs b/crates/language_models/src/provider/lmstudio.rs index 0a75ef2f88..e0fcf38f38 100644 --- a/crates/language_models/src/provider/lmstudio.rs +++ b/crates/language_models/src/provider/lmstudio.rs @@ -44,7 +44,7 @@ pub struct LmStudioSettings { pub struct AvailableModel { pub name: String, pub display_name: Option, - pub max_tokens: usize, + pub max_tokens: u64, pub supports_tool_calls: bool, pub supports_images: bool, } @@ -414,7 +414,7 @@ impl LanguageModel for LmStudioLanguageModel { format!("lmstudio/{}", self.model.id()) } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { self.model.max_token_count() } @@ -422,7 +422,7 @@ impl LanguageModel for LmStudioLanguageModel { &self, request: LanguageModelRequest, _cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { // Endpoint for this is coming soon. In the meantime, hacky estimation let token_count = request .messages @@ -430,7 +430,7 @@ impl LanguageModel for LmStudioLanguageModel { .map(|msg| msg.string_contents().split_whitespace().count()) .sum::(); - let estimated_tokens = (token_count as f64 * 0.75) as usize; + let estimated_tokens = (token_count as f64 * 0.75) as u64; async move { Ok(estimated_tokens) }.boxed() } diff --git a/crates/language_models/src/provider/mistral.rs b/crates/language_models/src/provider/mistral.rs index 84b7131c7d..5e46c41746 100644 --- a/crates/language_models/src/provider/mistral.rs +++ b/crates/language_models/src/provider/mistral.rs @@ -43,9 +43,9 @@ pub struct MistralSettings { pub struct AvailableModel { pub name: String, pub display_name: Option, - pub max_tokens: usize, - pub max_output_tokens: Option, - pub max_completion_tokens: Option, + pub max_tokens: u64, + pub max_output_tokens: Option, + pub max_completion_tokens: Option, pub supports_tools: Option, pub supports_images: Option, } @@ -322,11 +322,11 @@ impl LanguageModel for MistralLanguageModel { format!("mistral/{}", self.model.id()) } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { self.model.max_token_count() } - fn max_output_tokens(&self) -> Option { + fn max_output_tokens(&self) -> Option { self.model.max_output_tokens() } @@ -334,7 +334,7 @@ impl LanguageModel for MistralLanguageModel { &self, request: LanguageModelRequest, cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { cx.background_spawn(async move { let messages = request .messages @@ -351,7 +351,7 @@ impl LanguageModel for MistralLanguageModel { }) .collect::>(); - tiktoken_rs::num_tokens_from_messages("gpt-4", &messages) + tiktoken_rs::num_tokens_from_messages("gpt-4", &messages).map(|tokens| tokens as u64) }) .boxed() } @@ -386,7 +386,7 @@ impl LanguageModel for MistralLanguageModel { pub fn into_mistral( request: LanguageModelRequest, model: String, - max_output_tokens: Option, + max_output_tokens: Option, ) -> mistral::Request { let stream = true; diff --git a/crates/language_models/src/provider/ollama.rs b/crates/language_models/src/provider/ollama.rs index 42ccd97089..205dab6c87 100644 --- a/crates/language_models/src/provider/ollama.rs +++ b/crates/language_models/src/provider/ollama.rs @@ -46,7 +46,7 @@ pub struct AvailableModel { /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel. pub display_name: Option, /// The Context Length parameter to the model (aka num_ctx or n_ctx) - pub max_tokens: usize, + pub max_tokens: u64, /// The number of seconds to keep the connection open after the last request pub keep_alive: Option, /// Whether the model supports tools @@ -377,7 +377,7 @@ impl LanguageModel for OllamaLanguageModel { format!("ollama/{}", self.model.id()) } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { self.model.max_token_count() } @@ -385,7 +385,7 @@ impl LanguageModel for OllamaLanguageModel { &self, request: LanguageModelRequest, _cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { // There is no endpoint for this _yet_ in Ollama // see: https://github.com/ollama/ollama/issues/1716 and https://github.com/ollama/ollama/issues/3582 let token_count = request @@ -395,7 +395,7 @@ impl LanguageModel for OllamaLanguageModel { .sum::() / 4; - async move { Ok(token_count) }.boxed() + async move { Ok(token_count as u64) }.boxed() } fn stream_completion( diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index 4b164d0c65..1233af9a52 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -43,9 +43,9 @@ pub struct OpenAiSettings { pub struct AvailableModel { pub name: String, pub display_name: Option, - pub max_tokens: usize, - pub max_output_tokens: Option, - pub max_completion_tokens: Option, + pub max_tokens: u64, + pub max_output_tokens: Option, + pub max_completion_tokens: Option, } pub struct OpenAiLanguageModelProvider { @@ -312,11 +312,11 @@ impl LanguageModel for OpenAiLanguageModel { format!("openai/{}", self.model.id()) } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { self.model.max_token_count() } - fn max_output_tokens(&self) -> Option { + fn max_output_tokens(&self) -> Option { self.model.max_output_tokens() } @@ -324,7 +324,7 @@ impl LanguageModel for OpenAiLanguageModel { &self, request: LanguageModelRequest, cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { count_open_ai_tokens(request, self.model.clone(), cx) } @@ -355,7 +355,7 @@ impl LanguageModel for OpenAiLanguageModel { pub fn into_open_ai( request: LanguageModelRequest, model: &Model, - max_output_tokens: Option, + max_output_tokens: Option, ) -> open_ai::Request { let stream = !model.id().starts_with("o1-"); @@ -606,7 +606,7 @@ pub fn count_open_ai_tokens( request: LanguageModelRequest, model: Model, cx: &App, -) -> BoxFuture<'static, Result> { +) -> BoxFuture<'static, Result> { cx.background_spawn(async move { let messages = request .messages @@ -652,6 +652,7 @@ pub fn count_open_ai_tokens( | Model::O3Mini | Model::O4Mini => tiktoken_rs::num_tokens_from_messages(model.id(), &messages), } + .map(|tokens| tokens as u64) }) .boxed() } diff --git a/crates/language_models/src/provider/open_router.rs b/crates/language_models/src/provider/open_router.rs index 450d56a1b2..09c5a0235a 100644 --- a/crates/language_models/src/provider/open_router.rs +++ b/crates/language_models/src/provider/open_router.rs @@ -40,9 +40,9 @@ pub struct OpenRouterSettings { pub struct AvailableModel { pub name: String, pub display_name: Option, - pub max_tokens: usize, - pub max_output_tokens: Option, - pub max_completion_tokens: Option, + pub max_tokens: u64, + pub max_output_tokens: Option, + pub max_completion_tokens: Option, pub supports_tools: Option, pub supports_images: Option, } @@ -331,11 +331,11 @@ impl LanguageModel for OpenRouterLanguageModel { format!("openrouter/{}", self.model.id()) } - fn max_token_count(&self) -> usize { + fn max_token_count(&self) -> u64 { self.model.max_token_count() } - fn max_output_tokens(&self) -> Option { + fn max_output_tokens(&self) -> Option { self.model.max_output_tokens() } @@ -355,7 +355,7 @@ impl LanguageModel for OpenRouterLanguageModel { &self, request: LanguageModelRequest, cx: &App, - ) -> BoxFuture<'static, Result> { + ) -> BoxFuture<'static, Result> { count_open_router_tokens(request, self.model.clone(), cx) } @@ -386,7 +386,7 @@ impl LanguageModel for OpenRouterLanguageModel { pub fn into_open_router( request: LanguageModelRequest, model: &Model, - max_output_tokens: Option, + max_output_tokens: Option, ) -> open_router::Request { let mut messages = Vec::new(); for message in request.messages { @@ -640,7 +640,7 @@ pub fn count_open_router_tokens( request: LanguageModelRequest, _model: open_router::Model, cx: &App, -) -> BoxFuture<'static, Result> { +) -> BoxFuture<'static, Result> { cx.background_spawn(async move { let messages = request .messages @@ -657,7 +657,7 @@ pub fn count_open_router_tokens( }) .collect::>(); - tiktoken_rs::num_tokens_from_messages("gpt-4o", &messages) + tiktoken_rs::num_tokens_from_messages("gpt-4o", &messages).map(|tokens| tokens as u64) }) .boxed() } diff --git a/crates/lmstudio/src/lmstudio.rs b/crates/lmstudio/src/lmstudio.rs index 5c6b610943..a5477994ff 100644 --- a/crates/lmstudio/src/lmstudio.rs +++ b/crates/lmstudio/src/lmstudio.rs @@ -46,7 +46,7 @@ impl From for String { pub struct Model { pub name: String, pub display_name: Option, - pub max_tokens: usize, + pub max_tokens: u64, pub supports_tool_calls: bool, pub supports_images: bool, } @@ -55,7 +55,7 @@ impl Model { pub fn new( name: &str, display_name: Option<&str>, - max_tokens: Option, + max_tokens: Option, supports_tool_calls: bool, supports_images: bool, ) -> Self { @@ -76,7 +76,7 @@ impl Model { self.display_name.as_ref().unwrap_or(&self.name) } - pub fn max_token_count(&self) -> usize { + pub fn max_token_count(&self) -> u64 { self.max_tokens } @@ -256,9 +256,9 @@ pub struct FunctionChunk { #[derive(Serialize, Deserialize, Debug)] pub struct Usage { - pub prompt_tokens: u32, - pub completion_tokens: u32, - pub total_tokens: u32, + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, } #[derive(Debug, Default, Clone, Deserialize, PartialEq)] @@ -306,8 +306,8 @@ pub struct ModelEntry { pub compatibility_type: CompatibilityType, pub quantization: Option, pub state: ModelState, - pub max_context_length: Option, - pub loaded_context_length: Option, + pub max_context_length: Option, + pub loaded_context_length: Option, #[serde(default)] pub capabilities: Capabilities, } diff --git a/crates/mistral/src/mistral.rs b/crates/mistral/src/mistral.rs index 4fc976860c..a3a017be83 100644 --- a/crates/mistral/src/mistral.rs +++ b/crates/mistral/src/mistral.rs @@ -70,9 +70,9 @@ pub enum Model { name: String, /// The name displayed in the UI, such as in the assistant panel model dropdown menu. display_name: Option, - max_tokens: usize, - max_output_tokens: Option, - max_completion_tokens: Option, + max_tokens: u64, + max_output_tokens: Option, + max_completion_tokens: Option, supports_tools: Option, supports_images: Option, }, @@ -130,7 +130,7 @@ impl Model { } } - pub fn max_token_count(&self) -> usize { + pub fn max_token_count(&self) -> u64 { match self { Self::CodestralLatest => 256000, Self::MistralLargeLatest => 131000, @@ -145,7 +145,7 @@ impl Model { } } - pub fn max_output_tokens(&self) -> Option { + pub fn max_output_tokens(&self) -> Option { match self { Self::Custom { max_output_tokens, .. @@ -193,7 +193,7 @@ pub struct Request { pub messages: Vec, pub stream: bool, #[serde(default, skip_serializing_if = "Option::is_none")] - pub max_tokens: Option, + pub max_tokens: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub temperature: Option, #[serde(default, skip_serializing_if = "Option::is_none")] @@ -360,9 +360,9 @@ pub struct Response { #[derive(Serialize, Deserialize, Debug)] pub struct Usage { - pub prompt_tokens: u32, - pub completion_tokens: u32, - pub total_tokens: u32, + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, } #[derive(Serialize, Deserialize, Debug)] diff --git a/crates/ollama/src/ollama.rs b/crates/ollama/src/ollama.rs index e17b08cde6..109fea7353 100644 --- a/crates/ollama/src/ollama.rs +++ b/crates/ollama/src/ollama.rs @@ -35,18 +35,18 @@ impl Default for KeepAlive { pub struct Model { pub name: String, pub display_name: Option, - pub max_tokens: usize, + pub max_tokens: u64, pub keep_alive: Option, pub supports_tools: Option, pub supports_vision: Option, pub supports_thinking: Option, } -fn get_max_tokens(name: &str) -> usize { +fn get_max_tokens(name: &str) -> u64 { /// Default context length for unknown models. - const DEFAULT_TOKENS: usize = 4096; + const DEFAULT_TOKENS: u64 = 4096; /// Magic number. Lets many Ollama models work with ~16GB of ram. - const MAXIMUM_TOKENS: usize = 16384; + const MAXIMUM_TOKENS: u64 = 16384; match name.split(':').next().unwrap() { "phi" | "tinyllama" | "granite-code" => 2048, @@ -67,7 +67,7 @@ impl Model { pub fn new( name: &str, display_name: Option<&str>, - max_tokens: Option, + max_tokens: Option, supports_tools: Option, supports_vision: Option, supports_thinking: Option, @@ -93,7 +93,7 @@ impl Model { self.display_name.as_ref().unwrap_or(&self.name) } - pub fn max_token_count(&self) -> usize { + pub fn max_token_count(&self) -> u64 { self.max_tokens } } @@ -165,7 +165,7 @@ impl ChatRequest { // https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values #[derive(Serialize, Default, Debug)] pub struct ChatOptions { - pub num_ctx: Option, + pub num_ctx: Option, pub num_predict: Option, pub stop: Option>, pub temperature: Option, @@ -183,8 +183,8 @@ pub struct ChatResponseDelta { pub done_reason: Option, #[allow(unused)] pub done: bool, - pub prompt_eval_count: Option, - pub eval_count: Option, + pub prompt_eval_count: Option, + pub eval_count: Option, } #[derive(Serialize, Deserialize)] diff --git a/crates/open_ai/src/open_ai.rs b/crates/open_ai/src/open_ai.rs index 3ca953d766..6628b29a6c 100644 --- a/crates/open_ai/src/open_ai.rs +++ b/crates/open_ai/src/open_ai.rs @@ -80,9 +80,9 @@ pub enum Model { name: String, /// The name displayed in the UI, such as in the assistant panel model dropdown menu. display_name: Option, - max_tokens: usize, - max_output_tokens: Option, - max_completion_tokens: Option, + max_tokens: u64, + max_output_tokens: Option, + max_completion_tokens: Option, }, } @@ -147,7 +147,7 @@ impl Model { } } - pub fn max_token_count(&self) -> usize { + pub fn max_token_count(&self) -> u64 { match self { Self::ThreePointFiveTurbo => 16_385, Self::Four => 8_192, @@ -165,7 +165,7 @@ impl Model { } } - pub fn max_output_tokens(&self) -> Option { + pub fn max_output_tokens(&self) -> Option { match self { Self::Custom { max_output_tokens, .. @@ -209,7 +209,7 @@ pub struct Request { pub messages: Vec, pub stream: bool, #[serde(default, skip_serializing_if = "Option::is_none")] - pub max_completion_tokens: Option, + pub max_completion_tokens: Option, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub stop: Vec, pub temperature: f32, diff --git a/crates/open_router/src/open_router.rs b/crates/open_router/src/open_router.rs index 407ed416ec..f0cb30e7aa 100644 --- a/crates/open_router/src/open_router.rs +++ b/crates/open_router/src/open_router.rs @@ -50,7 +50,7 @@ impl From for String { pub struct Model { pub name: String, pub display_name: Option, - pub max_tokens: usize, + pub max_tokens: u64, pub supports_tools: Option, pub supports_images: Option, } @@ -73,7 +73,7 @@ impl Model { pub fn new( name: &str, display_name: Option<&str>, - max_tokens: Option, + max_tokens: Option, supports_tools: Option, supports_images: Option, ) -> Self { @@ -94,11 +94,11 @@ impl Model { self.display_name.as_ref().unwrap_or(&self.name) } - pub fn max_token_count(&self) -> usize { + pub fn max_token_count(&self) -> u64 { self.max_tokens } - pub fn max_output_tokens(&self) -> Option { + pub fn max_output_tokens(&self) -> Option { None } @@ -117,7 +117,7 @@ pub struct Request { pub messages: Vec, pub stream: bool, #[serde(default, skip_serializing_if = "Option::is_none")] - pub max_tokens: Option, + pub max_tokens: Option, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub stop: Vec, pub temperature: f32, @@ -318,9 +318,9 @@ pub struct FunctionChunk { #[derive(Serialize, Deserialize, Debug)] pub struct Usage { - pub prompt_tokens: u32, - pub completion_tokens: u32, - pub total_tokens: u32, + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, } #[derive(Serialize, Deserialize, Debug)] @@ -369,7 +369,7 @@ pub struct ModelEntry { pub created: usize, pub description: String, #[serde(default, skip_serializing_if = "Option::is_none")] - pub context_length: Option, + pub context_length: Option, #[serde(default, skip_serializing_if = "Vec::is_empty")] pub supported_parameters: Vec, #[serde(default, skip_serializing_if = "Option::is_none")] diff --git a/crates/rules_library/src/rules_library.rs b/crates/rules_library/src/rules_library.rs index e1ff7062a7..7f145f3139 100644 --- a/crates/rules_library/src/rules_library.rs +++ b/crates/rules_library/src/rules_library.rs @@ -154,7 +154,7 @@ pub struct RulesLibrary { struct RuleEditor { title_editor: Entity, body_editor: Entity, - token_count: Option, + token_count: Option, pending_token_count: Task>, next_title_and_body_to_save: Option<(String, Rope)>, pending_save: Option>>,