From 108162423da6d5d37bd78bffed74e7ebc2e8a83b Mon Sep 17 00:00:00 2001 From: Umesh Yadav <23421535+imumesh18@users.noreply.github.com> Date: Wed, 25 Jun 2025 13:12:30 +0530 Subject: [PATCH] language_models: Emit UsageUpdate events for token usage in DeepSeek and OpenAI (#33242) Closes #ISSUE Release Notes: - N/A --- crates/deepseek/src/deepseek.rs | 11 ++++++----- crates/language_models/src/provider/deepseek.rs | 11 ++++++++++- crates/language_models/src/provider/open_ai.rs | 15 ++++++++++++--- crates/open_ai/src/open_ai.rs | 6 +++--- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/crates/deepseek/src/deepseek.rs b/crates/deepseek/src/deepseek.rs index 22bde8e594..c49270febe 100644 --- a/crates/deepseek/src/deepseek.rs +++ b/crates/deepseek/src/deepseek.rs @@ -201,13 +201,13 @@ pub struct Response { #[derive(Serialize, Deserialize, Debug)] pub struct Usage { - pub prompt_tokens: u32, - pub completion_tokens: u32, - pub total_tokens: u32, + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, #[serde(default)] - pub prompt_cache_hit_tokens: u32, + pub prompt_cache_hit_tokens: u64, #[serde(default)] - pub prompt_cache_miss_tokens: u32, + pub prompt_cache_miss_tokens: u64, } #[derive(Serialize, Deserialize, Debug)] @@ -224,6 +224,7 @@ pub struct StreamResponse { pub created: u64, pub model: String, pub choices: Vec, + pub usage: Option, } #[derive(Serialize, Deserialize, Debug)] diff --git a/crates/language_models/src/provider/deepseek.rs b/crates/language_models/src/provider/deepseek.rs index 10030c9091..99a1ca70c6 100644 --- a/crates/language_models/src/provider/deepseek.rs +++ b/crates/language_models/src/provider/deepseek.rs @@ -14,7 +14,7 @@ use language_model::{ LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent, - RateLimiter, Role, StopReason, + RateLimiter, Role, StopReason, TokenUsage, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -513,6 +513,15 @@ impl DeepSeekEventMapper { } } + if let Some(usage) = event.usage { + events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(TokenUsage { + input_tokens: usage.prompt_tokens, + output_tokens: usage.completion_tokens, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }))); + } + match choice.finish_reason.as_deref() { Some("stop") => { events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::EndTurn))); diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index f6e1ea559a..3fa5334eb0 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -12,7 +12,7 @@ use language_model::{ LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent, - RateLimiter, Role, StopReason, + RateLimiter, Role, StopReason, TokenUsage, }; use menu; use open_ai::{ImageUrl, Model, ResponseStreamEvent, stream_completion}; @@ -528,11 +528,20 @@ impl OpenAiEventMapper { &mut self, event: ResponseStreamEvent, ) -> Vec> { + let mut events = Vec::new(); + if let Some(usage) = event.usage { + events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(TokenUsage { + input_tokens: usage.prompt_tokens, + output_tokens: usage.completion_tokens, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }))); + } + let Some(choice) = event.choices.first() else { - return Vec::new(); + return events; }; - let mut events = Vec::new(); if let Some(content) = choice.delta.content.clone() { events.push(Ok(LanguageModelCompletionEvent::Text(content))); } diff --git a/crates/open_ai/src/open_ai.rs b/crates/open_ai/src/open_ai.rs index 034b4b358a..5b09aa5cbc 100644 --- a/crates/open_ai/src/open_ai.rs +++ b/crates/open_ai/src/open_ai.rs @@ -364,9 +364,9 @@ pub struct FunctionChunk { #[derive(Serialize, Deserialize, Debug)] pub struct Usage { - pub prompt_tokens: u32, - pub completion_tokens: u32, - pub total_tokens: u32, + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, } #[derive(Serialize, Deserialize, Debug)]