language_models: Improve token counting for providers (#32853)

We push the usage data whenever we receive it from the provider to make
sure the counting is correct after the turn has ended.

- [x] Ollama 
- [x] Copilot 
- [x] Mistral 
- [x] OpenRouter 
- [x] LMStudio

Put all the changes into a single PR open to move these to separate PR
if that makes the review and testing easier.

Release Notes:

- N/A
This commit is contained in:
Umesh Yadav 2025-06-17 16:16:29 +05:30 committed by GitHub
parent d4c9522da7
commit ed4b29f80c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 74 additions and 6 deletions

View file

@ -13,7 +13,7 @@ use language_model::{
LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId,
LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest,
LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent,
RateLimiter, Role, StopReason,
RateLimiter, Role, StopReason, TokenUsage,
};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
@ -626,6 +626,15 @@ impl MistralEventMapper {
}
}
if let Some(usage) = event.usage {
events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(TokenUsage {
input_tokens: usage.prompt_tokens,
output_tokens: usage.completion_tokens,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
})));
}
if let Some(finish_reason) = choice.finish_reason.as_deref() {
match finish_reason {
"stop" => {