language_models: Improve token counting for providers (#32853)
We push the usage data whenever we receive it from the provider to make sure the counting is correct after the turn has ended. - [x] Ollama - [x] Copilot - [x] Mistral - [x] OpenRouter - [x] LMStudio Put all the changes into a single PR open to move these to separate PR if that makes the review and testing easier. Release Notes: - N/A
This commit is contained in:
parent
d4c9522da7
commit
ed4b29f80c
9 changed files with 74 additions and 6 deletions
|
@ -24,7 +24,7 @@ use language_model::{
|
|||
LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest,
|
||||
LanguageModelRequestMessage, LanguageModelToolChoice, LanguageModelToolResultContent,
|
||||
LanguageModelToolSchemaFormat, LanguageModelToolUse, MessageContent, RateLimiter, Role,
|
||||
StopReason,
|
||||
StopReason, TokenUsage,
|
||||
};
|
||||
use settings::SettingsStore;
|
||||
use std::time::Duration;
|
||||
|
@ -378,6 +378,17 @@ pub fn map_to_language_model_completion_events(
|
|||
}
|
||||
}
|
||||
|
||||
if let Some(usage) = event.usage {
|
||||
events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(
|
||||
TokenUsage {
|
||||
input_tokens: usage.prompt_tokens,
|
||||
output_tokens: usage.completion_tokens,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
},
|
||||
)));
|
||||
}
|
||||
|
||||
match choice.finish_reason.as_deref() {
|
||||
Some("stop") => {
|
||||
events.push(Ok(LanguageModelCompletionEvent::Stop(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue