agent: Fix bug with double-counting tokens in Gemini (#31885)
We report the total number of input tokens by summing the numbers of 1. Prompt tokens 2. Cached tokens But Google API returns prompt tokens (1) that already include cached tokens (2), so we were double counting tokens in some cases. Release Notes: - Fixed bug with double-counting tokens in Gemini
This commit is contained in:
parent
6d99c12796
commit
ae219e9e99
1 changed files with 8 additions and 3 deletions
|
@ -685,10 +685,15 @@ fn update_usage(usage: &mut UsageMetadata, new: &UsageMetadata) {
|
|||
}
|
||||
|
||||
fn convert_usage(usage: &UsageMetadata) -> language_model::TokenUsage {
|
||||
let prompt_tokens = usage.prompt_token_count.unwrap_or(0) as u32;
|
||||
let cached_tokens = usage.cached_content_token_count.unwrap_or(0) as u32;
|
||||
let input_tokens = prompt_tokens - cached_tokens;
|
||||
let output_tokens = usage.candidates_token_count.unwrap_or(0) as u32;
|
||||
|
||||
language_model::TokenUsage {
|
||||
input_tokens: usage.prompt_token_count.unwrap_or(0) as u32,
|
||||
output_tokens: usage.candidates_token_count.unwrap_or(0) as u32,
|
||||
cache_read_input_tokens: usage.cached_content_token_count.unwrap_or(0) as u32,
|
||||
input_tokens,
|
||||
output_tokens,
|
||||
cache_read_input_tokens: cached_tokens,
|
||||
cache_creation_input_tokens: 0,
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue