agent: Fix bug with double-counting tokens in Gemini (#31885)

We report the total number of input tokens by summing the numbers of
1. Prompt tokens
2. Cached tokens

But Google API returns prompt tokens (1) that already include cached
tokens (2), so we were double counting tokens in some cases.

Release Notes:

- Fixed bug with double-counting tokens in Gemini
This commit is contained in:
Oleksiy Syvokon 2025-06-02 13:18:44 +03:00 committed by GitHub
parent 6d99c12796
commit ae219e9e99
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -685,10 +685,15 @@ fn update_usage(usage: &mut UsageMetadata, new: &UsageMetadata) {
}
fn convert_usage(usage: &UsageMetadata) -> language_model::TokenUsage {
let prompt_tokens = usage.prompt_token_count.unwrap_or(0) as u32;
let cached_tokens = usage.cached_content_token_count.unwrap_or(0) as u32;
let input_tokens = prompt_tokens - cached_tokens;
let output_tokens = usage.candidates_token_count.unwrap_or(0) as u32;
language_model::TokenUsage {
input_tokens: usage.prompt_token_count.unwrap_or(0) as u32,
output_tokens: usage.candidates_token_count.unwrap_or(0) as u32,
cache_read_input_tokens: usage.cached_content_token_count.unwrap_or(0) as u32,
input_tokens,
output_tokens,
cache_read_input_tokens: cached_tokens,
cache_creation_input_tokens: 0,
}
}