language_models: Improve token counting for providers (#32853)
We push the usage data whenever we receive it from the provider to make sure the counting is correct after the turn has ended. - [x] Ollama - [x] Copilot - [x] Mistral - [x] OpenRouter - [x] LMStudio Put all the changes into a single PR open to move these to separate PR if that makes the review and testing easier. Release Notes: - N/A
This commit is contained in:
parent
d4c9522da7
commit
ed4b29f80c
9 changed files with 74 additions and 6 deletions
|
@ -311,6 +311,20 @@ pub struct FunctionContent {
|
|||
pub struct ResponseEvent {
|
||||
pub choices: Vec<ResponseChoice>,
|
||||
pub id: String,
|
||||
pub usage: Option<Usage>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct Usage {
|
||||
pub completion_tokens: u32,
|
||||
pub prompt_tokens: u32,
|
||||
pub prompt_tokens_details: PromptTokensDetails,
|
||||
pub total_tokens: u32,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct PromptTokensDetails {
|
||||
pub cached_tokens: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue