language_models: Improve token counting for providers (#32853)
We push the usage data whenever we receive it from the provider to make sure the counting is correct after the turn has ended. - [x] Ollama - [x] Copilot - [x] Mistral - [x] OpenRouter - [x] LMStudio Put all the changes into a single PR open to move these to separate PR if that makes the review and testing easier. Release Notes: - N/A
This commit is contained in:
parent
d4c9522da7
commit
ed4b29f80c
9 changed files with 74 additions and 6 deletions
|
@ -183,6 +183,8 @@ pub struct ChatResponseDelta {
|
|||
pub done_reason: Option<String>,
|
||||
#[allow(unused)]
|
||||
pub done: bool,
|
||||
pub prompt_eval_count: Option<u32>,
|
||||
pub eval_count: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue