language_models: Improve token counting for providers (#32853)
We push the usage data whenever we receive it from the provider to make sure the counting is correct after the turn has ended. - [x] Ollama - [x] Copilot - [x] Mistral - [x] OpenRouter - [x] LMStudio Put all the changes into a single PR open to move these to separate PR if that makes the review and testing easier. Release Notes: - N/A
This commit is contained in:
parent
d4c9522da7
commit
ed4b29f80c
9 changed files with 74 additions and 6 deletions
|
@ -127,6 +127,12 @@ pub struct Request {
|
|||
pub parallel_tool_calls: Option<bool>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub tools: Vec<ToolDefinition>,
|
||||
pub usage: RequestUsage,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct RequestUsage {
|
||||
pub include: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue