language_models: Improve token counting for providers (#32853)

We push the usage data whenever we receive it from the provider to make sure the counting is correct after the turn has ended. - [x] Ollama - [x] Copilot - [x] Mistral - [x] OpenRouter - [x] LMStudio Put all the changes into a single PR open to move these to separate PR if that makes the review and testing easier. Release Notes: - N/A
2025-06-17 16:16:29 +05:30 · 2025-06-17 16:16:29 +05:30 · ed4b29f80c
commit ed4b29f80c
parent d4c9522da7
9 changed files with 74 additions and 6 deletions
--- a/crates/copilot/src/copilot_chat.rs
+++ b/crates/copilot/src/copilot_chat.rs
@ -311,6 +311,20 @@ pub struct FunctionContent {
 pub struct ResponseEvent {
    pub choices: Vec<ResponseChoice>,
    pub id: String,
+    pub usage: Option<Usage>,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct Usage {
+    pub completion_tokens: u32,
+    pub prompt_tokens: u32,
+    pub prompt_tokens_details: PromptTokensDetails,
+    pub total_tokens: u32,
+}
+
+#[derive(Deserialize, Debug)]
+pub struct PromptTokensDetails {
+    pub cached_tokens: u32,
 }

 #[derive(Debug, Deserialize)]