language_models: Add thinking support for ollama (#31665)

This PR updates how we handle Ollama responses, leveraging the new [v0.9.0](https://github.com/ollama/ollama/releases/tag/v0.9.0) release. Previously, thinking text was embedded within the model's main content, leading to it appearing directly in the agent's response. Now, thinking content is provided as a separate parameter, allowing us to display it correctly within the agent panel, similar to other providers. I have tested this with qwen3:8b and works nicely. ~~We can release this once the ollama is release is stable.~~ It's released now as stable. <img width="433" alt="image" src="https://github.com/user-attachments/assets/2983ef06-6679-4033-82c2-231ea9cd6434" /> Release Notes: - Add thinking support for ollama --------- Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>
2025-06-02 20:42:41 +05:30 · 2025-06-02 20:42:41 +05:30 · 65e3e84cbc
commit 65e3e84cbc
parent 1e1d4430c2
3 changed files with 39 additions and 7 deletions
--- a/crates/ollama/src/ollama.rs
+++ b/crates/ollama/src/ollama.rs
@ -38,6 +38,7 @@ pub struct Model {
    pub max_tokens: usize,
    pub keep_alive: Option<KeepAlive>,
    pub supports_tools: Option<bool>,
+    pub supports_thinking: Option<bool>,
 }

 fn get_max_tokens(name: &str) -> usize {
@ -67,6 +68,7 @@ impl Model {
        display_name: Option<&str>,
        max_tokens: Option<usize>,
        supports_tools: Option<bool>,
+        supports_thinking: Option<bool>,
    ) -> Self {
        Self {
            name: name.to_owned(),
@ -76,6 +78,7 @@ impl Model {
            max_tokens: max_tokens.unwrap_or_else(|| get_max_tokens(name)),
            keep_alive: Some(KeepAlive::indefinite()),
            supports_tools,
+            supports_thinking,
        }
    }

@ -98,6 +101,7 @@ pub enum ChatMessage {
    Assistant {
        content: String,
        tool_calls: Option<Vec<OllamaToolCall>>,
+        thinking: Option<String>,
    },
    User {
        content: String,
@ -140,6 +144,7 @@ pub struct ChatRequest {
    pub keep_alive: KeepAlive,
    pub options: Option<ChatOptions>,
    pub tools: Vec<OllamaTool>,
+    pub think: Option<bool>,
 }

 impl ChatRequest {
@ -215,6 +220,10 @@ impl ModelShow {
        // .contains expects &String, which would require an additional allocation
        self.capabilities.iter().any(|v| v == "tools")
    }
+
+    pub fn supports_thinking(&self) -> bool {
+        self.capabilities.iter().any(|v| v == "thinking")
+    }
 }

 pub async fn complete(
@ -459,9 +468,11 @@ mod tests {
            ChatMessage::Assistant {
                content,
                tool_calls,
+                thinking,
            } => {
                assert!(content.is_empty());
                assert!(tool_calls.is_some_and(|v| !v.is_empty()));
+                assert!(thinking.is_none());
            }
            _ => panic!("Deserialized wrong role"),
        }