language_models: Add thinking support for ollama (#31665)

This PR updates how we handle Ollama responses, leveraging the new [v0.9.0](https://github.com/ollama/ollama/releases/tag/v0.9.0) release. Previously, thinking text was embedded within the model's main content, leading to it appearing directly in the agent's response. Now, thinking content is provided as a separate parameter, allowing us to display it correctly within the agent panel, similar to other providers. I have tested this with qwen3:8b and works nicely. ~~We can release this once the ollama is release is stable.~~ It's released now as stable. <img width="433" alt="image" src="https://github.com/user-attachments/assets/2983ef06-6679-4033-82c2-231ea9cd6434" /> Release Notes: - Add thinking support for ollama --------- Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>
2025-06-02 20:42:41 +05:30 · 2025-06-02 20:42:41 +05:30 · 65e3e84cbc
commit 65e3e84cbc
parent 1e1d4430c2
3 changed files with 39 additions and 7 deletions
--- a/crates/agent_settings/src/agent_settings.rs
+++ b/crates/agent_settings/src/agent_settings.rs
@ -372,6 +372,7 @@ impl AgentSettingsContent {
                                None,
                                None,
                                Some(language_model.supports_tools()),
+                                None,
                            )),
                            api_url,
                        });
--- a/crates/language_models/src/provider/ollama.rs
+++ b/crates/language_models/src/provider/ollama.rs
@ -6,7 +6,7 @@ use http_client::HttpClient;
 use language_model::{
    AuthenticateError, LanguageModelCompletionError, LanguageModelCompletionEvent,
    LanguageModelRequestTool, LanguageModelToolChoice, LanguageModelToolUse,
-    LanguageModelToolUseId, StopReason,
+    LanguageModelToolUseId, MessageContent, StopReason,
 };
 use language_model::{
    LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
@ -54,6 +54,8 @@ pub struct AvailableModel {
    pub keep_alive: Option<KeepAlive>,
    /// Whether the model supports tools
    pub supports_tools: Option<bool>,
+    /// Whether to enable think mode
+    pub supports_thinking: Option<bool>,
 }

 pub struct OllamaLanguageModelProvider {
@ -99,6 +101,7 @@ impl State {
                            None,
                            None,
                            Some(capabilities.supports_tools()),
+                            Some(capabilities.supports_thinking()),
                        );
                        Ok(ollama_model)
                    }
@ -219,6 +222,7 @@ impl LanguageModelProvider for OllamaLanguageModelProvider {
                    max_tokens: model.max_tokens,
                    keep_alive: model.keep_alive.clone(),
                    supports_tools: model.supports_tools,
+                    supports_thinking: model.supports_thinking,
                },
            );
        }
@ -282,10 +286,18 @@ impl OllamaLanguageModel {
                    Role::User => ChatMessage::User {
                        content: msg.string_contents(),
                    },
-                    Role::Assistant => ChatMessage::Assistant {
-                        content: msg.string_contents(),
-                        tool_calls: None,
-                    },
+                    Role::Assistant => {
+                        let content = msg.string_contents();
+                        let thinking = msg.content.into_iter().find_map(|content| match content {
+                            MessageContent::Thinking { text, .. } if !text.is_empty() => Some(text),
+                            _ => None,
+                        });
+                        ChatMessage::Assistant {
+                            content,
+                            tool_calls: None,
+                            thinking,
+                        }
+                    }
                    Role::System => ChatMessage::System {
                        content: msg.string_contents(),
                    },
@ -299,6 +311,7 @@ impl OllamaLanguageModel {
                temperature: request.temperature.or(Some(1.0)),
                ..Default::default()
            }),
+            think: self.model.supports_thinking,
            tools: request.tools.into_iter().map(tool_into_ollama).collect(),
        }
    }
@ -433,8 +446,15 @@ fn map_to_language_model_completion_events(
                ChatMessage::Assistant {
                    content,
                    tool_calls,
+                    thinking,
                } => {
-                    // Check for tool calls
+                    if let Some(text) = thinking {
+                        events.push(Ok(LanguageModelCompletionEvent::Thinking {
+                            text,
+                            signature: None,
+                        }));
+                    }
+
                    if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
                        match tool_call {
                            OllamaToolCall::Function(function) => {
@ -455,7 +475,7 @@ fn map_to_language_model_completion_events(
                                state.used_tools = true;
                            }
                        }
-                    } else {
+                    } else if !content.is_empty() {
                        events.push(Ok(LanguageModelCompletionEvent::Text(content)));
                    }
                }
--- a/crates/ollama/src/ollama.rs
+++ b/crates/ollama/src/ollama.rs
@ -38,6 +38,7 @@ pub struct Model {
    pub max_tokens: usize,
    pub keep_alive: Option<KeepAlive>,
    pub supports_tools: Option<bool>,
+    pub supports_thinking: Option<bool>,
 }

 fn get_max_tokens(name: &str) -> usize {
@ -67,6 +68,7 @@ impl Model {
        display_name: Option<&str>,
        max_tokens: Option<usize>,
        supports_tools: Option<bool>,
+        supports_thinking: Option<bool>,
    ) -> Self {
        Self {
            name: name.to_owned(),
@ -76,6 +78,7 @@ impl Model {
            max_tokens: max_tokens.unwrap_or_else(|| get_max_tokens(name)),
            keep_alive: Some(KeepAlive::indefinite()),
            supports_tools,
+            supports_thinking,
        }
    }

@ -98,6 +101,7 @@ pub enum ChatMessage {
    Assistant {
        content: String,
        tool_calls: Option<Vec<OllamaToolCall>>,
+        thinking: Option<String>,
    },
    User {
        content: String,
@ -140,6 +144,7 @@ pub struct ChatRequest {
    pub keep_alive: KeepAlive,
    pub options: Option<ChatOptions>,
    pub tools: Vec<OllamaTool>,
+    pub think: Option<bool>,
 }

 impl ChatRequest {
@ -215,6 +220,10 @@ impl ModelShow {
        // .contains expects &String, which would require an additional allocation
        self.capabilities.iter().any(|v| v == "tools")
    }
+
+    pub fn supports_thinking(&self) -> bool {
+        self.capabilities.iter().any(|v| v == "thinking")
+    }
 }

 pub async fn complete(
@ -459,9 +468,11 @@ mod tests {
            ChatMessage::Assistant {
                content,
                tool_calls,
+                thinking,
            } => {
                assert!(content.is_empty());
                assert!(tool_calls.is_some_and(|v| !v.is_empty()));
+                assert!(thinking.is_none());
            }
            _ => panic!("Deserialized wrong role"),
        }