From 65e3e84cbc5c1fe1e6ff86c26ce609123452194c Mon Sep 17 00:00:00 2001 From: Umesh Yadav <23421535+imumesh18@users.noreply.github.com> Date: Mon, 2 Jun 2025 20:42:41 +0530 Subject: [PATCH] language_models: Add thinking support for ollama (#31665) This PR updates how we handle Ollama responses, leveraging the new [v0.9.0](https://github.com/ollama/ollama/releases/tag/v0.9.0) release. Previously, thinking text was embedded within the model's main content, leading to it appearing directly in the agent's response. Now, thinking content is provided as a separate parameter, allowing us to display it correctly within the agent panel, similar to other providers. I have tested this with qwen3:8b and works nicely. ~~We can release this once the ollama is release is stable.~~ It's released now as stable. image Release Notes: - Add thinking support for ollama --------- Co-authored-by: Bennet Bo Fenner --- crates/agent_settings/src/agent_settings.rs | 1 + crates/language_models/src/provider/ollama.rs | 34 +++++++++++++++---- crates/ollama/src/ollama.rs | 11 ++++++ 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/crates/agent_settings/src/agent_settings.rs b/crates/agent_settings/src/agent_settings.rs index 696b379b12..a162ce064e 100644 --- a/crates/agent_settings/src/agent_settings.rs +++ b/crates/agent_settings/src/agent_settings.rs @@ -372,6 +372,7 @@ impl AgentSettingsContent { None, None, Some(language_model.supports_tools()), + None, )), api_url, }); diff --git a/crates/language_models/src/provider/ollama.rs b/crates/language_models/src/provider/ollama.rs index 1bb46ea482..78645cc1b9 100644 --- a/crates/language_models/src/provider/ollama.rs +++ b/crates/language_models/src/provider/ollama.rs @@ -6,7 +6,7 @@ use http_client::HttpClient; use language_model::{ AuthenticateError, LanguageModelCompletionError, LanguageModelCompletionEvent, LanguageModelRequestTool, LanguageModelToolChoice, LanguageModelToolUse, - LanguageModelToolUseId, StopReason, + LanguageModelToolUseId, MessageContent, StopReason, }; use language_model::{ LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider, @@ -54,6 +54,8 @@ pub struct AvailableModel { pub keep_alive: Option, /// Whether the model supports tools pub supports_tools: Option, + /// Whether to enable think mode + pub supports_thinking: Option, } pub struct OllamaLanguageModelProvider { @@ -99,6 +101,7 @@ impl State { None, None, Some(capabilities.supports_tools()), + Some(capabilities.supports_thinking()), ); Ok(ollama_model) } @@ -219,6 +222,7 @@ impl LanguageModelProvider for OllamaLanguageModelProvider { max_tokens: model.max_tokens, keep_alive: model.keep_alive.clone(), supports_tools: model.supports_tools, + supports_thinking: model.supports_thinking, }, ); } @@ -282,10 +286,18 @@ impl OllamaLanguageModel { Role::User => ChatMessage::User { content: msg.string_contents(), }, - Role::Assistant => ChatMessage::Assistant { - content: msg.string_contents(), - tool_calls: None, - }, + Role::Assistant => { + let content = msg.string_contents(); + let thinking = msg.content.into_iter().find_map(|content| match content { + MessageContent::Thinking { text, .. } if !text.is_empty() => Some(text), + _ => None, + }); + ChatMessage::Assistant { + content, + tool_calls: None, + thinking, + } + } Role::System => ChatMessage::System { content: msg.string_contents(), }, @@ -299,6 +311,7 @@ impl OllamaLanguageModel { temperature: request.temperature.or(Some(1.0)), ..Default::default() }), + think: self.model.supports_thinking, tools: request.tools.into_iter().map(tool_into_ollama).collect(), } } @@ -433,8 +446,15 @@ fn map_to_language_model_completion_events( ChatMessage::Assistant { content, tool_calls, + thinking, } => { - // Check for tool calls + if let Some(text) = thinking { + events.push(Ok(LanguageModelCompletionEvent::Thinking { + text, + signature: None, + })); + } + if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) { match tool_call { OllamaToolCall::Function(function) => { @@ -455,7 +475,7 @@ fn map_to_language_model_completion_events( state.used_tools = true; } } - } else { + } else if !content.is_empty() { events.push(Ok(LanguageModelCompletionEvent::Text(content))); } } diff --git a/crates/ollama/src/ollama.rs b/crates/ollama/src/ollama.rs index a42510279c..b52df6e4ce 100644 --- a/crates/ollama/src/ollama.rs +++ b/crates/ollama/src/ollama.rs @@ -38,6 +38,7 @@ pub struct Model { pub max_tokens: usize, pub keep_alive: Option, pub supports_tools: Option, + pub supports_thinking: Option, } fn get_max_tokens(name: &str) -> usize { @@ -67,6 +68,7 @@ impl Model { display_name: Option<&str>, max_tokens: Option, supports_tools: Option, + supports_thinking: Option, ) -> Self { Self { name: name.to_owned(), @@ -76,6 +78,7 @@ impl Model { max_tokens: max_tokens.unwrap_or_else(|| get_max_tokens(name)), keep_alive: Some(KeepAlive::indefinite()), supports_tools, + supports_thinking, } } @@ -98,6 +101,7 @@ pub enum ChatMessage { Assistant { content: String, tool_calls: Option>, + thinking: Option, }, User { content: String, @@ -140,6 +144,7 @@ pub struct ChatRequest { pub keep_alive: KeepAlive, pub options: Option, pub tools: Vec, + pub think: Option, } impl ChatRequest { @@ -215,6 +220,10 @@ impl ModelShow { // .contains expects &String, which would require an additional allocation self.capabilities.iter().any(|v| v == "tools") } + + pub fn supports_thinking(&self) -> bool { + self.capabilities.iter().any(|v| v == "thinking") + } } pub async fn complete( @@ -459,9 +468,11 @@ mod tests { ChatMessage::Assistant { content, tool_calls, + thinking, } => { assert!(content.is_empty()); assert!(tool_calls.is_some_and(|v| !v.is_empty())); + assert!(thinking.is_none()); } _ => panic!("Deserialized wrong role"), }