From 65e3e84cbc5c1fe1e6ff86c26ce609123452194c Mon Sep 17 00:00:00 2001
From: Umesh Yadav <23421535+imumesh18@users.noreply.github.com>
Date: Mon, 2 Jun 2025 20:42:41 +0530
Subject: [PATCH] language_models: Add thinking support for ollama (#31665)
This PR updates how we handle Ollama responses, leveraging the new
[v0.9.0](https://github.com/ollama/ollama/releases/tag/v0.9.0) release.
Previously, thinking text was embedded within the model's main content,
leading to it appearing directly in the agent's response. Now, thinking
content is provided as a separate parameter, allowing us to display it
correctly within the agent panel, similar to other providers. I have
tested this with qwen3:8b and works nicely. ~~We can release this once
the ollama is release is stable.~~ It's released now as stable.
Release Notes:
- Add thinking support for ollama
---------
Co-authored-by: Bennet Bo Fenner
---
crates/agent_settings/src/agent_settings.rs | 1 +
crates/language_models/src/provider/ollama.rs | 34 +++++++++++++++----
crates/ollama/src/ollama.rs | 11 ++++++
3 files changed, 39 insertions(+), 7 deletions(-)
diff --git a/crates/agent_settings/src/agent_settings.rs b/crates/agent_settings/src/agent_settings.rs
index 696b379b12..a162ce064e 100644
--- a/crates/agent_settings/src/agent_settings.rs
+++ b/crates/agent_settings/src/agent_settings.rs
@@ -372,6 +372,7 @@ impl AgentSettingsContent {
None,
None,
Some(language_model.supports_tools()),
+ None,
)),
api_url,
});
diff --git a/crates/language_models/src/provider/ollama.rs b/crates/language_models/src/provider/ollama.rs
index 1bb46ea482..78645cc1b9 100644
--- a/crates/language_models/src/provider/ollama.rs
+++ b/crates/language_models/src/provider/ollama.rs
@@ -6,7 +6,7 @@ use http_client::HttpClient;
use language_model::{
AuthenticateError, LanguageModelCompletionError, LanguageModelCompletionEvent,
LanguageModelRequestTool, LanguageModelToolChoice, LanguageModelToolUse,
- LanguageModelToolUseId, StopReason,
+ LanguageModelToolUseId, MessageContent, StopReason,
};
use language_model::{
LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
@@ -54,6 +54,8 @@ pub struct AvailableModel {
pub keep_alive: Option,
/// Whether the model supports tools
pub supports_tools: Option,
+ /// Whether to enable think mode
+ pub supports_thinking: Option,
}
pub struct OllamaLanguageModelProvider {
@@ -99,6 +101,7 @@ impl State {
None,
None,
Some(capabilities.supports_tools()),
+ Some(capabilities.supports_thinking()),
);
Ok(ollama_model)
}
@@ -219,6 +222,7 @@ impl LanguageModelProvider for OllamaLanguageModelProvider {
max_tokens: model.max_tokens,
keep_alive: model.keep_alive.clone(),
supports_tools: model.supports_tools,
+ supports_thinking: model.supports_thinking,
},
);
}
@@ -282,10 +286,18 @@ impl OllamaLanguageModel {
Role::User => ChatMessage::User {
content: msg.string_contents(),
},
- Role::Assistant => ChatMessage::Assistant {
- content: msg.string_contents(),
- tool_calls: None,
- },
+ Role::Assistant => {
+ let content = msg.string_contents();
+ let thinking = msg.content.into_iter().find_map(|content| match content {
+ MessageContent::Thinking { text, .. } if !text.is_empty() => Some(text),
+ _ => None,
+ });
+ ChatMessage::Assistant {
+ content,
+ tool_calls: None,
+ thinking,
+ }
+ }
Role::System => ChatMessage::System {
content: msg.string_contents(),
},
@@ -299,6 +311,7 @@ impl OllamaLanguageModel {
temperature: request.temperature.or(Some(1.0)),
..Default::default()
}),
+ think: self.model.supports_thinking,
tools: request.tools.into_iter().map(tool_into_ollama).collect(),
}
}
@@ -433,8 +446,15 @@ fn map_to_language_model_completion_events(
ChatMessage::Assistant {
content,
tool_calls,
+ thinking,
} => {
- // Check for tool calls
+ if let Some(text) = thinking {
+ events.push(Ok(LanguageModelCompletionEvent::Thinking {
+ text,
+ signature: None,
+ }));
+ }
+
if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
match tool_call {
OllamaToolCall::Function(function) => {
@@ -455,7 +475,7 @@ fn map_to_language_model_completion_events(
state.used_tools = true;
}
}
- } else {
+ } else if !content.is_empty() {
events.push(Ok(LanguageModelCompletionEvent::Text(content)));
}
}
diff --git a/crates/ollama/src/ollama.rs b/crates/ollama/src/ollama.rs
index a42510279c..b52df6e4ce 100644
--- a/crates/ollama/src/ollama.rs
+++ b/crates/ollama/src/ollama.rs
@@ -38,6 +38,7 @@ pub struct Model {
pub max_tokens: usize,
pub keep_alive: Option,
pub supports_tools: Option,
+ pub supports_thinking: Option,
}
fn get_max_tokens(name: &str) -> usize {
@@ -67,6 +68,7 @@ impl Model {
display_name: Option<&str>,
max_tokens: Option,
supports_tools: Option,
+ supports_thinking: Option,
) -> Self {
Self {
name: name.to_owned(),
@@ -76,6 +78,7 @@ impl Model {
max_tokens: max_tokens.unwrap_or_else(|| get_max_tokens(name)),
keep_alive: Some(KeepAlive::indefinite()),
supports_tools,
+ supports_thinking,
}
}
@@ -98,6 +101,7 @@ pub enum ChatMessage {
Assistant {
content: String,
tool_calls: Option>,
+ thinking: Option,
},
User {
content: String,
@@ -140,6 +144,7 @@ pub struct ChatRequest {
pub keep_alive: KeepAlive,
pub options: Option,
pub tools: Vec,
+ pub think: Option,
}
impl ChatRequest {
@@ -215,6 +220,10 @@ impl ModelShow {
// .contains expects &String, which would require an additional allocation
self.capabilities.iter().any(|v| v == "tools")
}
+
+ pub fn supports_thinking(&self) -> bool {
+ self.capabilities.iter().any(|v| v == "thinking")
+ }
}
pub async fn complete(
@@ -459,9 +468,11 @@ mod tests {
ChatMessage::Assistant {
content,
tool_calls,
+ thinking,
} => {
assert!(content.is_empty());
assert!(tool_calls.is_some_and(|v| !v.is_empty()));
+ assert!(thinking.is_none());
}
_ => panic!("Deserialized wrong role"),
}