language_models: Add thinking support for ollama (#31665)

This PR updates how we handle Ollama responses, leveraging the new
[v0.9.0](https://github.com/ollama/ollama/releases/tag/v0.9.0) release.
Previously, thinking text was embedded within the model's main content,
leading to it appearing directly in the agent's response. Now, thinking
content is provided as a separate parameter, allowing us to display it
correctly within the agent panel, similar to other providers. I have
tested this with qwen3:8b and works nicely. ~~We can release this once
the ollama is release is stable.~~ It's released now as stable.

<img width="433" alt="image"
src="https://github.com/user-attachments/assets/2983ef06-6679-4033-82c2-231ea9cd6434"
/>


Release Notes:

- Add thinking support for ollama

---------

Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>
This commit is contained in:
Umesh Yadav 2025-06-02 20:42:41 +05:30 committed by GitHub
parent 1e1d4430c2
commit 65e3e84cbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 39 additions and 7 deletions

View file

@ -372,6 +372,7 @@ impl AgentSettingsContent {
None,
None,
Some(language_model.supports_tools()),
None,
)),
api_url,
});

View file

@ -6,7 +6,7 @@ use http_client::HttpClient;
use language_model::{
AuthenticateError, LanguageModelCompletionError, LanguageModelCompletionEvent,
LanguageModelRequestTool, LanguageModelToolChoice, LanguageModelToolUse,
LanguageModelToolUseId, StopReason,
LanguageModelToolUseId, MessageContent, StopReason,
};
use language_model::{
LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
@ -54,6 +54,8 @@ pub struct AvailableModel {
pub keep_alive: Option<KeepAlive>,
/// Whether the model supports tools
pub supports_tools: Option<bool>,
/// Whether to enable think mode
pub supports_thinking: Option<bool>,
}
pub struct OllamaLanguageModelProvider {
@ -99,6 +101,7 @@ impl State {
None,
None,
Some(capabilities.supports_tools()),
Some(capabilities.supports_thinking()),
);
Ok(ollama_model)
}
@ -219,6 +222,7 @@ impl LanguageModelProvider for OllamaLanguageModelProvider {
max_tokens: model.max_tokens,
keep_alive: model.keep_alive.clone(),
supports_tools: model.supports_tools,
supports_thinking: model.supports_thinking,
},
);
}
@ -282,10 +286,18 @@ impl OllamaLanguageModel {
Role::User => ChatMessage::User {
content: msg.string_contents(),
},
Role::Assistant => ChatMessage::Assistant {
content: msg.string_contents(),
tool_calls: None,
},
Role::Assistant => {
let content = msg.string_contents();
let thinking = msg.content.into_iter().find_map(|content| match content {
MessageContent::Thinking { text, .. } if !text.is_empty() => Some(text),
_ => None,
});
ChatMessage::Assistant {
content,
tool_calls: None,
thinking,
}
}
Role::System => ChatMessage::System {
content: msg.string_contents(),
},
@ -299,6 +311,7 @@ impl OllamaLanguageModel {
temperature: request.temperature.or(Some(1.0)),
..Default::default()
}),
think: self.model.supports_thinking,
tools: request.tools.into_iter().map(tool_into_ollama).collect(),
}
}
@ -433,8 +446,15 @@ fn map_to_language_model_completion_events(
ChatMessage::Assistant {
content,
tool_calls,
thinking,
} => {
// Check for tool calls
if let Some(text) = thinking {
events.push(Ok(LanguageModelCompletionEvent::Thinking {
text,
signature: None,
}));
}
if let Some(tool_call) = tool_calls.and_then(|v| v.into_iter().next()) {
match tool_call {
OllamaToolCall::Function(function) => {
@ -455,7 +475,7 @@ fn map_to_language_model_completion_events(
state.used_tools = true;
}
}
} else {
} else if !content.is_empty() {
events.push(Ok(LanguageModelCompletionEvent::Text(content)));
}
}

View file

@ -38,6 +38,7 @@ pub struct Model {
pub max_tokens: usize,
pub keep_alive: Option<KeepAlive>,
pub supports_tools: Option<bool>,
pub supports_thinking: Option<bool>,
}
fn get_max_tokens(name: &str) -> usize {
@ -67,6 +68,7 @@ impl Model {
display_name: Option<&str>,
max_tokens: Option<usize>,
supports_tools: Option<bool>,
supports_thinking: Option<bool>,
) -> Self {
Self {
name: name.to_owned(),
@ -76,6 +78,7 @@ impl Model {
max_tokens: max_tokens.unwrap_or_else(|| get_max_tokens(name)),
keep_alive: Some(KeepAlive::indefinite()),
supports_tools,
supports_thinking,
}
}
@ -98,6 +101,7 @@ pub enum ChatMessage {
Assistant {
content: String,
tool_calls: Option<Vec<OllamaToolCall>>,
thinking: Option<String>,
},
User {
content: String,
@ -140,6 +144,7 @@ pub struct ChatRequest {
pub keep_alive: KeepAlive,
pub options: Option<ChatOptions>,
pub tools: Vec<OllamaTool>,
pub think: Option<bool>,
}
impl ChatRequest {
@ -215,6 +220,10 @@ impl ModelShow {
// .contains expects &String, which would require an additional allocation
self.capabilities.iter().any(|v| v == "tools")
}
pub fn supports_thinking(&self) -> bool {
self.capabilities.iter().any(|v| v == "thinking")
}
}
pub async fn complete(
@ -459,9 +468,11 @@ mod tests {
ChatMessage::Assistant {
content,
tool_calls,
thinking,
} => {
assert!(content.is_empty());
assert!(tool_calls.is_some_and(|v| !v.is_empty()));
assert!(thinking.is_none());
}
_ => panic!("Deserialized wrong role"),
}