language_models: Add thinking support for ollama (#31665)

This PR updates how we handle Ollama responses, leveraging the new
[v0.9.0](https://github.com/ollama/ollama/releases/tag/v0.9.0) release.
Previously, thinking text was embedded within the model's main content,
leading to it appearing directly in the agent's response. Now, thinking
content is provided as a separate parameter, allowing us to display it
correctly within the agent panel, similar to other providers. I have
tested this with qwen3:8b and works nicely. ~~We can release this once
the ollama is release is stable.~~ It's released now as stable.

<img width="433" alt="image"
src="https://github.com/user-attachments/assets/2983ef06-6679-4033-82c2-231ea9cd6434"
/>


Release Notes:

- Add thinking support for ollama

---------

Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>
This commit is contained in:
Umesh Yadav 2025-06-02 20:42:41 +05:30 committed by GitHub
parent 1e1d4430c2
commit 65e3e84cbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 39 additions and 7 deletions

View file

@ -38,6 +38,7 @@ pub struct Model {
pub max_tokens: usize,
pub keep_alive: Option<KeepAlive>,
pub supports_tools: Option<bool>,
pub supports_thinking: Option<bool>,
}
fn get_max_tokens(name: &str) -> usize {
@ -67,6 +68,7 @@ impl Model {
display_name: Option<&str>,
max_tokens: Option<usize>,
supports_tools: Option<bool>,
supports_thinking: Option<bool>,
) -> Self {
Self {
name: name.to_owned(),
@ -76,6 +78,7 @@ impl Model {
max_tokens: max_tokens.unwrap_or_else(|| get_max_tokens(name)),
keep_alive: Some(KeepAlive::indefinite()),
supports_tools,
supports_thinking,
}
}
@ -98,6 +101,7 @@ pub enum ChatMessage {
Assistant {
content: String,
tool_calls: Option<Vec<OllamaToolCall>>,
thinking: Option<String>,
},
User {
content: String,
@ -140,6 +144,7 @@ pub struct ChatRequest {
pub keep_alive: KeepAlive,
pub options: Option<ChatOptions>,
pub tools: Vec<OllamaTool>,
pub think: Option<bool>,
}
impl ChatRequest {
@ -215,6 +220,10 @@ impl ModelShow {
// .contains expects &String, which would require an additional allocation
self.capabilities.iter().any(|v| v == "tools")
}
pub fn supports_thinking(&self) -> bool {
self.capabilities.iter().any(|v| v == "thinking")
}
}
pub async fn complete(
@ -459,9 +468,11 @@ mod tests {
ChatMessage::Assistant {
content,
tool_calls,
thinking,
} => {
assert!(content.is_empty());
assert!(tool_calls.is_some_and(|v| !v.is_empty()));
assert!(thinking.is_none());
}
_ => panic!("Deserialized wrong role"),
}