language_models: Add thinking support for ollama (#31665)
This PR updates how we handle Ollama responses, leveraging the new [v0.9.0](https://github.com/ollama/ollama/releases/tag/v0.9.0) release. Previously, thinking text was embedded within the model's main content, leading to it appearing directly in the agent's response. Now, thinking content is provided as a separate parameter, allowing us to display it correctly within the agent panel, similar to other providers. I have tested this with qwen3:8b and works nicely. ~~We can release this once the ollama is release is stable.~~ It's released now as stable. <img width="433" alt="image" src="https://github.com/user-attachments/assets/2983ef06-6679-4033-82c2-231ea9cd6434" /> Release Notes: - Add thinking support for ollama --------- Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>
This commit is contained in:
parent
1e1d4430c2
commit
65e3e84cbc
3 changed files with 39 additions and 7 deletions
|
@ -38,6 +38,7 @@ pub struct Model {
|
|||
pub max_tokens: usize,
|
||||
pub keep_alive: Option<KeepAlive>,
|
||||
pub supports_tools: Option<bool>,
|
||||
pub supports_thinking: Option<bool>,
|
||||
}
|
||||
|
||||
fn get_max_tokens(name: &str) -> usize {
|
||||
|
@ -67,6 +68,7 @@ impl Model {
|
|||
display_name: Option<&str>,
|
||||
max_tokens: Option<usize>,
|
||||
supports_tools: Option<bool>,
|
||||
supports_thinking: Option<bool>,
|
||||
) -> Self {
|
||||
Self {
|
||||
name: name.to_owned(),
|
||||
|
@ -76,6 +78,7 @@ impl Model {
|
|||
max_tokens: max_tokens.unwrap_or_else(|| get_max_tokens(name)),
|
||||
keep_alive: Some(KeepAlive::indefinite()),
|
||||
supports_tools,
|
||||
supports_thinking,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -98,6 +101,7 @@ pub enum ChatMessage {
|
|||
Assistant {
|
||||
content: String,
|
||||
tool_calls: Option<Vec<OllamaToolCall>>,
|
||||
thinking: Option<String>,
|
||||
},
|
||||
User {
|
||||
content: String,
|
||||
|
@ -140,6 +144,7 @@ pub struct ChatRequest {
|
|||
pub keep_alive: KeepAlive,
|
||||
pub options: Option<ChatOptions>,
|
||||
pub tools: Vec<OllamaTool>,
|
||||
pub think: Option<bool>,
|
||||
}
|
||||
|
||||
impl ChatRequest {
|
||||
|
@ -215,6 +220,10 @@ impl ModelShow {
|
|||
// .contains expects &String, which would require an additional allocation
|
||||
self.capabilities.iter().any(|v| v == "tools")
|
||||
}
|
||||
|
||||
pub fn supports_thinking(&self) -> bool {
|
||||
self.capabilities.iter().any(|v| v == "thinking")
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn complete(
|
||||
|
@ -459,9 +468,11 @@ mod tests {
|
|||
ChatMessage::Assistant {
|
||||
content,
|
||||
tool_calls,
|
||||
thinking,
|
||||
} => {
|
||||
assert!(content.is_empty());
|
||||
assert!(tool_calls.is_some_and(|v| !v.is_empty()));
|
||||
assert!(thinking.is_none());
|
||||
}
|
||||
_ => panic!("Deserialized wrong role"),
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue