diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index 1f2654dac5..6a20ad8f83 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -1284,6 +1284,7 @@ impl Thread { tool_choice: None, stop: Vec::new(), temperature: AgentSettings::temperature_for_model(&model, cx), + thinking_allowed: true, }; let available_tools = self.available_tools(cx, model.clone()); @@ -1449,6 +1450,7 @@ impl Thread { tool_choice: None, stop: Vec::new(), temperature: AgentSettings::temperature_for_model(model, cx), + thinking_allowed: false, }; for message in &self.messages { diff --git a/crates/agent_ui/src/active_thread.rs b/crates/agent_ui/src/active_thread.rs index a4553fc901..0e0e3756e3 100644 --- a/crates/agent_ui/src/active_thread.rs +++ b/crates/agent_ui/src/active_thread.rs @@ -1461,6 +1461,7 @@ impl ActiveThread { &configured_model.model, cx, ), + thinking_allowed: true, }; Some(configured_model.model.count_tokens(request, cx)) diff --git a/crates/agent_ui/src/buffer_codegen.rs b/crates/agent_ui/src/buffer_codegen.rs index 117dcf4f8e..64498e9281 100644 --- a/crates/agent_ui/src/buffer_codegen.rs +++ b/crates/agent_ui/src/buffer_codegen.rs @@ -475,6 +475,7 @@ impl CodegenAlternative { stop: Vec::new(), temperature, messages: vec![request_message], + thinking_allowed: false, } })) } diff --git a/crates/agent_ui/src/message_editor.rs b/crates/agent_ui/src/message_editor.rs index d1eae02246..8bc93f0f58 100644 --- a/crates/agent_ui/src/message_editor.rs +++ b/crates/agent_ui/src/message_editor.rs @@ -1454,6 +1454,7 @@ impl MessageEditor { tool_choice: None, stop: vec![], temperature: AgentSettings::temperature_for_model(&model.model, cx), + thinking_allowed: true, }; Some(model.model.count_tokens(request, cx)) diff --git a/crates/agent_ui/src/terminal_inline_assistant.rs b/crates/agent_ui/src/terminal_inline_assistant.rs index 162b45413f..91867957cd 100644 --- a/crates/agent_ui/src/terminal_inline_assistant.rs +++ b/crates/agent_ui/src/terminal_inline_assistant.rs @@ -297,6 +297,7 @@ impl TerminalInlineAssistant { tool_choice: None, stop: Vec::new(), temperature, + thinking_allowed: false, } })) } diff --git a/crates/assistant_context/src/assistant_context.rs b/crates/assistant_context/src/assistant_context.rs index aaaef15250..136468e084 100644 --- a/crates/assistant_context/src/assistant_context.rs +++ b/crates/assistant_context/src/assistant_context.rs @@ -2293,6 +2293,7 @@ impl AssistantContext { tool_choice: None, stop: Vec::new(), temperature: model.and_then(|model| AgentSettings::temperature_for_model(model, cx)), + thinking_allowed: true, }; for message in self.messages(cx) { if message.status != MessageStatus::Done { diff --git a/crates/assistant_tools/src/edit_agent.rs b/crates/assistant_tools/src/edit_agent.rs index c2540633f7..af7dae2e20 100644 --- a/crates/assistant_tools/src/edit_agent.rs +++ b/crates/assistant_tools/src/edit_agent.rs @@ -719,6 +719,7 @@ impl EditAgent { tools, stop: Vec::new(), temperature: None, + thinking_allowed: false, }; Ok(self.model.stream_completion_text(request, cx).await?.stream) diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index 8df8f677f2..d2ee03f08f 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -1263,6 +1263,7 @@ impl EvalAssertion { content: vec![prompt.into()], cache: false, }], + thinking_allowed: true, ..Default::default() }; let mut response = retry_on_rate_limit(async || { @@ -1599,6 +1600,7 @@ impl EditAgentTest { let conversation = LanguageModelRequest { messages, tools, + thinking_allowed: true, ..Default::default() }; diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index d17dc89d0b..0f2b4c18ea 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -594,6 +594,7 @@ impl ExampleInstance { tools: Vec::new(), tool_choice: None, stop: Vec::new(), + thinking_allowed: true, }; let model = model.clone(); diff --git a/crates/git_ui/src/git_panel.rs b/crates/git_ui/src/git_panel.rs index 84ce97a982..c50e2f8912 100644 --- a/crates/git_ui/src/git_panel.rs +++ b/crates/git_ui/src/git_panel.rs @@ -1830,6 +1830,7 @@ impl GitPanel { tool_choice: None, stop: Vec::new(), temperature, + thinking_allowed: false, }; let stream = model.stream_completion_text(request, &cx); diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index 451a62775e..6f3d420ad5 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -391,6 +391,7 @@ pub struct LanguageModelRequest { pub tool_choice: Option, pub stop: Vec, pub temperature: Option, + pub thinking_allowed: bool, } #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] diff --git a/crates/language_models/src/provider/anthropic.rs b/crates/language_models/src/provider/anthropic.rs index 6ddb1a4381..959cbccf39 100644 --- a/crates/language_models/src/provider/anthropic.rs +++ b/crates/language_models/src/provider/anthropic.rs @@ -663,7 +663,9 @@ pub fn into_anthropic( } else { Some(anthropic::StringOrContents::String(system_message)) }, - thinking: if let AnthropicModelMode::Thinking { budget_tokens } = mode { + thinking: if request.thinking_allowed + && let AnthropicModelMode::Thinking { budget_tokens } = mode + { Some(anthropic::Thinking::Enabled { budget_tokens }) } else { None @@ -1108,6 +1110,7 @@ mod tests { temperature: None, tools: vec![], tool_choice: None, + thinking_allowed: true, }; let anthropic_request = into_anthropic( diff --git a/crates/language_models/src/provider/bedrock.rs b/crates/language_models/src/provider/bedrock.rs index 9c0d481607..65ce1dbc4b 100644 --- a/crates/language_models/src/provider/bedrock.rs +++ b/crates/language_models/src/provider/bedrock.rs @@ -799,7 +799,9 @@ pub fn into_bedrock( max_tokens: max_output_tokens, system: Some(system_message), tools: Some(tool_config), - thinking: if let BedrockModelMode::Thinking { budget_tokens } = mode { + thinking: if request.thinking_allowed + && let BedrockModelMode::Thinking { budget_tokens } = mode + { Some(bedrock::Thinking::Enabled { budget_tokens }) } else { None diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index 9b7fee228a..aaaeb478c0 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -849,6 +849,7 @@ impl LanguageModel for CloudLanguageModel { let use_cloud = cx .update(|cx| cx.has_flag::()) .unwrap_or(false); + let thinking_allowed = request.thinking_allowed; match self.model.provider { zed_llm_client::LanguageModelProvider::Anthropic => { let request = into_anthropic( @@ -856,7 +857,7 @@ impl LanguageModel for CloudLanguageModel { self.model.id.to_string(), 1.0, self.model.max_output_tokens as u64, - if self.model.id.0.ends_with("-thinking") { + if thinking_allowed && self.model.id.0.ends_with("-thinking") { AnthropicModelMode::Thinking { budget_tokens: Some(4_096), } diff --git a/crates/language_models/src/provider/google.rs b/crates/language_models/src/provider/google.rs index bb19a3901a..d1539dd22c 100644 --- a/crates/language_models/src/provider/google.rs +++ b/crates/language_models/src/provider/google.rs @@ -559,11 +559,11 @@ pub fn into_google( stop_sequences: Some(request.stop), max_output_tokens: None, temperature: request.temperature.map(|t| t as f64).or(Some(1.0)), - thinking_config: match mode { - GoogleModelMode::Thinking { budget_tokens } => { + thinking_config: match (request.thinking_allowed, mode) { + (true, GoogleModelMode::Thinking { budget_tokens }) => { budget_tokens.map(|thinking_budget| ThinkingConfig { thinking_budget }) } - GoogleModelMode::Default => None, + _ => None, }, top_p: None, top_k: None, diff --git a/crates/language_models/src/provider/mistral.rs b/crates/language_models/src/provider/mistral.rs index c58622d4e0..11497fda35 100644 --- a/crates/language_models/src/provider/mistral.rs +++ b/crates/language_models/src/provider/mistral.rs @@ -911,6 +911,7 @@ mod tests { intent: None, mode: None, stop: vec![], + thinking_allowed: true, }; let mistral_request = into_mistral(request, "mistral-small-latest".into(), None); @@ -943,6 +944,7 @@ mod tests { intent: None, mode: None, stop: vec![], + thinking_allowed: true, }; let mistral_request = into_mistral(request, "pixtral-12b-latest".into(), None); diff --git a/crates/language_models/src/provider/ollama.rs b/crates/language_models/src/provider/ollama.rs index 0866cfa4c8..dc81e8be18 100644 --- a/crates/language_models/src/provider/ollama.rs +++ b/crates/language_models/src/provider/ollama.rs @@ -334,7 +334,10 @@ impl OllamaLanguageModel { temperature: request.temperature.or(Some(1.0)), ..Default::default() }), - think: self.model.supports_thinking, + think: self + .model + .supports_thinking + .map(|supports_thinking| supports_thinking && request.thinking_allowed), tools: request.tools.into_iter().map(tool_into_ollama).collect(), } } diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index 476c1715ae..76f2fbe303 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -999,6 +999,7 @@ mod tests { tool_choice: None, stop: vec![], temperature: None, + thinking_allowed: true, }; // Validate that all models are supported by tiktoken-rs diff --git a/crates/language_models/src/provider/open_router.rs b/crates/language_models/src/provider/open_router.rs index 5883da1e2f..c46135ff3e 100644 --- a/crates/language_models/src/provider/open_router.rs +++ b/crates/language_models/src/provider/open_router.rs @@ -523,7 +523,9 @@ pub fn into_open_router( None }, usage: open_router::RequestUsage { include: true }, - reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode { + reasoning: if request.thinking_allowed + && let OpenRouterModelMode::Thinking { budget_tokens } = model.mode + { Some(open_router::Reasoning { effort: None, max_tokens: budget_tokens, diff --git a/crates/rules_library/src/rules_library.rs b/crates/rules_library/src/rules_library.rs index 66f589bfd3..f871416f39 100644 --- a/crates/rules_library/src/rules_library.rs +++ b/crates/rules_library/src/rules_library.rs @@ -981,6 +981,7 @@ impl RulesLibrary { tool_choice: None, stop: Vec::new(), temperature: None, + thinking_allowed: true, }, cx, ) diff --git a/crates/semantic_index/src/summary_index.rs b/crates/semantic_index/src/summary_index.rs index 108130ebc9..6e3aae1344 100644 --- a/crates/semantic_index/src/summary_index.rs +++ b/crates/semantic_index/src/summary_index.rs @@ -570,6 +570,7 @@ impl SummaryIndex { tool_choice: None, stop: Vec::new(), temperature: None, + thinking_allowed: true, }; let code_len = code.len();