From 41fe2a2ab4600c41e94d0fa97b9fe1e76977ea33 Mon Sep 17 00:00:00 2001 From: Bennet Bo Fenner Date: Wed, 9 Jul 2025 20:05:39 +0200 Subject: [PATCH] agent: Disable thinking when using inline assistant/edit file tool (#34141) This introduces a new field `thinking_allowed` on `LanguageModelRequest` which lets us control whether thinking should be enabled if the model supports it. We permit thinking in the Inline Assistant, Edit File tool and the Git Commit message generator, this should make generation faster when using a thinking model, e.g. `claude-sonnet-4-thinking` Release Notes: - N/A --- crates/agent/src/thread.rs | 2 ++ crates/agent_ui/src/active_thread.rs | 1 + crates/agent_ui/src/buffer_codegen.rs | 1 + crates/agent_ui/src/message_editor.rs | 1 + crates/agent_ui/src/terminal_inline_assistant.rs | 1 + crates/assistant_context/src/assistant_context.rs | 1 + crates/assistant_tools/src/edit_agent.rs | 1 + crates/assistant_tools/src/edit_agent/evals.rs | 2 ++ crates/eval/src/instance.rs | 1 + crates/git_ui/src/git_panel.rs | 1 + crates/language_model/src/request.rs | 1 + crates/language_models/src/provider/anthropic.rs | 5 ++++- crates/language_models/src/provider/bedrock.rs | 4 +++- crates/language_models/src/provider/cloud.rs | 3 ++- crates/language_models/src/provider/google.rs | 6 +++--- crates/language_models/src/provider/mistral.rs | 2 ++ crates/language_models/src/provider/ollama.rs | 5 ++++- crates/language_models/src/provider/open_ai.rs | 1 + crates/language_models/src/provider/open_router.rs | 4 +++- crates/rules_library/src/rules_library.rs | 1 + crates/semantic_index/src/summary_index.rs | 1 + 21 files changed, 37 insertions(+), 8 deletions(-) diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index 1f2654dac5..6a20ad8f83 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -1284,6 +1284,7 @@ impl Thread { tool_choice: None, stop: Vec::new(), temperature: AgentSettings::temperature_for_model(&model, cx), + thinking_allowed: true, }; let available_tools = self.available_tools(cx, model.clone()); @@ -1449,6 +1450,7 @@ impl Thread { tool_choice: None, stop: Vec::new(), temperature: AgentSettings::temperature_for_model(model, cx), + thinking_allowed: false, }; for message in &self.messages { diff --git a/crates/agent_ui/src/active_thread.rs b/crates/agent_ui/src/active_thread.rs index a4553fc901..0e0e3756e3 100644 --- a/crates/agent_ui/src/active_thread.rs +++ b/crates/agent_ui/src/active_thread.rs @@ -1461,6 +1461,7 @@ impl ActiveThread { &configured_model.model, cx, ), + thinking_allowed: true, }; Some(configured_model.model.count_tokens(request, cx)) diff --git a/crates/agent_ui/src/buffer_codegen.rs b/crates/agent_ui/src/buffer_codegen.rs index 117dcf4f8e..64498e9281 100644 --- a/crates/agent_ui/src/buffer_codegen.rs +++ b/crates/agent_ui/src/buffer_codegen.rs @@ -475,6 +475,7 @@ impl CodegenAlternative { stop: Vec::new(), temperature, messages: vec![request_message], + thinking_allowed: false, } })) } diff --git a/crates/agent_ui/src/message_editor.rs b/crates/agent_ui/src/message_editor.rs index d1eae02246..8bc93f0f58 100644 --- a/crates/agent_ui/src/message_editor.rs +++ b/crates/agent_ui/src/message_editor.rs @@ -1454,6 +1454,7 @@ impl MessageEditor { tool_choice: None, stop: vec![], temperature: AgentSettings::temperature_for_model(&model.model, cx), + thinking_allowed: true, }; Some(model.model.count_tokens(request, cx)) diff --git a/crates/agent_ui/src/terminal_inline_assistant.rs b/crates/agent_ui/src/terminal_inline_assistant.rs index 162b45413f..91867957cd 100644 --- a/crates/agent_ui/src/terminal_inline_assistant.rs +++ b/crates/agent_ui/src/terminal_inline_assistant.rs @@ -297,6 +297,7 @@ impl TerminalInlineAssistant { tool_choice: None, stop: Vec::new(), temperature, + thinking_allowed: false, } })) } diff --git a/crates/assistant_context/src/assistant_context.rs b/crates/assistant_context/src/assistant_context.rs index aaaef15250..136468e084 100644 --- a/crates/assistant_context/src/assistant_context.rs +++ b/crates/assistant_context/src/assistant_context.rs @@ -2293,6 +2293,7 @@ impl AssistantContext { tool_choice: None, stop: Vec::new(), temperature: model.and_then(|model| AgentSettings::temperature_for_model(model, cx)), + thinking_allowed: true, }; for message in self.messages(cx) { if message.status != MessageStatus::Done { diff --git a/crates/assistant_tools/src/edit_agent.rs b/crates/assistant_tools/src/edit_agent.rs index c2540633f7..af7dae2e20 100644 --- a/crates/assistant_tools/src/edit_agent.rs +++ b/crates/assistant_tools/src/edit_agent.rs @@ -719,6 +719,7 @@ impl EditAgent { tools, stop: Vec::new(), temperature: None, + thinking_allowed: false, }; Ok(self.model.stream_completion_text(request, cx).await?.stream) diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index 8df8f677f2..d2ee03f08f 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -1263,6 +1263,7 @@ impl EvalAssertion { content: vec![prompt.into()], cache: false, }], + thinking_allowed: true, ..Default::default() }; let mut response = retry_on_rate_limit(async || { @@ -1599,6 +1600,7 @@ impl EditAgentTest { let conversation = LanguageModelRequest { messages, tools, + thinking_allowed: true, ..Default::default() }; diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index d17dc89d0b..0f2b4c18ea 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -594,6 +594,7 @@ impl ExampleInstance { tools: Vec::new(), tool_choice: None, stop: Vec::new(), + thinking_allowed: true, }; let model = model.clone(); diff --git a/crates/git_ui/src/git_panel.rs b/crates/git_ui/src/git_panel.rs index 84ce97a982..c50e2f8912 100644 --- a/crates/git_ui/src/git_panel.rs +++ b/crates/git_ui/src/git_panel.rs @@ -1830,6 +1830,7 @@ impl GitPanel { tool_choice: None, stop: Vec::new(), temperature, + thinking_allowed: false, }; let stream = model.stream_completion_text(request, &cx); diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index 451a62775e..6f3d420ad5 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -391,6 +391,7 @@ pub struct LanguageModelRequest { pub tool_choice: Option, pub stop: Vec, pub temperature: Option, + pub thinking_allowed: bool, } #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] diff --git a/crates/language_models/src/provider/anthropic.rs b/crates/language_models/src/provider/anthropic.rs index 6ddb1a4381..959cbccf39 100644 --- a/crates/language_models/src/provider/anthropic.rs +++ b/crates/language_models/src/provider/anthropic.rs @@ -663,7 +663,9 @@ pub fn into_anthropic( } else { Some(anthropic::StringOrContents::String(system_message)) }, - thinking: if let AnthropicModelMode::Thinking { budget_tokens } = mode { + thinking: if request.thinking_allowed + && let AnthropicModelMode::Thinking { budget_tokens } = mode + { Some(anthropic::Thinking::Enabled { budget_tokens }) } else { None @@ -1108,6 +1110,7 @@ mod tests { temperature: None, tools: vec![], tool_choice: None, + thinking_allowed: true, }; let anthropic_request = into_anthropic( diff --git a/crates/language_models/src/provider/bedrock.rs b/crates/language_models/src/provider/bedrock.rs index 9c0d481607..65ce1dbc4b 100644 --- a/crates/language_models/src/provider/bedrock.rs +++ b/crates/language_models/src/provider/bedrock.rs @@ -799,7 +799,9 @@ pub fn into_bedrock( max_tokens: max_output_tokens, system: Some(system_message), tools: Some(tool_config), - thinking: if let BedrockModelMode::Thinking { budget_tokens } = mode { + thinking: if request.thinking_allowed + && let BedrockModelMode::Thinking { budget_tokens } = mode + { Some(bedrock::Thinking::Enabled { budget_tokens }) } else { None diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index 9b7fee228a..aaaeb478c0 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -849,6 +849,7 @@ impl LanguageModel for CloudLanguageModel { let use_cloud = cx .update(|cx| cx.has_flag::()) .unwrap_or(false); + let thinking_allowed = request.thinking_allowed; match self.model.provider { zed_llm_client::LanguageModelProvider::Anthropic => { let request = into_anthropic( @@ -856,7 +857,7 @@ impl LanguageModel for CloudLanguageModel { self.model.id.to_string(), 1.0, self.model.max_output_tokens as u64, - if self.model.id.0.ends_with("-thinking") { + if thinking_allowed && self.model.id.0.ends_with("-thinking") { AnthropicModelMode::Thinking { budget_tokens: Some(4_096), } diff --git a/crates/language_models/src/provider/google.rs b/crates/language_models/src/provider/google.rs index bb19a3901a..d1539dd22c 100644 --- a/crates/language_models/src/provider/google.rs +++ b/crates/language_models/src/provider/google.rs @@ -559,11 +559,11 @@ pub fn into_google( stop_sequences: Some(request.stop), max_output_tokens: None, temperature: request.temperature.map(|t| t as f64).or(Some(1.0)), - thinking_config: match mode { - GoogleModelMode::Thinking { budget_tokens } => { + thinking_config: match (request.thinking_allowed, mode) { + (true, GoogleModelMode::Thinking { budget_tokens }) => { budget_tokens.map(|thinking_budget| ThinkingConfig { thinking_budget }) } - GoogleModelMode::Default => None, + _ => None, }, top_p: None, top_k: None, diff --git a/crates/language_models/src/provider/mistral.rs b/crates/language_models/src/provider/mistral.rs index c58622d4e0..11497fda35 100644 --- a/crates/language_models/src/provider/mistral.rs +++ b/crates/language_models/src/provider/mistral.rs @@ -911,6 +911,7 @@ mod tests { intent: None, mode: None, stop: vec![], + thinking_allowed: true, }; let mistral_request = into_mistral(request, "mistral-small-latest".into(), None); @@ -943,6 +944,7 @@ mod tests { intent: None, mode: None, stop: vec![], + thinking_allowed: true, }; let mistral_request = into_mistral(request, "pixtral-12b-latest".into(), None); diff --git a/crates/language_models/src/provider/ollama.rs b/crates/language_models/src/provider/ollama.rs index 0866cfa4c8..dc81e8be18 100644 --- a/crates/language_models/src/provider/ollama.rs +++ b/crates/language_models/src/provider/ollama.rs @@ -334,7 +334,10 @@ impl OllamaLanguageModel { temperature: request.temperature.or(Some(1.0)), ..Default::default() }), - think: self.model.supports_thinking, + think: self + .model + .supports_thinking + .map(|supports_thinking| supports_thinking && request.thinking_allowed), tools: request.tools.into_iter().map(tool_into_ollama).collect(), } } diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index 476c1715ae..76f2fbe303 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -999,6 +999,7 @@ mod tests { tool_choice: None, stop: vec![], temperature: None, + thinking_allowed: true, }; // Validate that all models are supported by tiktoken-rs diff --git a/crates/language_models/src/provider/open_router.rs b/crates/language_models/src/provider/open_router.rs index 5883da1e2f..c46135ff3e 100644 --- a/crates/language_models/src/provider/open_router.rs +++ b/crates/language_models/src/provider/open_router.rs @@ -523,7 +523,9 @@ pub fn into_open_router( None }, usage: open_router::RequestUsage { include: true }, - reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode { + reasoning: if request.thinking_allowed + && let OpenRouterModelMode::Thinking { budget_tokens } = model.mode + { Some(open_router::Reasoning { effort: None, max_tokens: budget_tokens, diff --git a/crates/rules_library/src/rules_library.rs b/crates/rules_library/src/rules_library.rs index 66f589bfd3..f871416f39 100644 --- a/crates/rules_library/src/rules_library.rs +++ b/crates/rules_library/src/rules_library.rs @@ -981,6 +981,7 @@ impl RulesLibrary { tool_choice: None, stop: Vec::new(), temperature: None, + thinking_allowed: true, }, cx, ) diff --git a/crates/semantic_index/src/summary_index.rs b/crates/semantic_index/src/summary_index.rs index 108130ebc9..6e3aae1344 100644 --- a/crates/semantic_index/src/summary_index.rs +++ b/crates/semantic_index/src/summary_index.rs @@ -570,6 +570,7 @@ impl SummaryIndex { tool_choice: None, stop: Vec::new(), temperature: None, + thinking_allowed: true, }; let code_len = code.len();