diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs
index 1f2654dac5..6a20ad8f83 100644
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@@ -1284,6 +1284,7 @@ impl Thread {
             tool_choice: None,
             stop: Vec::new(),
             temperature: AgentSettings::temperature_for_model(&model, cx),
+            thinking_allowed: true,
         };
 
         let available_tools = self.available_tools(cx, model.clone());
@@ -1449,6 +1450,7 @@ impl Thread {
             tool_choice: None,
             stop: Vec::new(),
             temperature: AgentSettings::temperature_for_model(model, cx),
+            thinking_allowed: false,
         };
 
         for message in &self.messages {
diff --git a/crates/agent_ui/src/active_thread.rs b/crates/agent_ui/src/active_thread.rs
index a4553fc901..0e0e3756e3 100644
--- a/crates/agent_ui/src/active_thread.rs
+++ b/crates/agent_ui/src/active_thread.rs
@@ -1461,6 +1461,7 @@ impl ActiveThread {
                             &configured_model.model,
                             cx,
                         ),
+                        thinking_allowed: true,
                     };
 
                     Some(configured_model.model.count_tokens(request, cx))
diff --git a/crates/agent_ui/src/buffer_codegen.rs b/crates/agent_ui/src/buffer_codegen.rs
index 117dcf4f8e..64498e9281 100644
--- a/crates/agent_ui/src/buffer_codegen.rs
+++ b/crates/agent_ui/src/buffer_codegen.rs
@@ -475,6 +475,7 @@ impl CodegenAlternative {
                 stop: Vec::new(),
                 temperature,
                 messages: vec![request_message],
+                thinking_allowed: false,
             }
         }))
     }
diff --git a/crates/agent_ui/src/message_editor.rs b/crates/agent_ui/src/message_editor.rs
index d1eae02246..8bc93f0f58 100644
--- a/crates/agent_ui/src/message_editor.rs
+++ b/crates/agent_ui/src/message_editor.rs
@@ -1454,6 +1454,7 @@ impl MessageEditor {
                         tool_choice: None,
                         stop: vec![],
                         temperature: AgentSettings::temperature_for_model(&model.model, cx),
+                        thinking_allowed: true,
                     };
 
                     Some(model.model.count_tokens(request, cx))
diff --git a/crates/agent_ui/src/terminal_inline_assistant.rs b/crates/agent_ui/src/terminal_inline_assistant.rs
index 162b45413f..91867957cd 100644
--- a/crates/agent_ui/src/terminal_inline_assistant.rs
+++ b/crates/agent_ui/src/terminal_inline_assistant.rs
@@ -297,6 +297,7 @@ impl TerminalInlineAssistant {
                 tool_choice: None,
                 stop: Vec::new(),
                 temperature,
+                thinking_allowed: false,
             }
         }))
     }
diff --git a/crates/assistant_context/src/assistant_context.rs b/crates/assistant_context/src/assistant_context.rs
index aaaef15250..136468e084 100644
--- a/crates/assistant_context/src/assistant_context.rs
+++ b/crates/assistant_context/src/assistant_context.rs
@@ -2293,6 +2293,7 @@ impl AssistantContext {
             tool_choice: None,
             stop: Vec::new(),
             temperature: model.and_then(|model| AgentSettings::temperature_for_model(model, cx)),
+            thinking_allowed: true,
         };
         for message in self.messages(cx) {
             if message.status != MessageStatus::Done {
diff --git a/crates/assistant_tools/src/edit_agent.rs b/crates/assistant_tools/src/edit_agent.rs
index c2540633f7..af7dae2e20 100644
--- a/crates/assistant_tools/src/edit_agent.rs
+++ b/crates/assistant_tools/src/edit_agent.rs
@@ -719,6 +719,7 @@ impl EditAgent {
             tools,
             stop: Vec::new(),
             temperature: None,
+            thinking_allowed: false,
         };
 
         Ok(self.model.stream_completion_text(request, cx).await?.stream)
diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs
index 8df8f677f2..d2ee03f08f 100644
--- a/crates/assistant_tools/src/edit_agent/evals.rs
+++ b/crates/assistant_tools/src/edit_agent/evals.rs
@@ -1263,6 +1263,7 @@ impl EvalAssertion {
                     content: vec![prompt.into()],
                     cache: false,
                 }],
+                thinking_allowed: true,
                 ..Default::default()
             };
             let mut response = retry_on_rate_limit(async || {
@@ -1599,6 +1600,7 @@ impl EditAgentTest {
         let conversation = LanguageModelRequest {
             messages,
             tools,
+            thinking_allowed: true,
             ..Default::default()
         };
 
diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs
index d17dc89d0b..0f2b4c18ea 100644
--- a/crates/eval/src/instance.rs
+++ b/crates/eval/src/instance.rs
@@ -594,6 +594,7 @@ impl ExampleInstance {
                 tools: Vec::new(),
                 tool_choice: None,
                 stop: Vec::new(),
+                thinking_allowed: true,
             };
 
             let model = model.clone();
diff --git a/crates/git_ui/src/git_panel.rs b/crates/git_ui/src/git_panel.rs
index 84ce97a982..c50e2f8912 100644
--- a/crates/git_ui/src/git_panel.rs
+++ b/crates/git_ui/src/git_panel.rs
@@ -1830,6 +1830,7 @@ impl GitPanel {
                     tool_choice: None,
                     stop: Vec::new(),
                     temperature,
+                    thinking_allowed: false,
                 };
 
                 let stream = model.stream_completion_text(request, &cx);
diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs
index 451a62775e..6f3d420ad5 100644
--- a/crates/language_model/src/request.rs
+++ b/crates/language_model/src/request.rs
@@ -391,6 +391,7 @@ pub struct LanguageModelRequest {
     pub tool_choice: Option<LanguageModelToolChoice>,
     pub stop: Vec<String>,
     pub temperature: Option<f32>,
+    pub thinking_allowed: bool,
 }
 
 #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
diff --git a/crates/language_models/src/provider/anthropic.rs b/crates/language_models/src/provider/anthropic.rs
index 6ddb1a4381..959cbccf39 100644
--- a/crates/language_models/src/provider/anthropic.rs
+++ b/crates/language_models/src/provider/anthropic.rs
@@ -663,7 +663,9 @@ pub fn into_anthropic(
         } else {
             Some(anthropic::StringOrContents::String(system_message))
         },
-        thinking: if let AnthropicModelMode::Thinking { budget_tokens } = mode {
+        thinking: if request.thinking_allowed
+            && let AnthropicModelMode::Thinking { budget_tokens } = mode
+        {
             Some(anthropic::Thinking::Enabled { budget_tokens })
         } else {
             None
@@ -1108,6 +1110,7 @@ mod tests {
             temperature: None,
             tools: vec![],
             tool_choice: None,
+            thinking_allowed: true,
         };
 
         let anthropic_request = into_anthropic(
diff --git a/crates/language_models/src/provider/bedrock.rs b/crates/language_models/src/provider/bedrock.rs
index 9c0d481607..65ce1dbc4b 100644
--- a/crates/language_models/src/provider/bedrock.rs
+++ b/crates/language_models/src/provider/bedrock.rs
@@ -799,7 +799,9 @@ pub fn into_bedrock(
         max_tokens: max_output_tokens,
         system: Some(system_message),
         tools: Some(tool_config),
-        thinking: if let BedrockModelMode::Thinking { budget_tokens } = mode {
+        thinking: if request.thinking_allowed
+            && let BedrockModelMode::Thinking { budget_tokens } = mode
+        {
             Some(bedrock::Thinking::Enabled { budget_tokens })
         } else {
             None
diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs
index 9b7fee228a..aaaeb478c0 100644
--- a/crates/language_models/src/provider/cloud.rs
+++ b/crates/language_models/src/provider/cloud.rs
@@ -849,6 +849,7 @@ impl LanguageModel for CloudLanguageModel {
         let use_cloud = cx
             .update(|cx| cx.has_flag::<ZedCloudFeatureFlag>())
             .unwrap_or(false);
+        let thinking_allowed = request.thinking_allowed;
         match self.model.provider {
             zed_llm_client::LanguageModelProvider::Anthropic => {
                 let request = into_anthropic(
@@ -856,7 +857,7 @@ impl LanguageModel for CloudLanguageModel {
                     self.model.id.to_string(),
                     1.0,
                     self.model.max_output_tokens as u64,
-                    if self.model.id.0.ends_with("-thinking") {
+                    if thinking_allowed && self.model.id.0.ends_with("-thinking") {
                         AnthropicModelMode::Thinking {
                             budget_tokens: Some(4_096),
                         }
diff --git a/crates/language_models/src/provider/google.rs b/crates/language_models/src/provider/google.rs
index bb19a3901a..d1539dd22c 100644
--- a/crates/language_models/src/provider/google.rs
+++ b/crates/language_models/src/provider/google.rs
@@ -559,11 +559,11 @@ pub fn into_google(
             stop_sequences: Some(request.stop),
             max_output_tokens: None,
             temperature: request.temperature.map(|t| t as f64).or(Some(1.0)),
-            thinking_config: match mode {
-                GoogleModelMode::Thinking { budget_tokens } => {
+            thinking_config: match (request.thinking_allowed, mode) {
+                (true, GoogleModelMode::Thinking { budget_tokens }) => {
                     budget_tokens.map(|thinking_budget| ThinkingConfig { thinking_budget })
                 }
-                GoogleModelMode::Default => None,
+                _ => None,
             },
             top_p: None,
             top_k: None,
diff --git a/crates/language_models/src/provider/mistral.rs b/crates/language_models/src/provider/mistral.rs
index c58622d4e0..11497fda35 100644
--- a/crates/language_models/src/provider/mistral.rs
+++ b/crates/language_models/src/provider/mistral.rs
@@ -911,6 +911,7 @@ mod tests {
             intent: None,
             mode: None,
             stop: vec![],
+            thinking_allowed: true,
         };
 
         let mistral_request = into_mistral(request, "mistral-small-latest".into(), None);
@@ -943,6 +944,7 @@ mod tests {
             intent: None,
             mode: None,
             stop: vec![],
+            thinking_allowed: true,
         };
 
         let mistral_request = into_mistral(request, "pixtral-12b-latest".into(), None);
diff --git a/crates/language_models/src/provider/ollama.rs b/crates/language_models/src/provider/ollama.rs
index 0866cfa4c8..dc81e8be18 100644
--- a/crates/language_models/src/provider/ollama.rs
+++ b/crates/language_models/src/provider/ollama.rs
@@ -334,7 +334,10 @@ impl OllamaLanguageModel {
                 temperature: request.temperature.or(Some(1.0)),
                 ..Default::default()
             }),
-            think: self.model.supports_thinking,
+            think: self
+                .model
+                .supports_thinking
+                .map(|supports_thinking| supports_thinking && request.thinking_allowed),
             tools: request.tools.into_iter().map(tool_into_ollama).collect(),
         }
     }
diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs
index 476c1715ae..76f2fbe303 100644
--- a/crates/language_models/src/provider/open_ai.rs
+++ b/crates/language_models/src/provider/open_ai.rs
@@ -999,6 +999,7 @@ mod tests {
             tool_choice: None,
             stop: vec![],
             temperature: None,
+            thinking_allowed: true,
         };
 
         // Validate that all models are supported by tiktoken-rs
diff --git a/crates/language_models/src/provider/open_router.rs b/crates/language_models/src/provider/open_router.rs
index 5883da1e2f..c46135ff3e 100644
--- a/crates/language_models/src/provider/open_router.rs
+++ b/crates/language_models/src/provider/open_router.rs
@@ -523,7 +523,9 @@ pub fn into_open_router(
             None
         },
         usage: open_router::RequestUsage { include: true },
-        reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode {
+        reasoning: if request.thinking_allowed
+            && let OpenRouterModelMode::Thinking { budget_tokens } = model.mode
+        {
             Some(open_router::Reasoning {
                 effort: None,
                 max_tokens: budget_tokens,
diff --git a/crates/rules_library/src/rules_library.rs b/crates/rules_library/src/rules_library.rs
index 66f589bfd3..f871416f39 100644
--- a/crates/rules_library/src/rules_library.rs
+++ b/crates/rules_library/src/rules_library.rs
@@ -981,6 +981,7 @@ impl RulesLibrary {
                                     tool_choice: None,
                                     stop: Vec::new(),
                                     temperature: None,
+                                    thinking_allowed: true,
                                 },
                                 cx,
                             )
diff --git a/crates/semantic_index/src/summary_index.rs b/crates/semantic_index/src/summary_index.rs
index 108130ebc9..6e3aae1344 100644
--- a/crates/semantic_index/src/summary_index.rs
+++ b/crates/semantic_index/src/summary_index.rs
@@ -570,6 +570,7 @@ impl SummaryIndex {
             tool_choice: None,
             stop: Vec::new(),
             temperature: None,
+            thinking_allowed: true,
         };
 
         let code_len = code.len();