agent: Disable thinking when using inline assistant/edit file tool (#34141)

This introduces a new field `thinking_allowed` on `LanguageModelRequest`
which lets us control whether thinking should be enabled if the model
supports it.
We permit thinking in the Inline Assistant, Edit File tool and the Git
Commit message generator, this should make generation faster when using
a thinking model, e.g. `claude-sonnet-4-thinking`

Release Notes:

- N/A
This commit is contained in:
Bennet Bo Fenner 2025-07-09 20:05:39 +02:00 committed by GitHub
parent 96ff6d86a3
commit 41fe2a2ab4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 37 additions and 8 deletions

View file

@ -663,7 +663,9 @@ pub fn into_anthropic(
} else {
Some(anthropic::StringOrContents::String(system_message))
},
thinking: if let AnthropicModelMode::Thinking { budget_tokens } = mode {
thinking: if request.thinking_allowed
&& let AnthropicModelMode::Thinking { budget_tokens } = mode
{
Some(anthropic::Thinking::Enabled { budget_tokens })
} else {
None
@ -1108,6 +1110,7 @@ mod tests {
temperature: None,
tools: vec![],
tool_choice: None,
thinking_allowed: true,
};
let anthropic_request = into_anthropic(

View file

@ -799,7 +799,9 @@ pub fn into_bedrock(
max_tokens: max_output_tokens,
system: Some(system_message),
tools: Some(tool_config),
thinking: if let BedrockModelMode::Thinking { budget_tokens } = mode {
thinking: if request.thinking_allowed
&& let BedrockModelMode::Thinking { budget_tokens } = mode
{
Some(bedrock::Thinking::Enabled { budget_tokens })
} else {
None

View file

@ -849,6 +849,7 @@ impl LanguageModel for CloudLanguageModel {
let use_cloud = cx
.update(|cx| cx.has_flag::<ZedCloudFeatureFlag>())
.unwrap_or(false);
let thinking_allowed = request.thinking_allowed;
match self.model.provider {
zed_llm_client::LanguageModelProvider::Anthropic => {
let request = into_anthropic(
@ -856,7 +857,7 @@ impl LanguageModel for CloudLanguageModel {
self.model.id.to_string(),
1.0,
self.model.max_output_tokens as u64,
if self.model.id.0.ends_with("-thinking") {
if thinking_allowed && self.model.id.0.ends_with("-thinking") {
AnthropicModelMode::Thinking {
budget_tokens: Some(4_096),
}

View file

@ -559,11 +559,11 @@ pub fn into_google(
stop_sequences: Some(request.stop),
max_output_tokens: None,
temperature: request.temperature.map(|t| t as f64).or(Some(1.0)),
thinking_config: match mode {
GoogleModelMode::Thinking { budget_tokens } => {
thinking_config: match (request.thinking_allowed, mode) {
(true, GoogleModelMode::Thinking { budget_tokens }) => {
budget_tokens.map(|thinking_budget| ThinkingConfig { thinking_budget })
}
GoogleModelMode::Default => None,
_ => None,
},
top_p: None,
top_k: None,

View file

@ -911,6 +911,7 @@ mod tests {
intent: None,
mode: None,
stop: vec![],
thinking_allowed: true,
};
let mistral_request = into_mistral(request, "mistral-small-latest".into(), None);
@ -943,6 +944,7 @@ mod tests {
intent: None,
mode: None,
stop: vec![],
thinking_allowed: true,
};
let mistral_request = into_mistral(request, "pixtral-12b-latest".into(), None);

View file

@ -334,7 +334,10 @@ impl OllamaLanguageModel {
temperature: request.temperature.or(Some(1.0)),
..Default::default()
}),
think: self.model.supports_thinking,
think: self
.model
.supports_thinking
.map(|supports_thinking| supports_thinking && request.thinking_allowed),
tools: request.tools.into_iter().map(tool_into_ollama).collect(),
}
}

View file

@ -999,6 +999,7 @@ mod tests {
tool_choice: None,
stop: vec![],
temperature: None,
thinking_allowed: true,
};
// Validate that all models are supported by tiktoken-rs

View file

@ -523,7 +523,9 @@ pub fn into_open_router(
None
},
usage: open_router::RequestUsage { include: true },
reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode {
reasoning: if request.thinking_allowed
&& let OpenRouterModelMode::Thinking { budget_tokens } = model.mode
{
Some(open_router::Reasoning {
effort: None,
max_tokens: budget_tokens,