assistant: Add support for claude-3-7-sonnet-thinking (#27085)

Closes #25671

Release Notes:

- Added support for `claude-3-7-sonnet-thinking` in the assistant panel

---------

Co-authored-by: Danilo Leal <daniloleal09@gmail.com>
Co-authored-by: Antonio Scandurra <me@as-cii.com>
Co-authored-by: Agus Zubiaga <hi@aguz.me>
This commit is contained in:
Bennet Bo Fenner 2025-03-21 13:29:07 +01:00 committed by GitHub
parent 2ffce4f516
commit a709d4c7c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 1212 additions and 177 deletions

View file

@ -1,6 +1,6 @@
use crate::ui::InstructionListItem;
use crate::AllLanguageModelSettings;
use anthropic::{AnthropicError, ContentDelta, Event, ResponseContent, Usage};
use anthropic::{AnthropicError, AnthropicModelMode, ContentDelta, Event, ResponseContent, Usage};
use anyhow::{anyhow, Context as _, Result};
use collections::{BTreeMap, HashMap};
use credentials_provider::CredentialsProvider;
@ -55,6 +55,37 @@ pub struct AvailableModel {
pub default_temperature: Option<f32>,
#[serde(default)]
pub extra_beta_headers: Vec<String>,
/// The model's mode (e.g. thinking)
pub mode: Option<ModelMode>,
}
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "type", rename_all = "lowercase")]
pub enum ModelMode {
#[default]
Default,
Thinking {
/// The maximum number of tokens to use for reasoning. Must be lower than the model's `max_output_tokens`.
budget_tokens: Option<u32>,
},
}
impl From<ModelMode> for AnthropicModelMode {
fn from(value: ModelMode) -> Self {
match value {
ModelMode::Default => AnthropicModelMode::Default,
ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
}
}
}
impl From<AnthropicModelMode> for ModelMode {
fn from(value: AnthropicModelMode) -> Self {
match value {
AnthropicModelMode::Default => ModelMode::Default,
AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
}
}
}
pub struct AnthropicLanguageModelProvider {
@ -228,6 +259,7 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider {
max_output_tokens: model.max_output_tokens,
default_temperature: model.default_temperature,
extra_beta_headers: model.extra_beta_headers.clone(),
mode: model.mode.clone().unwrap_or_default().into(),
},
);
}
@ -399,9 +431,10 @@ impl LanguageModel for AnthropicModel {
) -> BoxFuture<'static, Result<BoxStream<'static, Result<LanguageModelCompletionEvent>>>> {
let request = into_anthropic(
request,
self.model.id().into(),
self.model.request_id().into(),
self.model.default_temperature(),
self.model.max_output_tokens(),
self.model.mode(),
);
let request = self.stream_completion(request, cx);
let future = self.request_limiter.stream(async move {
@ -434,6 +467,7 @@ impl LanguageModel for AnthropicModel {
self.model.tool_model_id().into(),
self.model.default_temperature(),
self.model.max_output_tokens(),
self.model.mode(),
);
request.tool_choice = Some(anthropic::ToolChoice::Tool {
name: tool_name.clone(),
@ -464,6 +498,7 @@ pub fn into_anthropic(
model: String,
default_temperature: f32,
max_output_tokens: u32,
mode: AnthropicModelMode,
) -> anthropic::Request {
let mut new_messages: Vec<anthropic::Message> = Vec::new();
let mut system_message = String::new();
@ -552,6 +587,11 @@ pub fn into_anthropic(
messages: new_messages,
max_tokens: max_output_tokens,
system: Some(system_message),
thinking: if let AnthropicModelMode::Thinking { budget_tokens } = mode {
Some(anthropic::Thinking::Enabled { budget_tokens })
} else {
None
},
tools: request
.tools
.into_iter()
@ -607,6 +647,16 @@ pub fn map_to_language_model_completion_events(
state,
));
}
ResponseContent::Thinking { thinking } => {
return Some((
vec![Ok(LanguageModelCompletionEvent::Thinking(thinking))],
state,
));
}
ResponseContent::RedactedThinking { .. } => {
// Redacted thinking is encrypted and not accessible to the user, see:
// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#suggestions-for-handling-redacted-thinking-in-production
}
ResponseContent::ToolUse { id, name, .. } => {
state.tool_uses_by_index.insert(
index,
@ -625,6 +675,13 @@ pub fn map_to_language_model_completion_events(
state,
));
}
ContentDelta::ThinkingDelta { thinking } => {
return Some((
vec![Ok(LanguageModelCompletionEvent::Thinking(thinking))],
state,
));
}
ContentDelta::SignatureDelta { .. } => {}
ContentDelta::InputJsonDelta { partial_json } => {
if let Some(tool_use) = state.tool_uses_by_index.get_mut(&index) {
tool_use.input_json.push_str(&partial_json);

View file

@ -1,4 +1,4 @@
use anthropic::AnthropicError;
use anthropic::{AnthropicError, AnthropicModelMode};
use anyhow::{anyhow, Result};
use client::{
zed_urls, Client, PerformCompletionParams, UserStore, EXPIRED_LLM_TOKEN_HEADER_NAME,
@ -91,6 +91,28 @@ pub struct AvailableModel {
/// Any extra beta headers to provide when using the model.
#[serde(default)]
pub extra_beta_headers: Vec<String>,
/// The model's mode (e.g. thinking)
pub mode: Option<ModelMode>,
}
#[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "type", rename_all = "lowercase")]
pub enum ModelMode {
#[default]
Default,
Thinking {
/// The maximum number of tokens to use for reasoning. Must be lower than the model's `max_output_tokens`.
budget_tokens: Option<u32>,
},
}
impl From<ModelMode> for AnthropicModelMode {
fn from(value: ModelMode) -> Self {
match value {
ModelMode::Default => AnthropicModelMode::Default,
ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
}
}
}
pub struct CloudLanguageModelProvider {
@ -299,6 +321,7 @@ impl LanguageModelProvider for CloudLanguageModelProvider {
default_temperature: model.default_temperature,
max_output_tokens: model.max_output_tokens,
extra_beta_headers: model.extra_beta_headers.clone(),
mode: model.mode.unwrap_or_default().into(),
}),
AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {
name: model.name.clone(),
@ -567,9 +590,10 @@ impl LanguageModel for CloudLanguageModel {
CloudModel::Anthropic(model) => {
let request = into_anthropic(
request,
model.id().into(),
model.request_id().into(),
model.default_temperature(),
model.max_output_tokens(),
model.mode(),
);
let client = self.client.clone();
let llm_api_token = self.llm_api_token.clone();
@ -669,6 +693,7 @@ impl LanguageModel for CloudLanguageModel {
model.tool_model_id().into(),
model.default_temperature(),
model.max_output_tokens(),
model.mode(),
);
request.tool_choice = Some(anthropic::ToolChoice::Tool {
name: tool_name.clone(),

View file

@ -109,6 +109,7 @@ impl AnthropicSettingsContent {
max_output_tokens,
default_temperature,
extra_beta_headers,
mode,
} => Some(provider::anthropic::AvailableModel {
name,
display_name,
@ -124,6 +125,7 @@ impl AnthropicSettingsContent {
max_output_tokens,
default_temperature,
extra_beta_headers,
mode: Some(mode.into()),
}),
_ => None,
})