assistant: Add support for claude-3-7-sonnet-thinking
(#27085)
Closes #25671 Release Notes: - Added support for `claude-3-7-sonnet-thinking` in the assistant panel --------- Co-authored-by: Danilo Leal <daniloleal09@gmail.com> Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Agus Zubiaga <hi@aguz.me>
This commit is contained in:
parent
2ffce4f516
commit
a709d4c7c6
16 changed files with 1212 additions and 177 deletions
|
@ -1,6 +1,6 @@
|
|||
use crate::ui::InstructionListItem;
|
||||
use crate::AllLanguageModelSettings;
|
||||
use anthropic::{AnthropicError, ContentDelta, Event, ResponseContent, Usage};
|
||||
use anthropic::{AnthropicError, AnthropicModelMode, ContentDelta, Event, ResponseContent, Usage};
|
||||
use anyhow::{anyhow, Context as _, Result};
|
||||
use collections::{BTreeMap, HashMap};
|
||||
use credentials_provider::CredentialsProvider;
|
||||
|
@ -55,6 +55,37 @@ pub struct AvailableModel {
|
|||
pub default_temperature: Option<f32>,
|
||||
#[serde(default)]
|
||||
pub extra_beta_headers: Vec<String>,
|
||||
/// The model's mode (e.g. thinking)
|
||||
pub mode: Option<ModelMode>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(tag = "type", rename_all = "lowercase")]
|
||||
pub enum ModelMode {
|
||||
#[default]
|
||||
Default,
|
||||
Thinking {
|
||||
/// The maximum number of tokens to use for reasoning. Must be lower than the model's `max_output_tokens`.
|
||||
budget_tokens: Option<u32>,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<ModelMode> for AnthropicModelMode {
|
||||
fn from(value: ModelMode) -> Self {
|
||||
match value {
|
||||
ModelMode::Default => AnthropicModelMode::Default,
|
||||
ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<AnthropicModelMode> for ModelMode {
|
||||
fn from(value: AnthropicModelMode) -> Self {
|
||||
match value {
|
||||
AnthropicModelMode::Default => ModelMode::Default,
|
||||
AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AnthropicLanguageModelProvider {
|
||||
|
@ -228,6 +259,7 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider {
|
|||
max_output_tokens: model.max_output_tokens,
|
||||
default_temperature: model.default_temperature,
|
||||
extra_beta_headers: model.extra_beta_headers.clone(),
|
||||
mode: model.mode.clone().unwrap_or_default().into(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
@ -399,9 +431,10 @@ impl LanguageModel for AnthropicModel {
|
|||
) -> BoxFuture<'static, Result<BoxStream<'static, Result<LanguageModelCompletionEvent>>>> {
|
||||
let request = into_anthropic(
|
||||
request,
|
||||
self.model.id().into(),
|
||||
self.model.request_id().into(),
|
||||
self.model.default_temperature(),
|
||||
self.model.max_output_tokens(),
|
||||
self.model.mode(),
|
||||
);
|
||||
let request = self.stream_completion(request, cx);
|
||||
let future = self.request_limiter.stream(async move {
|
||||
|
@ -434,6 +467,7 @@ impl LanguageModel for AnthropicModel {
|
|||
self.model.tool_model_id().into(),
|
||||
self.model.default_temperature(),
|
||||
self.model.max_output_tokens(),
|
||||
self.model.mode(),
|
||||
);
|
||||
request.tool_choice = Some(anthropic::ToolChoice::Tool {
|
||||
name: tool_name.clone(),
|
||||
|
@ -464,6 +498,7 @@ pub fn into_anthropic(
|
|||
model: String,
|
||||
default_temperature: f32,
|
||||
max_output_tokens: u32,
|
||||
mode: AnthropicModelMode,
|
||||
) -> anthropic::Request {
|
||||
let mut new_messages: Vec<anthropic::Message> = Vec::new();
|
||||
let mut system_message = String::new();
|
||||
|
@ -552,6 +587,11 @@ pub fn into_anthropic(
|
|||
messages: new_messages,
|
||||
max_tokens: max_output_tokens,
|
||||
system: Some(system_message),
|
||||
thinking: if let AnthropicModelMode::Thinking { budget_tokens } = mode {
|
||||
Some(anthropic::Thinking::Enabled { budget_tokens })
|
||||
} else {
|
||||
None
|
||||
},
|
||||
tools: request
|
||||
.tools
|
||||
.into_iter()
|
||||
|
@ -607,6 +647,16 @@ pub fn map_to_language_model_completion_events(
|
|||
state,
|
||||
));
|
||||
}
|
||||
ResponseContent::Thinking { thinking } => {
|
||||
return Some((
|
||||
vec![Ok(LanguageModelCompletionEvent::Thinking(thinking))],
|
||||
state,
|
||||
));
|
||||
}
|
||||
ResponseContent::RedactedThinking { .. } => {
|
||||
// Redacted thinking is encrypted and not accessible to the user, see:
|
||||
// https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#suggestions-for-handling-redacted-thinking-in-production
|
||||
}
|
||||
ResponseContent::ToolUse { id, name, .. } => {
|
||||
state.tool_uses_by_index.insert(
|
||||
index,
|
||||
|
@ -625,6 +675,13 @@ pub fn map_to_language_model_completion_events(
|
|||
state,
|
||||
));
|
||||
}
|
||||
ContentDelta::ThinkingDelta { thinking } => {
|
||||
return Some((
|
||||
vec![Ok(LanguageModelCompletionEvent::Thinking(thinking))],
|
||||
state,
|
||||
));
|
||||
}
|
||||
ContentDelta::SignatureDelta { .. } => {}
|
||||
ContentDelta::InputJsonDelta { partial_json } => {
|
||||
if let Some(tool_use) = state.tool_uses_by_index.get_mut(&index) {
|
||||
tool_use.input_json.push_str(&partial_json);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use anthropic::AnthropicError;
|
||||
use anthropic::{AnthropicError, AnthropicModelMode};
|
||||
use anyhow::{anyhow, Result};
|
||||
use client::{
|
||||
zed_urls, Client, PerformCompletionParams, UserStore, EXPIRED_LLM_TOKEN_HEADER_NAME,
|
||||
|
@ -91,6 +91,28 @@ pub struct AvailableModel {
|
|||
/// Any extra beta headers to provide when using the model.
|
||||
#[serde(default)]
|
||||
pub extra_beta_headers: Vec<String>,
|
||||
/// The model's mode (e.g. thinking)
|
||||
pub mode: Option<ModelMode>,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(tag = "type", rename_all = "lowercase")]
|
||||
pub enum ModelMode {
|
||||
#[default]
|
||||
Default,
|
||||
Thinking {
|
||||
/// The maximum number of tokens to use for reasoning. Must be lower than the model's `max_output_tokens`.
|
||||
budget_tokens: Option<u32>,
|
||||
},
|
||||
}
|
||||
|
||||
impl From<ModelMode> for AnthropicModelMode {
|
||||
fn from(value: ModelMode) -> Self {
|
||||
match value {
|
||||
ModelMode::Default => AnthropicModelMode::Default,
|
||||
ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CloudLanguageModelProvider {
|
||||
|
@ -299,6 +321,7 @@ impl LanguageModelProvider for CloudLanguageModelProvider {
|
|||
default_temperature: model.default_temperature,
|
||||
max_output_tokens: model.max_output_tokens,
|
||||
extra_beta_headers: model.extra_beta_headers.clone(),
|
||||
mode: model.mode.unwrap_or_default().into(),
|
||||
}),
|
||||
AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {
|
||||
name: model.name.clone(),
|
||||
|
@ -567,9 +590,10 @@ impl LanguageModel for CloudLanguageModel {
|
|||
CloudModel::Anthropic(model) => {
|
||||
let request = into_anthropic(
|
||||
request,
|
||||
model.id().into(),
|
||||
model.request_id().into(),
|
||||
model.default_temperature(),
|
||||
model.max_output_tokens(),
|
||||
model.mode(),
|
||||
);
|
||||
let client = self.client.clone();
|
||||
let llm_api_token = self.llm_api_token.clone();
|
||||
|
@ -669,6 +693,7 @@ impl LanguageModel for CloudLanguageModel {
|
|||
model.tool_model_id().into(),
|
||||
model.default_temperature(),
|
||||
model.max_output_tokens(),
|
||||
model.mode(),
|
||||
);
|
||||
request.tool_choice = Some(anthropic::ToolChoice::Tool {
|
||||
name: tool_name.clone(),
|
||||
|
|
|
@ -109,6 +109,7 @@ impl AnthropicSettingsContent {
|
|||
max_output_tokens,
|
||||
default_temperature,
|
||||
extra_beta_headers,
|
||||
mode,
|
||||
} => Some(provider::anthropic::AvailableModel {
|
||||
name,
|
||||
display_name,
|
||||
|
@ -124,6 +125,7 @@ impl AnthropicSettingsContent {
|
|||
max_output_tokens,
|
||||
default_temperature,
|
||||
extra_beta_headers,
|
||||
mode: Some(mode.into()),
|
||||
}),
|
||||
_ => None,
|
||||
})
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue