Support 8192 output tokens for Claude Sonnet 3.5 (#16358)
Release Notes: - Added support for 8192 output tokens from Claude Sonnet 3.5 (https://x.com/alexalbert__/status/1812921642143900036)
This commit is contained in:
parent
7eab57a264
commit
b4f5f5024e
6 changed files with 37 additions and 8 deletions
|
@ -42,6 +42,7 @@ pub enum Model {
|
||||||
tool_override: Option<String>,
|
tool_override: Option<String>,
|
||||||
/// Indicates whether this custom model supports caching.
|
/// Indicates whether this custom model supports caching.
|
||||||
cache_configuration: Option<AnthropicModelCacheConfiguration>,
|
cache_configuration: Option<AnthropicModelCacheConfiguration>,
|
||||||
|
max_output_tokens: Option<u32>,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,6 +106,16 @@ impl Model {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn max_output_tokens(&self) -> u32 {
|
||||||
|
match self {
|
||||||
|
Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
|
||||||
|
Self::Claude3_5Sonnet => 8_192,
|
||||||
|
Self::Custom {
|
||||||
|
max_output_tokens, ..
|
||||||
|
} => max_output_tokens.unwrap_or(4_096),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn tool_model_id(&self) -> &str {
|
pub fn tool_model_id(&self) -> &str {
|
||||||
if let Self::Custom {
|
if let Self::Custom {
|
||||||
tool_override: Some(tool_override),
|
tool_override: Some(tool_override),
|
||||||
|
@ -131,7 +142,7 @@ pub async fn complete(
|
||||||
.header("Anthropic-Version", "2023-06-01")
|
.header("Anthropic-Version", "2023-06-01")
|
||||||
.header(
|
.header(
|
||||||
"Anthropic-Beta",
|
"Anthropic-Beta",
|
||||||
"tools-2024-04-04,prompt-caching-2024-07-31",
|
"tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15",
|
||||||
)
|
)
|
||||||
.header("X-Api-Key", api_key)
|
.header("X-Api-Key", api_key)
|
||||||
.header("Content-Type", "application/json");
|
.header("Content-Type", "application/json");
|
||||||
|
@ -191,7 +202,7 @@ pub async fn stream_completion(
|
||||||
.header("Anthropic-Version", "2023-06-01")
|
.header("Anthropic-Version", "2023-06-01")
|
||||||
.header(
|
.header(
|
||||||
"Anthropic-Beta",
|
"Anthropic-Beta",
|
||||||
"tools-2024-04-04,prompt-caching-2024-07-31",
|
"tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15",
|
||||||
)
|
)
|
||||||
.header("X-Api-Key", api_key)
|
.header("X-Api-Key", api_key)
|
||||||
.header("Content-Type", "application/json");
|
.header("Content-Type", "application/json");
|
||||||
|
|
|
@ -64,6 +64,9 @@ pub trait LanguageModel: Send + Sync {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn max_token_count(&self) -> usize;
|
fn max_token_count(&self) -> usize;
|
||||||
|
fn max_output_tokens(&self) -> Option<u32> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
fn count_tokens(
|
fn count_tokens(
|
||||||
&self,
|
&self,
|
||||||
|
|
|
@ -39,6 +39,7 @@ pub struct AvailableModel {
|
||||||
pub max_tokens: usize,
|
pub max_tokens: usize,
|
||||||
pub tool_override: Option<String>,
|
pub tool_override: Option<String>,
|
||||||
pub cache_configuration: Option<LanguageModelCacheConfiguration>,
|
pub cache_configuration: Option<LanguageModelCacheConfiguration>,
|
||||||
|
pub max_output_tokens: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct AnthropicLanguageModelProvider {
|
pub struct AnthropicLanguageModelProvider {
|
||||||
|
@ -179,6 +180,7 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider {
|
||||||
min_total_token: config.min_total_token,
|
min_total_token: config.min_total_token,
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
max_output_tokens: model.max_output_tokens,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -331,6 +333,10 @@ impl LanguageModel for AnthropicModel {
|
||||||
self.model.max_token_count()
|
self.model.max_token_count()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn max_output_tokens(&self) -> Option<u32> {
|
||||||
|
Some(self.model.max_output_tokens())
|
||||||
|
}
|
||||||
|
|
||||||
fn count_tokens(
|
fn count_tokens(
|
||||||
&self,
|
&self,
|
||||||
request: LanguageModelRequest,
|
request: LanguageModelRequest,
|
||||||
|
@ -344,7 +350,8 @@ impl LanguageModel for AnthropicModel {
|
||||||
request: LanguageModelRequest,
|
request: LanguageModelRequest,
|
||||||
cx: &AsyncAppContext,
|
cx: &AsyncAppContext,
|
||||||
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
|
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
|
||||||
let request = request.into_anthropic(self.model.id().into());
|
let request =
|
||||||
|
request.into_anthropic(self.model.id().into(), self.model.max_output_tokens());
|
||||||
let request = self.stream_completion(request, cx);
|
let request = self.stream_completion(request, cx);
|
||||||
let future = self.request_limiter.stream(async move {
|
let future = self.request_limiter.stream(async move {
|
||||||
let response = request.await.map_err(|err| anyhow!(err))?;
|
let response = request.await.map_err(|err| anyhow!(err))?;
|
||||||
|
@ -377,7 +384,10 @@ impl LanguageModel for AnthropicModel {
|
||||||
input_schema: serde_json::Value,
|
input_schema: serde_json::Value,
|
||||||
cx: &AsyncAppContext,
|
cx: &AsyncAppContext,
|
||||||
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
|
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
|
||||||
let mut request = request.into_anthropic(self.model.tool_model_id().into());
|
let mut request = request.into_anthropic(
|
||||||
|
self.model.tool_model_id().into(),
|
||||||
|
self.model.max_output_tokens(),
|
||||||
|
);
|
||||||
request.tool_choice = Some(anthropic::ToolChoice::Tool {
|
request.tool_choice = Some(anthropic::ToolChoice::Tool {
|
||||||
name: tool_name.clone(),
|
name: tool_name.clone(),
|
||||||
});
|
});
|
||||||
|
|
|
@ -57,6 +57,7 @@ pub struct AvailableModel {
|
||||||
max_tokens: usize,
|
max_tokens: usize,
|
||||||
tool_override: Option<String>,
|
tool_override: Option<String>,
|
||||||
cache_configuration: Option<LanguageModelCacheConfiguration>,
|
cache_configuration: Option<LanguageModelCacheConfiguration>,
|
||||||
|
max_output_tokens: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct CloudLanguageModelProvider {
|
pub struct CloudLanguageModelProvider {
|
||||||
|
@ -210,6 +211,7 @@ impl LanguageModelProvider for CloudLanguageModelProvider {
|
||||||
min_total_token: config.min_total_token,
|
min_total_token: config.min_total_token,
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
max_output_tokens: model.max_output_tokens,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {
|
AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {
|
||||||
|
@ -446,7 +448,7 @@ impl LanguageModel for CloudLanguageModel {
|
||||||
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
|
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
|
||||||
match &self.model {
|
match &self.model {
|
||||||
CloudModel::Anthropic(model) => {
|
CloudModel::Anthropic(model) => {
|
||||||
let request = request.into_anthropic(model.id().into());
|
let request = request.into_anthropic(model.id().into(), model.max_output_tokens());
|
||||||
let client = self.client.clone();
|
let client = self.client.clone();
|
||||||
let llm_api_token = self.llm_api_token.clone();
|
let llm_api_token = self.llm_api_token.clone();
|
||||||
let future = self.request_limiter.stream(async move {
|
let future = self.request_limiter.stream(async move {
|
||||||
|
@ -556,7 +558,8 @@ impl LanguageModel for CloudLanguageModel {
|
||||||
|
|
||||||
match &self.model {
|
match &self.model {
|
||||||
CloudModel::Anthropic(model) => {
|
CloudModel::Anthropic(model) => {
|
||||||
let mut request = request.into_anthropic(model.tool_model_id().into());
|
let mut request =
|
||||||
|
request.into_anthropic(model.tool_model_id().into(), model.max_output_tokens());
|
||||||
request.tool_choice = Some(anthropic::ToolChoice::Tool {
|
request.tool_choice = Some(anthropic::ToolChoice::Tool {
|
||||||
name: tool_name.clone(),
|
name: tool_name.clone(),
|
||||||
});
|
});
|
||||||
|
|
|
@ -286,7 +286,7 @@ impl LanguageModelRequest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn into_anthropic(self, model: String) -> anthropic::Request {
|
pub fn into_anthropic(self, model: String, max_output_tokens: u32) -> anthropic::Request {
|
||||||
let mut new_messages: Vec<anthropic::Message> = Vec::new();
|
let mut new_messages: Vec<anthropic::Message> = Vec::new();
|
||||||
let mut system_message = String::new();
|
let mut system_message = String::new();
|
||||||
|
|
||||||
|
@ -353,7 +353,7 @@ impl LanguageModelRequest {
|
||||||
anthropic::Request {
|
anthropic::Request {
|
||||||
model,
|
model,
|
||||||
messages: new_messages,
|
messages: new_messages,
|
||||||
max_tokens: 4092,
|
max_tokens: max_output_tokens,
|
||||||
system: Some(system_message),
|
system: Some(system_message),
|
||||||
tools: Vec::new(),
|
tools: Vec::new(),
|
||||||
tool_choice: None,
|
tool_choice: None,
|
||||||
|
|
|
@ -97,6 +97,7 @@ impl AnthropicSettingsContent {
|
||||||
max_tokens,
|
max_tokens,
|
||||||
tool_override,
|
tool_override,
|
||||||
cache_configuration,
|
cache_configuration,
|
||||||
|
max_output_tokens,
|
||||||
} => Some(provider::anthropic::AvailableModel {
|
} => Some(provider::anthropic::AvailableModel {
|
||||||
name,
|
name,
|
||||||
max_tokens,
|
max_tokens,
|
||||||
|
@ -108,6 +109,7 @@ impl AnthropicSettingsContent {
|
||||||
min_total_token: config.min_total_token,
|
min_total_token: config.min_total_token,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
max_output_tokens,
|
||||||
}),
|
}),
|
||||||
_ => None,
|
_ => None,
|
||||||
})
|
})
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue