Support 8192 output tokens for Claude Sonnet 3.5 (#16358)

Release Notes:

- Added support for 8192 output tokens from Claude Sonnet 3.5
(https://x.com/alexalbert__/status/1812921642143900036)
This commit is contained in:
Roy Williams 2024-08-16 11:47:39 -04:00 committed by GitHub
parent 7eab57a264
commit b4f5f5024e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 37 additions and 8 deletions

View file

@ -42,6 +42,7 @@ pub enum Model {
tool_override: Option<String>, tool_override: Option<String>,
/// Indicates whether this custom model supports caching. /// Indicates whether this custom model supports caching.
cache_configuration: Option<AnthropicModelCacheConfiguration>, cache_configuration: Option<AnthropicModelCacheConfiguration>,
max_output_tokens: Option<u32>,
}, },
} }
@ -105,6 +106,16 @@ impl Model {
} }
} }
pub fn max_output_tokens(&self) -> u32 {
match self {
Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
Self::Claude3_5Sonnet => 8_192,
Self::Custom {
max_output_tokens, ..
} => max_output_tokens.unwrap_or(4_096),
}
}
pub fn tool_model_id(&self) -> &str { pub fn tool_model_id(&self) -> &str {
if let Self::Custom { if let Self::Custom {
tool_override: Some(tool_override), tool_override: Some(tool_override),
@ -131,7 +142,7 @@ pub async fn complete(
.header("Anthropic-Version", "2023-06-01") .header("Anthropic-Version", "2023-06-01")
.header( .header(
"Anthropic-Beta", "Anthropic-Beta",
"tools-2024-04-04,prompt-caching-2024-07-31", "tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15",
) )
.header("X-Api-Key", api_key) .header("X-Api-Key", api_key)
.header("Content-Type", "application/json"); .header("Content-Type", "application/json");
@ -191,7 +202,7 @@ pub async fn stream_completion(
.header("Anthropic-Version", "2023-06-01") .header("Anthropic-Version", "2023-06-01")
.header( .header(
"Anthropic-Beta", "Anthropic-Beta",
"tools-2024-04-04,prompt-caching-2024-07-31", "tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15",
) )
.header("X-Api-Key", api_key) .header("X-Api-Key", api_key)
.header("Content-Type", "application/json"); .header("Content-Type", "application/json");

View file

@ -64,6 +64,9 @@ pub trait LanguageModel: Send + Sync {
} }
fn max_token_count(&self) -> usize; fn max_token_count(&self) -> usize;
fn max_output_tokens(&self) -> Option<u32> {
None
}
fn count_tokens( fn count_tokens(
&self, &self,

View file

@ -39,6 +39,7 @@ pub struct AvailableModel {
pub max_tokens: usize, pub max_tokens: usize,
pub tool_override: Option<String>, pub tool_override: Option<String>,
pub cache_configuration: Option<LanguageModelCacheConfiguration>, pub cache_configuration: Option<LanguageModelCacheConfiguration>,
pub max_output_tokens: Option<u32>,
} }
pub struct AnthropicLanguageModelProvider { pub struct AnthropicLanguageModelProvider {
@ -179,6 +180,7 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider {
min_total_token: config.min_total_token, min_total_token: config.min_total_token,
} }
}), }),
max_output_tokens: model.max_output_tokens,
}, },
); );
} }
@ -331,6 +333,10 @@ impl LanguageModel for AnthropicModel {
self.model.max_token_count() self.model.max_token_count()
} }
fn max_output_tokens(&self) -> Option<u32> {
Some(self.model.max_output_tokens())
}
fn count_tokens( fn count_tokens(
&self, &self,
request: LanguageModelRequest, request: LanguageModelRequest,
@ -344,7 +350,8 @@ impl LanguageModel for AnthropicModel {
request: LanguageModelRequest, request: LanguageModelRequest,
cx: &AsyncAppContext, cx: &AsyncAppContext,
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> { ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
let request = request.into_anthropic(self.model.id().into()); let request =
request.into_anthropic(self.model.id().into(), self.model.max_output_tokens());
let request = self.stream_completion(request, cx); let request = self.stream_completion(request, cx);
let future = self.request_limiter.stream(async move { let future = self.request_limiter.stream(async move {
let response = request.await.map_err(|err| anyhow!(err))?; let response = request.await.map_err(|err| anyhow!(err))?;
@ -377,7 +384,10 @@ impl LanguageModel for AnthropicModel {
input_schema: serde_json::Value, input_schema: serde_json::Value,
cx: &AsyncAppContext, cx: &AsyncAppContext,
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> { ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
let mut request = request.into_anthropic(self.model.tool_model_id().into()); let mut request = request.into_anthropic(
self.model.tool_model_id().into(),
self.model.max_output_tokens(),
);
request.tool_choice = Some(anthropic::ToolChoice::Tool { request.tool_choice = Some(anthropic::ToolChoice::Tool {
name: tool_name.clone(), name: tool_name.clone(),
}); });

View file

@ -57,6 +57,7 @@ pub struct AvailableModel {
max_tokens: usize, max_tokens: usize,
tool_override: Option<String>, tool_override: Option<String>,
cache_configuration: Option<LanguageModelCacheConfiguration>, cache_configuration: Option<LanguageModelCacheConfiguration>,
max_output_tokens: Option<u32>,
} }
pub struct CloudLanguageModelProvider { pub struct CloudLanguageModelProvider {
@ -210,6 +211,7 @@ impl LanguageModelProvider for CloudLanguageModelProvider {
min_total_token: config.min_total_token, min_total_token: config.min_total_token,
} }
}), }),
max_output_tokens: model.max_output_tokens,
}) })
} }
AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom { AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {
@ -446,7 +448,7 @@ impl LanguageModel for CloudLanguageModel {
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> { ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
match &self.model { match &self.model {
CloudModel::Anthropic(model) => { CloudModel::Anthropic(model) => {
let request = request.into_anthropic(model.id().into()); let request = request.into_anthropic(model.id().into(), model.max_output_tokens());
let client = self.client.clone(); let client = self.client.clone();
let llm_api_token = self.llm_api_token.clone(); let llm_api_token = self.llm_api_token.clone();
let future = self.request_limiter.stream(async move { let future = self.request_limiter.stream(async move {
@ -556,7 +558,8 @@ impl LanguageModel for CloudLanguageModel {
match &self.model { match &self.model {
CloudModel::Anthropic(model) => { CloudModel::Anthropic(model) => {
let mut request = request.into_anthropic(model.tool_model_id().into()); let mut request =
request.into_anthropic(model.tool_model_id().into(), model.max_output_tokens());
request.tool_choice = Some(anthropic::ToolChoice::Tool { request.tool_choice = Some(anthropic::ToolChoice::Tool {
name: tool_name.clone(), name: tool_name.clone(),
}); });

View file

@ -286,7 +286,7 @@ impl LanguageModelRequest {
} }
} }
pub fn into_anthropic(self, model: String) -> anthropic::Request { pub fn into_anthropic(self, model: String, max_output_tokens: u32) -> anthropic::Request {
let mut new_messages: Vec<anthropic::Message> = Vec::new(); let mut new_messages: Vec<anthropic::Message> = Vec::new();
let mut system_message = String::new(); let mut system_message = String::new();
@ -353,7 +353,7 @@ impl LanguageModelRequest {
anthropic::Request { anthropic::Request {
model, model,
messages: new_messages, messages: new_messages,
max_tokens: 4092, max_tokens: max_output_tokens,
system: Some(system_message), system: Some(system_message),
tools: Vec::new(), tools: Vec::new(),
tool_choice: None, tool_choice: None,

View file

@ -97,6 +97,7 @@ impl AnthropicSettingsContent {
max_tokens, max_tokens,
tool_override, tool_override,
cache_configuration, cache_configuration,
max_output_tokens,
} => Some(provider::anthropic::AvailableModel { } => Some(provider::anthropic::AvailableModel {
name, name,
max_tokens, max_tokens,
@ -108,6 +109,7 @@ impl AnthropicSettingsContent {
min_total_token: config.min_total_token, min_total_token: config.min_total_token,
}, },
), ),
max_output_tokens,
}), }),
_ => None, _ => None,
}) })