Support 8192 output tokens for Claude Sonnet 3.5 (#16358)
Release Notes: - Added support for 8192 output tokens from Claude Sonnet 3.5 (https://x.com/alexalbert__/status/1812921642143900036)
This commit is contained in:
parent
7eab57a264
commit
b4f5f5024e
6 changed files with 37 additions and 8 deletions
|
@ -39,6 +39,7 @@ pub struct AvailableModel {
|
|||
pub max_tokens: usize,
|
||||
pub tool_override: Option<String>,
|
||||
pub cache_configuration: Option<LanguageModelCacheConfiguration>,
|
||||
pub max_output_tokens: Option<u32>,
|
||||
}
|
||||
|
||||
pub struct AnthropicLanguageModelProvider {
|
||||
|
@ -179,6 +180,7 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider {
|
|||
min_total_token: config.min_total_token,
|
||||
}
|
||||
}),
|
||||
max_output_tokens: model.max_output_tokens,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
@ -331,6 +333,10 @@ impl LanguageModel for AnthropicModel {
|
|||
self.model.max_token_count()
|
||||
}
|
||||
|
||||
fn max_output_tokens(&self) -> Option<u32> {
|
||||
Some(self.model.max_output_tokens())
|
||||
}
|
||||
|
||||
fn count_tokens(
|
||||
&self,
|
||||
request: LanguageModelRequest,
|
||||
|
@ -344,7 +350,8 @@ impl LanguageModel for AnthropicModel {
|
|||
request: LanguageModelRequest,
|
||||
cx: &AsyncAppContext,
|
||||
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
|
||||
let request = request.into_anthropic(self.model.id().into());
|
||||
let request =
|
||||
request.into_anthropic(self.model.id().into(), self.model.max_output_tokens());
|
||||
let request = self.stream_completion(request, cx);
|
||||
let future = self.request_limiter.stream(async move {
|
||||
let response = request.await.map_err(|err| anyhow!(err))?;
|
||||
|
@ -377,7 +384,10 @@ impl LanguageModel for AnthropicModel {
|
|||
input_schema: serde_json::Value,
|
||||
cx: &AsyncAppContext,
|
||||
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
|
||||
let mut request = request.into_anthropic(self.model.tool_model_id().into());
|
||||
let mut request = request.into_anthropic(
|
||||
self.model.tool_model_id().into(),
|
||||
self.model.max_output_tokens(),
|
||||
);
|
||||
request.tool_choice = Some(anthropic::ToolChoice::Tool {
|
||||
name: tool_name.clone(),
|
||||
});
|
||||
|
|
|
@ -57,6 +57,7 @@ pub struct AvailableModel {
|
|||
max_tokens: usize,
|
||||
tool_override: Option<String>,
|
||||
cache_configuration: Option<LanguageModelCacheConfiguration>,
|
||||
max_output_tokens: Option<u32>,
|
||||
}
|
||||
|
||||
pub struct CloudLanguageModelProvider {
|
||||
|
@ -210,6 +211,7 @@ impl LanguageModelProvider for CloudLanguageModelProvider {
|
|||
min_total_token: config.min_total_token,
|
||||
}
|
||||
}),
|
||||
max_output_tokens: model.max_output_tokens,
|
||||
})
|
||||
}
|
||||
AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {
|
||||
|
@ -446,7 +448,7 @@ impl LanguageModel for CloudLanguageModel {
|
|||
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
|
||||
match &self.model {
|
||||
CloudModel::Anthropic(model) => {
|
||||
let request = request.into_anthropic(model.id().into());
|
||||
let request = request.into_anthropic(model.id().into(), model.max_output_tokens());
|
||||
let client = self.client.clone();
|
||||
let llm_api_token = self.llm_api_token.clone();
|
||||
let future = self.request_limiter.stream(async move {
|
||||
|
@ -556,7 +558,8 @@ impl LanguageModel for CloudLanguageModel {
|
|||
|
||||
match &self.model {
|
||||
CloudModel::Anthropic(model) => {
|
||||
let mut request = request.into_anthropic(model.tool_model_id().into());
|
||||
let mut request =
|
||||
request.into_anthropic(model.tool_model_id().into(), model.max_output_tokens());
|
||||
request.tool_choice = Some(anthropic::ToolChoice::Tool {
|
||||
name: tool_name.clone(),
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue