Implement Anthropic prompt caching (#16274)
Release Notes: - Adds support for Prompt Caching in Anthropic. For models that support it this can dramatically lower cost while improving performance.
This commit is contained in:
parent
09b6e3f2a6
commit
46fb917e02
11 changed files with 338 additions and 70 deletions
|
@ -1,7 +1,7 @@
|
|||
use super::open_ai::count_open_ai_tokens;
|
||||
use crate::{
|
||||
settings::AllLanguageModelSettings, CloudModel, LanguageModel, LanguageModelId,
|
||||
LanguageModelName, LanguageModelProviderId, LanguageModelProviderName,
|
||||
settings::AllLanguageModelSettings, CloudModel, LanguageModel, LanguageModelCacheConfiguration,
|
||||
LanguageModelId, LanguageModelName, LanguageModelProviderId, LanguageModelProviderName,
|
||||
LanguageModelProviderState, LanguageModelRequest, RateLimiter, ZedModel,
|
||||
};
|
||||
use anthropic::AnthropicError;
|
||||
|
@ -56,6 +56,7 @@ pub struct AvailableModel {
|
|||
name: String,
|
||||
max_tokens: usize,
|
||||
tool_override: Option<String>,
|
||||
cache_configuration: Option<LanguageModelCacheConfiguration>,
|
||||
}
|
||||
|
||||
pub struct CloudLanguageModelProvider {
|
||||
|
@ -202,6 +203,13 @@ impl LanguageModelProvider for CloudLanguageModelProvider {
|
|||
name: model.name.clone(),
|
||||
max_tokens: model.max_tokens,
|
||||
tool_override: model.tool_override.clone(),
|
||||
cache_configuration: model.cache_configuration.as_ref().map(|config| {
|
||||
anthropic::AnthropicModelCacheConfiguration {
|
||||
max_cache_anchors: config.max_cache_anchors,
|
||||
should_speculate: config.should_speculate,
|
||||
min_total_token: config.min_total_token,
|
||||
}
|
||||
}),
|
||||
})
|
||||
}
|
||||
AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue