Implement Anthropic prompt caching (#16274)

Release Notes:

- Adds support for Prompt Caching in Anthropic. For models that support
it this can dramatically lower cost while improving performance.
This commit is contained in:
Roy Williams 2024-08-15 23:21:06 -04:00 committed by GitHub
parent 09b6e3f2a6
commit 46fb917e02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 338 additions and 70 deletions

View file

@ -1,7 +1,7 @@
use super::open_ai::count_open_ai_tokens;
use crate::{
settings::AllLanguageModelSettings, CloudModel, LanguageModel, LanguageModelId,
LanguageModelName, LanguageModelProviderId, LanguageModelProviderName,
settings::AllLanguageModelSettings, CloudModel, LanguageModel, LanguageModelCacheConfiguration,
LanguageModelId, LanguageModelName, LanguageModelProviderId, LanguageModelProviderName,
LanguageModelProviderState, LanguageModelRequest, RateLimiter, ZedModel,
};
use anthropic::AnthropicError;
@ -56,6 +56,7 @@ pub struct AvailableModel {
name: String,
max_tokens: usize,
tool_override: Option<String>,
cache_configuration: Option<LanguageModelCacheConfiguration>,
}
pub struct CloudLanguageModelProvider {
@ -202,6 +203,13 @@ impl LanguageModelProvider for CloudLanguageModelProvider {
name: model.name.clone(),
max_tokens: model.max_tokens,
tool_override: model.tool_override.clone(),
cache_configuration: model.cache_configuration.as_ref().map(|config| {
anthropic::AnthropicModelCacheConfiguration {
max_cache_anchors: config.max_cache_anchors,
should_speculate: config.should_speculate,
min_total_token: config.min_total_token,
}
}),
})
}
AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {