Implement Anthropic prompt caching (#16274)

Release Notes:

- Adds support for Prompt Caching in Anthropic. For models that support
it this can dramatically lower cost while improving performance.
This commit is contained in:
Roy Williams 2024-08-15 23:21:06 -04:00 committed by GitHub
parent 09b6e3f2a6
commit 46fb917e02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 338 additions and 70 deletions

View file

@ -1,7 +1,7 @@
use crate::{
settings::AllLanguageModelSettings, LanguageModel, LanguageModelId, LanguageModelName,
LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
LanguageModelProviderState, LanguageModelRequest, RateLimiter, Role,
settings::AllLanguageModelSettings, LanguageModel, LanguageModelCacheConfiguration,
LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId,
LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, RateLimiter, Role,
};
use anthropic::AnthropicError;
use anyhow::{anyhow, Context as _, Result};
@ -38,6 +38,7 @@ pub struct AvailableModel {
pub name: String,
pub max_tokens: usize,
pub tool_override: Option<String>,
pub cache_configuration: Option<LanguageModelCacheConfiguration>,
}
pub struct AnthropicLanguageModelProvider {
@ -171,6 +172,13 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider {
name: model.name.clone(),
max_tokens: model.max_tokens,
tool_override: model.tool_override.clone(),
cache_configuration: model.cache_configuration.as_ref().map(|config| {
anthropic::AnthropicModelCacheConfiguration {
max_cache_anchors: config.max_cache_anchors,
should_speculate: config.should_speculate,
min_total_token: config.min_total_token,
}
}),
},
);
}
@ -351,6 +359,16 @@ impl LanguageModel for AnthropicModel {
.boxed()
}
fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
self.model
.cache_configuration()
.map(|config| LanguageModelCacheConfiguration {
max_cache_anchors: config.max_cache_anchors,
should_speculate: config.should_speculate,
min_total_token: config.min_total_token,
})
}
fn use_any_tool(
&self,
request: LanguageModelRequest,