Implement Anthropic prompt caching (#16274)

Release Notes:

- Adds support for Prompt Caching in Anthropic. For models that support
it this can dramatically lower cost while improving performance.
This commit is contained in:
Roy Williams 2024-08-15 23:21:06 -04:00 committed by GitHub
parent 09b6e3f2a6
commit 46fb917e02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 338 additions and 70 deletions

View file

@ -14,6 +14,14 @@ pub use supported_countries::*;
pub const ANTHROPIC_API_URL: &'static str = "https://api.anthropic.com";
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub struct AnthropicModelCacheConfiguration {
pub min_total_token: usize,
pub should_speculate: bool,
pub max_cache_anchors: usize,
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
pub enum Model {
@ -32,6 +40,8 @@ pub enum Model {
max_tokens: usize,
/// Override this model with a different Anthropic model for tool calls.
tool_override: Option<String>,
/// Indicates whether this custom model supports caching.
cache_configuration: Option<AnthropicModelCacheConfiguration>,
},
}
@ -70,6 +80,21 @@ impl Model {
}
}
pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
match self {
Self::Claude3_5Sonnet | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
min_total_token: 2_048,
should_speculate: true,
max_cache_anchors: 4,
}),
Self::Custom {
cache_configuration,
..
} => cache_configuration.clone(),
_ => None,
}
}
pub fn max_token_count(&self) -> usize {
match self {
Self::Claude3_5Sonnet
@ -104,7 +129,10 @@ pub async fn complete(
.method(Method::POST)
.uri(uri)
.header("Anthropic-Version", "2023-06-01")
.header("Anthropic-Beta", "tools-2024-04-04")
.header(
"Anthropic-Beta",
"tools-2024-04-04,prompt-caching-2024-07-31",
)
.header("X-Api-Key", api_key)
.header("Content-Type", "application/json");
@ -161,7 +189,10 @@ pub async fn stream_completion(
.method(Method::POST)
.uri(uri)
.header("Anthropic-Version", "2023-06-01")
.header("Anthropic-Beta", "tools-2024-04-04")
.header(
"Anthropic-Beta",
"tools-2024-04-04,prompt-caching-2024-07-31",
)
.header("X-Api-Key", api_key)
.header("Content-Type", "application/json");
if let Some(low_speed_timeout) = low_speed_timeout {
@ -226,7 +257,7 @@ pub fn extract_text_from_events(
match response {
Ok(response) => match response {
Event::ContentBlockStart { content_block, .. } => match content_block {
Content::Text { text } => Some(Ok(text)),
Content::Text { text, .. } => Some(Ok(text)),
_ => None,
},
Event::ContentBlockDelta { delta, .. } => match delta {
@ -285,13 +316,25 @@ pub async fn extract_tool_args_from_events(
}))
}
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
#[serde(rename_all = "lowercase")]
pub enum CacheControlType {
Ephemeral,
}
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
pub struct CacheControl {
#[serde(rename = "type")]
pub cache_type: CacheControlType,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Message {
pub role: Role,
pub content: Vec<Content>,
}
#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
#[serde(rename_all = "lowercase")]
pub enum Role {
User,
@ -302,19 +345,31 @@ pub enum Role {
#[serde(tag = "type")]
pub enum Content {
#[serde(rename = "text")]
Text { text: String },
Text {
text: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "image")]
Image { source: ImageSource },
Image {
source: ImageSource,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
name: String,
input: serde_json::Value,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "tool_result")]
ToolResult {
tool_use_id: String,
content: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
}