Implement Anthropic prompt caching (#16274)
Release Notes: - Adds support for Prompt Caching in Anthropic. For models that support it this can dramatically lower cost while improving performance.
This commit is contained in:
parent
09b6e3f2a6
commit
46fb917e02
11 changed files with 338 additions and 70 deletions
|
@ -14,6 +14,14 @@ pub use supported_countries::*;
|
|||
|
||||
pub const ANTHROPIC_API_URL: &'static str = "https://api.anthropic.com";
|
||||
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub struct AnthropicModelCacheConfiguration {
|
||||
pub min_total_token: usize,
|
||||
pub should_speculate: bool,
|
||||
pub max_cache_anchors: usize,
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
|
||||
pub enum Model {
|
||||
|
@ -32,6 +40,8 @@ pub enum Model {
|
|||
max_tokens: usize,
|
||||
/// Override this model with a different Anthropic model for tool calls.
|
||||
tool_override: Option<String>,
|
||||
/// Indicates whether this custom model supports caching.
|
||||
cache_configuration: Option<AnthropicModelCacheConfiguration>,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -70,6 +80,21 @@ impl Model {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
|
||||
match self {
|
||||
Self::Claude3_5Sonnet | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
|
||||
min_total_token: 2_048,
|
||||
should_speculate: true,
|
||||
max_cache_anchors: 4,
|
||||
}),
|
||||
Self::Custom {
|
||||
cache_configuration,
|
||||
..
|
||||
} => cache_configuration.clone(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn max_token_count(&self) -> usize {
|
||||
match self {
|
||||
Self::Claude3_5Sonnet
|
||||
|
@ -104,7 +129,10 @@ pub async fn complete(
|
|||
.method(Method::POST)
|
||||
.uri(uri)
|
||||
.header("Anthropic-Version", "2023-06-01")
|
||||
.header("Anthropic-Beta", "tools-2024-04-04")
|
||||
.header(
|
||||
"Anthropic-Beta",
|
||||
"tools-2024-04-04,prompt-caching-2024-07-31",
|
||||
)
|
||||
.header("X-Api-Key", api_key)
|
||||
.header("Content-Type", "application/json");
|
||||
|
||||
|
@ -161,7 +189,10 @@ pub async fn stream_completion(
|
|||
.method(Method::POST)
|
||||
.uri(uri)
|
||||
.header("Anthropic-Version", "2023-06-01")
|
||||
.header("Anthropic-Beta", "tools-2024-04-04")
|
||||
.header(
|
||||
"Anthropic-Beta",
|
||||
"tools-2024-04-04,prompt-caching-2024-07-31",
|
||||
)
|
||||
.header("X-Api-Key", api_key)
|
||||
.header("Content-Type", "application/json");
|
||||
if let Some(low_speed_timeout) = low_speed_timeout {
|
||||
|
@ -226,7 +257,7 @@ pub fn extract_text_from_events(
|
|||
match response {
|
||||
Ok(response) => match response {
|
||||
Event::ContentBlockStart { content_block, .. } => match content_block {
|
||||
Content::Text { text } => Some(Ok(text)),
|
||||
Content::Text { text, .. } => Some(Ok(text)),
|
||||
_ => None,
|
||||
},
|
||||
Event::ContentBlockDelta { delta, .. } => match delta {
|
||||
|
@ -285,13 +316,25 @@ pub async fn extract_tool_args_from_events(
|
|||
}))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum CacheControlType {
|
||||
Ephemeral,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
||||
pub struct CacheControl {
|
||||
#[serde(rename = "type")]
|
||||
pub cache_type: CacheControlType,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct Message {
|
||||
pub role: Role,
|
||||
pub content: Vec<Content>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Role {
|
||||
User,
|
||||
|
@ -302,19 +345,31 @@ pub enum Role {
|
|||
#[serde(tag = "type")]
|
||||
pub enum Content {
|
||||
#[serde(rename = "text")]
|
||||
Text { text: String },
|
||||
Text {
|
||||
text: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
cache_control: Option<CacheControl>,
|
||||
},
|
||||
#[serde(rename = "image")]
|
||||
Image { source: ImageSource },
|
||||
Image {
|
||||
source: ImageSource,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
cache_control: Option<CacheControl>,
|
||||
},
|
||||
#[serde(rename = "tool_use")]
|
||||
ToolUse {
|
||||
id: String,
|
||||
name: String,
|
||||
input: serde_json::Value,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
cache_control: Option<CacheControl>,
|
||||
},
|
||||
#[serde(rename = "tool_result")]
|
||||
ToolResult {
|
||||
tool_use_id: String,
|
||||
content: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
cache_control: Option<CacheControl>,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue