bedrock: Add prompt caching support (#33194)
Closes https://github.com/zed-industries/zed/issues/33221 Bedrock has similar to anthropic caching api, if we want to cache messages up to a certain point, we should add a special block into that message. Additionally, we can cache tools definition by adding cache point block after tools spec. See: [Bedrock User Guide: Prompt Caching](https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html#prompt-caching-models) Release Notes: - bedrock: Added prompt caching support --------- Co-authored-by: Oleksiy Syvokon <oleksiy@zed.dev>
This commit is contained in:
parent
59aeede50d
commit
0905255fd1
2 changed files with 101 additions and 10 deletions
|
@ -11,6 +11,13 @@ pub enum BedrockModelMode {
|
|||
},
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub struct BedrockModelCacheConfiguration {
|
||||
pub max_cache_anchors: usize,
|
||||
pub min_total_token: u64,
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
|
||||
pub enum Model {
|
||||
|
@ -104,6 +111,7 @@ pub enum Model {
|
|||
display_name: Option<String>,
|
||||
max_output_tokens: Option<u64>,
|
||||
default_temperature: Option<f32>,
|
||||
cache_configuration: Option<BedrockModelCacheConfiguration>,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -401,6 +409,56 @@ impl Model {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn supports_caching(&self) -> bool {
|
||||
match self {
|
||||
// Only Claude models on Bedrock support caching
|
||||
// Nova models support only text caching
|
||||
// https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html#prompt-caching-models
|
||||
Self::Claude3_5Haiku
|
||||
| Self::Claude3_7Sonnet
|
||||
| Self::Claude3_7SonnetThinking
|
||||
| Self::ClaudeSonnet4
|
||||
| Self::ClaudeSonnet4Thinking
|
||||
| Self::ClaudeOpus4
|
||||
| Self::ClaudeOpus4Thinking => true,
|
||||
|
||||
// Custom models - check if they have cache configuration
|
||||
Self::Custom {
|
||||
cache_configuration,
|
||||
..
|
||||
} => cache_configuration.is_some(),
|
||||
|
||||
// All other models don't support caching
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cache_configuration(&self) -> Option<BedrockModelCacheConfiguration> {
|
||||
match self {
|
||||
Self::Claude3_7Sonnet
|
||||
| Self::Claude3_7SonnetThinking
|
||||
| Self::ClaudeSonnet4
|
||||
| Self::ClaudeSonnet4Thinking
|
||||
| Self::ClaudeOpus4
|
||||
| Self::ClaudeOpus4Thinking => Some(BedrockModelCacheConfiguration {
|
||||
max_cache_anchors: 4,
|
||||
min_total_token: 1024,
|
||||
}),
|
||||
|
||||
Self::Claude3_5Haiku => Some(BedrockModelCacheConfiguration {
|
||||
max_cache_anchors: 4,
|
||||
min_total_token: 2048,
|
||||
}),
|
||||
|
||||
Self::Custom {
|
||||
cache_configuration,
|
||||
..
|
||||
} => cache_configuration.clone(),
|
||||
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mode(&self) -> BedrockModelMode {
|
||||
match self {
|
||||
Model::Claude3_7SonnetThinking => BedrockModelMode::Thinking {
|
||||
|
@ -660,6 +718,7 @@ mod tests {
|
|||
display_name: Some("My Custom Model".to_string()),
|
||||
max_output_tokens: Some(8192),
|
||||
default_temperature: Some(0.7),
|
||||
cache_configuration: None,
|
||||
};
|
||||
|
||||
// Custom model should return its name unchanged
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue