Implement Anthropic prompt caching (#16274)

Release Notes: - Adds support for Prompt Caching in Anthropic. For models that support it this can dramatically lower cost while improving performance.
2024-08-15 23:21:06 -04:00 · 2024-08-15 23:21:06 -04:00 · 46fb917e02
commit 46fb917e02
parent 09b6e3f2a6
11 changed files with 338 additions and 70 deletions
--- a/crates/language_model/src/settings.rs
+++ b/crates/language_model/src/settings.rs
@ -7,14 +7,17 @@ use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use settings::{update_settings_file, Settings, SettingsSources};

-use crate::provider::{
-    self,
-    anthropic::AnthropicSettings,
-    cloud::{self, ZedDotDevSettings},
-    copilot_chat::CopilotChatSettings,
-    google::GoogleSettings,
-    ollama::OllamaSettings,
-    open_ai::OpenAiSettings,
+use crate::{
+    provider::{
+        self,
+        anthropic::AnthropicSettings,
+        cloud::{self, ZedDotDevSettings},
+        copilot_chat::CopilotChatSettings,
+        google::GoogleSettings,
+        ollama::OllamaSettings,
+        open_ai::OpenAiSettings,
+    },
+    LanguageModelCacheConfiguration,
 };

 /// Initializes the language model settings.
@ -93,10 +96,18 @@ impl AnthropicSettingsContent {
                                    name,
                                    max_tokens,
                                    tool_override,
+                                    cache_configuration,
                                } => Some(provider::anthropic::AvailableModel {
                                    name,
                                    max_tokens,
                                    tool_override,
+                                    cache_configuration: cache_configuration.as_ref().map(
+                                        |config| LanguageModelCacheConfiguration {
+                                            max_cache_anchors: config.max_cache_anchors,
+                                            should_speculate: config.should_speculate,
+                                            min_total_token: config.min_total_token,
+                                        },
+                                    ),
                                }),
                                _ => None,
                            })