Implement Anthropic prompt caching (#16274)

Release Notes: - Adds support for Prompt Caching in Anthropic. For models that support it this can dramatically lower cost while improving performance.
2024-08-15 23:21:06 -04:00 · 2024-08-15 23:21:06 -04:00 · 46fb917e02
commit 46fb917e02
parent 09b6e3f2a6
11 changed files with 338 additions and 70 deletions
--- a/crates/language_model/src/language_model.rs
+++ b/crates/language_model/src/language_model.rs
@ -20,7 +20,7 @@ pub use registry::*;
 pub use request::*;
 pub use role::*;
 use schemars::JsonSchema;
-use serde::de::DeserializeOwned;
+use serde::{de::DeserializeOwned, Deserialize, Serialize};
 use std::{future::Future, sync::Arc};
 use ui::IconName;

@ -43,6 +43,14 @@ pub enum LanguageModelAvailability {
    RequiresPlan(Plan),
 }

+/// Configuration for caching language model messages.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
+pub struct LanguageModelCacheConfiguration {
+    pub max_cache_anchors: usize,
+    pub should_speculate: bool,
+    pub min_total_token: usize,
+}
+
 pub trait LanguageModel: Send + Sync {
    fn id(&self) -> LanguageModelId;
    fn name(&self) -> LanguageModelName;
@ -78,6 +86,10 @@ pub trait LanguageModel: Send + Sync {
        cx: &AsyncAppContext,
    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>>;

+    fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
+        None
+    }
+
    #[cfg(any(test, feature = "test-support"))]
    fn as_fake(&self) -> &provider::fake::FakeLanguageModel {
        unimplemented!()