Implement Anthropic prompt caching (#16274)

Release Notes:

- Adds support for Prompt Caching in Anthropic. For models that support
it this can dramatically lower cost while improving performance.
This commit is contained in:
Roy Williams 2024-08-15 23:21:06 -04:00 committed by GitHub
parent 09b6e3f2a6
commit 46fb917e02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 338 additions and 70 deletions

View file

@ -20,7 +20,7 @@ pub use registry::*;
pub use request::*;
pub use role::*;
use schemars::JsonSchema;
use serde::de::DeserializeOwned;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use std::{future::Future, sync::Arc};
use ui::IconName;
@ -43,6 +43,14 @@ pub enum LanguageModelAvailability {
RequiresPlan(Plan),
}
/// Configuration for caching language model messages.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct LanguageModelCacheConfiguration {
pub max_cache_anchors: usize,
pub should_speculate: bool,
pub min_total_token: usize,
}
pub trait LanguageModel: Send + Sync {
fn id(&self) -> LanguageModelId;
fn name(&self) -> LanguageModelName;
@ -78,6 +86,10 @@ pub trait LanguageModel: Send + Sync {
cx: &AsyncAppContext,
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>>;
fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
None
}
#[cfg(any(test, feature = "test-support"))]
fn as_fake(&self) -> &provider::fake::FakeLanguageModel {
unimplemented!()