Implement Anthropic prompt caching (#16274)
Release Notes: - Adds support for Prompt Caching in Anthropic. For models that support it this can dramatically lower cost while improving performance.
This commit is contained in:
parent
09b6e3f2a6
commit
46fb917e02
11 changed files with 338 additions and 70 deletions
|
@ -20,7 +20,7 @@ pub use registry::*;
|
|||
pub use request::*;
|
||||
pub use role::*;
|
||||
use schemars::JsonSchema;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::{de::DeserializeOwned, Deserialize, Serialize};
|
||||
use std::{future::Future, sync::Arc};
|
||||
use ui::IconName;
|
||||
|
||||
|
@ -43,6 +43,14 @@ pub enum LanguageModelAvailability {
|
|||
RequiresPlan(Plan),
|
||||
}
|
||||
|
||||
/// Configuration for caching language model messages.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct LanguageModelCacheConfiguration {
|
||||
pub max_cache_anchors: usize,
|
||||
pub should_speculate: bool,
|
||||
pub min_total_token: usize,
|
||||
}
|
||||
|
||||
pub trait LanguageModel: Send + Sync {
|
||||
fn id(&self) -> LanguageModelId;
|
||||
fn name(&self) -> LanguageModelName;
|
||||
|
@ -78,6 +86,10 @@ pub trait LanguageModel: Send + Sync {
|
|||
cx: &AsyncAppContext,
|
||||
) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>>;
|
||||
|
||||
fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(any(test, feature = "test-support"))]
|
||||
fn as_fake(&self) -> &provider::fake::FakeLanguageModel {
|
||||
unimplemented!()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue