diff --git a/crates/anthropic/src/anthropic.rs b/crates/anthropic/src/anthropic.rs index 7c1774dee4..bdbb5e465e 100644 --- a/crates/anthropic/src/anthropic.rs +++ b/crates/anthropic/src/anthropic.rs @@ -14,6 +14,14 @@ pub use supported_countries::*; pub const ANTHROPIC_API_URL: &'static str = "https://api.anthropic.com"; +#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] +#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] +pub struct AnthropicModelCacheConfiguration { + pub min_total_token: usize, + pub should_speculate: bool, + pub max_cache_anchors: usize, +} + #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)] pub enum Model { @@ -32,6 +40,8 @@ pub enum Model { max_tokens: usize, /// Override this model with a different Anthropic model for tool calls. tool_override: Option, + /// Indicates whether this custom model supports caching. + cache_configuration: Option, }, } @@ -70,6 +80,21 @@ impl Model { } } + pub fn cache_configuration(&self) -> Option { + match self { + Self::Claude3_5Sonnet | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration { + min_total_token: 2_048, + should_speculate: true, + max_cache_anchors: 4, + }), + Self::Custom { + cache_configuration, + .. + } => cache_configuration.clone(), + _ => None, + } + } + pub fn max_token_count(&self) -> usize { match self { Self::Claude3_5Sonnet @@ -104,7 +129,10 @@ pub async fn complete( .method(Method::POST) .uri(uri) .header("Anthropic-Version", "2023-06-01") - .header("Anthropic-Beta", "tools-2024-04-04") + .header( + "Anthropic-Beta", + "tools-2024-04-04,prompt-caching-2024-07-31", + ) .header("X-Api-Key", api_key) .header("Content-Type", "application/json"); @@ -161,7 +189,10 @@ pub async fn stream_completion( .method(Method::POST) .uri(uri) .header("Anthropic-Version", "2023-06-01") - .header("Anthropic-Beta", "tools-2024-04-04") + .header( + "Anthropic-Beta", + "tools-2024-04-04,prompt-caching-2024-07-31", + ) .header("X-Api-Key", api_key) .header("Content-Type", "application/json"); if let Some(low_speed_timeout) = low_speed_timeout { @@ -226,7 +257,7 @@ pub fn extract_text_from_events( match response { Ok(response) => match response { Event::ContentBlockStart { content_block, .. } => match content_block { - Content::Text { text } => Some(Ok(text)), + Content::Text { text, .. } => Some(Ok(text)), _ => None, }, Event::ContentBlockDelta { delta, .. } => match delta { @@ -285,13 +316,25 @@ pub async fn extract_tool_args_from_events( })) } +#[derive(Debug, Serialize, Deserialize, Copy, Clone)] +#[serde(rename_all = "lowercase")] +pub enum CacheControlType { + Ephemeral, +} + +#[derive(Debug, Serialize, Deserialize, Copy, Clone)] +pub struct CacheControl { + #[serde(rename = "type")] + pub cache_type: CacheControlType, +} + #[derive(Debug, Serialize, Deserialize)] pub struct Message { pub role: Role, pub content: Vec, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)] #[serde(rename_all = "lowercase")] pub enum Role { User, @@ -302,19 +345,31 @@ pub enum Role { #[serde(tag = "type")] pub enum Content { #[serde(rename = "text")] - Text { text: String }, + Text { + text: String, + #[serde(skip_serializing_if = "Option::is_none")] + cache_control: Option, + }, #[serde(rename = "image")] - Image { source: ImageSource }, + Image { + source: ImageSource, + #[serde(skip_serializing_if = "Option::is_none")] + cache_control: Option, + }, #[serde(rename = "tool_use")] ToolUse { id: String, name: String, input: serde_json::Value, + #[serde(skip_serializing_if = "Option::is_none")] + cache_control: Option, }, #[serde(rename = "tool_result")] ToolResult { tool_use_id: String, content: String, + #[serde(skip_serializing_if = "Option::is_none")] + cache_control: Option, }, } diff --git a/crates/assistant/src/context.rs b/crates/assistant/src/context.rs index a49a624c95..5bedd80f55 100644 --- a/crates/assistant/src/context.rs +++ b/crates/assistant/src/context.rs @@ -21,8 +21,8 @@ use gpui::{ use language::{AnchorRangeExt, Bias, Buffer, LanguageRegistry, OffsetRangeExt, Point, ToOffset}; use language_model::{ - LanguageModelImage, LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage, - Role, + LanguageModel, LanguageModelCacheConfiguration, LanguageModelImage, LanguageModelRegistry, + LanguageModelRequest, LanguageModelRequestMessage, Role, }; use open_ai::Model as OpenAiModel; use paths::{context_images_dir, contexts_dir}; @@ -30,7 +30,7 @@ use project::Project; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; use std::{ - cmp::Ordering, + cmp::{max, Ordering}, collections::hash_map, fmt::Debug, iter, mem, @@ -107,6 +107,8 @@ impl ContextOperation { message.status.context("invalid status")?, ), timestamp: id.0, + should_cache: false, + is_cache_anchor: false, }, version: language::proto::deserialize_version(&insert.version), }) @@ -121,6 +123,8 @@ impl ContextOperation { timestamp: language::proto::deserialize_timestamp( update.timestamp.context("invalid timestamp")?, ), + should_cache: false, + is_cache_anchor: false, }, version: language::proto::deserialize_version(&update.version), }), @@ -313,6 +317,8 @@ pub struct MessageMetadata { pub role: Role, pub status: MessageStatus, timestamp: clock::Lamport, + should_cache: bool, + is_cache_anchor: bool, } #[derive(Clone, Debug)] @@ -338,6 +344,7 @@ pub struct Message { pub anchor: language::Anchor, pub role: Role, pub status: MessageStatus, + pub cache: bool, } impl Message { @@ -373,6 +380,7 @@ impl Message { LanguageModelRequestMessage { role: self.role, content, + cache: self.cache, } } } @@ -421,6 +429,7 @@ pub struct Context { token_count: Option, pending_token_count: Task>, pending_save: Task>, + pending_cache_warming_task: Task>, path: Option, _subscriptions: Vec, telemetry: Option>, @@ -498,6 +507,7 @@ impl Context { pending_completions: Default::default(), token_count: None, pending_token_count: Task::ready(None), + pending_cache_warming_task: Task::ready(None), _subscriptions: vec![cx.subscribe(&buffer, Self::handle_buffer_event)], pending_save: Task::ready(Ok(())), path: None, @@ -524,6 +534,8 @@ impl Context { role: Role::User, status: MessageStatus::Done, timestamp: first_message_id.0, + should_cache: false, + is_cache_anchor: false, }, ); this.message_anchors.push(message); @@ -948,6 +960,7 @@ impl Context { let token_count = cx.update(|cx| model.count_tokens(request, cx))?.await?; this.update(&mut cx, |this, cx| { this.token_count = Some(token_count); + this.start_cache_warming(&model, cx); cx.notify() }) } @@ -955,6 +968,121 @@ impl Context { }); } + pub fn mark_longest_messages_for_cache( + &mut self, + cache_configuration: &Option, + speculative: bool, + cx: &mut ModelContext, + ) -> bool { + let cache_configuration = + cache_configuration + .as_ref() + .unwrap_or(&LanguageModelCacheConfiguration { + max_cache_anchors: 0, + should_speculate: false, + min_total_token: 0, + }); + + let messages: Vec = self + .messages_from_anchors( + self.message_anchors.iter().take(if speculative { + self.message_anchors.len().saturating_sub(1) + } else { + self.message_anchors.len() + }), + cx, + ) + .filter(|message| message.offset_range.len() >= 5_000) + .collect(); + + let mut sorted_messages = messages.clone(); + sorted_messages.sort_by(|a, b| b.offset_range.len().cmp(&a.offset_range.len())); + if cache_configuration.max_cache_anchors == 0 && cache_configuration.should_speculate { + // Some models support caching, but don't support anchors. In that case we want to + // mark the largest message as needing to be cached, but we will not mark it as an + // anchor. + sorted_messages.truncate(1); + } else { + // Save 1 anchor for the inline assistant. + sorted_messages.truncate(max(cache_configuration.max_cache_anchors, 1) - 1); + } + + let longest_message_ids: HashSet = sorted_messages + .into_iter() + .map(|message| message.id) + .collect(); + + let cache_deltas: HashSet = self + .messages_metadata + .iter() + .filter_map(|(id, metadata)| { + let should_cache = longest_message_ids.contains(id); + let should_be_anchor = should_cache && cache_configuration.max_cache_anchors > 0; + if metadata.should_cache != should_cache + || metadata.is_cache_anchor != should_be_anchor + { + Some(*id) + } else { + None + } + }) + .collect(); + + let mut newly_cached_item = false; + for id in cache_deltas { + newly_cached_item = newly_cached_item || longest_message_ids.contains(&id); + self.update_metadata(id, cx, |metadata| { + metadata.should_cache = longest_message_ids.contains(&id); + metadata.is_cache_anchor = + metadata.should_cache && (cache_configuration.max_cache_anchors > 0); + }); + } + newly_cached_item + } + + fn start_cache_warming(&mut self, model: &Arc, cx: &mut ModelContext) { + let cache_configuration = model.cache_configuration(); + if !self.mark_longest_messages_for_cache(&cache_configuration, true, cx) { + return; + } + if let Some(cache_configuration) = cache_configuration { + if !cache_configuration.should_speculate { + return; + } + } + + let request = { + let mut req = self.to_completion_request(cx); + // Skip the last message because it's likely to change and + // therefore would be a waste to cache. + req.messages.pop(); + req.messages.push(LanguageModelRequestMessage { + role: Role::User, + content: vec!["Respond only with OK, nothing else.".into()], + cache: false, + }); + req + }; + + let model = Arc::clone(model); + self.pending_cache_warming_task = cx.spawn(|_, cx| { + async move { + match model.stream_completion(request, &cx).await { + Ok(mut stream) => { + stream.next().await; + log::info!("Cache warming completed successfully"); + } + Err(e) => { + log::warn!("Cache warming failed: {}", e); + } + }; + + anyhow::Ok(()) + } + .log_err() + }); + } + pub fn reparse_slash_commands(&mut self, cx: &mut ModelContext) { let buffer = self.buffer.read(cx); let mut row_ranges = self @@ -1352,20 +1480,26 @@ impl Context { self.count_remaining_tokens(cx); } - pub fn assist(&mut self, cx: &mut ModelContext) -> Option { - let provider = LanguageModelRegistry::read_global(cx).active_provider()?; - let model = LanguageModelRegistry::read_global(cx).active_model()?; - let last_message_id = self.message_anchors.iter().rev().find_map(|message| { + fn get_last_valid_message_id(&self, cx: &ModelContext) -> Option { + self.message_anchors.iter().rev().find_map(|message| { message .start .is_valid(self.buffer.read(cx)) .then_some(message.id) - })?; + }) + } + + pub fn assist(&mut self, cx: &mut ModelContext) -> Option { + let provider = LanguageModelRegistry::read_global(cx).active_provider()?; + let model = LanguageModelRegistry::read_global(cx).active_model()?; + let last_message_id = self.get_last_valid_message_id(cx)?; if !provider.is_authenticated(cx) { log::info!("completion provider has no credentials"); return None; } + // Compute which messages to cache, including the last one. + self.mark_longest_messages_for_cache(&model.cache_configuration(), false, cx); let request = self.to_completion_request(cx); let assistant_message = self @@ -1580,6 +1714,8 @@ impl Context { role, status, timestamp: anchor.id.0, + should_cache: false, + is_cache_anchor: false, }; self.insert_message(anchor.clone(), metadata.clone(), cx); self.push_op( @@ -1696,6 +1832,8 @@ impl Context { role, status: MessageStatus::Done, timestamp: suffix.id.0, + should_cache: false, + is_cache_anchor: false, }; self.insert_message(suffix.clone(), suffix_metadata.clone(), cx); self.push_op( @@ -1745,6 +1883,8 @@ impl Context { role, status: MessageStatus::Done, timestamp: selection.id.0, + should_cache: false, + is_cache_anchor: false, }; self.insert_message(selection.clone(), selection_metadata.clone(), cx); self.push_op( @@ -1811,6 +1951,7 @@ impl Context { content: vec![ "Summarize the context into a short title without punctuation.".into(), ], + cache: false, })); let request = LanguageModelRequest { messages: messages.collect(), @@ -1910,14 +2051,22 @@ impl Context { result } - pub fn messages<'a>(&'a self, cx: &'a AppContext) -> impl 'a + Iterator { + fn messages_from_anchors<'a>( + &'a self, + message_anchors: impl Iterator + 'a, + cx: &'a AppContext, + ) -> impl 'a + Iterator { let buffer = self.buffer.read(cx); - let messages = self.message_anchors.iter().enumerate(); + let messages = message_anchors.enumerate(); let images = self.image_anchors.iter(); Self::messages_from_iters(buffer, &self.messages_metadata, messages, images) } + pub fn messages<'a>(&'a self, cx: &'a AppContext) -> impl 'a + Iterator { + self.messages_from_anchors(self.message_anchors.iter(), cx) + } + pub fn messages_from_iters<'a>( buffer: &'a Buffer, metadata: &'a HashMap, @@ -1969,6 +2118,7 @@ impl Context { anchor: message_anchor.start, role: metadata.role, status: metadata.status.clone(), + cache: metadata.is_cache_anchor, image_offsets, }); } @@ -2215,6 +2365,8 @@ impl SavedContext { role: message.metadata.role, status: message.metadata.status, timestamp: message.metadata.timestamp, + should_cache: false, + is_cache_anchor: false, }, version: version.clone(), }); @@ -2231,6 +2383,8 @@ impl SavedContext { role: metadata.role, status: metadata.status, timestamp, + should_cache: false, + is_cache_anchor: false, }, version: version.clone(), }); @@ -2325,6 +2479,8 @@ impl SavedContextV0_3_0 { role: metadata.role, status: metadata.status.clone(), timestamp, + should_cache: false, + is_cache_anchor: false, }, image_offsets: Vec::new(), }) diff --git a/crates/assistant/src/inline_assistant.rs b/crates/assistant/src/inline_assistant.rs index fbbe7d4224..b027870246 100644 --- a/crates/assistant/src/inline_assistant.rs +++ b/crates/assistant/src/inline_assistant.rs @@ -2387,6 +2387,7 @@ impl Codegen { messages.push(LanguageModelRequestMessage { role: Role::User, content: vec![prompt.into()], + cache: false, }); Ok(LanguageModelRequest { diff --git a/crates/assistant/src/prompt_library.rs b/crates/assistant/src/prompt_library.rs index 124420b0cd..bc3c356267 100644 --- a/crates/assistant/src/prompt_library.rs +++ b/crates/assistant/src/prompt_library.rs @@ -784,6 +784,7 @@ impl PromptLibrary { messages: vec![LanguageModelRequestMessage { role: Role::System, content: vec![body.to_string().into()], + cache: false, }], stop: Vec::new(), temperature: 1., diff --git a/crates/assistant/src/terminal_inline_assistant.rs b/crates/assistant/src/terminal_inline_assistant.rs index bf464de382..05621cce1e 100644 --- a/crates/assistant/src/terminal_inline_assistant.rs +++ b/crates/assistant/src/terminal_inline_assistant.rs @@ -277,6 +277,7 @@ impl TerminalInlineAssistant { messages.push(LanguageModelRequestMessage { role: Role::User, content: vec![prompt.into()], + cache: false, }); Ok(LanguageModelRequest { diff --git a/crates/assistant/src/workflow.rs b/crates/assistant/src/workflow.rs index f08456a35b..bd5419c627 100644 --- a/crates/assistant/src/workflow.rs +++ b/crates/assistant/src/workflow.rs @@ -136,6 +136,7 @@ impl WorkflowStep { request.messages.push(LanguageModelRequestMessage { role: Role::User, content: vec![prompt.into()], + cache: false, }); // Invoke the model to get its edit suggestions for this workflow step. diff --git a/crates/language_model/src/language_model.rs b/crates/language_model/src/language_model.rs index 9c8f9c3a9f..1df651ad9e 100644 --- a/crates/language_model/src/language_model.rs +++ b/crates/language_model/src/language_model.rs @@ -20,7 +20,7 @@ pub use registry::*; pub use request::*; pub use role::*; use schemars::JsonSchema; -use serde::de::DeserializeOwned; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; use std::{future::Future, sync::Arc}; use ui::IconName; @@ -43,6 +43,14 @@ pub enum LanguageModelAvailability { RequiresPlan(Plan), } +/// Configuration for caching language model messages. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] +pub struct LanguageModelCacheConfiguration { + pub max_cache_anchors: usize, + pub should_speculate: bool, + pub min_total_token: usize, +} + pub trait LanguageModel: Send + Sync { fn id(&self) -> LanguageModelId; fn name(&self) -> LanguageModelName; @@ -78,6 +86,10 @@ pub trait LanguageModel: Send + Sync { cx: &AsyncAppContext, ) -> BoxFuture<'static, Result>>>; + fn cache_configuration(&self) -> Option { + None + } + #[cfg(any(test, feature = "test-support"))] fn as_fake(&self) -> &provider::fake::FakeLanguageModel { unimplemented!() diff --git a/crates/language_model/src/provider/anthropic.rs b/crates/language_model/src/provider/anthropic.rs index 7f3cbbf44f..fa5401a38f 100644 --- a/crates/language_model/src/provider/anthropic.rs +++ b/crates/language_model/src/provider/anthropic.rs @@ -1,7 +1,7 @@ use crate::{ - settings::AllLanguageModelSettings, LanguageModel, LanguageModelId, LanguageModelName, - LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, - LanguageModelProviderState, LanguageModelRequest, RateLimiter, Role, + settings::AllLanguageModelSettings, LanguageModel, LanguageModelCacheConfiguration, + LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId, + LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, RateLimiter, Role, }; use anthropic::AnthropicError; use anyhow::{anyhow, Context as _, Result}; @@ -38,6 +38,7 @@ pub struct AvailableModel { pub name: String, pub max_tokens: usize, pub tool_override: Option, + pub cache_configuration: Option, } pub struct AnthropicLanguageModelProvider { @@ -171,6 +172,13 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider { name: model.name.clone(), max_tokens: model.max_tokens, tool_override: model.tool_override.clone(), + cache_configuration: model.cache_configuration.as_ref().map(|config| { + anthropic::AnthropicModelCacheConfiguration { + max_cache_anchors: config.max_cache_anchors, + should_speculate: config.should_speculate, + min_total_token: config.min_total_token, + } + }), }, ); } @@ -351,6 +359,16 @@ impl LanguageModel for AnthropicModel { .boxed() } + fn cache_configuration(&self) -> Option { + self.model + .cache_configuration() + .map(|config| LanguageModelCacheConfiguration { + max_cache_anchors: config.max_cache_anchors, + should_speculate: config.should_speculate, + min_total_token: config.min_total_token, + }) + } + fn use_any_tool( &self, request: LanguageModelRequest, diff --git a/crates/language_model/src/provider/cloud.rs b/crates/language_model/src/provider/cloud.rs index 1bb68a0513..517cb13342 100644 --- a/crates/language_model/src/provider/cloud.rs +++ b/crates/language_model/src/provider/cloud.rs @@ -1,7 +1,7 @@ use super::open_ai::count_open_ai_tokens; use crate::{ - settings::AllLanguageModelSettings, CloudModel, LanguageModel, LanguageModelId, - LanguageModelName, LanguageModelProviderId, LanguageModelProviderName, + settings::AllLanguageModelSettings, CloudModel, LanguageModel, LanguageModelCacheConfiguration, + LanguageModelId, LanguageModelName, LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, RateLimiter, ZedModel, }; use anthropic::AnthropicError; @@ -56,6 +56,7 @@ pub struct AvailableModel { name: String, max_tokens: usize, tool_override: Option, + cache_configuration: Option, } pub struct CloudLanguageModelProvider { @@ -202,6 +203,13 @@ impl LanguageModelProvider for CloudLanguageModelProvider { name: model.name.clone(), max_tokens: model.max_tokens, tool_override: model.tool_override.clone(), + cache_configuration: model.cache_configuration.as_ref().map(|config| { + anthropic::AnthropicModelCacheConfiguration { + max_cache_anchors: config.max_cache_anchors, + should_speculate: config.should_speculate, + min_total_token: config.min_total_token, + } + }), }) } AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom { diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index 7a545dd814..452cb3ddac 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -193,6 +193,7 @@ impl From<&str> for MessageContent { pub struct LanguageModelRequestMessage { pub role: Role, pub content: Vec, + pub cache: bool, } impl LanguageModelRequestMessage { @@ -213,7 +214,7 @@ impl LanguageModelRequestMessage { .content .get(0) .map(|content| match content { - MessageContent::Text(s) => s.is_empty(), + MessageContent::Text(s) => s.trim().is_empty(), MessageContent::Image(_) => true, }) .unwrap_or(false) @@ -286,7 +287,7 @@ impl LanguageModelRequest { } pub fn into_anthropic(self, model: String) -> anthropic::Request { - let mut new_messages: Vec = Vec::new(); + let mut new_messages: Vec = Vec::new(); let mut system_message = String::new(); for message in self.messages { @@ -296,18 +297,50 @@ impl LanguageModelRequest { match message.role { Role::User | Role::Assistant => { + let cache_control = if message.cache { + Some(anthropic::CacheControl { + cache_type: anthropic::CacheControlType::Ephemeral, + }) + } else { + None + }; + let anthropic_message_content: Vec = message + .content + .into_iter() + // TODO: filter out the empty messages in the message construction step + .filter_map(|content| match content { + MessageContent::Text(t) if !t.is_empty() => { + Some(anthropic::Content::Text { + text: t, + cache_control, + }) + } + MessageContent::Image(i) => Some(anthropic::Content::Image { + source: anthropic::ImageSource { + source_type: "base64".to_string(), + media_type: "image/png".to_string(), + data: i.source.to_string(), + }, + cache_control, + }), + _ => None, + }) + .collect(); + let anthropic_role = match message.role { + Role::User => anthropic::Role::User, + Role::Assistant => anthropic::Role::Assistant, + Role::System => unreachable!("System role should never occur here"), + }; if let Some(last_message) = new_messages.last_mut() { - if last_message.role == message.role { - // TODO: is this append done properly? - last_message.content.push(MessageContent::Text(format!( - "\n\n{}", - message.string_contents() - ))); + if last_message.role == anthropic_role { + last_message.content.extend(anthropic_message_content); continue; } } - - new_messages.push(message); + new_messages.push(anthropic::Message { + role: anthropic_role, + content: anthropic_message_content, + }); } Role::System => { if !system_message.is_empty() { @@ -320,36 +353,7 @@ impl LanguageModelRequest { anthropic::Request { model, - messages: new_messages - .into_iter() - .filter_map(|message| { - Some(anthropic::Message { - role: match message.role { - Role::User => anthropic::Role::User, - Role::Assistant => anthropic::Role::Assistant, - Role::System => return None, - }, - content: message - .content - .into_iter() - // TODO: filter out the empty messages in the message construction step - .filter_map(|content| match content { - MessageContent::Text(t) if !t.is_empty() => { - Some(anthropic::Content::Text { text: t }) - } - MessageContent::Image(i) => Some(anthropic::Content::Image { - source: anthropic::ImageSource { - source_type: "base64".to_string(), - media_type: "image/png".to_string(), - data: i.source.to_string(), - }, - }), - _ => None, - }) - .collect(), - }) - }) - .collect(), + messages: new_messages, max_tokens: 4092, system: Some(system_message), tools: Vec::new(), diff --git a/crates/language_model/src/settings.rs b/crates/language_model/src/settings.rs index 17bbaf77ad..ded797e1e5 100644 --- a/crates/language_model/src/settings.rs +++ b/crates/language_model/src/settings.rs @@ -7,14 +7,17 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use settings::{update_settings_file, Settings, SettingsSources}; -use crate::provider::{ - self, - anthropic::AnthropicSettings, - cloud::{self, ZedDotDevSettings}, - copilot_chat::CopilotChatSettings, - google::GoogleSettings, - ollama::OllamaSettings, - open_ai::OpenAiSettings, +use crate::{ + provider::{ + self, + anthropic::AnthropicSettings, + cloud::{self, ZedDotDevSettings}, + copilot_chat::CopilotChatSettings, + google::GoogleSettings, + ollama::OllamaSettings, + open_ai::OpenAiSettings, + }, + LanguageModelCacheConfiguration, }; /// Initializes the language model settings. @@ -93,10 +96,18 @@ impl AnthropicSettingsContent { name, max_tokens, tool_override, + cache_configuration, } => Some(provider::anthropic::AvailableModel { name, max_tokens, tool_override, + cache_configuration: cache_configuration.as_ref().map( + |config| LanguageModelCacheConfiguration { + max_cache_anchors: config.max_cache_anchors, + should_speculate: config.should_speculate, + min_total_token: config.min_total_token, + }, + ), }), _ => None, })