use crate::AllLanguageModelSettings; use crate::ui::InstructionListItem; use anthropic::{AnthropicError, AnthropicModelMode, ContentDelta, Event, ResponseContent, Usage}; use anyhow::{Context as _, Result, anyhow}; use collections::{BTreeMap, HashMap}; use credentials_provider::CredentialsProvider; use editor::{Editor, EditorElement, EditorStyle}; use futures::Stream; use futures::{FutureExt, StreamExt, future::BoxFuture, stream::BoxStream}; use gpui::{ AnyView, App, AsyncApp, Context, Entity, FontStyle, Subscription, Task, TextStyle, WhiteSpace, }; use http_client::HttpClient; use language_model::{ AuthenticateError, LanguageModel, LanguageModelCacheConfiguration, LanguageModelId, LanguageModelKnownError, LanguageModelName, LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, MessageContent, RateLimiter, Role, }; use language_model::{LanguageModelCompletionEvent, LanguageModelToolUse, StopReason}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use settings::{Settings, SettingsStore}; use std::pin::Pin; use std::str::FromStr; use std::sync::Arc; use strum::IntoEnumIterator; use theme::ThemeSettings; use ui::{Icon, IconName, List, Tooltip, prelude::*}; use util::{ResultExt, maybe}; const PROVIDER_ID: &str = language_model::ANTHROPIC_PROVIDER_ID; const PROVIDER_NAME: &str = "Anthropic"; #[derive(Default, Clone, Debug, PartialEq)] pub struct AnthropicSettings { pub api_url: String, /// Extend Zed's list of Anthropic models. pub available_models: Vec, pub needs_setting_migration: bool, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct AvailableModel { /// The model's name in the Anthropic API. e.g. claude-3-5-sonnet-latest, claude-3-opus-20240229, etc pub name: String, /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel. pub display_name: Option, /// The model's context window size. pub max_tokens: usize, /// A model `name` to substitute when calling tools, in case the primary model doesn't support tool calling. pub tool_override: Option, /// Configuration of Anthropic's caching API. pub cache_configuration: Option, pub max_output_tokens: Option, pub default_temperature: Option, #[serde(default)] pub extra_beta_headers: Vec, /// The model's mode (e.g. thinking) pub mode: Option, } #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)] #[serde(tag = "type", rename_all = "lowercase")] pub enum ModelMode { #[default] Default, Thinking { /// The maximum number of tokens to use for reasoning. Must be lower than the model's `max_output_tokens`. budget_tokens: Option, }, } impl From for AnthropicModelMode { fn from(value: ModelMode) -> Self { match value { ModelMode::Default => AnthropicModelMode::Default, ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens }, } } } impl From for ModelMode { fn from(value: AnthropicModelMode) -> Self { match value { AnthropicModelMode::Default => ModelMode::Default, AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens }, } } } pub struct AnthropicLanguageModelProvider { http_client: Arc, state: gpui::Entity, } const ANTHROPIC_API_KEY_VAR: &str = "ANTHROPIC_API_KEY"; pub struct State { api_key: Option, api_key_from_env: bool, _subscription: Subscription, } impl State { fn reset_api_key(&self, cx: &mut Context) -> Task> { let credentials_provider = ::global(cx); let api_url = AllLanguageModelSettings::get_global(cx) .anthropic .api_url .clone(); cx.spawn(async move |this, cx| { credentials_provider .delete_credentials(&api_url, &cx) .await .ok(); this.update(cx, |this, cx| { this.api_key = None; this.api_key_from_env = false; cx.notify(); }) }) } fn set_api_key(&mut self, api_key: String, cx: &mut Context) -> Task> { let credentials_provider = ::global(cx); let api_url = AllLanguageModelSettings::get_global(cx) .anthropic .api_url .clone(); cx.spawn(async move |this, cx| { credentials_provider .write_credentials(&api_url, "Bearer", api_key.as_bytes(), &cx) .await .ok(); this.update(cx, |this, cx| { this.api_key = Some(api_key); cx.notify(); }) }) } fn is_authenticated(&self) -> bool { self.api_key.is_some() } fn authenticate(&self, cx: &mut Context) -> Task> { if self.is_authenticated() { return Task::ready(Ok(())); } let credentials_provider = ::global(cx); let api_url = AllLanguageModelSettings::get_global(cx) .anthropic .api_url .clone(); cx.spawn(async move |this, cx| { let (api_key, from_env) = if let Ok(api_key) = std::env::var(ANTHROPIC_API_KEY_VAR) { (api_key, true) } else { let (_, api_key) = credentials_provider .read_credentials(&api_url, &cx) .await? .ok_or(AuthenticateError::CredentialsNotFound)?; ( String::from_utf8(api_key).context("invalid {PROVIDER_NAME} API key")?, false, ) }; this.update(cx, |this, cx| { this.api_key = Some(api_key); this.api_key_from_env = from_env; cx.notify(); })?; Ok(()) }) } } impl AnthropicLanguageModelProvider { pub fn new(http_client: Arc, cx: &mut App) -> Self { let state = cx.new(|cx| State { api_key: None, api_key_from_env: false, _subscription: cx.observe_global::(|_, cx| { cx.notify(); }), }); Self { http_client, state } } fn create_language_model(&self, model: anthropic::Model) -> Arc { Arc::new(AnthropicModel { id: LanguageModelId::from(model.id().to_string()), model, state: self.state.clone(), http_client: self.http_client.clone(), request_limiter: RateLimiter::new(4), }) as Arc } } impl LanguageModelProviderState for AnthropicLanguageModelProvider { type ObservableEntity = State; fn observable_entity(&self) -> Option> { Some(self.state.clone()) } } impl LanguageModelProvider for AnthropicLanguageModelProvider { fn id(&self) -> LanguageModelProviderId { LanguageModelProviderId(PROVIDER_ID.into()) } fn name(&self) -> LanguageModelProviderName { LanguageModelProviderName(PROVIDER_NAME.into()) } fn icon(&self) -> IconName { IconName::AiAnthropic } fn default_model(&self, _cx: &App) -> Option> { let model = anthropic::Model::default(); Some(Arc::new(AnthropicModel { id: LanguageModelId::from(model.id().to_string()), model, state: self.state.clone(), http_client: self.http_client.clone(), request_limiter: RateLimiter::new(4), })) } fn recommended_models(&self, _cx: &App) -> Vec> { [ anthropic::Model::Claude3_7Sonnet, anthropic::Model::Claude3_7SonnetThinking, ] .into_iter() .map(|model| self.create_language_model(model)) .collect() } fn provided_models(&self, cx: &App) -> Vec> { let mut models = BTreeMap::default(); // Add base models from anthropic::Model::iter() for model in anthropic::Model::iter() { if !matches!(model, anthropic::Model::Custom { .. }) { models.insert(model.id().to_string(), model); } } // Override with available models from settings for model in AllLanguageModelSettings::get_global(cx) .anthropic .available_models .iter() { models.insert( model.name.clone(), anthropic::Model::Custom { name: model.name.clone(), display_name: model.display_name.clone(), max_tokens: model.max_tokens, tool_override: model.tool_override.clone(), cache_configuration: model.cache_configuration.as_ref().map(|config| { anthropic::AnthropicModelCacheConfiguration { max_cache_anchors: config.max_cache_anchors, should_speculate: config.should_speculate, min_total_token: config.min_total_token, } }), max_output_tokens: model.max_output_tokens, default_temperature: model.default_temperature, extra_beta_headers: model.extra_beta_headers.clone(), mode: model.mode.clone().unwrap_or_default().into(), }, ); } models .into_values() .map(|model| self.create_language_model(model)) .collect() } fn is_authenticated(&self, cx: &App) -> bool { self.state.read(cx).is_authenticated() } fn authenticate(&self, cx: &mut App) -> Task> { self.state.update(cx, |state, cx| state.authenticate(cx)) } fn configuration_view(&self, window: &mut Window, cx: &mut App) -> AnyView { cx.new(|cx| ConfigurationView::new(self.state.clone(), window, cx)) .into() } fn reset_credentials(&self, cx: &mut App) -> Task> { self.state.update(cx, |state, cx| state.reset_api_key(cx)) } } pub struct AnthropicModel { id: LanguageModelId, model: anthropic::Model, state: gpui::Entity, http_client: Arc, request_limiter: RateLimiter, } pub fn count_anthropic_tokens( request: LanguageModelRequest, cx: &App, ) -> BoxFuture<'static, Result> { cx.background_spawn(async move { let messages = request.messages; let mut tokens_from_images = 0; let mut string_messages = Vec::with_capacity(messages.len()); for message in messages { use language_model::MessageContent; let mut string_contents = String::new(); for content in message.content { match content { MessageContent::Text(text) => { string_contents.push_str(&text); } MessageContent::Image(image) => { tokens_from_images += image.estimate_tokens(); } MessageContent::ToolUse(_tool_use) => { // TODO: Estimate token usage from tool uses. } MessageContent::ToolResult(tool_result) => { string_contents.push_str(&tool_result.content); } } } if !string_contents.is_empty() { string_messages.push(tiktoken_rs::ChatCompletionRequestMessage { role: match message.role { Role::User => "user".into(), Role::Assistant => "assistant".into(), Role::System => "system".into(), }, content: Some(string_contents), name: None, function_call: None, }); } } // Tiktoken doesn't yet support these models, so we manually use the // same tokenizer as GPT-4. tiktoken_rs::num_tokens_from_messages("gpt-4", &string_messages) .map(|tokens| tokens + tokens_from_images) }) .boxed() } impl AnthropicModel { fn stream_completion( &self, request: anthropic::Request, cx: &AsyncApp, ) -> BoxFuture<'static, Result>>> { let http_client = self.http_client.clone(); let Ok((api_key, api_url)) = cx.read_entity(&self.state, |state, cx| { let settings = &AllLanguageModelSettings::get_global(cx).anthropic; (state.api_key.clone(), settings.api_url.clone()) }) else { return futures::future::ready(Err(anyhow!("App state dropped"))).boxed(); }; async move { let api_key = api_key.ok_or_else(|| anyhow!("Missing Anthropic API Key"))?; let request = anthropic::stream_completion(http_client.as_ref(), &api_url, &api_key, request); request.await.context("failed to stream completion") } .boxed() } } impl LanguageModel for AnthropicModel { fn id(&self) -> LanguageModelId { self.id.clone() } fn name(&self) -> LanguageModelName { LanguageModelName::from(self.model.display_name().to_string()) } fn provider_id(&self) -> LanguageModelProviderId { LanguageModelProviderId(PROVIDER_ID.into()) } fn provider_name(&self) -> LanguageModelProviderName { LanguageModelProviderName(PROVIDER_NAME.into()) } fn supports_tools(&self) -> bool { true } fn telemetry_id(&self) -> String { format!("anthropic/{}", self.model.id()) } fn api_key(&self, cx: &App) -> Option { self.state.read(cx).api_key.clone() } fn max_token_count(&self) -> usize { self.model.max_token_count() } fn max_output_tokens(&self) -> Option { Some(self.model.max_output_tokens()) } fn count_tokens( &self, request: LanguageModelRequest, cx: &App, ) -> BoxFuture<'static, Result> { count_anthropic_tokens(request, cx) } fn stream_completion( &self, request: LanguageModelRequest, cx: &AsyncApp, ) -> BoxFuture<'static, Result>>> { let request = into_anthropic( request, self.model.request_id().into(), self.model.default_temperature(), self.model.max_output_tokens(), self.model.mode(), ); let request = self.stream_completion(request, cx); let future = self.request_limiter.stream(async move { let response = request .await .map_err(|err| match err.downcast::() { Ok(anthropic_err) => anthropic_err_to_anyhow(anthropic_err), Err(err) => anyhow!(err), })?; Ok(map_to_language_model_completion_events(response)) }); async move { Ok(future.await?.boxed()) }.boxed() } fn cache_configuration(&self) -> Option { self.model .cache_configuration() .map(|config| LanguageModelCacheConfiguration { max_cache_anchors: config.max_cache_anchors, should_speculate: config.should_speculate, min_total_token: config.min_total_token, }) } } pub fn into_anthropic( request: LanguageModelRequest, model: String, default_temperature: f32, max_output_tokens: u32, mode: AnthropicModelMode, ) -> anthropic::Request { let mut new_messages: Vec = Vec::new(); let mut system_message = String::new(); for message in request.messages { if message.contents_empty() { continue; } match message.role { Role::User | Role::Assistant => { let cache_control = if message.cache { Some(anthropic::CacheControl { cache_type: anthropic::CacheControlType::Ephemeral, }) } else { None }; let anthropic_message_content: Vec = message .content .into_iter() .filter_map(|content| match content { MessageContent::Text(text) => { if !text.is_empty() { Some(anthropic::RequestContent::Text { text, cache_control, }) } else { None } } MessageContent::Image(image) => Some(anthropic::RequestContent::Image { source: anthropic::ImageSource { source_type: "base64".to_string(), media_type: "image/png".to_string(), data: image.source.to_string(), }, cache_control, }), MessageContent::ToolUse(tool_use) => { Some(anthropic::RequestContent::ToolUse { id: tool_use.id.to_string(), name: tool_use.name.to_string(), input: tool_use.input, cache_control, }) } MessageContent::ToolResult(tool_result) => { Some(anthropic::RequestContent::ToolResult { tool_use_id: tool_result.tool_use_id.to_string(), is_error: tool_result.is_error, content: tool_result.content.to_string(), cache_control, }) } }) .collect(); let anthropic_role = match message.role { Role::User => anthropic::Role::User, Role::Assistant => anthropic::Role::Assistant, Role::System => unreachable!("System role should never occur here"), }; if let Some(last_message) = new_messages.last_mut() { if last_message.role == anthropic_role { last_message.content.extend(anthropic_message_content); continue; } } new_messages.push(anthropic::Message { role: anthropic_role, content: anthropic_message_content, }); } Role::System => { if !system_message.is_empty() { system_message.push_str("\n\n"); } system_message.push_str(&message.string_contents()); } } } anthropic::Request { model, messages: new_messages, max_tokens: max_output_tokens, system: if system_message.is_empty() { None } else { Some(anthropic::StringOrContents::String(system_message)) }, thinking: if let AnthropicModelMode::Thinking { budget_tokens } = mode { Some(anthropic::Thinking::Enabled { budget_tokens }) } else { None }, tools: request .tools .into_iter() .map(|tool| anthropic::Tool { name: tool.name, description: tool.description, input_schema: tool.input_schema, }) .collect(), tool_choice: None, metadata: None, stop_sequences: Vec::new(), temperature: request.temperature.or(Some(default_temperature)), top_k: None, top_p: None, } } pub fn map_to_language_model_completion_events( events: Pin>>>, ) -> impl Stream> { struct RawToolUse { id: String, name: String, input_json: String, } struct State { events: Pin>>>, tool_uses_by_index: HashMap, usage: Usage, stop_reason: StopReason, } futures::stream::unfold( State { events, tool_uses_by_index: HashMap::default(), usage: Usage::default(), stop_reason: StopReason::EndTurn, }, |mut state| async move { while let Some(event) = state.events.next().await { match event { Ok(event) => match event { Event::ContentBlockStart { index, content_block, } => match content_block { ResponseContent::Text { text } => { return Some(( vec![Ok(LanguageModelCompletionEvent::Text(text))], state, )); } ResponseContent::Thinking { thinking } => { return Some(( vec![Ok(LanguageModelCompletionEvent::Thinking(thinking))], state, )); } ResponseContent::RedactedThinking { .. } => { // Redacted thinking is encrypted and not accessible to the user, see: // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#suggestions-for-handling-redacted-thinking-in-production } ResponseContent::ToolUse { id, name, .. } => { state.tool_uses_by_index.insert( index, RawToolUse { id, name, input_json: String::new(), }, ); } }, Event::ContentBlockDelta { index, delta } => match delta { ContentDelta::TextDelta { text } => { return Some(( vec![Ok(LanguageModelCompletionEvent::Text(text))], state, )); } ContentDelta::ThinkingDelta { thinking } => { return Some(( vec![Ok(LanguageModelCompletionEvent::Thinking(thinking))], state, )); } ContentDelta::SignatureDelta { .. } => {} ContentDelta::InputJsonDelta { partial_json } => { if let Some(tool_use) = state.tool_uses_by_index.get_mut(&index) { tool_use.input_json.push_str(&partial_json); } } }, Event::ContentBlockStop { index } => { if let Some(tool_use) = state.tool_uses_by_index.remove(&index) { return Some(( vec![maybe!({ Ok(LanguageModelCompletionEvent::ToolUse( LanguageModelToolUse { id: tool_use.id.into(), name: tool_use.name.into(), input: if tool_use.input_json.is_empty() { serde_json::Value::Object( serde_json::Map::default(), ) } else { serde_json::Value::from_str( &tool_use.input_json, ) .map_err(|err| anyhow!(err))? }, }, )) })], state, )); } } Event::MessageStart { message } => { update_usage(&mut state.usage, &message.usage); return Some(( vec![ Ok(LanguageModelCompletionEvent::UsageUpdate(convert_usage( &state.usage, ))), Ok(LanguageModelCompletionEvent::StartMessage { message_id: message.id, }), ], state, )); } Event::MessageDelta { delta, usage } => { update_usage(&mut state.usage, &usage); if let Some(stop_reason) = delta.stop_reason.as_deref() { state.stop_reason = match stop_reason { "end_turn" => StopReason::EndTurn, "max_tokens" => StopReason::MaxTokens, "tool_use" => StopReason::ToolUse, _ => { log::error!( "Unexpected anthropic stop_reason: {stop_reason}" ); StopReason::EndTurn } }; } return Some(( vec![Ok(LanguageModelCompletionEvent::UsageUpdate( convert_usage(&state.usage), ))], state, )); } Event::MessageStop => { return Some(( vec![Ok(LanguageModelCompletionEvent::Stop(state.stop_reason))], state, )); } Event::Error { error } => { return Some(( vec![Err(anyhow!(AnthropicError::ApiError(error)))], state, )); } _ => {} }, Err(err) => { return Some((vec![Err(anthropic_err_to_anyhow(err))], state)); } } } None }, ) .flat_map(futures::stream::iter) } pub fn anthropic_err_to_anyhow(err: AnthropicError) -> anyhow::Error { if let AnthropicError::ApiError(api_err) = &err { if let Some(tokens) = api_err.match_window_exceeded() { return anyhow!(LanguageModelKnownError::ContextWindowLimitExceeded { tokens }); } } anyhow!(err) } /// Updates usage data by preferring counts from `new`. fn update_usage(usage: &mut Usage, new: &Usage) { if let Some(input_tokens) = new.input_tokens { usage.input_tokens = Some(input_tokens); } if let Some(output_tokens) = new.output_tokens { usage.output_tokens = Some(output_tokens); } if let Some(cache_creation_input_tokens) = new.cache_creation_input_tokens { usage.cache_creation_input_tokens = Some(cache_creation_input_tokens); } if let Some(cache_read_input_tokens) = new.cache_read_input_tokens { usage.cache_read_input_tokens = Some(cache_read_input_tokens); } } fn convert_usage(usage: &Usage) -> language_model::TokenUsage { language_model::TokenUsage { input_tokens: usage.input_tokens.unwrap_or(0), output_tokens: usage.output_tokens.unwrap_or(0), cache_creation_input_tokens: usage.cache_creation_input_tokens.unwrap_or(0), cache_read_input_tokens: usage.cache_read_input_tokens.unwrap_or(0), } } struct ConfigurationView { api_key_editor: Entity, state: gpui::Entity, load_credentials_task: Option>, } impl ConfigurationView { const PLACEHOLDER_TEXT: &'static str = "sk-ant-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; fn new(state: gpui::Entity, window: &mut Window, cx: &mut Context) -> Self { cx.observe(&state, |_, _, cx| { cx.notify(); }) .detach(); let load_credentials_task = Some(cx.spawn({ let state = state.clone(); async move |this, cx| { if let Some(task) = state .update(cx, |state, cx| state.authenticate(cx)) .log_err() { // We don't log an error, because "not signed in" is also an error. let _ = task.await; } this.update(cx, |this, cx| { this.load_credentials_task = None; cx.notify(); }) .log_err(); } })); Self { api_key_editor: cx.new(|cx| { let mut editor = Editor::single_line(window, cx); editor.set_placeholder_text(Self::PLACEHOLDER_TEXT, cx); editor }), state, load_credentials_task, } } fn save_api_key(&mut self, _: &menu::Confirm, window: &mut Window, cx: &mut Context) { let api_key = self.api_key_editor.read(cx).text(cx); if api_key.is_empty() { return; } let state = self.state.clone(); cx.spawn_in(window, async move |_, cx| { state .update(cx, |state, cx| state.set_api_key(api_key, cx))? .await }) .detach_and_log_err(cx); cx.notify(); } fn reset_api_key(&mut self, window: &mut Window, cx: &mut Context) { self.api_key_editor .update(cx, |editor, cx| editor.set_text("", window, cx)); let state = self.state.clone(); cx.spawn_in(window, async move |_, cx| { state.update(cx, |state, cx| state.reset_api_key(cx))?.await }) .detach_and_log_err(cx); cx.notify(); } fn render_api_key_editor(&self, cx: &mut Context) -> impl IntoElement { let settings = ThemeSettings::get_global(cx); let text_style = TextStyle { color: cx.theme().colors().text, font_family: settings.ui_font.family.clone(), font_features: settings.ui_font.features.clone(), font_fallbacks: settings.ui_font.fallbacks.clone(), font_size: rems(0.875).into(), font_weight: settings.ui_font.weight, font_style: FontStyle::Normal, line_height: relative(1.3), white_space: WhiteSpace::Normal, ..Default::default() }; EditorElement::new( &self.api_key_editor, EditorStyle { background: cx.theme().colors().editor_background, local_player: cx.theme().players().local(), text: text_style, ..Default::default() }, ) } fn should_render_editor(&self, cx: &mut Context) -> bool { !self.state.read(cx).is_authenticated() } } impl Render for ConfigurationView { fn render(&mut self, _: &mut Window, cx: &mut Context) -> impl IntoElement { let env_var_set = self.state.read(cx).api_key_from_env; if self.load_credentials_task.is_some() { div().child(Label::new("Loading credentials...")).into_any() } else if self.should_render_editor(cx) { v_flex() .size_full() .on_action(cx.listener(Self::save_api_key)) .child(Label::new("To use Zed's assistant with Anthropic, you need to add an API key. Follow these steps:")) .child( List::new() .child( InstructionListItem::new( "Create one by visiting", Some("Anthropic's settings"), Some("https://console.anthropic.com/settings/keys") ) ) .child( InstructionListItem::text_only("Paste your API key below and hit enter to start using the assistant") ) ) .child( h_flex() .w_full() .my_2() .px_2() .py_1() .bg(cx.theme().colors().editor_background) .border_1() .border_color(cx.theme().colors().border_variant) .rounded_sm() .child(self.render_api_key_editor(cx)), ) .child( Label::new( format!("You can also assign the {ANTHROPIC_API_KEY_VAR} environment variable and restart Zed."), ) .size(LabelSize::Small) .color(Color::Muted), ) .into_any() } else { h_flex() .size_full() .justify_between() .child( h_flex() .gap_1() .child(Icon::new(IconName::Check).color(Color::Success)) .child(Label::new(if env_var_set { format!("API key set in {ANTHROPIC_API_KEY_VAR} environment variable.") } else { "API key configured.".to_string() })), ) .child( Button::new("reset-key", "Reset key") .icon(Some(IconName::Trash)) .icon_size(IconSize::Small) .icon_position(IconPosition::Start) .disabled(env_var_set) .when(env_var_set, |this| { this.tooltip(Tooltip::text(format!("To reset your API key, unset the {ANTHROPIC_API_KEY_VAR} environment variable."))) }) .on_click(cx.listener(|this, _, window, cx| this.reset_api_key(window, cx))), ) .into_any() } } }