Add language_models crate to house language model providers (#20945)

This PR adds a new `language_models` crate to house the various language model providers. By extracting the provider definitions out of `language_model`, we're able to remove `language_model`'s dependency on `editor`, which improves incremental compilation when changing `editor`. Release Notes: - N/A
2024-11-20 18:49:34 -05:00 · 2024-11-20 18:49:34 -05:00 · cbba44900d
commit cbba44900d
parent 335b112abd
27 changed files with 265 additions and 199 deletions
--- a/crates/language_models/Cargo.toml
+++ b/crates/language_models/Cargo.toml
@ -0,0 +1,49 @@
+[package]
+name = "language_models"
+version = "0.1.0"
+edition = "2021"
+publish = false
+license = "GPL-3.0-or-later"
+
+[lints]
+workspace = true
+
+[lib]
+path = "src/language_models.rs"
+
+[dependencies]
+anthropic = { workspace = true, features = ["schemars"] }
+anyhow.workspace = true
+client.workspace = true
+collections.workspace = true
+copilot = { workspace = true, features = ["schemars"] }
+editor.workspace = true
+feature_flags.workspace = true
+fs.workspace = true
+futures.workspace = true
+google_ai = { workspace = true, features = ["schemars"] }
+gpui.workspace = true
+http_client.workspace = true
+language_model.workspace = true
+menu.workspace = true
+ollama = { workspace = true, features = ["schemars"] }
+open_ai = { workspace = true, features = ["schemars"] }
+project.workspace = true
+proto.workspace = true
+schemars.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+settings.workspace = true
+smol.workspace = true
+strum.workspace = true
+telemetry_events.workspace = true
+theme.workspace = true
+thiserror.workspace = true
+tiktoken-rs.workspace = true
+ui.workspace = true
+util.workspace = true
+
+[dev-dependencies]
+editor = { workspace = true, features = ["test-support"] }
+language_model = { workspace = true, features = ["test-support"] }
+project = { workspace = true, features = ["test-support"] }
--- a/crates/language_models/LICENSE-GPL
+++ b/crates/language_models/LICENSE-GPL
@ -0,0 +1 @@
+../../LICENSE-GPL
--- a/crates/language_models/src/language_models.rs
+++ b/crates/language_models/src/language_models.rs
@ -0,0 +1,80 @@
+use std::sync::Arc;
+
+use client::{Client, UserStore};
+use fs::Fs;
+use gpui::{AppContext, Model, ModelContext};
+use language_model::{LanguageModelProviderId, LanguageModelRegistry, ZED_CLOUD_PROVIDER_ID};
+
+mod logging;
+pub mod provider;
+mod settings;
+
+use crate::provider::anthropic::AnthropicLanguageModelProvider;
+use crate::provider::cloud::{CloudLanguageModelProvider, RefreshLlmTokenListener};
+use crate::provider::copilot_chat::CopilotChatLanguageModelProvider;
+use crate::provider::google::GoogleLanguageModelProvider;
+use crate::provider::ollama::OllamaLanguageModelProvider;
+use crate::provider::open_ai::OpenAiLanguageModelProvider;
+pub use crate::settings::*;
+pub use logging::report_assistant_event;
+
+pub fn init(
+    user_store: Model<UserStore>,
+    client: Arc<Client>,
+    fs: Arc<dyn Fs>,
+    cx: &mut AppContext,
+) {
+    crate::settings::init(fs, cx);
+    let registry = LanguageModelRegistry::global(cx);
+    registry.update(cx, |registry, cx| {
+        register_language_model_providers(registry, user_store, client, cx);
+    });
+}
+
+fn register_language_model_providers(
+    registry: &mut LanguageModelRegistry,
+    user_store: Model<UserStore>,
+    client: Arc<Client>,
+    cx: &mut ModelContext<LanguageModelRegistry>,
+) {
+    use feature_flags::FeatureFlagAppExt;
+
+    RefreshLlmTokenListener::register(client.clone(), cx);
+
+    registry.register_provider(
+        AnthropicLanguageModelProvider::new(client.http_client(), cx),
+        cx,
+    );
+    registry.register_provider(
+        OpenAiLanguageModelProvider::new(client.http_client(), cx),
+        cx,
+    );
+    registry.register_provider(
+        OllamaLanguageModelProvider::new(client.http_client(), cx),
+        cx,
+    );
+    registry.register_provider(
+        GoogleLanguageModelProvider::new(client.http_client(), cx),
+        cx,
+    );
+    registry.register_provider(CopilotChatLanguageModelProvider::new(cx), cx);
+
+    cx.observe_flag::<feature_flags::LanguageModels, _>(move |enabled, cx| {
+        let user_store = user_store.clone();
+        let client = client.clone();
+        LanguageModelRegistry::global(cx).update(cx, move |registry, cx| {
+            if enabled {
+                registry.register_provider(
+                    CloudLanguageModelProvider::new(user_store.clone(), client.clone(), cx),
+                    cx,
+                );
+            } else {
+                registry.unregister_provider(
+                    LanguageModelProviderId::from(ZED_CLOUD_PROVIDER_ID.to_string()),
+                    cx,
+                );
+            }
+        });
+    })
+    .detach();
+}
--- a/crates/language_models/src/logging.rs
+++ b/crates/language_models/src/logging.rs
@ -0,0 +1,90 @@
+use anthropic::{AnthropicError, ANTHROPIC_API_URL};
+use anyhow::{anyhow, Context, Result};
+use client::telemetry::Telemetry;
+use gpui::BackgroundExecutor;
+use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
+use std::env;
+use std::sync::Arc;
+use telemetry_events::{AssistantEvent, AssistantKind, AssistantPhase};
+use util::ResultExt;
+
+use crate::provider::anthropic::PROVIDER_ID as ANTHROPIC_PROVIDER_ID;
+
+pub fn report_assistant_event(
+    event: AssistantEvent,
+    telemetry: Option<Arc<Telemetry>>,
+    client: Arc<dyn HttpClient>,
+    model_api_key: Option<String>,
+    executor: &BackgroundExecutor,
+) {
+    if let Some(telemetry) = telemetry.as_ref() {
+        telemetry.report_assistant_event(event.clone());
+        if telemetry.metrics_enabled() && event.model_provider == ANTHROPIC_PROVIDER_ID {
+            executor
+                .spawn(async move {
+                    report_anthropic_event(event, client, model_api_key)
+                        .await
+                        .log_err();
+                })
+                .detach();
+        }
+    }
+}
+
+async fn report_anthropic_event(
+    event: AssistantEvent,
+    client: Arc<dyn HttpClient>,
+    model_api_key: Option<String>,
+) -> Result<(), AnthropicError> {
+    let api_key = match model_api_key {
+        Some(key) => key,
+        None => {
+            return Err(AnthropicError::Other(anyhow!(
+                "Anthropic API key is not set"
+            )));
+        }
+    };
+
+    let uri = format!("{ANTHROPIC_API_URL}/v1/log/zed");
+    let request_builder = HttpRequest::builder()
+        .method(Method::POST)
+        .uri(uri)
+        .header("X-Api-Key", api_key)
+        .header("Content-Type", "application/json");
+    let serialized_event: serde_json::Value = serde_json::json!({
+        "completion_type": match event.kind {
+            AssistantKind::Inline => "natural_language_completion_in_editor",
+            AssistantKind::InlineTerminal => "natural_language_completion_in_terminal",
+            AssistantKind::Panel => "conversation_message",
+        },
+        "event": match event.phase {
+            AssistantPhase::Response => "response",
+            AssistantPhase::Invoked => "invoke",
+            AssistantPhase::Accepted => "accept",
+            AssistantPhase::Rejected => "reject",
+        },
+        "metadata": {
+            "language_name": event.language_name,
+            "message_id": event.message_id,
+            "platform": env::consts::OS,
+        }
+    });
+
+    let request = request_builder
+        .body(AsyncBody::from(serialized_event.to_string()))
+        .context("failed to construct request body")?;
+
+    let response = client
+        .send(request)
+        .await
+        .context("failed to send request to Anthropic")?;
+
+    if response.status().is_success() {
+        return Ok(());
+    }
+
+    return Err(AnthropicError::Other(anyhow!(
+        "Failed to log: {}",
+        response.status(),
+    )));
+}
--- a/crates/language_models/src/provider.rs
+++ b/crates/language_models/src/provider.rs
@ -0,0 +1,6 @@
+pub mod anthropic;
+pub mod cloud;
+pub mod copilot_chat;
+pub mod google;
+pub mod ollama;
+pub mod open_ai;
--- a/crates/language_models/src/provider/anthropic.rs
+++ b/crates/language_models/src/provider/anthropic.rs
@ -0,0 +1,742 @@
+use crate::AllLanguageModelSettings;
+use anthropic::{AnthropicError, ContentDelta, Event, ResponseContent};
+use anyhow::{anyhow, Context as _, Result};
+use collections::{BTreeMap, HashMap};
+use editor::{Editor, EditorElement, EditorStyle};
+use futures::Stream;
+use futures::{future::BoxFuture, stream::BoxStream, FutureExt, StreamExt, TryStreamExt as _};
+use gpui::{
+    AnyView, AppContext, AsyncAppContext, FontStyle, ModelContext, Subscription, Task, TextStyle,
+    View, WhiteSpace,
+};
+use http_client::HttpClient;
+use language_model::{
+    LanguageModel, LanguageModelCacheConfiguration, LanguageModelId, LanguageModelName,
+    LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
+    LanguageModelProviderState, LanguageModelRequest, RateLimiter, Role,
+};
+use language_model::{LanguageModelCompletionEvent, LanguageModelToolUse, StopReason};
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use settings::{Settings, SettingsStore};
+use std::pin::Pin;
+use std::str::FromStr;
+use std::sync::Arc;
+use strum::IntoEnumIterator;
+use theme::ThemeSettings;
+use ui::{prelude::*, Icon, IconName, Tooltip};
+use util::{maybe, ResultExt};
+
+pub const PROVIDER_ID: &str = "anthropic";
+const PROVIDER_NAME: &str = "Anthropic";
+
+#[derive(Default, Clone, Debug, PartialEq)]
+pub struct AnthropicSettings {
+    pub api_url: String,
+    /// Extend Zed's list of Anthropic models.
+    pub available_models: Vec<AvailableModel>,
+    pub needs_setting_migration: bool,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
+pub struct AvailableModel {
+    /// The model's name in the Anthropic API. e.g. claude-3-5-sonnet-latest, claude-3-opus-20240229, etc
+    pub name: String,
+    /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
+    pub display_name: Option<String>,
+    /// The model's context window size.
+    pub max_tokens: usize,
+    /// A model `name` to substitute when calling tools, in case the primary model doesn't support tool calling.
+    pub tool_override: Option<String>,
+    /// Configuration of Anthropic's caching API.
+    pub cache_configuration: Option<LanguageModelCacheConfiguration>,
+    pub max_output_tokens: Option<u32>,
+    pub default_temperature: Option<f32>,
+}
+
+pub struct AnthropicLanguageModelProvider {
+    http_client: Arc<dyn HttpClient>,
+    state: gpui::Model<State>,
+}
+
+const ANTHROPIC_API_KEY_VAR: &str = "ANTHROPIC_API_KEY";
+
+pub struct State {
+    api_key: Option<String>,
+    api_key_from_env: bool,
+    _subscription: Subscription,
+}
+
+impl State {
+    fn reset_api_key(&self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        let delete_credentials =
+            cx.delete_credentials(&AllLanguageModelSettings::get_global(cx).anthropic.api_url);
+        cx.spawn(|this, mut cx| async move {
+            delete_credentials.await.ok();
+            this.update(&mut cx, |this, cx| {
+                this.api_key = None;
+                this.api_key_from_env = false;
+                cx.notify();
+            })
+        })
+    }
+
+    fn set_api_key(&mut self, api_key: String, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        let write_credentials = cx.write_credentials(
+            AllLanguageModelSettings::get_global(cx)
+                .anthropic
+                .api_url
+                .as_str(),
+            "Bearer",
+            api_key.as_bytes(),
+        );
+        cx.spawn(|this, mut cx| async move {
+            write_credentials.await?;
+
+            this.update(&mut cx, |this, cx| {
+                this.api_key = Some(api_key);
+                cx.notify();
+            })
+        })
+    }
+
+    fn is_authenticated(&self) -> bool {
+        self.api_key.is_some()
+    }
+
+    fn authenticate(&self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        if self.is_authenticated() {
+            Task::ready(Ok(()))
+        } else {
+            let api_url = AllLanguageModelSettings::get_global(cx)
+                .anthropic
+                .api_url
+                .clone();
+
+            cx.spawn(|this, mut cx| async move {
+                let (api_key, from_env) = if let Ok(api_key) = std::env::var(ANTHROPIC_API_KEY_VAR)
+                {
+                    (api_key, true)
+                } else {
+                    let (_, api_key) = cx
+                        .update(|cx| cx.read_credentials(&api_url))?
+                        .await?
+                        .ok_or_else(|| anyhow!("credentials not found"))?;
+                    (String::from_utf8(api_key)?, false)
+                };
+
+                this.update(&mut cx, |this, cx| {
+                    this.api_key = Some(api_key);
+                    this.api_key_from_env = from_env;
+                    cx.notify();
+                })
+            })
+        }
+    }
+}
+
+impl AnthropicLanguageModelProvider {
+    pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut AppContext) -> Self {
+        let state = cx.new_model(|cx| State {
+            api_key: None,
+            api_key_from_env: false,
+            _subscription: cx.observe_global::<SettingsStore>(|_, cx| {
+                cx.notify();
+            }),
+        });
+
+        Self { http_client, state }
+    }
+}
+
+impl LanguageModelProviderState for AnthropicLanguageModelProvider {
+    type ObservableEntity = State;
+
+    fn observable_entity(&self) -> Option<gpui::Model<Self::ObservableEntity>> {
+        Some(self.state.clone())
+    }
+}
+
+impl LanguageModelProvider for AnthropicLanguageModelProvider {
+    fn id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(PROVIDER_ID.into())
+    }
+
+    fn name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn icon(&self) -> IconName {
+        IconName::AiAnthropic
+    }
+
+    fn provided_models(&self, cx: &AppContext) -> Vec<Arc<dyn LanguageModel>> {
+        let mut models = BTreeMap::default();
+
+        // Add base models from anthropic::Model::iter()
+        for model in anthropic::Model::iter() {
+            if !matches!(model, anthropic::Model::Custom { .. }) {
+                models.insert(model.id().to_string(), model);
+            }
+        }
+
+        // Override with available models from settings
+        for model in AllLanguageModelSettings::get_global(cx)
+            .anthropic
+            .available_models
+            .iter()
+        {
+            models.insert(
+                model.name.clone(),
+                anthropic::Model::Custom {
+                    name: model.name.clone(),
+                    display_name: model.display_name.clone(),
+                    max_tokens: model.max_tokens,
+                    tool_override: model.tool_override.clone(),
+                    cache_configuration: model.cache_configuration.as_ref().map(|config| {
+                        anthropic::AnthropicModelCacheConfiguration {
+                            max_cache_anchors: config.max_cache_anchors,
+                            should_speculate: config.should_speculate,
+                            min_total_token: config.min_total_token,
+                        }
+                    }),
+                    max_output_tokens: model.max_output_tokens,
+                    default_temperature: model.default_temperature,
+                },
+            );
+        }
+
+        models
+            .into_values()
+            .map(|model| {
+                Arc::new(AnthropicModel {
+                    id: LanguageModelId::from(model.id().to_string()),
+                    model,
+                    state: self.state.clone(),
+                    http_client: self.http_client.clone(),
+                    request_limiter: RateLimiter::new(4),
+                }) as Arc<dyn LanguageModel>
+            })
+            .collect()
+    }
+
+    fn is_authenticated(&self, cx: &AppContext) -> bool {
+        self.state.read(cx).is_authenticated()
+    }
+
+    fn authenticate(&self, cx: &mut AppContext) -> Task<Result<()>> {
+        self.state.update(cx, |state, cx| state.authenticate(cx))
+    }
+
+    fn configuration_view(&self, cx: &mut WindowContext) -> AnyView {
+        cx.new_view(|cx| ConfigurationView::new(self.state.clone(), cx))
+            .into()
+    }
+
+    fn reset_credentials(&self, cx: &mut AppContext) -> Task<Result<()>> {
+        self.state.update(cx, |state, cx| state.reset_api_key(cx))
+    }
+}
+
+pub struct AnthropicModel {
+    id: LanguageModelId,
+    model: anthropic::Model,
+    state: gpui::Model<State>,
+    http_client: Arc<dyn HttpClient>,
+    request_limiter: RateLimiter,
+}
+
+pub fn count_anthropic_tokens(
+    request: LanguageModelRequest,
+    cx: &AppContext,
+) -> BoxFuture<'static, Result<usize>> {
+    cx.background_executor()
+        .spawn(async move {
+            let messages = request.messages;
+            let mut tokens_from_images = 0;
+            let mut string_messages = Vec::with_capacity(messages.len());
+
+            for message in messages {
+                use language_model::MessageContent;
+
+                let mut string_contents = String::new();
+
+                for content in message.content {
+                    match content {
+                        MessageContent::Text(text) => {
+                            string_contents.push_str(&text);
+                        }
+                        MessageContent::Image(image) => {
+                            tokens_from_images += image.estimate_tokens();
+                        }
+                        MessageContent::ToolUse(_tool_use) => {
+                            // TODO: Estimate token usage from tool uses.
+                        }
+                        MessageContent::ToolResult(tool_result) => {
+                            string_contents.push_str(&tool_result.content);
+                        }
+                    }
+                }
+
+                if !string_contents.is_empty() {
+                    string_messages.push(tiktoken_rs::ChatCompletionRequestMessage {
+                        role: match message.role {
+                            Role::User => "user".into(),
+                            Role::Assistant => "assistant".into(),
+                            Role::System => "system".into(),
+                        },
+                        content: Some(string_contents),
+                        name: None,
+                        function_call: None,
+                    });
+                }
+            }
+
+            // Tiktoken doesn't yet support these models, so we manually use the
+            // same tokenizer as GPT-4.
+            tiktoken_rs::num_tokens_from_messages("gpt-4", &string_messages)
+                .map(|tokens| tokens + tokens_from_images)
+        })
+        .boxed()
+}
+
+impl AnthropicModel {
+    fn stream_completion(
+        &self,
+        request: anthropic::Request,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<anthropic::Event, AnthropicError>>>>
+    {
+        let http_client = self.http_client.clone();
+
+        let Ok((api_key, api_url)) = cx.read_model(&self.state, |state, cx| {
+            let settings = &AllLanguageModelSettings::get_global(cx).anthropic;
+            (state.api_key.clone(), settings.api_url.clone())
+        }) else {
+            return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
+        };
+
+        async move {
+            let api_key = api_key.ok_or_else(|| anyhow!("Missing Anthropic API Key"))?;
+            let request =
+                anthropic::stream_completion(http_client.as_ref(), &api_url, &api_key, request);
+            request.await.context("failed to stream completion")
+        }
+        .boxed()
+    }
+}
+
+impl LanguageModel for AnthropicModel {
+    fn id(&self) -> LanguageModelId {
+        self.id.clone()
+    }
+
+    fn name(&self) -> LanguageModelName {
+        LanguageModelName::from(self.model.display_name().to_string())
+    }
+
+    fn provider_id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(PROVIDER_ID.into())
+    }
+
+    fn provider_name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn telemetry_id(&self) -> String {
+        format!("anthropic/{}", self.model.id())
+    }
+
+    fn api_key(&self, cx: &AppContext) -> Option<String> {
+        self.state.read(cx).api_key.clone()
+    }
+
+    fn max_token_count(&self) -> usize {
+        self.model.max_token_count()
+    }
+
+    fn max_output_tokens(&self) -> Option<u32> {
+        Some(self.model.max_output_tokens())
+    }
+
+    fn count_tokens(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AppContext,
+    ) -> BoxFuture<'static, Result<usize>> {
+        count_anthropic_tokens(request, cx)
+    }
+
+    fn stream_completion(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<LanguageModelCompletionEvent>>>> {
+        let request = request.into_anthropic(
+            self.model.id().into(),
+            self.model.default_temperature(),
+            self.model.max_output_tokens(),
+        );
+        let request = self.stream_completion(request, cx);
+        let future = self.request_limiter.stream(async move {
+            let response = request.await.map_err(|err| anyhow!(err))?;
+            Ok(map_to_language_model_completion_events(response))
+        });
+        async move { Ok(future.await?.boxed()) }.boxed()
+    }
+
+    fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
+        self.model
+            .cache_configuration()
+            .map(|config| LanguageModelCacheConfiguration {
+                max_cache_anchors: config.max_cache_anchors,
+                should_speculate: config.should_speculate,
+                min_total_token: config.min_total_token,
+            })
+    }
+
+    fn use_any_tool(
+        &self,
+        request: LanguageModelRequest,
+        tool_name: String,
+        tool_description: String,
+        input_schema: serde_json::Value,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
+        let mut request = request.into_anthropic(
+            self.model.tool_model_id().into(),
+            self.model.default_temperature(),
+            self.model.max_output_tokens(),
+        );
+        request.tool_choice = Some(anthropic::ToolChoice::Tool {
+            name: tool_name.clone(),
+        });
+        request.tools = vec![anthropic::Tool {
+            name: tool_name.clone(),
+            description: tool_description,
+            input_schema,
+        }];
+
+        let response = self.stream_completion(request, cx);
+        self.request_limiter
+            .run(async move {
+                let response = response.await?;
+                Ok(anthropic::extract_tool_args_from_events(
+                    tool_name,
+                    Box::pin(response.map_err(|e| anyhow!(e))),
+                )
+                .await?
+                .boxed())
+            })
+            .boxed()
+    }
+}
+
+pub fn map_to_language_model_completion_events(
+    events: Pin<Box<dyn Send + Stream<Item = Result<Event, AnthropicError>>>>,
+) -> impl Stream<Item = Result<LanguageModelCompletionEvent>> {
+    struct RawToolUse {
+        id: String,
+        name: String,
+        input_json: String,
+    }
+
+    struct State {
+        events: Pin<Box<dyn Send + Stream<Item = Result<Event, AnthropicError>>>>,
+        tool_uses_by_index: HashMap<usize, RawToolUse>,
+    }
+
+    futures::stream::unfold(
+        State {
+            events,
+            tool_uses_by_index: HashMap::default(),
+        },
+        |mut state| async move {
+            while let Some(event) = state.events.next().await {
+                match event {
+                    Ok(event) => match event {
+                        Event::ContentBlockStart {
+                            index,
+                            content_block,
+                        } => match content_block {
+                            ResponseContent::Text { text } => {
+                                return Some((
+                                    Some(Ok(LanguageModelCompletionEvent::Text(text))),
+                                    state,
+                                ));
+                            }
+                            ResponseContent::ToolUse { id, name, .. } => {
+                                state.tool_uses_by_index.insert(
+                                    index,
+                                    RawToolUse {
+                                        id,
+                                        name,
+                                        input_json: String::new(),
+                                    },
+                                );
+
+                                return Some((None, state));
+                            }
+                        },
+                        Event::ContentBlockDelta { index, delta } => match delta {
+                            ContentDelta::TextDelta { text } => {
+                                return Some((
+                                    Some(Ok(LanguageModelCompletionEvent::Text(text))),
+                                    state,
+                                ));
+                            }
+                            ContentDelta::InputJsonDelta { partial_json } => {
+                                if let Some(tool_use) = state.tool_uses_by_index.get_mut(&index) {
+                                    tool_use.input_json.push_str(&partial_json);
+                                    return Some((None, state));
+                                }
+                            }
+                        },
+                        Event::ContentBlockStop { index } => {
+                            if let Some(tool_use) = state.tool_uses_by_index.remove(&index) {
+                                return Some((
+                                    Some(maybe!({
+                                        Ok(LanguageModelCompletionEvent::ToolUse(
+                                            LanguageModelToolUse {
+                                                id: tool_use.id,
+                                                name: tool_use.name,
+                                                input: if tool_use.input_json.is_empty() {
+                                                    serde_json::Value::Null
+                                                } else {
+                                                    serde_json::Value::from_str(
+                                                        &tool_use.input_json,
+                                                    )
+                                                    .map_err(|err| anyhow!(err))?
+                                                },
+                                            },
+                                        ))
+                                    })),
+                                    state,
+                                ));
+                            }
+                        }
+                        Event::MessageStart { message } => {
+                            return Some((
+                                Some(Ok(LanguageModelCompletionEvent::StartMessage {
+                                    message_id: message.id,
+                                })),
+                                state,
+                            ))
+                        }
+                        Event::MessageDelta { delta, .. } => {
+                            if let Some(stop_reason) = delta.stop_reason.as_deref() {
+                                let stop_reason = match stop_reason {
+                                    "end_turn" => StopReason::EndTurn,
+                                    "max_tokens" => StopReason::MaxTokens,
+                                    "tool_use" => StopReason::ToolUse,
+                                    _ => StopReason::EndTurn,
+                                };
+
+                                return Some((
+                                    Some(Ok(LanguageModelCompletionEvent::Stop(stop_reason))),
+                                    state,
+                                ));
+                            }
+                        }
+                        Event::Error { error } => {
+                            return Some((
+                                Some(Err(anyhow!(AnthropicError::ApiError(error)))),
+                                state,
+                            ));
+                        }
+                        _ => {}
+                    },
+                    Err(err) => {
+                        return Some((Some(Err(anyhow!(err))), state));
+                    }
+                }
+            }
+
+            None
+        },
+    )
+    .filter_map(|event| async move { event })
+}
+
+struct ConfigurationView {
+    api_key_editor: View<Editor>,
+    state: gpui::Model<State>,
+    load_credentials_task: Option<Task<()>>,
+}
+
+impl ConfigurationView {
+    const PLACEHOLDER_TEXT: &'static str = "sk-ant-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+
+    fn new(state: gpui::Model<State>, cx: &mut ViewContext<Self>) -> Self {
+        cx.observe(&state, |_, _, cx| {
+            cx.notify();
+        })
+        .detach();
+
+        let load_credentials_task = Some(cx.spawn({
+            let state = state.clone();
+            |this, mut cx| async move {
+                if let Some(task) = state
+                    .update(&mut cx, |state, cx| state.authenticate(cx))
+                    .log_err()
+                {
+                    // We don't log an error, because "not signed in" is also an error.
+                    let _ = task.await;
+                }
+                this.update(&mut cx, |this, cx| {
+                    this.load_credentials_task = None;
+                    cx.notify();
+                })
+                .log_err();
+            }
+        }));
+
+        Self {
+            api_key_editor: cx.new_view(|cx| {
+                let mut editor = Editor::single_line(cx);
+                editor.set_placeholder_text(Self::PLACEHOLDER_TEXT, cx);
+                editor
+            }),
+            state,
+            load_credentials_task,
+        }
+    }
+
+    fn save_api_key(&mut self, _: &menu::Confirm, cx: &mut ViewContext<Self>) {
+        let api_key = self.api_key_editor.read(cx).text(cx);
+        if api_key.is_empty() {
+            return;
+        }
+
+        let state = self.state.clone();
+        cx.spawn(|_, mut cx| async move {
+            state
+                .update(&mut cx, |state, cx| state.set_api_key(api_key, cx))?
+                .await
+        })
+        .detach_and_log_err(cx);
+
+        cx.notify();
+    }
+
+    fn reset_api_key(&mut self, cx: &mut ViewContext<Self>) {
+        self.api_key_editor
+            .update(cx, |editor, cx| editor.set_text("", cx));
+
+        let state = self.state.clone();
+        cx.spawn(|_, mut cx| async move {
+            state
+                .update(&mut cx, |state, cx| state.reset_api_key(cx))?
+                .await
+        })
+        .detach_and_log_err(cx);
+
+        cx.notify();
+    }
+
+    fn render_api_key_editor(&self, cx: &mut ViewContext<Self>) -> impl IntoElement {
+        let settings = ThemeSettings::get_global(cx);
+        let text_style = TextStyle {
+            color: cx.theme().colors().text,
+            font_family: settings.ui_font.family.clone(),
+            font_features: settings.ui_font.features.clone(),
+            font_fallbacks: settings.ui_font.fallbacks.clone(),
+            font_size: rems(0.875).into(),
+            font_weight: settings.ui_font.weight,
+            font_style: FontStyle::Normal,
+            line_height: relative(1.3),
+            background_color: None,
+            underline: None,
+            strikethrough: None,
+            white_space: WhiteSpace::Normal,
+            truncate: None,
+        };
+        EditorElement::new(
+            &self.api_key_editor,
+            EditorStyle {
+                background: cx.theme().colors().editor_background,
+                local_player: cx.theme().players().local(),
+                text: text_style,
+                ..Default::default()
+            },
+        )
+    }
+
+    fn should_render_editor(&self, cx: &mut ViewContext<Self>) -> bool {
+        !self.state.read(cx).is_authenticated()
+    }
+}
+
+impl Render for ConfigurationView {
+    fn render(&mut self, cx: &mut ViewContext<Self>) -> impl IntoElement {
+        const ANTHROPIC_CONSOLE_URL: &str = "https://console.anthropic.com/settings/keys";
+        const INSTRUCTIONS: [&str; 3] = [
+            "To use Zed's assistant with Anthropic, you need to add an API key. Follow these steps:",
+            "- Create one at:",
+            "- Paste your API key below and hit enter to use the assistant:",
+        ];
+        let env_var_set = self.state.read(cx).api_key_from_env;
+
+        if self.load_credentials_task.is_some() {
+            div().child(Label::new("Loading credentials...")).into_any()
+        } else if self.should_render_editor(cx) {
+            v_flex()
+                .size_full()
+                .on_action(cx.listener(Self::save_api_key))
+                .child(Label::new(INSTRUCTIONS[0]))
+                .child(h_flex().child(Label::new(INSTRUCTIONS[1])).child(
+                    Button::new("anthropic_console", ANTHROPIC_CONSOLE_URL)
+                        .style(ButtonStyle::Subtle)
+                        .icon(IconName::ExternalLink)
+                        .icon_size(IconSize::XSmall)
+                        .icon_color(Color::Muted)
+                        .on_click(move |_, cx| cx.open_url(ANTHROPIC_CONSOLE_URL))
+                    )
+                )
+                .child(Label::new(INSTRUCTIONS[2]))
+                .child(
+                    h_flex()
+                        .w_full()
+                        .my_2()
+                        .px_2()
+                        .py_1()
+                        .bg(cx.theme().colors().editor_background)
+                        .rounded_md()
+                        .child(self.render_api_key_editor(cx)),
+                )
+                .child(
+                    Label::new(
+                        format!("You can also assign the {ANTHROPIC_API_KEY_VAR} environment variable and restart Zed."),
+                    )
+                    .size(LabelSize::Small),
+                )
+                .into_any()
+        } else {
+            h_flex()
+                .size_full()
+                .justify_between()
+                .child(
+                    h_flex()
+                        .gap_1()
+                        .child(Icon::new(IconName::Check).color(Color::Success))
+                        .child(Label::new(if env_var_set {
+                            format!("API key set in {ANTHROPIC_API_KEY_VAR} environment variable.")
+                        } else {
+                            "API key configured.".to_string()
+                        })),
+                )
+                .child(
+                    Button::new("reset-key", "Reset key")
+                        .icon(Some(IconName::Trash))
+                        .icon_size(IconSize::Small)
+                        .icon_position(IconPosition::Start)
+                        .disabled(env_var_set)
+                        .when(env_var_set, |this| {
+                            this.tooltip(|cx| Tooltip::text(format!("To reset your API key, unset the {ANTHROPIC_API_KEY_VAR} environment variable."), cx))
+                        })
+                        .on_click(cx.listener(|this, _, cx| this.reset_api_key(cx))),
+                )
+                .into_any()
+        }
+    }
+}
--- a/crates/language_models/src/provider/cloud.rs
+++ b/crates/language_models/src/provider/cloud.rs
@ -0,0 +1,959 @@
+use super::open_ai::count_open_ai_tokens;
+use anthropic::AnthropicError;
+use anyhow::{anyhow, Result};
+use client::{
+    zed_urls, Client, PerformCompletionParams, UserStore, EXPIRED_LLM_TOKEN_HEADER_NAME,
+    MAX_LLM_MONTHLY_SPEND_REACHED_HEADER_NAME,
+};
+use collections::BTreeMap;
+use feature_flags::{FeatureFlagAppExt, LlmClosedBeta, ZedPro};
+use futures::{
+    future::BoxFuture, stream::BoxStream, AsyncBufReadExt, FutureExt, Stream, StreamExt,
+    TryStreamExt as _,
+};
+use gpui::{
+    AnyElement, AnyView, AppContext, AsyncAppContext, EventEmitter, FontWeight, Global, Model,
+    ModelContext, ReadGlobal, Subscription, Task,
+};
+use http_client::{AsyncBody, HttpClient, Method, Response, StatusCode};
+use language_model::{
+    CloudModel, LanguageModel, LanguageModelCacheConfiguration, LanguageModelId, LanguageModelName,
+    LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState,
+    LanguageModelRequest, RateLimiter, ZED_CLOUD_PROVIDER_ID,
+};
+use language_model::{
+    LanguageModelAvailability, LanguageModelCompletionEvent, LanguageModelProvider,
+};
+use proto::TypedEnvelope;
+use schemars::JsonSchema;
+use serde::{de::DeserializeOwned, Deserialize, Serialize};
+use serde_json::value::RawValue;
+use settings::{Settings, SettingsStore};
+use smol::{
+    io::{AsyncReadExt, BufReader},
+    lock::{RwLock, RwLockUpgradableReadGuard, RwLockWriteGuard},
+};
+use std::fmt;
+use std::{
+    future,
+    sync::{Arc, LazyLock},
+};
+use strum::IntoEnumIterator;
+use thiserror::Error;
+use ui::{prelude::*, TintColor};
+
+use crate::provider::anthropic::map_to_language_model_completion_events;
+use crate::AllLanguageModelSettings;
+
+use super::anthropic::count_anthropic_tokens;
+
+pub const PROVIDER_NAME: &str = "Zed";
+
+const ZED_CLOUD_PROVIDER_ADDITIONAL_MODELS_JSON: Option<&str> =
+    option_env!("ZED_CLOUD_PROVIDER_ADDITIONAL_MODELS_JSON");
+
+fn zed_cloud_provider_additional_models() -> &'static [AvailableModel] {
+    static ADDITIONAL_MODELS: LazyLock<Vec<AvailableModel>> = LazyLock::new(|| {
+        ZED_CLOUD_PROVIDER_ADDITIONAL_MODELS_JSON
+            .map(|json| serde_json::from_str(json).unwrap())
+            .unwrap_or_default()
+    });
+    ADDITIONAL_MODELS.as_slice()
+}
+
+#[derive(Default, Clone, Debug, PartialEq)]
+pub struct ZedDotDevSettings {
+    pub available_models: Vec<AvailableModel>,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum AvailableProvider {
+    Anthropic,
+    OpenAi,
+    Google,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
+pub struct AvailableModel {
+    /// The provider of the language model.
+    pub provider: AvailableProvider,
+    /// The model's name in the provider's API. e.g. claude-3-5-sonnet-20240620
+    pub name: String,
+    /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
+    pub display_name: Option<String>,
+    /// The size of the context window, indicating the maximum number of tokens the model can process.
+    pub max_tokens: usize,
+    /// The maximum number of output tokens allowed by the model.
+    pub max_output_tokens: Option<u32>,
+    /// The maximum number of completion tokens allowed by the model (o1-* only)
+    pub max_completion_tokens: Option<u32>,
+    /// Override this model with a different Anthropic model for tool calls.
+    pub tool_override: Option<String>,
+    /// Indicates whether this custom model supports caching.
+    pub cache_configuration: Option<LanguageModelCacheConfiguration>,
+    /// The default temperature to use for this model.
+    pub default_temperature: Option<f32>,
+}
+
+struct GlobalRefreshLlmTokenListener(Model<RefreshLlmTokenListener>);
+
+impl Global for GlobalRefreshLlmTokenListener {}
+
+pub struct RefreshLlmTokenEvent;
+
+pub struct RefreshLlmTokenListener {
+    _llm_token_subscription: client::Subscription,
+}
+
+impl EventEmitter<RefreshLlmTokenEvent> for RefreshLlmTokenListener {}
+
+impl RefreshLlmTokenListener {
+    pub fn register(client: Arc<Client>, cx: &mut AppContext) {
+        let listener = cx.new_model(|cx| RefreshLlmTokenListener::new(client, cx));
+        cx.set_global(GlobalRefreshLlmTokenListener(listener));
+    }
+
+    pub fn global(cx: &AppContext) -> Model<Self> {
+        GlobalRefreshLlmTokenListener::global(cx).0.clone()
+    }
+
+    fn new(client: Arc<Client>, cx: &mut ModelContext<Self>) -> Self {
+        Self {
+            _llm_token_subscription: client
+                .add_message_handler(cx.weak_model(), Self::handle_refresh_llm_token),
+        }
+    }
+
+    async fn handle_refresh_llm_token(
+        this: Model<Self>,
+        _: TypedEnvelope<proto::RefreshLlmToken>,
+        mut cx: AsyncAppContext,
+    ) -> Result<()> {
+        this.update(&mut cx, |_this, cx| cx.emit(RefreshLlmTokenEvent))
+    }
+}
+
+pub struct CloudLanguageModelProvider {
+    client: Arc<Client>,
+    state: gpui::Model<State>,
+    _maintain_client_status: Task<()>,
+}
+
+pub struct State {
+    client: Arc<Client>,
+    llm_api_token: LlmApiToken,
+    user_store: Model<UserStore>,
+    status: client::Status,
+    accept_terms: Option<Task<Result<()>>>,
+    _settings_subscription: Subscription,
+    _llm_token_subscription: Subscription,
+}
+
+impl State {
+    fn new(
+        client: Arc<Client>,
+        user_store: Model<UserStore>,
+        status: client::Status,
+        cx: &mut ModelContext<Self>,
+    ) -> Self {
+        let refresh_llm_token_listener = RefreshLlmTokenListener::global(cx);
+
+        Self {
+            client: client.clone(),
+            llm_api_token: LlmApiToken::default(),
+            user_store,
+            status,
+            accept_terms: None,
+            _settings_subscription: cx.observe_global::<SettingsStore>(|_, cx| {
+                cx.notify();
+            }),
+            _llm_token_subscription: cx.subscribe(
+                &refresh_llm_token_listener,
+                |this, _listener, _event, cx| {
+                    let client = this.client.clone();
+                    let llm_api_token = this.llm_api_token.clone();
+                    cx.spawn(|_this, _cx| async move {
+                        llm_api_token.refresh(&client).await?;
+                        anyhow::Ok(())
+                    })
+                    .detach_and_log_err(cx);
+                },
+            ),
+        }
+    }
+
+    fn is_signed_out(&self) -> bool {
+        self.status.is_signed_out()
+    }
+
+    fn authenticate(&self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        let client = self.client.clone();
+        cx.spawn(move |this, mut cx| async move {
+            client.authenticate_and_connect(true, &cx).await?;
+            this.update(&mut cx, |_, cx| cx.notify())
+        })
+    }
+
+    fn has_accepted_terms_of_service(&self, cx: &AppContext) -> bool {
+        self.user_store
+            .read(cx)
+            .current_user_has_accepted_terms()
+            .unwrap_or(false)
+    }
+
+    fn accept_terms_of_service(&mut self, cx: &mut ModelContext<Self>) {
+        let user_store = self.user_store.clone();
+        self.accept_terms = Some(cx.spawn(move |this, mut cx| async move {
+            let _ = user_store
+                .update(&mut cx, |store, cx| store.accept_terms_of_service(cx))?
+                .await;
+            this.update(&mut cx, |this, cx| {
+                this.accept_terms = None;
+                cx.notify()
+            })
+        }));
+    }
+}
+
+impl CloudLanguageModelProvider {
+    pub fn new(user_store: Model<UserStore>, client: Arc<Client>, cx: &mut AppContext) -> Self {
+        let mut status_rx = client.status();
+        let status = *status_rx.borrow();
+
+        let state = cx.new_model(|cx| State::new(client.clone(), user_store.clone(), status, cx));
+
+        let state_ref = state.downgrade();
+        let maintain_client_status = cx.spawn(|mut cx| async move {
+            while let Some(status) = status_rx.next().await {
+                if let Some(this) = state_ref.upgrade() {
+                    _ = this.update(&mut cx, |this, cx| {
+                        if this.status != status {
+                            this.status = status;
+                            cx.notify();
+                        }
+                    });
+                } else {
+                    break;
+                }
+            }
+        });
+
+        Self {
+            client,
+            state: state.clone(),
+            _maintain_client_status: maintain_client_status,
+        }
+    }
+}
+
+impl LanguageModelProviderState for CloudLanguageModelProvider {
+    type ObservableEntity = State;
+
+    fn observable_entity(&self) -> Option<gpui::Model<Self::ObservableEntity>> {
+        Some(self.state.clone())
+    }
+}
+
+impl LanguageModelProvider for CloudLanguageModelProvider {
+    fn id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(ZED_CLOUD_PROVIDER_ID.into())
+    }
+
+    fn name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn icon(&self) -> IconName {
+        IconName::AiZed
+    }
+
+    fn provided_models(&self, cx: &AppContext) -> Vec<Arc<dyn LanguageModel>> {
+        let mut models = BTreeMap::default();
+
+        if cx.is_staff() {
+            for model in anthropic::Model::iter() {
+                if !matches!(model, anthropic::Model::Custom { .. }) {
+                    models.insert(model.id().to_string(), CloudModel::Anthropic(model));
+                }
+            }
+            for model in open_ai::Model::iter() {
+                if !matches!(model, open_ai::Model::Custom { .. }) {
+                    models.insert(model.id().to_string(), CloudModel::OpenAi(model));
+                }
+            }
+            for model in google_ai::Model::iter() {
+                if !matches!(model, google_ai::Model::Custom { .. }) {
+                    models.insert(model.id().to_string(), CloudModel::Google(model));
+                }
+            }
+        } else {
+            models.insert(
+                anthropic::Model::Claude3_5Sonnet.id().to_string(),
+                CloudModel::Anthropic(anthropic::Model::Claude3_5Sonnet),
+            );
+        }
+
+        let llm_closed_beta_models = if cx.has_flag::<LlmClosedBeta>() {
+            zed_cloud_provider_additional_models()
+        } else {
+            &[]
+        };
+
+        // Override with available models from settings
+        for model in AllLanguageModelSettings::get_global(cx)
+            .zed_dot_dev
+            .available_models
+            .iter()
+            .chain(llm_closed_beta_models)
+            .cloned()
+        {
+            let model = match model.provider {
+                AvailableProvider::Anthropic => CloudModel::Anthropic(anthropic::Model::Custom {
+                    name: model.name.clone(),
+                    display_name: model.display_name.clone(),
+                    max_tokens: model.max_tokens,
+                    tool_override: model.tool_override.clone(),
+                    cache_configuration: model.cache_configuration.as_ref().map(|config| {
+                        anthropic::AnthropicModelCacheConfiguration {
+                            max_cache_anchors: config.max_cache_anchors,
+                            should_speculate: config.should_speculate,
+                            min_total_token: config.min_total_token,
+                        }
+                    }),
+                    default_temperature: model.default_temperature,
+                    max_output_tokens: model.max_output_tokens,
+                }),
+                AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {
+                    name: model.name.clone(),
+                    display_name: model.display_name.clone(),
+                    max_tokens: model.max_tokens,
+                    max_output_tokens: model.max_output_tokens,
+                    max_completion_tokens: model.max_completion_tokens,
+                }),
+                AvailableProvider::Google => CloudModel::Google(google_ai::Model::Custom {
+                    name: model.name.clone(),
+                    display_name: model.display_name.clone(),
+                    max_tokens: model.max_tokens,
+                }),
+            };
+            models.insert(model.id().to_string(), model.clone());
+        }
+
+        let llm_api_token = self.state.read(cx).llm_api_token.clone();
+        models
+            .into_values()
+            .map(|model| {
+                Arc::new(CloudLanguageModel {
+                    id: LanguageModelId::from(model.id().to_string()),
+                    model,
+                    llm_api_token: llm_api_token.clone(),
+                    client: self.client.clone(),
+                    request_limiter: RateLimiter::new(4),
+                }) as Arc<dyn LanguageModel>
+            })
+            .collect()
+    }
+
+    fn is_authenticated(&self, cx: &AppContext) -> bool {
+        !self.state.read(cx).is_signed_out()
+    }
+
+    fn authenticate(&self, _cx: &mut AppContext) -> Task<Result<()>> {
+        Task::ready(Ok(()))
+    }
+
+    fn configuration_view(&self, cx: &mut WindowContext) -> AnyView {
+        cx.new_view(|_cx| ConfigurationView {
+            state: self.state.clone(),
+        })
+        .into()
+    }
+
+    fn must_accept_terms(&self, cx: &AppContext) -> bool {
+        !self.state.read(cx).has_accepted_terms_of_service(cx)
+    }
+
+    fn render_accept_terms(&self, cx: &mut WindowContext) -> Option<AnyElement> {
+        let state = self.state.read(cx);
+
+        let terms = [(
+            "terms_of_service",
+            "Terms of Service",
+            "https://zed.dev/terms-of-service",
+        )]
+        .map(|(id, label, url)| {
+            Button::new(id, label)
+                .style(ButtonStyle::Subtle)
+                .icon(IconName::ExternalLink)
+                .icon_size(IconSize::XSmall)
+                .icon_color(Color::Muted)
+                .on_click(move |_, cx| cx.open_url(url))
+        });
+
+        if state.has_accepted_terms_of_service(cx) {
+            None
+        } else {
+            let disabled = state.accept_terms.is_some();
+            Some(
+                v_flex()
+                    .gap_2()
+                    .child(
+                        v_flex()
+                            .child(Label::new("Terms and Conditions").weight(FontWeight::MEDIUM))
+                            .child(
+                                Label::new(
+                                    "Please read and accept our terms and conditions to continue.",
+                                )
+                                .size(LabelSize::Small),
+                            ),
+                    )
+                    .child(v_flex().gap_1().children(terms))
+                    .child(
+                        h_flex().justify_end().child(
+                            Button::new("accept_terms", "I've read it and accept it")
+                                .disabled(disabled)
+                                .on_click({
+                                    let state = self.state.downgrade();
+                                    move |_, cx| {
+                                        state
+                                            .update(cx, |state, cx| {
+                                                state.accept_terms_of_service(cx)
+                                            })
+                                            .ok();
+                                    }
+                                }),
+                        ),
+                    )
+                    .into_any(),
+            )
+        }
+    }
+
+    fn reset_credentials(&self, _cx: &mut AppContext) -> Task<Result<()>> {
+        Task::ready(Ok(()))
+    }
+}
+
+pub struct CloudLanguageModel {
+    id: LanguageModelId,
+    model: CloudModel,
+    llm_api_token: LlmApiToken,
+    client: Arc<Client>,
+    request_limiter: RateLimiter,
+}
+
+#[derive(Clone, Default)]
+struct LlmApiToken(Arc<RwLock<Option<String>>>);
+
+#[derive(Error, Debug)]
+pub struct PaymentRequiredError;
+
+impl fmt::Display for PaymentRequiredError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "Payment required to use this language model. Please upgrade your account."
+        )
+    }
+}
+
+#[derive(Error, Debug)]
+pub struct MaxMonthlySpendReachedError;
+
+impl fmt::Display for MaxMonthlySpendReachedError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "Maximum spending limit reached for this month. For more usage, increase your spending limit."
+        )
+    }
+}
+
+impl CloudLanguageModel {
+    async fn perform_llm_completion(
+        client: Arc<Client>,
+        llm_api_token: LlmApiToken,
+        body: PerformCompletionParams,
+    ) -> Result<Response<AsyncBody>> {
+        let http_client = &client.http_client();
+
+        let mut token = llm_api_token.acquire(&client).await?;
+        let mut did_retry = false;
+
+        let response = loop {
+            let request_builder = http_client::Request::builder();
+            let request = request_builder
+                .method(Method::POST)
+                .uri(http_client.build_zed_llm_url("/completion", &[])?.as_ref())
+                .header("Content-Type", "application/json")
+                .header("Authorization", format!("Bearer {token}"))
+                .body(serde_json::to_string(&body)?.into())?;
+            let mut response = http_client.send(request).await?;
+            if response.status().is_success() {
+                break response;
+            } else if !did_retry
+                && response
+                    .headers()
+                    .get(EXPIRED_LLM_TOKEN_HEADER_NAME)
+                    .is_some()
+            {
+                did_retry = true;
+                token = llm_api_token.refresh(&client).await?;
+            } else if response.status() == StatusCode::FORBIDDEN
+                && response
+                    .headers()
+                    .get(MAX_LLM_MONTHLY_SPEND_REACHED_HEADER_NAME)
+                    .is_some()
+            {
+                break Err(anyhow!(MaxMonthlySpendReachedError))?;
+            } else if response.status() == StatusCode::PAYMENT_REQUIRED {
+                break Err(anyhow!(PaymentRequiredError))?;
+            } else {
+                let mut body = String::new();
+                response.body_mut().read_to_string(&mut body).await?;
+                break Err(anyhow!(
+                    "cloud language model completion failed with status {}: {body}",
+                    response.status()
+                ))?;
+            }
+        };
+
+        Ok(response)
+    }
+}
+
+impl LanguageModel for CloudLanguageModel {
+    fn id(&self) -> LanguageModelId {
+        self.id.clone()
+    }
+
+    fn name(&self) -> LanguageModelName {
+        LanguageModelName::from(self.model.display_name().to_string())
+    }
+
+    fn icon(&self) -> Option<IconName> {
+        self.model.icon()
+    }
+
+    fn provider_id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(ZED_CLOUD_PROVIDER_ID.into())
+    }
+
+    fn provider_name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn telemetry_id(&self) -> String {
+        format!("zed.dev/{}", self.model.id())
+    }
+
+    fn availability(&self) -> LanguageModelAvailability {
+        self.model.availability()
+    }
+
+    fn max_token_count(&self) -> usize {
+        self.model.max_token_count()
+    }
+
+    fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
+        match &self.model {
+            CloudModel::Anthropic(model) => {
+                model
+                    .cache_configuration()
+                    .map(|cache| LanguageModelCacheConfiguration {
+                        max_cache_anchors: cache.max_cache_anchors,
+                        should_speculate: cache.should_speculate,
+                        min_total_token: cache.min_total_token,
+                    })
+            }
+            CloudModel::OpenAi(_) | CloudModel::Google(_) => None,
+        }
+    }
+
+    fn count_tokens(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AppContext,
+    ) -> BoxFuture<'static, Result<usize>> {
+        match self.model.clone() {
+            CloudModel::Anthropic(_) => count_anthropic_tokens(request, cx),
+            CloudModel::OpenAi(model) => count_open_ai_tokens(request, model, cx),
+            CloudModel::Google(model) => {
+                let client = self.client.clone();
+                let request = request.into_google(model.id().into());
+                let request = google_ai::CountTokensRequest {
+                    contents: request.contents,
+                };
+                async move {
+                    let request = serde_json::to_string(&request)?;
+                    let response = client
+                        .request(proto::CountLanguageModelTokens {
+                            provider: proto::LanguageModelProvider::Google as i32,
+                            request,
+                        })
+                        .await?;
+                    Ok(response.token_count as usize)
+                }
+                .boxed()
+            }
+        }
+    }
+
+    fn stream_completion(
+        &self,
+        request: LanguageModelRequest,
+        _cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<LanguageModelCompletionEvent>>>> {
+        match &self.model {
+            CloudModel::Anthropic(model) => {
+                let request = request.into_anthropic(
+                    model.id().into(),
+                    model.default_temperature(),
+                    model.max_output_tokens(),
+                );
+                let client = self.client.clone();
+                let llm_api_token = self.llm_api_token.clone();
+                let future = self.request_limiter.stream(async move {
+                    let response = Self::perform_llm_completion(
+                        client.clone(),
+                        llm_api_token,
+                        PerformCompletionParams {
+                            provider: client::LanguageModelProvider::Anthropic,
+                            model: request.model.clone(),
+                            provider_request: RawValue::from_string(serde_json::to_string(
+                                &request,
+                            )?)?,
+                        },
+                    )
+                    .await?;
+                    Ok(map_to_language_model_completion_events(Box::pin(
+                        response_lines(response).map_err(AnthropicError::Other),
+                    )))
+                });
+                async move { Ok(future.await?.boxed()) }.boxed()
+            }
+            CloudModel::OpenAi(model) => {
+                let client = self.client.clone();
+                let request = request.into_open_ai(model.id().into(), model.max_output_tokens());
+                let llm_api_token = self.llm_api_token.clone();
+                let future = self.request_limiter.stream(async move {
+                    let response = Self::perform_llm_completion(
+                        client.clone(),
+                        llm_api_token,
+                        PerformCompletionParams {
+                            provider: client::LanguageModelProvider::OpenAi,
+                            model: request.model.clone(),
+                            provider_request: RawValue::from_string(serde_json::to_string(
+                                &request,
+                            )?)?,
+                        },
+                    )
+                    .await?;
+                    Ok(open_ai::extract_text_from_events(response_lines(response)))
+                });
+                async move {
+                    Ok(future
+                        .await?
+                        .map(|result| result.map(LanguageModelCompletionEvent::Text))
+                        .boxed())
+                }
+                .boxed()
+            }
+            CloudModel::Google(model) => {
+                let client = self.client.clone();
+                let request = request.into_google(model.id().into());
+                let llm_api_token = self.llm_api_token.clone();
+                let future = self.request_limiter.stream(async move {
+                    let response = Self::perform_llm_completion(
+                        client.clone(),
+                        llm_api_token,
+                        PerformCompletionParams {
+                            provider: client::LanguageModelProvider::Google,
+                            model: request.model.clone(),
+                            provider_request: RawValue::from_string(serde_json::to_string(
+                                &request,
+                            )?)?,
+                        },
+                    )
+                    .await?;
+                    Ok(google_ai::extract_text_from_events(response_lines(
+                        response,
+                    )))
+                });
+                async move {
+                    Ok(future
+                        .await?
+                        .map(|result| result.map(LanguageModelCompletionEvent::Text))
+                        .boxed())
+                }
+                .boxed()
+            }
+        }
+    }
+
+    fn use_any_tool(
+        &self,
+        request: LanguageModelRequest,
+        tool_name: String,
+        tool_description: String,
+        input_schema: serde_json::Value,
+        _cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
+        let client = self.client.clone();
+        let llm_api_token = self.llm_api_token.clone();
+
+        match &self.model {
+            CloudModel::Anthropic(model) => {
+                let mut request = request.into_anthropic(
+                    model.tool_model_id().into(),
+                    model.default_temperature(),
+                    model.max_output_tokens(),
+                );
+                request.tool_choice = Some(anthropic::ToolChoice::Tool {
+                    name: tool_name.clone(),
+                });
+                request.tools = vec![anthropic::Tool {
+                    name: tool_name.clone(),
+                    description: tool_description,
+                    input_schema,
+                }];
+
+                self.request_limiter
+                    .run(async move {
+                        let response = Self::perform_llm_completion(
+                            client.clone(),
+                            llm_api_token,
+                            PerformCompletionParams {
+                                provider: client::LanguageModelProvider::Anthropic,
+                                model: request.model.clone(),
+                                provider_request: RawValue::from_string(serde_json::to_string(
+                                    &request,
+                                )?)?,
+                            },
+                        )
+                        .await?;
+
+                        Ok(anthropic::extract_tool_args_from_events(
+                            tool_name,
+                            Box::pin(response_lines(response)),
+                        )
+                        .await?
+                        .boxed())
+                    })
+                    .boxed()
+            }
+            CloudModel::OpenAi(model) => {
+                let mut request =
+                    request.into_open_ai(model.id().into(), model.max_output_tokens());
+                request.tool_choice = Some(open_ai::ToolChoice::Other(
+                    open_ai::ToolDefinition::Function {
+                        function: open_ai::FunctionDefinition {
+                            name: tool_name.clone(),
+                            description: None,
+                            parameters: None,
+                        },
+                    },
+                ));
+                request.tools = vec![open_ai::ToolDefinition::Function {
+                    function: open_ai::FunctionDefinition {
+                        name: tool_name.clone(),
+                        description: Some(tool_description),
+                        parameters: Some(input_schema),
+                    },
+                }];
+
+                self.request_limiter
+                    .run(async move {
+                        let response = Self::perform_llm_completion(
+                            client.clone(),
+                            llm_api_token,
+                            PerformCompletionParams {
+                                provider: client::LanguageModelProvider::OpenAi,
+                                model: request.model.clone(),
+                                provider_request: RawValue::from_string(serde_json::to_string(
+                                    &request,
+                                )?)?,
+                            },
+                        )
+                        .await?;
+
+                        Ok(open_ai::extract_tool_args_from_events(
+                            tool_name,
+                            Box::pin(response_lines(response)),
+                        )
+                        .await?
+                        .boxed())
+                    })
+                    .boxed()
+            }
+            CloudModel::Google(_) => {
+                future::ready(Err(anyhow!("tool use not implemented for Google AI"))).boxed()
+            }
+        }
+    }
+}
+
+fn response_lines<T: DeserializeOwned>(
+    response: Response<AsyncBody>,
+) -> impl Stream<Item = Result<T>> {
+    futures::stream::try_unfold(
+        (String::new(), BufReader::new(response.into_body())),
+        move |(mut line, mut body)| async {
+            match body.read_line(&mut line).await {
+                Ok(0) => Ok(None),
+                Ok(_) => {
+                    let event: T = serde_json::from_str(&line)?;
+                    line.clear();
+                    Ok(Some((event, (line, body))))
+                }
+                Err(e) => Err(e.into()),
+            }
+        },
+    )
+}
+
+impl LlmApiToken {
+    async fn acquire(&self, client: &Arc<Client>) -> Result<String> {
+        let lock = self.0.upgradable_read().await;
+        if let Some(token) = lock.as_ref() {
+            Ok(token.to_string())
+        } else {
+            Self::fetch(RwLockUpgradableReadGuard::upgrade(lock).await, client).await
+        }
+    }
+
+    async fn refresh(&self, client: &Arc<Client>) -> Result<String> {
+        Self::fetch(self.0.write().await, client).await
+    }
+
+    async fn fetch<'a>(
+        mut lock: RwLockWriteGuard<'a, Option<String>>,
+        client: &Arc<Client>,
+    ) -> Result<String> {
+        let response = client.request(proto::GetLlmToken {}).await?;
+        *lock = Some(response.token.clone());
+        Ok(response.token.clone())
+    }
+}
+
+struct ConfigurationView {
+    state: gpui::Model<State>,
+}
+
+impl ConfigurationView {
+    fn authenticate(&mut self, cx: &mut ViewContext<Self>) {
+        self.state.update(cx, |state, cx| {
+            state.authenticate(cx).detach_and_log_err(cx);
+        });
+        cx.notify();
+    }
+
+    fn render_accept_terms(&mut self, cx: &mut ViewContext<Self>) -> Option<AnyElement> {
+        if self.state.read(cx).has_accepted_terms_of_service(cx) {
+            return None;
+        }
+
+        let accept_terms_disabled = self.state.read(cx).accept_terms.is_some();
+
+        let terms_button = Button::new("terms_of_service", "Terms of Service")
+            .style(ButtonStyle::Subtle)
+            .icon(IconName::ExternalLink)
+            .icon_color(Color::Muted)
+            .on_click(move |_, cx| cx.open_url("https://zed.dev/terms-of-service"));
+
+        let text =
+            "In order to use Zed AI, please read and accept our terms and conditions to continue:";
+
+        let form = v_flex()
+            .gap_2()
+            .child(Label::new("Terms and Conditions"))
+            .child(Label::new(text))
+            .child(h_flex().justify_center().child(terms_button))
+            .child(
+                h_flex().justify_center().child(
+                    Button::new("accept_terms", "I've read and accept the terms of service")
+                        .style(ButtonStyle::Tinted(TintColor::Accent))
+                        .disabled(accept_terms_disabled)
+                        .on_click({
+                            let state = self.state.downgrade();
+                            move |_, cx| {
+                                state
+                                    .update(cx, |state, cx| state.accept_terms_of_service(cx))
+                                    .ok();
+                            }
+                        }),
+                ),
+            );
+
+        Some(form.into_any())
+    }
+}
+
+impl Render for ConfigurationView {
+    fn render(&mut self, cx: &mut ViewContext<Self>) -> impl IntoElement {
+        const ZED_AI_URL: &str = "https://zed.dev/ai";
+
+        let is_connected = !self.state.read(cx).is_signed_out();
+        let plan = self.state.read(cx).user_store.read(cx).current_plan();
+        let has_accepted_terms = self.state.read(cx).has_accepted_terms_of_service(cx);
+
+        let is_pro = plan == Some(proto::Plan::ZedPro);
+        let subscription_text = Label::new(if is_pro {
+            "You have full access to Zed's hosted LLMs, which include models from Anthropic, OpenAI, and Google. They come with faster speeds and higher limits through Zed Pro."
+        } else {
+            "You have basic access to models from Anthropic through the Zed AI Free plan."
+        });
+        let manage_subscription_button = if is_pro {
+            Some(
+                h_flex().child(
+                    Button::new("manage_settings", "Manage Subscription")
+                        .style(ButtonStyle::Tinted(TintColor::Accent))
+                        .on_click(cx.listener(|_, _, cx| cx.open_url(&zed_urls::account_url(cx)))),
+                ),
+            )
+        } else if cx.has_flag::<ZedPro>() {
+            Some(
+                h_flex()
+                    .gap_2()
+                    .child(
+                        Button::new("learn_more", "Learn more")
+                            .style(ButtonStyle::Subtle)
+                            .on_click(cx.listener(|_, _, cx| cx.open_url(ZED_AI_URL))),
+                    )
+                    .child(
+                        Button::new("upgrade", "Upgrade")
+                            .style(ButtonStyle::Subtle)
+                            .color(Color::Accent)
+                            .on_click(
+                                cx.listener(|_, _, cx| cx.open_url(&zed_urls::account_url(cx))),
+                            ),
+                    ),
+            )
+        } else {
+            None
+        };
+
+        if is_connected {
+            v_flex()
+                .gap_3()
+                .max_w_4_5()
+                .children(self.render_accept_terms(cx))
+                .when(has_accepted_terms, |this| {
+                    this.child(subscription_text)
+                        .children(manage_subscription_button)
+                })
+        } else {
+            v_flex()
+                .gap_2()
+                .child(Label::new("Use Zed AI to access hosted language models."))
+                .child(
+                    Button::new("sign_in", "Sign In")
+                        .icon_color(Color::Muted)
+                        .icon(IconName::Github)
+                        .icon_position(IconPosition::Start)
+                        .on_click(cx.listener(move |this, _, cx| this.authenticate(cx))),
+                )
+        }
+    }
+}
--- a/crates/language_models/src/provider/copilot_chat.rs
+++ b/crates/language_models/src/provider/copilot_chat.rs
@ -0,0 +1,401 @@
+use std::future;
+use std::sync::Arc;
+
+use anyhow::{anyhow, Result};
+use copilot::copilot_chat::{
+    ChatMessage, CopilotChat, Model as CopilotChatModel, Request as CopilotChatRequest,
+    Role as CopilotChatRole,
+};
+use copilot::{Copilot, Status};
+use futures::future::BoxFuture;
+use futures::stream::BoxStream;
+use futures::{FutureExt, StreamExt};
+use gpui::{
+    percentage, svg, Animation, AnimationExt, AnyView, AppContext, AsyncAppContext, Model, Render,
+    Subscription, Task, Transformation,
+};
+use language_model::{
+    LanguageModel, LanguageModelCompletionEvent, LanguageModelId, LanguageModelName,
+    LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
+    LanguageModelProviderState, LanguageModelRequest, RateLimiter, Role,
+};
+use settings::SettingsStore;
+use std::time::Duration;
+use strum::IntoEnumIterator;
+use ui::{
+    div, h_flex, v_flex, Button, ButtonCommon, Clickable, Color, Context, FixedWidth, Icon,
+    IconName, IconPosition, IconSize, IntoElement, Label, LabelCommon, ParentElement, Styled,
+    ViewContext, VisualContext, WindowContext,
+};
+
+use super::anthropic::count_anthropic_tokens;
+use super::open_ai::count_open_ai_tokens;
+
+const PROVIDER_ID: &str = "copilot_chat";
+const PROVIDER_NAME: &str = "GitHub Copilot Chat";
+
+#[derive(Default, Clone, Debug, PartialEq)]
+pub struct CopilotChatSettings {}
+
+pub struct CopilotChatLanguageModelProvider {
+    state: Model<State>,
+}
+
+pub struct State {
+    _copilot_chat_subscription: Option<Subscription>,
+    _settings_subscription: Subscription,
+}
+
+impl State {
+    fn is_authenticated(&self, cx: &AppContext) -> bool {
+        CopilotChat::global(cx)
+            .map(|m| m.read(cx).is_authenticated())
+            .unwrap_or(false)
+    }
+}
+
+impl CopilotChatLanguageModelProvider {
+    pub fn new(cx: &mut AppContext) -> Self {
+        let state = cx.new_model(|cx| {
+            let _copilot_chat_subscription = CopilotChat::global(cx)
+                .map(|copilot_chat| cx.observe(&copilot_chat, |_, _, cx| cx.notify()));
+            State {
+                _copilot_chat_subscription,
+                _settings_subscription: cx.observe_global::<SettingsStore>(|_, cx| {
+                    cx.notify();
+                }),
+            }
+        });
+
+        Self { state }
+    }
+}
+
+impl LanguageModelProviderState for CopilotChatLanguageModelProvider {
+    type ObservableEntity = State;
+
+    fn observable_entity(&self) -> Option<gpui::Model<Self::ObservableEntity>> {
+        Some(self.state.clone())
+    }
+}
+
+impl LanguageModelProvider for CopilotChatLanguageModelProvider {
+    fn id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(PROVIDER_ID.into())
+    }
+
+    fn name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn icon(&self) -> IconName {
+        IconName::Copilot
+    }
+
+    fn provided_models(&self, _cx: &AppContext) -> Vec<Arc<dyn LanguageModel>> {
+        CopilotChatModel::iter()
+            .map(|model| {
+                Arc::new(CopilotChatLanguageModel {
+                    model,
+                    request_limiter: RateLimiter::new(4),
+                }) as Arc<dyn LanguageModel>
+            })
+            .collect()
+    }
+
+    fn is_authenticated(&self, cx: &AppContext) -> bool {
+        self.state.read(cx).is_authenticated(cx)
+    }
+
+    fn authenticate(&self, cx: &mut AppContext) -> Task<Result<()>> {
+        let result = if self.is_authenticated(cx) {
+            Ok(())
+        } else if let Some(copilot) = Copilot::global(cx) {
+            let error_msg = match copilot.read(cx).status() {
+                Status::Disabled => anyhow::anyhow!("Copilot must be enabled for Copilot Chat to work. Please enable Copilot and try again."),
+                Status::Error(e) => anyhow::anyhow!(format!("Received the following error while signing into Copilot: {e}")),
+                Status::Starting { task: _ } => anyhow::anyhow!("Copilot is still starting, please wait for Copilot to start then try again"),
+                Status::Unauthorized => anyhow::anyhow!("Unable to authorize with Copilot. Please make sure that you have an active Copilot and Copilot Chat subscription."),
+                Status::Authorized => return Task::ready(Ok(())),
+                Status::SignedOut => anyhow::anyhow!("You have signed out of Copilot. Please sign in to Copilot and try again."),
+                Status::SigningIn { prompt: _ } => anyhow::anyhow!("Still signing into Copilot..."),
+            };
+            Err(error_msg)
+        } else {
+            Err(anyhow::anyhow!(
+                "Copilot must be enabled for Copilot Chat to work. Please enable Copilot and try again."
+            ))
+        };
+        Task::ready(result)
+    }
+
+    fn configuration_view(&self, cx: &mut WindowContext) -> AnyView {
+        let state = self.state.clone();
+        cx.new_view(|cx| ConfigurationView::new(state, cx)).into()
+    }
+
+    fn reset_credentials(&self, _cx: &mut AppContext) -> Task<Result<()>> {
+        Task::ready(Err(anyhow!(
+            "Signing out of GitHub Copilot Chat is currently not supported."
+        )))
+    }
+}
+
+pub struct CopilotChatLanguageModel {
+    model: CopilotChatModel,
+    request_limiter: RateLimiter,
+}
+
+impl LanguageModel for CopilotChatLanguageModel {
+    fn id(&self) -> LanguageModelId {
+        LanguageModelId::from(self.model.id().to_string())
+    }
+
+    fn name(&self) -> LanguageModelName {
+        LanguageModelName::from(self.model.display_name().to_string())
+    }
+
+    fn provider_id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(PROVIDER_ID.into())
+    }
+
+    fn provider_name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn telemetry_id(&self) -> String {
+        format!("copilot_chat/{}", self.model.id())
+    }
+
+    fn max_token_count(&self) -> usize {
+        self.model.max_token_count()
+    }
+
+    fn count_tokens(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AppContext,
+    ) -> BoxFuture<'static, Result<usize>> {
+        match self.model {
+            CopilotChatModel::Claude3_5Sonnet => count_anthropic_tokens(request, cx),
+            _ => {
+                let model = match self.model {
+                    CopilotChatModel::Gpt4o => open_ai::Model::FourOmni,
+                    CopilotChatModel::Gpt4 => open_ai::Model::Four,
+                    CopilotChatModel::Gpt3_5Turbo => open_ai::Model::ThreePointFiveTurbo,
+                    CopilotChatModel::O1Preview | CopilotChatModel::O1Mini => open_ai::Model::Four,
+                    CopilotChatModel::Claude3_5Sonnet => unreachable!(),
+                };
+                count_open_ai_tokens(request, model, cx)
+            }
+        }
+    }
+
+    fn stream_completion(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<LanguageModelCompletionEvent>>>> {
+        if let Some(message) = request.messages.last() {
+            if message.contents_empty() {
+                const EMPTY_PROMPT_MSG: &str =
+                    "Empty prompts aren't allowed. Please provide a non-empty prompt.";
+                return futures::future::ready(Err(anyhow::anyhow!(EMPTY_PROMPT_MSG))).boxed();
+            }
+
+            // Copilot Chat has a restriction that the final message must be from the user.
+            // While their API does return an error message for this, we can catch it earlier
+            // and provide a more helpful error message.
+            if !matches!(message.role, Role::User) {
+                const USER_ROLE_MSG: &str = "The final message must be from the user. To provide a system prompt, you must provide the system prompt followed by a user prompt.";
+                return futures::future::ready(Err(anyhow::anyhow!(USER_ROLE_MSG))).boxed();
+            }
+        }
+
+        let copilot_request = self.to_copilot_chat_request(request);
+        let is_streaming = copilot_request.stream;
+
+        let request_limiter = self.request_limiter.clone();
+        let future = cx.spawn(|cx| async move {
+            let response = CopilotChat::stream_completion(copilot_request, cx);
+            request_limiter.stream(async move {
+                let response = response.await?;
+                let stream = response
+                    .filter_map(move |response| async move {
+                        match response {
+                            Ok(result) => {
+                                let choice = result.choices.first();
+                                match choice {
+                                    Some(choice) if !is_streaming => {
+                                        match &choice.message {
+                                            Some(msg) => Some(Ok(msg.content.clone().unwrap_or_default())),
+                                            None => Some(Err(anyhow::anyhow!(
+                                                "The Copilot Chat API returned a response with no message content"
+                                            ))),
+                                        }
+                                    },
+                                    Some(choice) => {
+                                        match &choice.delta {
+                                            Some(delta) => Some(Ok(delta.content.clone().unwrap_or_default())),
+                                            None => Some(Err(anyhow::anyhow!(
+                                                "The Copilot Chat API returned a response with no delta content"
+                                            ))),
+                                        }
+                                    },
+                                    None => Some(Err(anyhow::anyhow!(
+                                        "The Copilot Chat API returned a response with no choices, but hadn't finished the message yet. Please try again."
+                                    ))),
+                                }
+                            }
+                            Err(err) => Some(Err(err)),
+                        }
+                    })
+                    .boxed();
+                Ok(stream)
+            }).await
+        });
+
+        async move {
+            Ok(future
+                .await?
+                .map(|result| result.map(LanguageModelCompletionEvent::Text))
+                .boxed())
+        }
+        .boxed()
+    }
+
+    fn use_any_tool(
+        &self,
+        _request: LanguageModelRequest,
+        _name: String,
+        _description: String,
+        _schema: serde_json::Value,
+        _cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
+        future::ready(Err(anyhow!("not implemented"))).boxed()
+    }
+}
+
+impl CopilotChatLanguageModel {
+    pub fn to_copilot_chat_request(&self, request: LanguageModelRequest) -> CopilotChatRequest {
+        CopilotChatRequest::new(
+            self.model.clone(),
+            request
+                .messages
+                .into_iter()
+                .map(|msg| ChatMessage {
+                    role: match msg.role {
+                        Role::User => CopilotChatRole::User,
+                        Role::Assistant => CopilotChatRole::Assistant,
+                        Role::System => CopilotChatRole::System,
+                    },
+                    content: msg.string_contents(),
+                })
+                .collect(),
+        )
+    }
+}
+
+struct ConfigurationView {
+    copilot_status: Option<copilot::Status>,
+    state: Model<State>,
+    _subscription: Option<Subscription>,
+}
+
+impl ConfigurationView {
+    pub fn new(state: Model<State>, cx: &mut ViewContext<Self>) -> Self {
+        let copilot = Copilot::global(cx);
+
+        Self {
+            copilot_status: copilot.as_ref().map(|copilot| copilot.read(cx).status()),
+            state,
+            _subscription: copilot.as_ref().map(|copilot| {
+                cx.observe(copilot, |this, model, cx| {
+                    this.copilot_status = Some(model.read(cx).status());
+                    cx.notify();
+                })
+            }),
+        }
+    }
+}
+
+impl Render for ConfigurationView {
+    fn render(&mut self, cx: &mut ViewContext<Self>) -> impl IntoElement {
+        if self.state.read(cx).is_authenticated(cx) {
+            const LABEL: &str = "Authorized.";
+            h_flex()
+                .gap_1()
+                .child(Icon::new(IconName::Check).color(Color::Success))
+                .child(Label::new(LABEL))
+        } else {
+            let loading_icon = svg()
+                .size_8()
+                .path(IconName::ArrowCircle.path())
+                .text_color(cx.text_style().color)
+                .with_animation(
+                    "icon_circle_arrow",
+                    Animation::new(Duration::from_secs(2)).repeat(),
+                    |svg, delta| svg.with_transformation(Transformation::rotate(percentage(delta))),
+                );
+
+            const ERROR_LABEL: &str = "Copilot Chat requires an active GitHub Copilot subscription. Please ensure Copilot is configured and try again, or use a different Assistant provider.";
+
+            match &self.copilot_status {
+                Some(status) => match status {
+                    Status::Disabled => v_flex().gap_6().p_4().child(Label::new(ERROR_LABEL)),
+                    Status::Starting { task: _ } => {
+                        const LABEL: &str = "Starting Copilot...";
+                        v_flex()
+                            .gap_6()
+                            .justify_center()
+                            .items_center()
+                            .child(Label::new(LABEL))
+                            .child(loading_icon)
+                    }
+                    Status::SigningIn { prompt: _ } => {
+                        const LABEL: &str = "Signing in to Copilot...";
+                        v_flex()
+                            .gap_6()
+                            .justify_center()
+                            .items_center()
+                            .child(Label::new(LABEL))
+                            .child(loading_icon)
+                    }
+                    Status::Error(_) => {
+                        const LABEL: &str = "Copilot had issues starting. Please try restarting it. If the issue persists, try reinstalling Copilot.";
+                        v_flex()
+                            .gap_6()
+                            .child(Label::new(LABEL))
+                            .child(svg().size_8().path(IconName::CopilotError.path()))
+                    }
+                    _ => {
+                        const LABEL: &str =
+                    "To use Zed's assistant with GitHub Copilot, you need to be logged in to GitHub. Note that your GitHub account must have an active Copilot Chat subscription.";
+                        v_flex().gap_6().child(Label::new(LABEL)).child(
+                            v_flex()
+                                .gap_2()
+                                .child(
+                                    Button::new("sign_in", "Sign In")
+                                        .icon_color(Color::Muted)
+                                        .icon(IconName::Github)
+                                        .icon_position(IconPosition::Start)
+                                        .icon_size(IconSize::Medium)
+                                        .style(ui::ButtonStyle::Filled)
+                                        .full_width()
+                                        .on_click(|_, cx| copilot::initiate_sign_in(cx)),
+                                )
+                                .child(
+                                    div().flex().w_full().items_center().child(
+                                        Label::new("Sign in to start using Github Copilot Chat.")
+                                            .color(Color::Muted)
+                                            .size(ui::LabelSize::Small),
+                                    ),
+                                ),
+                        )
+                    }
+                },
+                None => v_flex().gap_6().child(Label::new(ERROR_LABEL)),
+            }
+        }
+    }
+}
--- a/crates/language_models/src/provider/google.rs
+++ b/crates/language_models/src/provider/google.rs
@ -0,0 +1,508 @@
+use anyhow::{anyhow, Result};
+use collections::BTreeMap;
+use editor::{Editor, EditorElement, EditorStyle};
+use futures::{future::BoxFuture, FutureExt, StreamExt};
+use google_ai::stream_generate_content;
+use gpui::{
+    AnyView, AppContext, AsyncAppContext, FontStyle, ModelContext, Subscription, Task, TextStyle,
+    View, WhiteSpace,
+};
+use http_client::HttpClient;
+use language_model::LanguageModelCompletionEvent;
+use language_model::{
+    LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
+    LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState,
+    LanguageModelRequest, RateLimiter,
+};
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use settings::{Settings, SettingsStore};
+use std::{future, sync::Arc};
+use strum::IntoEnumIterator;
+use theme::ThemeSettings;
+use ui::{prelude::*, Icon, IconName, Tooltip};
+use util::ResultExt;
+
+use crate::AllLanguageModelSettings;
+
+const PROVIDER_ID: &str = "google";
+const PROVIDER_NAME: &str = "Google AI";
+
+#[derive(Default, Clone, Debug, PartialEq)]
+pub struct GoogleSettings {
+    pub api_url: String,
+    pub available_models: Vec<AvailableModel>,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
+pub struct AvailableModel {
+    name: String,
+    display_name: Option<String>,
+    max_tokens: usize,
+}
+
+pub struct GoogleLanguageModelProvider {
+    http_client: Arc<dyn HttpClient>,
+    state: gpui::Model<State>,
+}
+
+pub struct State {
+    api_key: Option<String>,
+    api_key_from_env: bool,
+    _subscription: Subscription,
+}
+
+const GOOGLE_AI_API_KEY_VAR: &str = "GOOGLE_AI_API_KEY";
+
+impl State {
+    fn is_authenticated(&self) -> bool {
+        self.api_key.is_some()
+    }
+
+    fn reset_api_key(&self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        let delete_credentials =
+            cx.delete_credentials(&AllLanguageModelSettings::get_global(cx).google.api_url);
+        cx.spawn(|this, mut cx| async move {
+            delete_credentials.await.ok();
+            this.update(&mut cx, |this, cx| {
+                this.api_key = None;
+                this.api_key_from_env = false;
+                cx.notify();
+            })
+        })
+    }
+
+    fn set_api_key(&mut self, api_key: String, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        let settings = &AllLanguageModelSettings::get_global(cx).google;
+        let write_credentials =
+            cx.write_credentials(&settings.api_url, "Bearer", api_key.as_bytes());
+
+        cx.spawn(|this, mut cx| async move {
+            write_credentials.await?;
+            this.update(&mut cx, |this, cx| {
+                this.api_key = Some(api_key);
+                cx.notify();
+            })
+        })
+    }
+
+    fn authenticate(&self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        if self.is_authenticated() {
+            Task::ready(Ok(()))
+        } else {
+            let api_url = AllLanguageModelSettings::get_global(cx)
+                .google
+                .api_url
+                .clone();
+
+            cx.spawn(|this, mut cx| async move {
+                let (api_key, from_env) = if let Ok(api_key) = std::env::var(GOOGLE_AI_API_KEY_VAR)
+                {
+                    (api_key, true)
+                } else {
+                    let (_, api_key) = cx
+                        .update(|cx| cx.read_credentials(&api_url))?
+                        .await?
+                        .ok_or_else(|| anyhow!("credentials not found"))?;
+                    (String::from_utf8(api_key)?, false)
+                };
+
+                this.update(&mut cx, |this, cx| {
+                    this.api_key = Some(api_key);
+                    this.api_key_from_env = from_env;
+                    cx.notify();
+                })
+            })
+        }
+    }
+}
+
+impl GoogleLanguageModelProvider {
+    pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut AppContext) -> Self {
+        let state = cx.new_model(|cx| State {
+            api_key: None,
+            api_key_from_env: false,
+            _subscription: cx.observe_global::<SettingsStore>(|_, cx| {
+                cx.notify();
+            }),
+        });
+
+        Self { http_client, state }
+    }
+}
+
+impl LanguageModelProviderState for GoogleLanguageModelProvider {
+    type ObservableEntity = State;
+
+    fn observable_entity(&self) -> Option<gpui::Model<Self::ObservableEntity>> {
+        Some(self.state.clone())
+    }
+}
+
+impl LanguageModelProvider for GoogleLanguageModelProvider {
+    fn id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(PROVIDER_ID.into())
+    }
+
+    fn name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn icon(&self) -> IconName {
+        IconName::AiGoogle
+    }
+
+    fn provided_models(&self, cx: &AppContext) -> Vec<Arc<dyn LanguageModel>> {
+        let mut models = BTreeMap::default();
+
+        // Add base models from google_ai::Model::iter()
+        for model in google_ai::Model::iter() {
+            if !matches!(model, google_ai::Model::Custom { .. }) {
+                models.insert(model.id().to_string(), model);
+            }
+        }
+
+        // Override with available models from settings
+        for model in &AllLanguageModelSettings::get_global(cx)
+            .google
+            .available_models
+        {
+            models.insert(
+                model.name.clone(),
+                google_ai::Model::Custom {
+                    name: model.name.clone(),
+                    display_name: model.display_name.clone(),
+                    max_tokens: model.max_tokens,
+                },
+            );
+        }
+
+        models
+            .into_values()
+            .map(|model| {
+                Arc::new(GoogleLanguageModel {
+                    id: LanguageModelId::from(model.id().to_string()),
+                    model,
+                    state: self.state.clone(),
+                    http_client: self.http_client.clone(),
+                    rate_limiter: RateLimiter::new(4),
+                }) as Arc<dyn LanguageModel>
+            })
+            .collect()
+    }
+
+    fn is_authenticated(&self, cx: &AppContext) -> bool {
+        self.state.read(cx).is_authenticated()
+    }
+
+    fn authenticate(&self, cx: &mut AppContext) -> Task<Result<()>> {
+        self.state.update(cx, |state, cx| state.authenticate(cx))
+    }
+
+    fn configuration_view(&self, cx: &mut WindowContext) -> AnyView {
+        cx.new_view(|cx| ConfigurationView::new(self.state.clone(), cx))
+            .into()
+    }
+
+    fn reset_credentials(&self, cx: &mut AppContext) -> Task<Result<()>> {
+        let state = self.state.clone();
+        let delete_credentials =
+            cx.delete_credentials(&AllLanguageModelSettings::get_global(cx).google.api_url);
+        cx.spawn(|mut cx| async move {
+            delete_credentials.await.log_err();
+            state.update(&mut cx, |this, cx| {
+                this.api_key = None;
+                cx.notify();
+            })
+        })
+    }
+}
+
+pub struct GoogleLanguageModel {
+    id: LanguageModelId,
+    model: google_ai::Model,
+    state: gpui::Model<State>,
+    http_client: Arc<dyn HttpClient>,
+    rate_limiter: RateLimiter,
+}
+
+impl LanguageModel for GoogleLanguageModel {
+    fn id(&self) -> LanguageModelId {
+        self.id.clone()
+    }
+
+    fn name(&self) -> LanguageModelName {
+        LanguageModelName::from(self.model.display_name().to_string())
+    }
+
+    fn provider_id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(PROVIDER_ID.into())
+    }
+
+    fn provider_name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn telemetry_id(&self) -> String {
+        format!("google/{}", self.model.id())
+    }
+
+    fn max_token_count(&self) -> usize {
+        self.model.max_token_count()
+    }
+
+    fn count_tokens(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AppContext,
+    ) -> BoxFuture<'static, Result<usize>> {
+        let request = request.into_google(self.model.id().to_string());
+        let http_client = self.http_client.clone();
+        let api_key = self.state.read(cx).api_key.clone();
+
+        let settings = &AllLanguageModelSettings::get_global(cx).google;
+        let api_url = settings.api_url.clone();
+
+        async move {
+            let api_key = api_key.ok_or_else(|| anyhow!("Missing Google API key"))?;
+            let response = google_ai::count_tokens(
+                http_client.as_ref(),
+                &api_url,
+                &api_key,
+                google_ai::CountTokensRequest {
+                    contents: request.contents,
+                },
+            )
+            .await?;
+            Ok(response.total_tokens)
+        }
+        .boxed()
+    }
+
+    fn stream_completion(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<
+        'static,
+        Result<futures::stream::BoxStream<'static, Result<LanguageModelCompletionEvent>>>,
+    > {
+        let request = request.into_google(self.model.id().to_string());
+
+        let http_client = self.http_client.clone();
+        let Ok((api_key, api_url)) = cx.read_model(&self.state, |state, cx| {
+            let settings = &AllLanguageModelSettings::get_global(cx).google;
+            (state.api_key.clone(), settings.api_url.clone())
+        }) else {
+            return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
+        };
+
+        let future = self.rate_limiter.stream(async move {
+            let api_key = api_key.ok_or_else(|| anyhow!("Missing Google API Key"))?;
+            let response =
+                stream_generate_content(http_client.as_ref(), &api_url, &api_key, request);
+            let events = response.await?;
+            Ok(google_ai::extract_text_from_events(events).boxed())
+        });
+        async move {
+            Ok(future
+                .await?
+                .map(|result| result.map(LanguageModelCompletionEvent::Text))
+                .boxed())
+        }
+        .boxed()
+    }
+
+    fn use_any_tool(
+        &self,
+        _request: LanguageModelRequest,
+        _name: String,
+        _description: String,
+        _schema: serde_json::Value,
+        _cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<futures::stream::BoxStream<'static, Result<String>>>> {
+        future::ready(Err(anyhow!("not implemented"))).boxed()
+    }
+}
+
+struct ConfigurationView {
+    api_key_editor: View<Editor>,
+    state: gpui::Model<State>,
+    load_credentials_task: Option<Task<()>>,
+}
+
+impl ConfigurationView {
+    fn new(state: gpui::Model<State>, cx: &mut ViewContext<Self>) -> Self {
+        cx.observe(&state, |_, _, cx| {
+            cx.notify();
+        })
+        .detach();
+
+        let load_credentials_task = Some(cx.spawn({
+            let state = state.clone();
+            |this, mut cx| async move {
+                if let Some(task) = state
+                    .update(&mut cx, |state, cx| state.authenticate(cx))
+                    .log_err()
+                {
+                    // We don't log an error, because "not signed in" is also an error.
+                    let _ = task.await;
+                }
+                this.update(&mut cx, |this, cx| {
+                    this.load_credentials_task = None;
+                    cx.notify();
+                })
+                .log_err();
+            }
+        }));
+
+        Self {
+            api_key_editor: cx.new_view(|cx| {
+                let mut editor = Editor::single_line(cx);
+                editor.set_placeholder_text("AIzaSy...", cx);
+                editor
+            }),
+            state,
+            load_credentials_task,
+        }
+    }
+
+    fn save_api_key(&mut self, _: &menu::Confirm, cx: &mut ViewContext<Self>) {
+        let api_key = self.api_key_editor.read(cx).text(cx);
+        if api_key.is_empty() {
+            return;
+        }
+
+        let state = self.state.clone();
+        cx.spawn(|_, mut cx| async move {
+            state
+                .update(&mut cx, |state, cx| state.set_api_key(api_key, cx))?
+                .await
+        })
+        .detach_and_log_err(cx);
+
+        cx.notify();
+    }
+
+    fn reset_api_key(&mut self, cx: &mut ViewContext<Self>) {
+        self.api_key_editor
+            .update(cx, |editor, cx| editor.set_text("", cx));
+
+        let state = self.state.clone();
+        cx.spawn(|_, mut cx| async move {
+            state
+                .update(&mut cx, |state, cx| state.reset_api_key(cx))?
+                .await
+        })
+        .detach_and_log_err(cx);
+
+        cx.notify();
+    }
+
+    fn render_api_key_editor(&self, cx: &mut ViewContext<Self>) -> impl IntoElement {
+        let settings = ThemeSettings::get_global(cx);
+        let text_style = TextStyle {
+            color: cx.theme().colors().text,
+            font_family: settings.ui_font.family.clone(),
+            font_features: settings.ui_font.features.clone(),
+            font_fallbacks: settings.ui_font.fallbacks.clone(),
+            font_size: rems(0.875).into(),
+            font_weight: settings.ui_font.weight,
+            font_style: FontStyle::Normal,
+            line_height: relative(1.3),
+            background_color: None,
+            underline: None,
+            strikethrough: None,
+            white_space: WhiteSpace::Normal,
+            truncate: None,
+        };
+        EditorElement::new(
+            &self.api_key_editor,
+            EditorStyle {
+                background: cx.theme().colors().editor_background,
+                local_player: cx.theme().players().local(),
+                text: text_style,
+                ..Default::default()
+            },
+        )
+    }
+
+    fn should_render_editor(&self, cx: &mut ViewContext<Self>) -> bool {
+        !self.state.read(cx).is_authenticated()
+    }
+}
+
+impl Render for ConfigurationView {
+    fn render(&mut self, cx: &mut ViewContext<Self>) -> impl IntoElement {
+        const GOOGLE_CONSOLE_URL: &str = "https://aistudio.google.com/app/apikey";
+        const INSTRUCTIONS: [&str; 3] = [
+            "To use Zed's assistant with Google AI, you need to add an API key. Follow these steps:",
+            "- Create one by visiting:",
+            "- Paste your API key below and hit enter to use the assistant",
+        ];
+
+        let env_var_set = self.state.read(cx).api_key_from_env;
+
+        if self.load_credentials_task.is_some() {
+            div().child(Label::new("Loading credentials...")).into_any()
+        } else if self.should_render_editor(cx) {
+            v_flex()
+                .size_full()
+                .on_action(cx.listener(Self::save_api_key))
+                .child(Label::new(INSTRUCTIONS[0]))
+                .child(h_flex().child(Label::new(INSTRUCTIONS[1])).child(
+                    Button::new("google_console", GOOGLE_CONSOLE_URL)
+                        .style(ButtonStyle::Subtle)
+                        .icon(IconName::ExternalLink)
+                        .icon_size(IconSize::XSmall)
+                        .icon_color(Color::Muted)
+                        .on_click(move |_, cx| cx.open_url(GOOGLE_CONSOLE_URL))
+                    )
+                )
+                .child(Label::new(INSTRUCTIONS[2]))
+                .child(
+                    h_flex()
+                        .w_full()
+                        .my_2()
+                        .px_2()
+                        .py_1()
+                        .bg(cx.theme().colors().editor_background)
+                        .rounded_md()
+                        .child(self.render_api_key_editor(cx)),
+                )
+                .child(
+                    Label::new(
+                        format!("You can also assign the {GOOGLE_AI_API_KEY_VAR} environment variable and restart Zed."),
+                    )
+                    .size(LabelSize::Small),
+                )
+                .into_any()
+        } else {
+            h_flex()
+                .size_full()
+                .justify_between()
+                .child(
+                    h_flex()
+                        .gap_1()
+                        .child(Icon::new(IconName::Check).color(Color::Success))
+                        .child(Label::new(if env_var_set {
+                            format!("API key set in {GOOGLE_AI_API_KEY_VAR} environment variable.")
+                        } else {
+                            "API key configured.".to_string()
+                        })),
+                )
+                .child(
+                    Button::new("reset-key", "Reset key")
+                        .icon(Some(IconName::Trash))
+                        .icon_size(IconSize::Small)
+                        .icon_position(IconPosition::Start)
+                        .disabled(env_var_set)
+                        .when(env_var_set, |this| {
+                            this.tooltip(|cx| Tooltip::text(format!("To reset your API key, unset the {GOOGLE_AI_API_KEY_VAR} environment variable."), cx))
+                        })
+                        .on_click(cx.listener(|this, _, cx| this.reset_api_key(cx))),
+                )
+                .into_any()
+        }
+    }
+}
--- a/crates/language_models/src/provider/ollama.rs
+++ b/crates/language_models/src/provider/ollama.rs
@ -0,0 +1,554 @@
+use anyhow::{anyhow, bail, Result};
+use futures::{future::BoxFuture, stream::BoxStream, FutureExt, StreamExt};
+use gpui::{AnyView, AppContext, AsyncAppContext, ModelContext, Subscription, Task};
+use http_client::HttpClient;
+use language_model::LanguageModelCompletionEvent;
+use language_model::{
+    LanguageModel, LanguageModelId, LanguageModelName, LanguageModelProvider,
+    LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState,
+    LanguageModelRequest, RateLimiter, Role,
+};
+use ollama::{
+    get_models, preload_model, stream_chat_completion, ChatMessage, ChatOptions, ChatRequest,
+    ChatResponseDelta, KeepAlive, OllamaToolCall,
+};
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use settings::{Settings, SettingsStore};
+use std::{collections::BTreeMap, sync::Arc};
+use ui::{prelude::*, ButtonLike, Indicator};
+use util::ResultExt;
+
+use crate::AllLanguageModelSettings;
+
+const OLLAMA_DOWNLOAD_URL: &str = "https://ollama.com/download";
+const OLLAMA_LIBRARY_URL: &str = "https://ollama.com/library";
+const OLLAMA_SITE: &str = "https://ollama.com/";
+
+const PROVIDER_ID: &str = "ollama";
+const PROVIDER_NAME: &str = "Ollama";
+
+#[derive(Default, Debug, Clone, PartialEq)]
+pub struct OllamaSettings {
+    pub api_url: String,
+    pub available_models: Vec<AvailableModel>,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
+pub struct AvailableModel {
+    /// The model name in the Ollama API (e.g. "llama3.2:latest")
+    pub name: String,
+    /// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
+    pub display_name: Option<String>,
+    /// The Context Length parameter to the model (aka num_ctx or n_ctx)
+    pub max_tokens: usize,
+    /// The number of seconds to keep the connection open after the last request
+    pub keep_alive: Option<KeepAlive>,
+}
+
+pub struct OllamaLanguageModelProvider {
+    http_client: Arc<dyn HttpClient>,
+    state: gpui::Model<State>,
+}
+
+pub struct State {
+    http_client: Arc<dyn HttpClient>,
+    available_models: Vec<ollama::Model>,
+    fetch_model_task: Option<Task<Result<()>>>,
+    _subscription: Subscription,
+}
+
+impl State {
+    fn is_authenticated(&self) -> bool {
+        !self.available_models.is_empty()
+    }
+
+    fn fetch_models(&mut self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        let settings = &AllLanguageModelSettings::get_global(cx).ollama;
+        let http_client = self.http_client.clone();
+        let api_url = settings.api_url.clone();
+
+        // As a proxy for the server being "authenticated", we'll check if its up by fetching the models
+        cx.spawn(|this, mut cx| async move {
+            let models = get_models(http_client.as_ref(), &api_url, None).await?;
+
+            let mut models: Vec<ollama::Model> = models
+                .into_iter()
+                // Since there is no metadata from the Ollama API
+                // indicating which models are embedding models,
+                // simply filter out models with "-embed" in their name
+                .filter(|model| !model.name.contains("-embed"))
+                .map(|model| ollama::Model::new(&model.name, None, None))
+                .collect();
+
+            models.sort_by(|a, b| a.name.cmp(&b.name));
+
+            this.update(&mut cx, |this, cx| {
+                this.available_models = models;
+                cx.notify();
+            })
+        })
+    }
+
+    fn restart_fetch_models_task(&mut self, cx: &mut ModelContext<Self>) {
+        let task = self.fetch_models(cx);
+        self.fetch_model_task.replace(task);
+    }
+
+    fn authenticate(&mut self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        if self.is_authenticated() {
+            Task::ready(Ok(()))
+        } else {
+            self.fetch_models(cx)
+        }
+    }
+}
+
+impl OllamaLanguageModelProvider {
+    pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut AppContext) -> Self {
+        let this = Self {
+            http_client: http_client.clone(),
+            state: cx.new_model(|cx| {
+                let subscription = cx.observe_global::<SettingsStore>({
+                    let mut settings = AllLanguageModelSettings::get_global(cx).ollama.clone();
+                    move |this: &mut State, cx| {
+                        let new_settings = &AllLanguageModelSettings::get_global(cx).ollama;
+                        if &settings != new_settings {
+                            settings = new_settings.clone();
+                            this.restart_fetch_models_task(cx);
+                            cx.notify();
+                        }
+                    }
+                });
+
+                State {
+                    http_client,
+                    available_models: Default::default(),
+                    fetch_model_task: None,
+                    _subscription: subscription,
+                }
+            }),
+        };
+        this.state
+            .update(cx, |state, cx| state.restart_fetch_models_task(cx));
+        this
+    }
+}
+
+impl LanguageModelProviderState for OllamaLanguageModelProvider {
+    type ObservableEntity = State;
+
+    fn observable_entity(&self) -> Option<gpui::Model<Self::ObservableEntity>> {
+        Some(self.state.clone())
+    }
+}
+
+impl LanguageModelProvider for OllamaLanguageModelProvider {
+    fn id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(PROVIDER_ID.into())
+    }
+
+    fn name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn icon(&self) -> IconName {
+        IconName::AiOllama
+    }
+
+    fn provided_models(&self, cx: &AppContext) -> Vec<Arc<dyn LanguageModel>> {
+        let mut models: BTreeMap<String, ollama::Model> = BTreeMap::default();
+
+        // Add models from the Ollama API
+        for model in self.state.read(cx).available_models.iter() {
+            models.insert(model.name.clone(), model.clone());
+        }
+
+        // Override with available models from settings
+        for model in AllLanguageModelSettings::get_global(cx)
+            .ollama
+            .available_models
+            .iter()
+        {
+            models.insert(
+                model.name.clone(),
+                ollama::Model {
+                    name: model.name.clone(),
+                    display_name: model.display_name.clone(),
+                    max_tokens: model.max_tokens,
+                    keep_alive: model.keep_alive.clone(),
+                },
+            );
+        }
+
+        models
+            .into_values()
+            .map(|model| {
+                Arc::new(OllamaLanguageModel {
+                    id: LanguageModelId::from(model.name.clone()),
+                    model: model.clone(),
+                    http_client: self.http_client.clone(),
+                    request_limiter: RateLimiter::new(4),
+                }) as Arc<dyn LanguageModel>
+            })
+            .collect()
+    }
+
+    fn load_model(&self, model: Arc<dyn LanguageModel>, cx: &AppContext) {
+        let settings = &AllLanguageModelSettings::get_global(cx).ollama;
+        let http_client = self.http_client.clone();
+        let api_url = settings.api_url.clone();
+        let id = model.id().0.to_string();
+        cx.spawn(|_| async move { preload_model(http_client, &api_url, &id).await })
+            .detach_and_log_err(cx);
+    }
+
+    fn is_authenticated(&self, cx: &AppContext) -> bool {
+        self.state.read(cx).is_authenticated()
+    }
+
+    fn authenticate(&self, cx: &mut AppContext) -> Task<Result<()>> {
+        self.state.update(cx, |state, cx| state.authenticate(cx))
+    }
+
+    fn configuration_view(&self, cx: &mut WindowContext) -> AnyView {
+        let state = self.state.clone();
+        cx.new_view(|cx| ConfigurationView::new(state, cx)).into()
+    }
+
+    fn reset_credentials(&self, cx: &mut AppContext) -> Task<Result<()>> {
+        self.state.update(cx, |state, cx| state.fetch_models(cx))
+    }
+}
+
+pub struct OllamaLanguageModel {
+    id: LanguageModelId,
+    model: ollama::Model,
+    http_client: Arc<dyn HttpClient>,
+    request_limiter: RateLimiter,
+}
+
+impl OllamaLanguageModel {
+    fn to_ollama_request(&self, request: LanguageModelRequest) -> ChatRequest {
+        ChatRequest {
+            model: self.model.name.clone(),
+            messages: request
+                .messages
+                .into_iter()
+                .map(|msg| match msg.role {
+                    Role::User => ChatMessage::User {
+                        content: msg.string_contents(),
+                    },
+                    Role::Assistant => ChatMessage::Assistant {
+                        content: msg.string_contents(),
+                        tool_calls: None,
+                    },
+                    Role::System => ChatMessage::System {
+                        content: msg.string_contents(),
+                    },
+                })
+                .collect(),
+            keep_alive: self.model.keep_alive.clone().unwrap_or_default(),
+            stream: true,
+            options: Some(ChatOptions {
+                num_ctx: Some(self.model.max_tokens),
+                stop: Some(request.stop),
+                temperature: request.temperature.or(Some(1.0)),
+                ..Default::default()
+            }),
+            tools: vec![],
+        }
+    }
+    fn request_completion(
+        &self,
+        request: ChatRequest,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<ChatResponseDelta>> {
+        let http_client = self.http_client.clone();
+
+        let Ok(api_url) = cx.update(|cx| {
+            let settings = &AllLanguageModelSettings::get_global(cx).ollama;
+            settings.api_url.clone()
+        }) else {
+            return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
+        };
+
+        async move { ollama::complete(http_client.as_ref(), &api_url, request).await }.boxed()
+    }
+}
+
+impl LanguageModel for OllamaLanguageModel {
+    fn id(&self) -> LanguageModelId {
+        self.id.clone()
+    }
+
+    fn name(&self) -> LanguageModelName {
+        LanguageModelName::from(self.model.display_name().to_string())
+    }
+
+    fn provider_id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(PROVIDER_ID.into())
+    }
+
+    fn provider_name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn telemetry_id(&self) -> String {
+        format!("ollama/{}", self.model.id())
+    }
+
+    fn max_token_count(&self) -> usize {
+        self.model.max_token_count()
+    }
+
+    fn count_tokens(
+        &self,
+        request: LanguageModelRequest,
+        _cx: &AppContext,
+    ) -> BoxFuture<'static, Result<usize>> {
+        // There is no endpoint for this _yet_ in Ollama
+        // see: https://github.com/ollama/ollama/issues/1716 and https://github.com/ollama/ollama/issues/3582
+        let token_count = request
+            .messages
+            .iter()
+            .map(|msg| msg.string_contents().chars().count())
+            .sum::<usize>()
+            / 4;
+
+        async move { Ok(token_count) }.boxed()
+    }
+
+    fn stream_completion(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<LanguageModelCompletionEvent>>>> {
+        let request = self.to_ollama_request(request);
+
+        let http_client = self.http_client.clone();
+        let Ok(api_url) = cx.update(|cx| {
+            let settings = &AllLanguageModelSettings::get_global(cx).ollama;
+            settings.api_url.clone()
+        }) else {
+            return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
+        };
+
+        let future = self.request_limiter.stream(async move {
+            let response = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
+            let stream = response
+                .filter_map(|response| async move {
+                    match response {
+                        Ok(delta) => {
+                            let content = match delta.message {
+                                ChatMessage::User { content } => content,
+                                ChatMessage::Assistant { content, .. } => content,
+                                ChatMessage::System { content } => content,
+                            };
+                            Some(Ok(content))
+                        }
+                        Err(error) => Some(Err(error)),
+                    }
+                })
+                .boxed();
+            Ok(stream)
+        });
+
+        async move {
+            Ok(future
+                .await?
+                .map(|result| result.map(LanguageModelCompletionEvent::Text))
+                .boxed())
+        }
+        .boxed()
+    }
+
+    fn use_any_tool(
+        &self,
+        request: LanguageModelRequest,
+        tool_name: String,
+        tool_description: String,
+        schema: serde_json::Value,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
+        use ollama::{OllamaFunctionTool, OllamaTool};
+        let function = OllamaFunctionTool {
+            name: tool_name.clone(),
+            description: Some(tool_description),
+            parameters: Some(schema),
+        };
+        let tools = vec![OllamaTool::Function { function }];
+        let request = self.to_ollama_request(request).with_tools(tools);
+        let response = self.request_completion(request, cx);
+        self.request_limiter
+            .run(async move {
+                let response = response.await?;
+                let ChatMessage::Assistant { tool_calls, .. } = response.message else {
+                    bail!("message does not have an assistant role");
+                };
+                if let Some(tool_calls) = tool_calls.filter(|calls| !calls.is_empty()) {
+                    for call in tool_calls {
+                        let OllamaToolCall::Function(function) = call;
+                        if function.name == tool_name {
+                            return Ok(futures::stream::once(async move {
+                                Ok(function.arguments.to_string())
+                            })
+                            .boxed());
+                        }
+                    }
+                } else {
+                    bail!("assistant message does not have any tool calls");
+                };
+
+                bail!("tool not used")
+            })
+            .boxed()
+    }
+}
+
+struct ConfigurationView {
+    state: gpui::Model<State>,
+    loading_models_task: Option<Task<()>>,
+}
+
+impl ConfigurationView {
+    pub fn new(state: gpui::Model<State>, cx: &mut ViewContext<Self>) -> Self {
+        let loading_models_task = Some(cx.spawn({
+            let state = state.clone();
+            |this, mut cx| async move {
+                if let Some(task) = state
+                    .update(&mut cx, |state, cx| state.authenticate(cx))
+                    .log_err()
+                {
+                    task.await.log_err();
+                }
+                this.update(&mut cx, |this, cx| {
+                    this.loading_models_task = None;
+                    cx.notify();
+                })
+                .log_err();
+            }
+        }));
+
+        Self {
+            state,
+            loading_models_task,
+        }
+    }
+
+    fn retry_connection(&self, cx: &mut WindowContext) {
+        self.state
+            .update(cx, |state, cx| state.fetch_models(cx))
+            .detach_and_log_err(cx);
+    }
+}
+
+impl Render for ConfigurationView {
+    fn render(&mut self, cx: &mut ViewContext<Self>) -> impl IntoElement {
+        let is_authenticated = self.state.read(cx).is_authenticated();
+
+        let ollama_intro = "Get up and running with Llama 3.2, Mistral, Gemma 2, and other large language models with Ollama.";
+        let ollama_reqs =
+            "Ollama must be running with at least one model installed to use it in the assistant.";
+
+        let mut inline_code_bg = cx.theme().colors().editor_background;
+        inline_code_bg.fade_out(0.5);
+
+        if self.loading_models_task.is_some() {
+            div().child(Label::new("Loading models...")).into_any()
+        } else {
+            v_flex()
+                .size_full()
+                .gap_3()
+                .child(
+                    v_flex()
+                        .size_full()
+                        .gap_2()
+                        .p_1()
+                        .child(Label::new(ollama_intro))
+                        .child(Label::new(ollama_reqs))
+                        .child(
+                            h_flex()
+                                .gap_0p5()
+                                .child(Label::new("Once installed, try "))
+                                .child(
+                                    div()
+                                        .bg(inline_code_bg)
+                                        .px_1p5()
+                                        .rounded_md()
+                                        .child(Label::new("ollama run llama3.2")),
+                                ),
+                        ),
+                )
+                .child(
+                    h_flex()
+                        .w_full()
+                        .pt_2()
+                        .justify_between()
+                        .gap_2()
+                        .child(
+                            h_flex()
+                                .w_full()
+                                .gap_2()
+                                .map(|this| {
+                                    if is_authenticated {
+                                        this.child(
+                                            Button::new("ollama-site", "Ollama")
+                                                .style(ButtonStyle::Subtle)
+                                                .icon(IconName::ExternalLink)
+                                                .icon_size(IconSize::XSmall)
+                                                .icon_color(Color::Muted)
+                                                .on_click(move |_, cx| cx.open_url(OLLAMA_SITE))
+                                                .into_any_element(),
+                                        )
+                                    } else {
+                                        this.child(
+                                            Button::new(
+                                                "download_ollama_button",
+                                                "Download Ollama",
+                                            )
+                                            .style(ButtonStyle::Subtle)
+                                            .icon(IconName::ExternalLink)
+                                            .icon_size(IconSize::XSmall)
+                                            .icon_color(Color::Muted)
+                                            .on_click(move |_, cx| cx.open_url(OLLAMA_DOWNLOAD_URL))
+                                            .into_any_element(),
+                                        )
+                                    }
+                                })
+                                .child(
+                                    Button::new("view-models", "All Models")
+                                        .style(ButtonStyle::Subtle)
+                                        .icon(IconName::ExternalLink)
+                                        .icon_size(IconSize::XSmall)
+                                        .icon_color(Color::Muted)
+                                        .on_click(move |_, cx| cx.open_url(OLLAMA_LIBRARY_URL)),
+                                ),
+                        )
+                        .child(if is_authenticated {
+                            // This is only a button to ensure the spacing is correct
+                            // it should stay disabled
+                            ButtonLike::new("connected")
+                                .disabled(true)
+                                // Since this won't ever be clickable, we can use the arrow cursor
+                                .cursor_style(gpui::CursorStyle::Arrow)
+                                .child(
+                                    h_flex()
+                                        .gap_2()
+                                        .child(Indicator::dot().color(Color::Success))
+                                        .child(Label::new("Connected"))
+                                        .into_any_element(),
+                                )
+                                .into_any_element()
+                        } else {
+                            Button::new("retry_ollama_models", "Connect")
+                                .icon_position(IconPosition::Start)
+                                .icon(IconName::ArrowCircle)
+                                .on_click(cx.listener(move |this, _, cx| this.retry_connection(cx)))
+                                .into_any_element()
+                        }),
+                )
+                .into_any()
+        }
+    }
+}
--- a/crates/language_models/src/provider/open_ai.rs
+++ b/crates/language_models/src/provider/open_ai.rs
@ -0,0 +1,568 @@
+use anyhow::{anyhow, Result};
+use collections::BTreeMap;
+use editor::{Editor, EditorElement, EditorStyle};
+use futures::{future::BoxFuture, FutureExt, StreamExt};
+use gpui::{
+    AnyView, AppContext, AsyncAppContext, FontStyle, ModelContext, Subscription, Task, TextStyle,
+    View, WhiteSpace,
+};
+use http_client::HttpClient;
+use language_model::{
+    LanguageModel, LanguageModelCompletionEvent, LanguageModelId, LanguageModelName,
+    LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
+    LanguageModelProviderState, LanguageModelRequest, RateLimiter, Role,
+};
+use open_ai::{
+    stream_completion, FunctionDefinition, ResponseStreamEvent, ToolChoice, ToolDefinition,
+};
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use settings::{Settings, SettingsStore};
+use std::sync::Arc;
+use strum::IntoEnumIterator;
+use theme::ThemeSettings;
+use ui::{prelude::*, Icon, IconName, Tooltip};
+use util::ResultExt;
+
+use crate::AllLanguageModelSettings;
+
+const PROVIDER_ID: &str = "openai";
+const PROVIDER_NAME: &str = "OpenAI";
+
+#[derive(Default, Clone, Debug, PartialEq)]
+pub struct OpenAiSettings {
+    pub api_url: String,
+    pub available_models: Vec<AvailableModel>,
+    pub needs_setting_migration: bool,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
+pub struct AvailableModel {
+    pub name: String,
+    pub display_name: Option<String>,
+    pub max_tokens: usize,
+    pub max_output_tokens: Option<u32>,
+    pub max_completion_tokens: Option<u32>,
+}
+
+pub struct OpenAiLanguageModelProvider {
+    http_client: Arc<dyn HttpClient>,
+    state: gpui::Model<State>,
+}
+
+pub struct State {
+    api_key: Option<String>,
+    api_key_from_env: bool,
+    _subscription: Subscription,
+}
+
+const OPENAI_API_KEY_VAR: &str = "OPENAI_API_KEY";
+
+impl State {
+    fn is_authenticated(&self) -> bool {
+        self.api_key.is_some()
+    }
+
+    fn reset_api_key(&self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        let settings = &AllLanguageModelSettings::get_global(cx).openai;
+        let delete_credentials = cx.delete_credentials(&settings.api_url);
+        cx.spawn(|this, mut cx| async move {
+            delete_credentials.await.log_err();
+            this.update(&mut cx, |this, cx| {
+                this.api_key = None;
+                this.api_key_from_env = false;
+                cx.notify();
+            })
+        })
+    }
+
+    fn set_api_key(&mut self, api_key: String, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        let settings = &AllLanguageModelSettings::get_global(cx).openai;
+        let write_credentials =
+            cx.write_credentials(&settings.api_url, "Bearer", api_key.as_bytes());
+
+        cx.spawn(|this, mut cx| async move {
+            write_credentials.await?;
+            this.update(&mut cx, |this, cx| {
+                this.api_key = Some(api_key);
+                cx.notify();
+            })
+        })
+    }
+
+    fn authenticate(&self, cx: &mut ModelContext<Self>) -> Task<Result<()>> {
+        if self.is_authenticated() {
+            Task::ready(Ok(()))
+        } else {
+            let api_url = AllLanguageModelSettings::get_global(cx)
+                .openai
+                .api_url
+                .clone();
+            cx.spawn(|this, mut cx| async move {
+                let (api_key, from_env) = if let Ok(api_key) = std::env::var(OPENAI_API_KEY_VAR) {
+                    (api_key, true)
+                } else {
+                    let (_, api_key) = cx
+                        .update(|cx| cx.read_credentials(&api_url))?
+                        .await?
+                        .ok_or_else(|| anyhow!("credentials not found"))?;
+                    (String::from_utf8(api_key)?, false)
+                };
+                this.update(&mut cx, |this, cx| {
+                    this.api_key = Some(api_key);
+                    this.api_key_from_env = from_env;
+                    cx.notify();
+                })
+            })
+        }
+    }
+}
+
+impl OpenAiLanguageModelProvider {
+    pub fn new(http_client: Arc<dyn HttpClient>, cx: &mut AppContext) -> Self {
+        let state = cx.new_model(|cx| State {
+            api_key: None,
+            api_key_from_env: false,
+            _subscription: cx.observe_global::<SettingsStore>(|_this: &mut State, cx| {
+                cx.notify();
+            }),
+        });
+
+        Self { http_client, state }
+    }
+}
+
+impl LanguageModelProviderState for OpenAiLanguageModelProvider {
+    type ObservableEntity = State;
+
+    fn observable_entity(&self) -> Option<gpui::Model<Self::ObservableEntity>> {
+        Some(self.state.clone())
+    }
+}
+
+impl LanguageModelProvider for OpenAiLanguageModelProvider {
+    fn id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(PROVIDER_ID.into())
+    }
+
+    fn name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn icon(&self) -> IconName {
+        IconName::AiOpenAi
+    }
+
+    fn provided_models(&self, cx: &AppContext) -> Vec<Arc<dyn LanguageModel>> {
+        let mut models = BTreeMap::default();
+
+        // Add base models from open_ai::Model::iter()
+        for model in open_ai::Model::iter() {
+            if !matches!(model, open_ai::Model::Custom { .. }) {
+                models.insert(model.id().to_string(), model);
+            }
+        }
+
+        // Override with available models from settings
+        for model in &AllLanguageModelSettings::get_global(cx)
+            .openai
+            .available_models
+        {
+            models.insert(
+                model.name.clone(),
+                open_ai::Model::Custom {
+                    name: model.name.clone(),
+                    display_name: model.display_name.clone(),
+                    max_tokens: model.max_tokens,
+                    max_output_tokens: model.max_output_tokens,
+                    max_completion_tokens: model.max_completion_tokens,
+                },
+            );
+        }
+
+        models
+            .into_values()
+            .map(|model| {
+                Arc::new(OpenAiLanguageModel {
+                    id: LanguageModelId::from(model.id().to_string()),
+                    model,
+                    state: self.state.clone(),
+                    http_client: self.http_client.clone(),
+                    request_limiter: RateLimiter::new(4),
+                }) as Arc<dyn LanguageModel>
+            })
+            .collect()
+    }
+
+    fn is_authenticated(&self, cx: &AppContext) -> bool {
+        self.state.read(cx).is_authenticated()
+    }
+
+    fn authenticate(&self, cx: &mut AppContext) -> Task<Result<()>> {
+        self.state.update(cx, |state, cx| state.authenticate(cx))
+    }
+
+    fn configuration_view(&self, cx: &mut WindowContext) -> AnyView {
+        cx.new_view(|cx| ConfigurationView::new(self.state.clone(), cx))
+            .into()
+    }
+
+    fn reset_credentials(&self, cx: &mut AppContext) -> Task<Result<()>> {
+        self.state.update(cx, |state, cx| state.reset_api_key(cx))
+    }
+}
+
+pub struct OpenAiLanguageModel {
+    id: LanguageModelId,
+    model: open_ai::Model,
+    state: gpui::Model<State>,
+    http_client: Arc<dyn HttpClient>,
+    request_limiter: RateLimiter,
+}
+
+impl OpenAiLanguageModel {
+    fn stream_completion(
+        &self,
+        request: open_ai::Request,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<futures::stream::BoxStream<'static, Result<ResponseStreamEvent>>>>
+    {
+        let http_client = self.http_client.clone();
+        let Ok((api_key, api_url)) = cx.read_model(&self.state, |state, cx| {
+            let settings = &AllLanguageModelSettings::get_global(cx).openai;
+            (state.api_key.clone(), settings.api_url.clone())
+        }) else {
+            return futures::future::ready(Err(anyhow!("App state dropped"))).boxed();
+        };
+
+        let future = self.request_limiter.stream(async move {
+            let api_key = api_key.ok_or_else(|| anyhow!("Missing OpenAI API Key"))?;
+            let request = stream_completion(http_client.as_ref(), &api_url, &api_key, request);
+            let response = request.await?;
+            Ok(response)
+        });
+
+        async move { Ok(future.await?.boxed()) }.boxed()
+    }
+}
+
+impl LanguageModel for OpenAiLanguageModel {
+    fn id(&self) -> LanguageModelId {
+        self.id.clone()
+    }
+
+    fn name(&self) -> LanguageModelName {
+        LanguageModelName::from(self.model.display_name().to_string())
+    }
+
+    fn provider_id(&self) -> LanguageModelProviderId {
+        LanguageModelProviderId(PROVIDER_ID.into())
+    }
+
+    fn provider_name(&self) -> LanguageModelProviderName {
+        LanguageModelProviderName(PROVIDER_NAME.into())
+    }
+
+    fn telemetry_id(&self) -> String {
+        format!("openai/{}", self.model.id())
+    }
+
+    fn max_token_count(&self) -> usize {
+        self.model.max_token_count()
+    }
+
+    fn max_output_tokens(&self) -> Option<u32> {
+        self.model.max_output_tokens()
+    }
+
+    fn count_tokens(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AppContext,
+    ) -> BoxFuture<'static, Result<usize>> {
+        count_open_ai_tokens(request, self.model.clone(), cx)
+    }
+
+    fn stream_completion(
+        &self,
+        request: LanguageModelRequest,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<
+        'static,
+        Result<futures::stream::BoxStream<'static, Result<LanguageModelCompletionEvent>>>,
+    > {
+        let request = request.into_open_ai(self.model.id().into(), self.max_output_tokens());
+        let completions = self.stream_completion(request, cx);
+        async move {
+            Ok(open_ai::extract_text_from_events(completions.await?)
+                .map(|result| result.map(LanguageModelCompletionEvent::Text))
+                .boxed())
+        }
+        .boxed()
+    }
+
+    fn use_any_tool(
+        &self,
+        request: LanguageModelRequest,
+        tool_name: String,
+        tool_description: String,
+        schema: serde_json::Value,
+        cx: &AsyncAppContext,
+    ) -> BoxFuture<'static, Result<futures::stream::BoxStream<'static, Result<String>>>> {
+        let mut request = request.into_open_ai(self.model.id().into(), self.max_output_tokens());
+        request.tool_choice = Some(ToolChoice::Other(ToolDefinition::Function {
+            function: FunctionDefinition {
+                name: tool_name.clone(),
+                description: None,
+                parameters: None,
+            },
+        }));
+        request.tools = vec![ToolDefinition::Function {
+            function: FunctionDefinition {
+                name: tool_name.clone(),
+                description: Some(tool_description),
+                parameters: Some(schema),
+            },
+        }];
+
+        let response = self.stream_completion(request, cx);
+        self.request_limiter
+            .run(async move {
+                let response = response.await?;
+                Ok(
+                    open_ai::extract_tool_args_from_events(tool_name, Box::pin(response))
+                        .await?
+                        .boxed(),
+                )
+            })
+            .boxed()
+    }
+}
+
+pub fn count_open_ai_tokens(
+    request: LanguageModelRequest,
+    model: open_ai::Model,
+    cx: &AppContext,
+) -> BoxFuture<'static, Result<usize>> {
+    cx.background_executor()
+        .spawn(async move {
+            let messages = request
+                .messages
+                .into_iter()
+                .map(|message| tiktoken_rs::ChatCompletionRequestMessage {
+                    role: match message.role {
+                        Role::User => "user".into(),
+                        Role::Assistant => "assistant".into(),
+                        Role::System => "system".into(),
+                    },
+                    content: Some(message.string_contents()),
+                    name: None,
+                    function_call: None,
+                })
+                .collect::<Vec<_>>();
+
+            match model {
+                open_ai::Model::Custom { .. }
+                | open_ai::Model::O1Mini
+                | open_ai::Model::O1Preview => {
+                    tiktoken_rs::num_tokens_from_messages("gpt-4", &messages)
+                }
+                _ => tiktoken_rs::num_tokens_from_messages(model.id(), &messages),
+            }
+        })
+        .boxed()
+}
+
+struct ConfigurationView {
+    api_key_editor: View<Editor>,
+    state: gpui::Model<State>,
+    load_credentials_task: Option<Task<()>>,
+}
+
+impl ConfigurationView {
+    fn new(state: gpui::Model<State>, cx: &mut ViewContext<Self>) -> Self {
+        let api_key_editor = cx.new_view(|cx| {
+            let mut editor = Editor::single_line(cx);
+            editor.set_placeholder_text("sk-000000000000000000000000000000000000000000000000", cx);
+            editor
+        });
+
+        cx.observe(&state, |_, _, cx| {
+            cx.notify();
+        })
+        .detach();
+
+        let load_credentials_task = Some(cx.spawn({
+            let state = state.clone();
+            |this, mut cx| async move {
+                if let Some(task) = state
+                    .update(&mut cx, |state, cx| state.authenticate(cx))
+                    .log_err()
+                {
+                    // We don't log an error, because "not signed in" is also an error.
+                    let _ = task.await;
+                }
+
+                this.update(&mut cx, |this, cx| {
+                    this.load_credentials_task = None;
+                    cx.notify();
+                })
+                .log_err();
+            }
+        }));
+
+        Self {
+            api_key_editor,
+            state,
+            load_credentials_task,
+        }
+    }
+
+    fn save_api_key(&mut self, _: &menu::Confirm, cx: &mut ViewContext<Self>) {
+        let api_key = self.api_key_editor.read(cx).text(cx);
+        if api_key.is_empty() {
+            return;
+        }
+
+        let state = self.state.clone();
+        cx.spawn(|_, mut cx| async move {
+            state
+                .update(&mut cx, |state, cx| state.set_api_key(api_key, cx))?
+                .await
+        })
+        .detach_and_log_err(cx);
+
+        cx.notify();
+    }
+
+    fn reset_api_key(&mut self, cx: &mut ViewContext<Self>) {
+        self.api_key_editor
+            .update(cx, |editor, cx| editor.set_text("", cx));
+
+        let state = self.state.clone();
+        cx.spawn(|_, mut cx| async move {
+            state
+                .update(&mut cx, |state, cx| state.reset_api_key(cx))?
+                .await
+        })
+        .detach_and_log_err(cx);
+
+        cx.notify();
+    }
+
+    fn render_api_key_editor(&self, cx: &mut ViewContext<Self>) -> impl IntoElement {
+        let settings = ThemeSettings::get_global(cx);
+        let text_style = TextStyle {
+            color: cx.theme().colors().text,
+            font_family: settings.ui_font.family.clone(),
+            font_features: settings.ui_font.features.clone(),
+            font_fallbacks: settings.ui_font.fallbacks.clone(),
+            font_size: rems(0.875).into(),
+            font_weight: settings.ui_font.weight,
+            font_style: FontStyle::Normal,
+            line_height: relative(1.3),
+            background_color: None,
+            underline: None,
+            strikethrough: None,
+            white_space: WhiteSpace::Normal,
+            truncate: None,
+        };
+        EditorElement::new(
+            &self.api_key_editor,
+            EditorStyle {
+                background: cx.theme().colors().editor_background,
+                local_player: cx.theme().players().local(),
+                text: text_style,
+                ..Default::default()
+            },
+        )
+    }
+
+    fn should_render_editor(&self, cx: &mut ViewContext<Self>) -> bool {
+        !self.state.read(cx).is_authenticated()
+    }
+}
+
+impl Render for ConfigurationView {
+    fn render(&mut self, cx: &mut ViewContext<Self>) -> impl IntoElement {
+        const OPENAI_CONSOLE_URL: &str = "https://platform.openai.com/api-keys";
+        const INSTRUCTIONS: [&str; 4] = [
+            "To use Zed's assistant with OpenAI, you need to add an API key. Follow these steps:",
+            " - Create one by visiting:",
+            " - Ensure your OpenAI account has credits",
+            " - Paste your API key below and hit enter to start using the assistant",
+        ];
+
+        let env_var_set = self.state.read(cx).api_key_from_env;
+
+        if self.load_credentials_task.is_some() {
+            div().child(Label::new("Loading credentials...")).into_any()
+        } else if self.should_render_editor(cx) {
+            v_flex()
+                .size_full()
+                .on_action(cx.listener(Self::save_api_key))
+                .child(Label::new(INSTRUCTIONS[0]))
+                .child(h_flex().child(Label::new(INSTRUCTIONS[1])).child(
+                    Button::new("openai_console", OPENAI_CONSOLE_URL)
+                        .style(ButtonStyle::Subtle)
+                        .icon(IconName::ExternalLink)
+                        .icon_size(IconSize::XSmall)
+                        .icon_color(Color::Muted)
+                        .on_click(move |_, cx| cx.open_url(OPENAI_CONSOLE_URL))
+                    )
+                )
+                .children(
+                    (2..INSTRUCTIONS.len()).map(|n|
+                        Label::new(INSTRUCTIONS[n])).collect::<Vec<_>>())
+                .child(
+                    h_flex()
+                        .w_full()
+                        .my_2()
+                        .px_2()
+                        .py_1()
+                        .bg(cx.theme().colors().editor_background)
+                        .rounded_md()
+                        .child(self.render_api_key_editor(cx)),
+                )
+                .child(
+                    Label::new(
+                        format!("You can also assign the {OPENAI_API_KEY_VAR} environment variable and restart Zed."),
+                    )
+                    .size(LabelSize::Small),
+                )
+                .child(
+                    Label::new(
+                        "Note that having a subscription for another service like GitHub Copilot won't work.".to_string(),
+                    )
+                    .size(LabelSize::Small),
+                )
+                .into_any()
+        } else {
+            h_flex()
+                .size_full()
+                .justify_between()
+                .child(
+                    h_flex()
+                        .gap_1()
+                        .child(Icon::new(IconName::Check).color(Color::Success))
+                        .child(Label::new(if env_var_set {
+                            format!("API key set in {OPENAI_API_KEY_VAR} environment variable.")
+                        } else {
+                            "API key configured.".to_string()
+                        })),
+                )
+                .child(
+                    Button::new("reset-key", "Reset key")
+                        .icon(Some(IconName::Trash))
+                        .icon_size(IconSize::Small)
+                        .icon_position(IconPosition::Start)
+                        .disabled(env_var_set)
+                        .when(env_var_set, |this| {
+                            this.tooltip(|cx| Tooltip::text(format!("To reset your API key, unset the {OPENAI_API_KEY_VAR} environment variable."), cx))
+                        })
+                        .on_click(cx.listener(|this, _, cx| this.reset_api_key(cx))),
+                )
+                .into_any()
+        }
+    }
+}
--- a/crates/language_models/src/settings.rs
+++ b/crates/language_models/src/settings.rs
@ -0,0 +1,319 @@
+use std::sync::Arc;
+
+use anyhow::Result;
+use gpui::AppContext;
+use language_model::LanguageModelCacheConfiguration;
+use project::Fs;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use settings::{update_settings_file, Settings, SettingsSources};
+
+use crate::provider::{
+    self,
+    anthropic::AnthropicSettings,
+    cloud::{self, ZedDotDevSettings},
+    copilot_chat::CopilotChatSettings,
+    google::GoogleSettings,
+    ollama::OllamaSettings,
+    open_ai::OpenAiSettings,
+};
+
+/// Initializes the language model settings.
+pub fn init(fs: Arc<dyn Fs>, cx: &mut AppContext) {
+    AllLanguageModelSettings::register(cx);
+
+    if AllLanguageModelSettings::get_global(cx)
+        .openai
+        .needs_setting_migration
+    {
+        update_settings_file::<AllLanguageModelSettings>(fs.clone(), cx, move |setting, _| {
+            if let Some(settings) = setting.openai.clone() {
+                let (newest_version, _) = settings.upgrade();
+                setting.openai = Some(OpenAiSettingsContent::Versioned(
+                    VersionedOpenAiSettingsContent::V1(newest_version),
+                ));
+            }
+        });
+    }
+
+    if AllLanguageModelSettings::get_global(cx)
+        .anthropic
+        .needs_setting_migration
+    {
+        update_settings_file::<AllLanguageModelSettings>(fs, cx, move |setting, _| {
+            if let Some(settings) = setting.anthropic.clone() {
+                let (newest_version, _) = settings.upgrade();
+                setting.anthropic = Some(AnthropicSettingsContent::Versioned(
+                    VersionedAnthropicSettingsContent::V1(newest_version),
+                ));
+            }
+        });
+    }
+}
+
+#[derive(Default)]
+pub struct AllLanguageModelSettings {
+    pub anthropic: AnthropicSettings,
+    pub ollama: OllamaSettings,
+    pub openai: OpenAiSettings,
+    pub zed_dot_dev: ZedDotDevSettings,
+    pub google: GoogleSettings,
+    pub copilot_chat: CopilotChatSettings,
+}
+
+#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+pub struct AllLanguageModelSettingsContent {
+    pub anthropic: Option<AnthropicSettingsContent>,
+    pub ollama: Option<OllamaSettingsContent>,
+    pub openai: Option<OpenAiSettingsContent>,
+    #[serde(rename = "zed.dev")]
+    pub zed_dot_dev: Option<ZedDotDevSettingsContent>,
+    pub google: Option<GoogleSettingsContent>,
+    pub copilot_chat: Option<CopilotChatSettingsContent>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+#[serde(untagged)]
+pub enum AnthropicSettingsContent {
+    Legacy(LegacyAnthropicSettingsContent),
+    Versioned(VersionedAnthropicSettingsContent),
+}
+
+impl AnthropicSettingsContent {
+    pub fn upgrade(self) -> (AnthropicSettingsContentV1, bool) {
+        match self {
+            AnthropicSettingsContent::Legacy(content) => (
+                AnthropicSettingsContentV1 {
+                    api_url: content.api_url,
+                    available_models: content.available_models.map(|models| {
+                        models
+                            .into_iter()
+                            .filter_map(|model| match model {
+                                anthropic::Model::Custom {
+                                    name,
+                                    display_name,
+                                    max_tokens,
+                                    tool_override,
+                                    cache_configuration,
+                                    max_output_tokens,
+                                    default_temperature,
+                                } => Some(provider::anthropic::AvailableModel {
+                                    name,
+                                    display_name,
+                                    max_tokens,
+                                    tool_override,
+                                    cache_configuration: cache_configuration.as_ref().map(
+                                        |config| LanguageModelCacheConfiguration {
+                                            max_cache_anchors: config.max_cache_anchors,
+                                            should_speculate: config.should_speculate,
+                                            min_total_token: config.min_total_token,
+                                        },
+                                    ),
+                                    max_output_tokens,
+                                    default_temperature,
+                                }),
+                                _ => None,
+                            })
+                            .collect()
+                    }),
+                },
+                true,
+            ),
+            AnthropicSettingsContent::Versioned(content) => match content {
+                VersionedAnthropicSettingsContent::V1(content) => (content, false),
+            },
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+pub struct LegacyAnthropicSettingsContent {
+    pub api_url: Option<String>,
+    pub available_models: Option<Vec<anthropic::Model>>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+#[serde(tag = "version")]
+pub enum VersionedAnthropicSettingsContent {
+    #[serde(rename = "1")]
+    V1(AnthropicSettingsContentV1),
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+pub struct AnthropicSettingsContentV1 {
+    pub api_url: Option<String>,
+    pub available_models: Option<Vec<provider::anthropic::AvailableModel>>,
+}
+
+#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+pub struct OllamaSettingsContent {
+    pub api_url: Option<String>,
+    pub available_models: Option<Vec<provider::ollama::AvailableModel>>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+#[serde(untagged)]
+pub enum OpenAiSettingsContent {
+    Legacy(LegacyOpenAiSettingsContent),
+    Versioned(VersionedOpenAiSettingsContent),
+}
+
+impl OpenAiSettingsContent {
+    pub fn upgrade(self) -> (OpenAiSettingsContentV1, bool) {
+        match self {
+            OpenAiSettingsContent::Legacy(content) => (
+                OpenAiSettingsContentV1 {
+                    api_url: content.api_url,
+                    available_models: content.available_models.map(|models| {
+                        models
+                            .into_iter()
+                            .filter_map(|model| match model {
+                                open_ai::Model::Custom {
+                                    name,
+                                    display_name,
+                                    max_tokens,
+                                    max_output_tokens,
+                                    max_completion_tokens,
+                                } => Some(provider::open_ai::AvailableModel {
+                                    name,
+                                    max_tokens,
+                                    max_output_tokens,
+                                    display_name,
+                                    max_completion_tokens,
+                                }),
+                                _ => None,
+                            })
+                            .collect()
+                    }),
+                },
+                true,
+            ),
+            OpenAiSettingsContent::Versioned(content) => match content {
+                VersionedOpenAiSettingsContent::V1(content) => (content, false),
+            },
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+pub struct LegacyOpenAiSettingsContent {
+    pub api_url: Option<String>,
+    pub available_models: Option<Vec<open_ai::Model>>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+#[serde(tag = "version")]
+pub enum VersionedOpenAiSettingsContent {
+    #[serde(rename = "1")]
+    V1(OpenAiSettingsContentV1),
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+pub struct OpenAiSettingsContentV1 {
+    pub api_url: Option<String>,
+    pub available_models: Option<Vec<provider::open_ai::AvailableModel>>,
+}
+
+#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+pub struct GoogleSettingsContent {
+    pub api_url: Option<String>,
+    pub available_models: Option<Vec<provider::google::AvailableModel>>,
+}
+
+#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+pub struct ZedDotDevSettingsContent {
+    available_models: Option<Vec<cloud::AvailableModel>>,
+}
+
+#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+pub struct CopilotChatSettingsContent {}
+
+impl settings::Settings for AllLanguageModelSettings {
+    const KEY: Option<&'static str> = Some("language_models");
+
+    const PRESERVED_KEYS: Option<&'static [&'static str]> = Some(&["version"]);
+
+    type FileContent = AllLanguageModelSettingsContent;
+
+    fn load(sources: SettingsSources<Self::FileContent>, _: &mut AppContext) -> Result<Self> {
+        fn merge<T>(target: &mut T, value: Option<T>) {
+            if let Some(value) = value {
+                *target = value;
+            }
+        }
+
+        let mut settings = AllLanguageModelSettings::default();
+
+        for value in sources.defaults_and_customizations() {
+            // Anthropic
+            let (anthropic, upgraded) = match value.anthropic.clone().map(|s| s.upgrade()) {
+                Some((content, upgraded)) => (Some(content), upgraded),
+                None => (None, false),
+            };
+
+            if upgraded {
+                settings.anthropic.needs_setting_migration = true;
+            }
+
+            merge(
+                &mut settings.anthropic.api_url,
+                anthropic.as_ref().and_then(|s| s.api_url.clone()),
+            );
+            merge(
+                &mut settings.anthropic.available_models,
+                anthropic.as_ref().and_then(|s| s.available_models.clone()),
+            );
+
+            // Ollama
+            let ollama = value.ollama.clone();
+
+            merge(
+                &mut settings.ollama.api_url,
+                value.ollama.as_ref().and_then(|s| s.api_url.clone()),
+            );
+            merge(
+                &mut settings.ollama.available_models,
+                ollama.as_ref().and_then(|s| s.available_models.clone()),
+            );
+
+            // OpenAI
+            let (openai, upgraded) = match value.openai.clone().map(|s| s.upgrade()) {
+                Some((content, upgraded)) => (Some(content), upgraded),
+                None => (None, false),
+            };
+
+            if upgraded {
+                settings.openai.needs_setting_migration = true;
+            }
+
+            merge(
+                &mut settings.openai.api_url,
+                openai.as_ref().and_then(|s| s.api_url.clone()),
+            );
+            merge(
+                &mut settings.openai.available_models,
+                openai.as_ref().and_then(|s| s.available_models.clone()),
+            );
+            merge(
+                &mut settings.zed_dot_dev.available_models,
+                value
+                    .zed_dot_dev
+                    .as_ref()
+                    .and_then(|s| s.available_models.clone()),
+            );
+            merge(
+                &mut settings.google.api_url,
+                value.google.as_ref().and_then(|s| s.api_url.clone()),
+            );
+            merge(
+                &mut settings.google.available_models,
+                value
+                    .google
+                    .as_ref()
+                    .and_then(|s| s.available_models.clone()),
+            );
+        }
+
+        Ok(settings)
+    }
+}