diff --git a/Cargo.lock b/Cargo.lock
index 527b99f3c2..ff51a57145 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -90,6 +90,7 @@ dependencies = [
  "assistant_tools",
  "chrono",
  "client",
+ "cloud_llm_client",
  "collections",
  "component",
  "context_server",
@@ -132,7 +133,6 @@ dependencies = [
  "uuid",
  "workspace",
  "workspace-hack",
- "zed_llm_client",
  "zstd",
 ]
 
@@ -189,6 +189,7 @@ name = "agent_settings"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "cloud_llm_client",
  "collections",
  "fs",
  "gpui",
@@ -200,7 +201,6 @@ dependencies = [
  "serde_json_lenient",
  "settings",
  "workspace-hack",
- "zed_llm_client",
 ]
 
 [[package]]
@@ -223,6 +223,7 @@ dependencies = [
  "buffer_diff",
  "chrono",
  "client",
+ "cloud_llm_client",
  "collections",
  "command_palette_hooks",
  "component",
@@ -294,7 +295,6 @@ dependencies = [
  "workspace",
  "workspace-hack",
  "zed_actions",
- "zed_llm_client",
 ]
 
 [[package]]
@@ -687,6 +687,7 @@ dependencies = [
  "chrono",
  "client",
  "clock",
+ "cloud_llm_client",
  "collections",
  "context_server",
  "fs",
@@ -720,7 +721,6 @@ dependencies = [
  "uuid",
  "workspace",
  "workspace-hack",
- "zed_llm_client",
 ]
 
 [[package]]
@@ -828,6 +828,7 @@ dependencies = [
  "chrono",
  "client",
  "clock",
+ "cloud_llm_client",
  "collections",
  "component",
  "derive_more 0.99.19",
@@ -881,7 +882,6 @@ dependencies = [
  "which 6.0.3",
  "workspace",
  "workspace-hack",
- "zed_llm_client",
  "zlog",
 ]
 
@@ -2976,6 +2976,7 @@ dependencies = [
  "base64 0.22.1",
  "chrono",
  "clock",
+ "cloud_llm_client",
  "cocoa 0.26.0",
  "collections",
  "credentials_provider",
@@ -3018,7 +3019,6 @@ dependencies = [
  "windows 0.61.1",
  "workspace-hack",
  "worktree",
- "zed_llm_client",
 ]
 
 [[package]]
@@ -3170,6 +3170,7 @@ dependencies = [
  "chrono",
  "client",
  "clock",
+ "cloud_llm_client",
  "collab_ui",
  "collections",
  "command_palette_hooks",
@@ -3256,7 +3257,6 @@ dependencies = [
  "workspace",
  "workspace-hack",
  "worktree",
- "zed_llm_client",
  "zlog",
 ]
 
@@ -5242,6 +5242,7 @@ dependencies = [
  "chrono",
  "clap",
  "client",
+ "cloud_llm_client",
  "collections",
  "debug_adapter_extension",
  "dirs 4.0.0",
@@ -5281,7 +5282,6 @@ dependencies = [
  "uuid",
  "watch",
  "workspace-hack",
- "zed_llm_client",
 ]
 
 [[package]]
@@ -6363,6 +6363,7 @@ dependencies = [
  "call",
  "chrono",
  "client",
+ "cloud_llm_client",
  "collections",
  "command_palette_hooks",
  "component",
@@ -6405,7 +6406,6 @@ dependencies = [
  "workspace",
  "workspace-hack",
  "zed_actions",
- "zed_llm_client",
  "zlog",
 ]
 
@@ -8366,6 +8366,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "client",
+ "cloud_llm_client",
  "copilot",
  "editor",
  "feature_flags",
@@ -8388,7 +8389,6 @@ dependencies = [
  "workspace",
  "workspace-hack",
  "zed_actions",
- "zed_llm_client",
  "zeta",
 ]
 
@@ -9070,6 +9070,7 @@ dependencies = [
  "anyhow",
  "base64 0.22.1",
  "client",
+ "cloud_llm_client",
  "collections",
  "futures 0.3.31",
  "gpui",
@@ -9087,7 +9088,6 @@ dependencies = [
  "thiserror 2.0.12",
  "util",
  "workspace-hack",
- "zed_llm_client",
 ]
 
 [[package]]
@@ -9103,6 +9103,7 @@ dependencies = [
  "bedrock",
  "chrono",
  "client",
+ "cloud_llm_client",
  "collections",
  "component",
  "convert_case 0.8.0",
@@ -9144,7 +9145,6 @@ dependencies = [
  "vercel",
  "workspace-hack",
  "x_ai",
- "zed_llm_client",
 ]
 
 [[package]]
@@ -18531,11 +18531,11 @@ name = "web_search"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "cloud_llm_client",
  "collections",
  "gpui",
  "serde",
  "workspace-hack",
- "zed_llm_client",
 ]
 
 [[package]]
@@ -18544,6 +18544,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "client",
+ "cloud_llm_client",
  "futures 0.3.31",
  "gpui",
  "http_client",
@@ -18552,7 +18553,6 @@ dependencies = [
  "serde_json",
  "web_search",
  "workspace-hack",
- "zed_llm_client",
 ]
 
 [[package]]
@@ -20419,19 +20419,6 @@ dependencies = [
  "zed_extension_api 0.1.0",
 ]
 
-[[package]]
-name = "zed_llm_client"
-version = "0.8.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6607f74dee2a18a9ce0f091844944a0e59881359ab62e0768fb0618f55d4c1dc"
-dependencies = [
- "anyhow",
- "serde",
- "serde_json",
- "strum 0.27.1",
- "uuid",
-]
-
 [[package]]
 name = "zed_proto"
 version = "0.2.2"
@@ -20611,6 +20598,7 @@ dependencies = [
  "call",
  "client",
  "clock",
+ "cloud_llm_client",
  "collections",
  "command_palette_hooks",
  "copilot",
@@ -20652,7 +20640,6 @@ dependencies = [
  "workspace-hack",
  "worktree",
  "zed_actions",
- "zed_llm_client",
  "zlog",
 ]
 
diff --git a/Cargo.toml b/Cargo.toml
index e08736e38e..a6428d897b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -646,7 +646,6 @@ which = "6.0.0"
 windows-core = "0.61"
 wit-component = "0.221"
 workspace-hack = "0.1.0"
-zed_llm_client = "= 0.8.6"
 zstd = "0.11"
 
 [workspace.dependencies.async-stripe]
diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml
index 135363ab65..c89a7f3303 100644
--- a/crates/agent/Cargo.toml
+++ b/crates/agent/Cargo.toml
@@ -25,6 +25,7 @@ assistant_context.workspace = true
 assistant_tool.workspace = true
 chrono.workspace = true
 client.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 component.workspace = true
 context_server.workspace = true
@@ -35,9 +36,9 @@ futures.workspace = true
 git.workspace = true
 gpui.workspace = true
 heed.workspace = true
+http_client.workspace = true
 icons.workspace = true
 indoc.workspace = true
-http_client.workspace = true
 itertools.workspace = true
 language.workspace = true
 language_model.workspace = true
@@ -63,7 +64,6 @@ time.workspace = true
 util.workspace = true
 uuid.workspace = true
 workspace-hack.workspace = true
-zed_llm_client.workspace = true
 zstd.workspace = true
 
 [dev-dependencies]
diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs
index 1b8aa012a1..0e5da2d43b 100644
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@@ -13,6 +13,7 @@ use anyhow::{Result, anyhow};
 use assistant_tool::{ActionLog, AnyToolCard, Tool, ToolWorkingSet};
 use chrono::{DateTime, Utc};
 use client::{ModelRequestUsage, RequestUsage};
+use cloud_llm_client::{CompletionIntent, CompletionRequestStatus, UsageLimit};
 use collections::HashMap;
 use feature_flags::{self, FeatureFlagAppExt};
 use futures::{FutureExt, StreamExt as _, future::Shared};
@@ -49,7 +50,6 @@ use std::{
 use thiserror::Error;
 use util::{ResultExt as _, post_inc};
 use uuid::Uuid;
-use zed_llm_client::{CompletionIntent, CompletionRequestStatus, UsageLimit};
 
 const MAX_RETRY_ATTEMPTS: u8 = 4;
 const BASE_RETRY_DELAY: Duration = Duration::from_secs(5);
@@ -1681,7 +1681,7 @@ impl Thread {
 
         let completion_mode = request
             .mode
-            .unwrap_or(zed_llm_client::CompletionMode::Normal);
+            .unwrap_or(cloud_llm_client::CompletionMode::Normal);
 
         self.last_received_chunk_at = Some(Instant::now());
 
diff --git a/crates/agent_settings/Cargo.toml b/crates/agent_settings/Cargo.toml
index 3afe5ae547..d34396a5d3 100644
--- a/crates/agent_settings/Cargo.toml
+++ b/crates/agent_settings/Cargo.toml
@@ -13,6 +13,7 @@ path = "src/agent_settings.rs"
 
 [dependencies]
 anyhow.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 gpui.workspace = true
 language_model.workspace = true
@@ -20,7 +21,6 @@ schemars.workspace = true
 serde.workspace = true
 settings.workspace = true
 workspace-hack.workspace = true
-zed_llm_client.workspace = true
 
 [dev-dependencies]
 fs.workspace = true
diff --git a/crates/agent_settings/src/agent_settings.rs b/crates/agent_settings/src/agent_settings.rs
index 13b966608c..4e872c78d7 100644
--- a/crates/agent_settings/src/agent_settings.rs
+++ b/crates/agent_settings/src/agent_settings.rs
@@ -321,11 +321,11 @@ pub enum CompletionMode {
     Burn,
 }
 
-impl From<CompletionMode> for zed_llm_client::CompletionMode {
+impl From<CompletionMode> for cloud_llm_client::CompletionMode {
     fn from(value: CompletionMode) -> Self {
         match value {
-            CompletionMode::Normal => zed_llm_client::CompletionMode::Normal,
-            CompletionMode::Burn => zed_llm_client::CompletionMode::Max,
+            CompletionMode::Normal => cloud_llm_client::CompletionMode::Normal,
+            CompletionMode::Burn => cloud_llm_client::CompletionMode::Max,
         }
     }
 }
diff --git a/crates/agent_ui/Cargo.toml b/crates/agent_ui/Cargo.toml
index fbd53e8d09..95fd2b1757 100644
--- a/crates/agent_ui/Cargo.toml
+++ b/crates/agent_ui/Cargo.toml
@@ -31,6 +31,7 @@ audio.workspace = true
 buffer_diff.workspace = true
 chrono.workspace = true
 client.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 command_palette_hooks.workspace = true
 component.workspace = true
@@ -46,9 +47,9 @@ futures.workspace = true
 fuzzy.workspace = true
 gpui.workspace = true
 html_to_markdown.workspace = true
-indoc.workspace = true
 http_client.workspace = true
 indexed_docs.workspace = true
+indoc.workspace = true
 inventory.workspace = true
 itertools.workspace = true
 jsonschema.workspace = true
@@ -97,7 +98,6 @@ watch.workspace = true
 workspace-hack.workspace = true
 workspace.workspace = true
 zed_actions.workspace = true
-zed_llm_client.workspace = true
 
 [dev-dependencies]
 assistant_tools.workspace = true
diff --git a/crates/agent_ui/src/active_thread.rs b/crates/agent_ui/src/active_thread.rs
index e27c318221..04a093c7d0 100644
--- a/crates/agent_ui/src/active_thread.rs
+++ b/crates/agent_ui/src/active_thread.rs
@@ -14,6 +14,7 @@ use agent_settings::{AgentSettings, NotifyWhenAgentWaiting};
 use anyhow::Context as _;
 use assistant_tool::ToolUseStatus;
 use audio::{Audio, Sound};
+use cloud_llm_client::CompletionIntent;
 use collections::{HashMap, HashSet};
 use editor::actions::{MoveUp, Paste};
 use editor::scroll::Autoscroll;
@@ -52,7 +53,6 @@ use util::ResultExt as _;
 use util::markdown::MarkdownCodeBlock;
 use workspace::{CollaboratorId, Workspace};
 use zed_actions::assistant::OpenRulesLibrary;
-use zed_llm_client::CompletionIntent;
 
 const CODEBLOCK_CONTAINER_GROUP: &str = "codeblock_container";
 const EDIT_PREVIOUS_MESSAGE_MIN_LINES: usize = 1;
diff --git a/crates/agent_ui/src/agent_panel.rs b/crates/agent_ui/src/agent_panel.rs
index 61a65de50b..91217cb030 100644
--- a/crates/agent_ui/src/agent_panel.rs
+++ b/crates/agent_ui/src/agent_panel.rs
@@ -44,6 +44,7 @@ use assistant_context::{AssistantContext, ContextEvent, ContextSummary};
 use assistant_slash_command::SlashCommandWorkingSet;
 use assistant_tool::ToolWorkingSet;
 use client::{DisableAiSettings, UserStore, zed_urls};
+use cloud_llm_client::{CompletionIntent, UsageLimit};
 use editor::{Anchor, AnchorRangeExt as _, Editor, EditorEvent, MultiBuffer};
 use feature_flags::{self, FeatureFlagAppExt};
 use fs::Fs;
@@ -80,7 +81,6 @@ use zed_actions::{
     agent::{OpenConfiguration, OpenOnboardingModal, ResetOnboarding, ToggleModelSelector},
     assistant::{OpenRulesLibrary, ToggleFocus},
 };
-use zed_llm_client::{CompletionIntent, UsageLimit};
 
 const AGENT_PANEL_KEY: &str = "agent_panel";
 
diff --git a/crates/agent_ui/src/buffer_codegen.rs b/crates/agent_ui/src/buffer_codegen.rs
index 64498e9281..615142b73d 100644
--- a/crates/agent_ui/src/buffer_codegen.rs
+++ b/crates/agent_ui/src/buffer_codegen.rs
@@ -6,6 +6,7 @@ use agent::{
 use agent_settings::AgentSettings;
 use anyhow::{Context as _, Result};
 use client::telemetry::Telemetry;
+use cloud_llm_client::CompletionIntent;
 use collections::HashSet;
 use editor::{Anchor, AnchorRangeExt, MultiBuffer, MultiBufferSnapshot, ToOffset as _, ToPoint};
 use futures::{
@@ -35,7 +36,6 @@ use std::{
 };
 use streaming_diff::{CharOperation, LineDiff, LineOperation, StreamingDiff};
 use telemetry_events::{AssistantEventData, AssistantKind, AssistantPhase};
-use zed_llm_client::CompletionIntent;
 
 pub struct BufferCodegen {
     alternatives: Vec<Entity<CodegenAlternative>>,
diff --git a/crates/agent_ui/src/debug.rs b/crates/agent_ui/src/debug.rs
index ff6538dc85..bd34659210 100644
--- a/crates/agent_ui/src/debug.rs
+++ b/crates/agent_ui/src/debug.rs
@@ -1,10 +1,10 @@
 #![allow(unused, dead_code)]
 
 use client::{ModelRequestUsage, RequestUsage};
+use cloud_llm_client::{Plan, UsageLimit};
 use gpui::Global;
 use std::ops::{Deref, DerefMut};
 use ui::prelude::*;
-use zed_llm_client::{Plan, UsageLimit};
 
 /// Debug only: Used for testing various account states
 ///
diff --git a/crates/agent_ui/src/message_editor.rs b/crates/agent_ui/src/message_editor.rs
index c160f1de04..082d1dfb51 100644
--- a/crates/agent_ui/src/message_editor.rs
+++ b/crates/agent_ui/src/message_editor.rs
@@ -18,6 +18,7 @@ use agent_settings::{AgentSettings, CompletionMode};
 use ai_onboarding::ApiKeysWithProviders;
 use buffer_diff::BufferDiff;
 use client::UserStore;
+use cloud_llm_client::CompletionIntent;
 use collections::{HashMap, HashSet};
 use editor::actions::{MoveUp, Paste};
 use editor::display_map::CreaseId;
@@ -53,7 +54,6 @@ use util::ResultExt as _;
 use workspace::{CollaboratorId, Workspace};
 use zed_actions::agent::Chat;
 use zed_actions::agent::ToggleModelSelector;
-use zed_llm_client::CompletionIntent;
 
 use crate::context_picker::{ContextPicker, ContextPickerCompletionProvider, crease_for_mention};
 use crate::context_strip::{ContextStrip, ContextStripEvent, SuggestContextKind};
@@ -1300,11 +1300,11 @@ impl MessageEditor {
         let plan = user_store
             .current_plan()
             .map(|plan| match plan {
-                Plan::Free => zed_llm_client::Plan::ZedFree,
-                Plan::ZedPro => zed_llm_client::Plan::ZedPro,
-                Plan::ZedProTrial => zed_llm_client::Plan::ZedProTrial,
+                Plan::Free => cloud_llm_client::Plan::ZedFree,
+                Plan::ZedPro => cloud_llm_client::Plan::ZedPro,
+                Plan::ZedProTrial => cloud_llm_client::Plan::ZedProTrial,
             })
-            .unwrap_or(zed_llm_client::Plan::ZedFree);
+            .unwrap_or(cloud_llm_client::Plan::ZedFree);
 
         let usage = user_store.model_request_usage()?;
 
diff --git a/crates/agent_ui/src/terminal_inline_assistant.rs b/crates/agent_ui/src/terminal_inline_assistant.rs
index 91867957cd..bcbc308c99 100644
--- a/crates/agent_ui/src/terminal_inline_assistant.rs
+++ b/crates/agent_ui/src/terminal_inline_assistant.rs
@@ -10,6 +10,7 @@ use agent::{
 use agent_settings::AgentSettings;
 use anyhow::{Context as _, Result};
 use client::telemetry::Telemetry;
+use cloud_llm_client::CompletionIntent;
 use collections::{HashMap, VecDeque};
 use editor::{MultiBuffer, actions::SelectAll};
 use fs::Fs;
@@ -27,7 +28,6 @@ use terminal_view::TerminalView;
 use ui::prelude::*;
 use util::ResultExt;
 use workspace::{Toast, Workspace, notifications::NotificationId};
-use zed_llm_client::CompletionIntent;
 
 pub fn init(
     fs: Arc<dyn Fs>,
diff --git a/crates/agent_ui/src/ui/preview/usage_callouts.rs b/crates/agent_ui/src/ui/preview/usage_callouts.rs
index 45af41395b..64869a6ec7 100644
--- a/crates/agent_ui/src/ui/preview/usage_callouts.rs
+++ b/crates/agent_ui/src/ui/preview/usage_callouts.rs
@@ -1,8 +1,8 @@
 use client::{ModelRequestUsage, RequestUsage, zed_urls};
+use cloud_llm_client::{Plan, UsageLimit};
 use component::{empty_example, example_group_with_title, single_example};
 use gpui::{AnyElement, App, IntoElement, RenderOnce, Window};
 use ui::{Callout, prelude::*};
-use zed_llm_client::{Plan, UsageLimit};
 
 #[derive(IntoElement, RegisterComponent)]
 pub struct UsageCallout {
diff --git a/crates/assistant_context/Cargo.toml b/crates/assistant_context/Cargo.toml
index f35dc43340..8f5ff98790 100644
--- a/crates/assistant_context/Cargo.toml
+++ b/crates/assistant_context/Cargo.toml
@@ -19,6 +19,7 @@ assistant_slash_commands.workspace = true
 chrono.workspace = true
 client.workspace = true
 clock.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 context_server.workspace = true
 fs.workspace = true
@@ -48,7 +49,6 @@ util.workspace = true
 uuid.workspace = true
 workspace-hack.workspace = true
 workspace.workspace = true
-zed_llm_client.workspace = true
 
 [dev-dependencies]
 indoc.workspace = true
diff --git a/crates/assistant_context/src/assistant_context.rs b/crates/assistant_context/src/assistant_context.rs
index 136468e084..4518bbff79 100644
--- a/crates/assistant_context/src/assistant_context.rs
+++ b/crates/assistant_context/src/assistant_context.rs
@@ -11,6 +11,7 @@ use assistant_slash_command::{
 use assistant_slash_commands::FileCommandMetadata;
 use client::{self, Client, proto, telemetry::Telemetry};
 use clock::ReplicaId;
+use cloud_llm_client::CompletionIntent;
 use collections::{HashMap, HashSet};
 use fs::{Fs, RenameOptions};
 use futures::{FutureExt, StreamExt, future::Shared};
@@ -46,7 +47,6 @@ use text::{BufferSnapshot, ToPoint};
 use ui::IconName;
 use util::{ResultExt, TryFutureExt, post_inc};
 use uuid::Uuid;
-use zed_llm_client::CompletionIntent;
 
 pub use crate::context_store::*;
 
diff --git a/crates/assistant_tools/Cargo.toml b/crates/assistant_tools/Cargo.toml
index 146800e094..d4b8fa3afc 100644
--- a/crates/assistant_tools/Cargo.toml
+++ b/crates/assistant_tools/Cargo.toml
@@ -21,9 +21,11 @@ assistant_tool.workspace = true
 buffer_diff.workspace = true
 chrono.workspace = true
 client.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 component.workspace = true
 derive_more.workspace = true
+diffy = "0.4.2"
 editor.workspace = true
 feature_flags.workspace = true
 futures.workspace = true
@@ -63,8 +65,6 @@ web_search.workspace = true
 which.workspace = true
 workspace-hack.workspace = true
 workspace.workspace = true
-zed_llm_client.workspace = true
-diffy = "0.4.2"
 
 [dev-dependencies]
 lsp = { workspace = true, features = ["test-support"] }
diff --git a/crates/assistant_tools/src/edit_agent.rs b/crates/assistant_tools/src/edit_agent.rs
index 0184dff36c..fed79434bb 100644
--- a/crates/assistant_tools/src/edit_agent.rs
+++ b/crates/assistant_tools/src/edit_agent.rs
@@ -7,6 +7,7 @@ mod streaming_fuzzy_matcher;
 use crate::{Template, Templates};
 use anyhow::Result;
 use assistant_tool::ActionLog;
+use cloud_llm_client::CompletionIntent;
 use create_file_parser::{CreateFileParser, CreateFileParserEvent};
 pub use edit_parser::EditFormat;
 use edit_parser::{EditParser, EditParserEvent, EditParserMetrics};
@@ -29,7 +30,6 @@ use std::{cmp, iter, mem, ops::Range, path::PathBuf, pin::Pin, sync::Arc, task::
 use streaming_diff::{CharOperation, StreamingDiff};
 use streaming_fuzzy_matcher::StreamingFuzzyMatcher;
 use util::debug_panic;
-use zed_llm_client::CompletionIntent;
 
 #[derive(Serialize)]
 struct CreateFilePromptTemplate {
diff --git a/crates/assistant_tools/src/web_search_tool.rs b/crates/assistant_tools/src/web_search_tool.rs
index 5eeca9c2c4..d4a12f22c5 100644
--- a/crates/assistant_tools/src/web_search_tool.rs
+++ b/crates/assistant_tools/src/web_search_tool.rs
@@ -6,6 +6,7 @@ use anyhow::{Context as _, Result, anyhow};
 use assistant_tool::{
     ActionLog, Tool, ToolCard, ToolResult, ToolResultContent, ToolResultOutput, ToolUseStatus,
 };
+use cloud_llm_client::{WebSearchResponse, WebSearchResult};
 use futures::{Future, FutureExt, TryFutureExt};
 use gpui::{
     AnyWindowHandle, App, AppContext, Context, Entity, IntoElement, Task, WeakEntity, Window,
@@ -17,7 +18,6 @@ use serde::{Deserialize, Serialize};
 use ui::{IconName, Tooltip, prelude::*};
 use web_search::WebSearchRegistry;
 use workspace::Workspace;
-use zed_llm_client::{WebSearchResponse, WebSearchResult};
 
 #[derive(Debug, Serialize, Deserialize, JsonSchema)]
 pub struct WebSearchToolInput {
diff --git a/crates/client/Cargo.toml b/crates/client/Cargo.toml
index b741f515fd..dd97bd9ca4 100644
--- a/crates/client/Cargo.toml
+++ b/crates/client/Cargo.toml
@@ -22,6 +22,7 @@ async-tungstenite = { workspace = true, features = ["tokio", "tokio-rustls-manua
 base64.workspace = true
 chrono = { workspace = true, features = ["serde"] }
 clock.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 credentials_provider.workspace = true
 derive_more.workspace = true
@@ -33,8 +34,8 @@ http_client.workspace = true
 http_client_tls.workspace = true
 httparse = "1.10"
 log.workspace = true
-paths.workspace = true
 parking_lot.workspace = true
+paths.workspace = true
 postage.workspace = true
 rand.workspace = true
 regex.workspace = true
@@ -46,19 +47,18 @@ serde_json.workspace = true
 settings.workspace = true
 sha2.workspace = true
 smol.workspace = true
+telemetry.workspace = true
 telemetry_events.workspace = true
 text.workspace = true
 thiserror.workspace = true
 time.workspace = true
 tiny_http.workspace = true
 tokio-socks = { version = "0.5.2", default-features = false, features = ["futures-io"] }
+tokio.workspace = true
 url.workspace = true
 util.workspace = true
-worktree.workspace = true
-telemetry.workspace = true
-tokio.workspace = true
 workspace-hack.workspace = true
-zed_llm_client.workspace = true
+worktree.workspace = true
 
 [dev-dependencies]
 clock = { workspace = true, features = ["test-support"] }
diff --git a/crates/client/src/user.rs b/crates/client/src/user.rs
index 5ed258aa8e..a7dab2a8d3 100644
--- a/crates/client/src/user.rs
+++ b/crates/client/src/user.rs
@@ -1,6 +1,10 @@
 use super::{Client, Status, TypedEnvelope, proto};
 use anyhow::{Context as _, Result, anyhow};
 use chrono::{DateTime, Utc};
+use cloud_llm_client::{
+    EDIT_PREDICTIONS_USAGE_AMOUNT_HEADER_NAME, EDIT_PREDICTIONS_USAGE_LIMIT_HEADER_NAME,
+    MODEL_REQUESTS_USAGE_AMOUNT_HEADER_NAME, MODEL_REQUESTS_USAGE_LIMIT_HEADER_NAME, UsageLimit,
+};
 use collections::{HashMap, HashSet, hash_map::Entry};
 use derive_more::Deref;
 use feature_flags::FeatureFlagAppExt;
@@ -17,10 +21,6 @@ use std::{
 };
 use text::ReplicaId;
 use util::{TryFutureExt as _, maybe};
-use zed_llm_client::{
-    EDIT_PREDICTIONS_USAGE_AMOUNT_HEADER_NAME, EDIT_PREDICTIONS_USAGE_LIMIT_HEADER_NAME,
-    MODEL_REQUESTS_USAGE_AMOUNT_HEADER_NAME, MODEL_REQUESTS_USAGE_LIMIT_HEADER_NAME, UsageLimit,
-};
 
 pub type UserId = u64;
 
diff --git a/crates/collab/Cargo.toml b/crates/collab/Cargo.toml
index d3b5048283..9af95317e6 100644
--- a/crates/collab/Cargo.toml
+++ b/crates/collab/Cargo.toml
@@ -23,13 +23,14 @@ async-stripe.workspace = true
 async-trait.workspace = true
 async-tungstenite.workspace = true
 aws-config = { version = "1.1.5" }
-aws-sdk-s3 = { version = "1.15.0" }
 aws-sdk-kinesis = "1.51.0"
+aws-sdk-s3 = { version = "1.15.0" }
 axum = { version = "0.6", features = ["json", "headers", "ws"] }
 axum-extra = { version = "0.4", features = ["erased-json"] }
 base64.workspace = true
 chrono.workspace = true
 clock.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 dashmap.workspace = true
 derive_more.workspace = true
@@ -75,7 +76,6 @@ tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json", "re
 util.workspace = true
 uuid.workspace = true
 workspace-hack.workspace = true
-zed_llm_client.workspace = true
 
 [dev-dependencies]
 agent_settings.workspace = true
diff --git a/crates/collab/src/api.rs b/crates/collab/src/api.rs
index 050a15dd4e..6cf3f68f54 100644
--- a/crates/collab/src/api.rs
+++ b/crates/collab/src/api.rs
@@ -310,9 +310,9 @@ async fn refresh_llm_tokens(
 
 #[derive(Debug, Serialize, Deserialize)]
 struct UpdatePlanBody {
-    pub plan: zed_llm_client::Plan,
+    pub plan: cloud_llm_client::Plan,
     pub subscription_period: SubscriptionPeriod,
-    pub usage: zed_llm_client::CurrentUsage,
+    pub usage: cloud_llm_client::CurrentUsage,
     pub trial_started_at: Option<DateTime<Utc>>,
     pub is_usage_based_billing_enabled: bool,
     pub is_account_too_young: bool,
@@ -334,9 +334,9 @@ async fn update_plan(
     extract::Json(body): extract::Json<UpdatePlanBody>,
 ) -> Result<Json<UpdatePlanResponse>> {
     let plan = match body.plan {
-        zed_llm_client::Plan::ZedFree => proto::Plan::Free,
-        zed_llm_client::Plan::ZedPro => proto::Plan::ZedPro,
-        zed_llm_client::Plan::ZedProTrial => proto::Plan::ZedProTrial,
+        cloud_llm_client::Plan::ZedFree => proto::Plan::Free,
+        cloud_llm_client::Plan::ZedPro => proto::Plan::ZedPro,
+        cloud_llm_client::Plan::ZedProTrial => proto::Plan::ZedProTrial,
     };
 
     let update_user_plan = proto::UpdateUserPlan {
@@ -368,15 +368,15 @@ async fn update_plan(
     Ok(Json(UpdatePlanResponse {}))
 }
 
-fn usage_limit_to_proto(limit: zed_llm_client::UsageLimit) -> proto::UsageLimit {
+fn usage_limit_to_proto(limit: cloud_llm_client::UsageLimit) -> proto::UsageLimit {
     proto::UsageLimit {
         variant: Some(match limit {
-            zed_llm_client::UsageLimit::Limited(limit) => {
+            cloud_llm_client::UsageLimit::Limited(limit) => {
                 proto::usage_limit::Variant::Limited(proto::usage_limit::Limited {
                     limit: limit as u32,
                 })
             }
-            zed_llm_client::UsageLimit::Unlimited => {
+            cloud_llm_client::UsageLimit::Unlimited => {
                 proto::usage_limit::Variant::Unlimited(proto::usage_limit::Unlimited {})
             }
         }),
diff --git a/crates/collab/src/api/billing.rs b/crates/collab/src/api/billing.rs
index 9d0c617ab9..0e15308ffe 100644
--- a/crates/collab/src/api/billing.rs
+++ b/crates/collab/src/api/billing.rs
@@ -1,11 +1,11 @@
 use anyhow::{Context as _, bail};
 use chrono::{DateTime, Utc};
+use cloud_llm_client::LanguageModelProvider;
 use collections::{HashMap, HashSet};
 use sea_orm::ActiveValue;
 use std::{sync::Arc, time::Duration};
 use stripe::{CancellationDetailsReason, EventObject, EventType, ListEvents, SubscriptionStatus};
 use util::{ResultExt, maybe};
-use zed_llm_client::LanguageModelProvider;
 
 use crate::AppState;
 use crate::db::billing_subscription::{
diff --git a/crates/collab/src/db/tables/billing_subscription.rs b/crates/collab/src/db/tables/billing_subscription.rs
index 43198f9859..522973dbc9 100644
--- a/crates/collab/src/db/tables/billing_subscription.rs
+++ b/crates/collab/src/db/tables/billing_subscription.rs
@@ -95,7 +95,7 @@ pub enum SubscriptionKind {
     ZedFree,
 }
 
-impl From<SubscriptionKind> for zed_llm_client::Plan {
+impl From<SubscriptionKind> for cloud_llm_client::Plan {
     fn from(value: SubscriptionKind) -> Self {
         match value {
             SubscriptionKind::ZedPro => Self::ZedPro,
diff --git a/crates/collab/src/llm/db.rs b/crates/collab/src/llm/db.rs
index 6a6efca0de..18ad624dab 100644
--- a/crates/collab/src/llm/db.rs
+++ b/crates/collab/src/llm/db.rs
@@ -6,11 +6,11 @@ mod tables;
 #[cfg(test)]
 mod tests;
 
+use cloud_llm_client::LanguageModelProvider;
 use collections::HashMap;
 pub use ids::*;
 pub use seed::*;
 pub use tables::*;
-use zed_llm_client::LanguageModelProvider;
 
 #[cfg(test)]
 pub use tests::TestLlmDb;
diff --git a/crates/collab/src/llm/db/tests/provider_tests.rs b/crates/collab/src/llm/db/tests/provider_tests.rs
index 7d52964b93..f4e1de40ec 100644
--- a/crates/collab/src/llm/db/tests/provider_tests.rs
+++ b/crates/collab/src/llm/db/tests/provider_tests.rs
@@ -1,5 +1,5 @@
+use cloud_llm_client::LanguageModelProvider;
 use pretty_assertions::assert_eq;
-use zed_llm_client::LanguageModelProvider;
 
 use crate::llm::db::LlmDatabase;
 use crate::test_llm_db;
diff --git a/crates/collab/src/llm/token.rs b/crates/collab/src/llm/token.rs
index d4566ffcb4..da01c7f3be 100644
--- a/crates/collab/src/llm/token.rs
+++ b/crates/collab/src/llm/token.rs
@@ -4,12 +4,12 @@ use crate::llm::{AGENT_EXTENDED_TRIAL_FEATURE_FLAG, BYPASS_ACCOUNT_AGE_CHECK_FEA
 use crate::{Config, db::billing_preference};
 use anyhow::{Context as _, Result};
 use chrono::{NaiveDateTime, Utc};
+use cloud_llm_client::Plan;
 use jsonwebtoken::{DecodingKey, EncodingKey, Header, Validation};
 use serde::{Deserialize, Serialize};
 use std::time::Duration;
 use thiserror::Error;
 use uuid::Uuid;
-use zed_llm_client::Plan;
 
 #[derive(Clone, Debug, Default, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
diff --git a/crates/collab/src/rpc.rs b/crates/collab/src/rpc.rs
index b7e5ce0739..5c35394e1d 100644
--- a/crates/collab/src/rpc.rs
+++ b/crates/collab/src/rpc.rs
@@ -2868,12 +2868,12 @@ async fn make_update_user_plan_message(
 }
 
 fn model_requests_limit(
-    plan: zed_llm_client::Plan,
+    plan: cloud_llm_client::Plan,
     feature_flags: &Vec<String>,
-) -> zed_llm_client::UsageLimit {
+) -> cloud_llm_client::UsageLimit {
     match plan.model_requests_limit() {
-        zed_llm_client::UsageLimit::Limited(limit) => {
-            let limit = if plan == zed_llm_client::Plan::ZedProTrial
+        cloud_llm_client::UsageLimit::Limited(limit) => {
+            let limit = if plan == cloud_llm_client::Plan::ZedProTrial
                 && feature_flags
                     .iter()
                     .any(|flag| flag == AGENT_EXTENDED_TRIAL_FEATURE_FLAG)
@@ -2883,9 +2883,9 @@ fn model_requests_limit(
                 limit
             };
 
-            zed_llm_client::UsageLimit::Limited(limit)
+            cloud_llm_client::UsageLimit::Limited(limit)
         }
-        zed_llm_client::UsageLimit::Unlimited => zed_llm_client::UsageLimit::Unlimited,
+        cloud_llm_client::UsageLimit::Unlimited => cloud_llm_client::UsageLimit::Unlimited,
     }
 }
 
@@ -2895,21 +2895,21 @@ fn subscription_usage_to_proto(
     feature_flags: &Vec<String>,
 ) -> proto::SubscriptionUsage {
     let plan = match plan {
-        proto::Plan::Free => zed_llm_client::Plan::ZedFree,
-        proto::Plan::ZedPro => zed_llm_client::Plan::ZedPro,
-        proto::Plan::ZedProTrial => zed_llm_client::Plan::ZedProTrial,
+        proto::Plan::Free => cloud_llm_client::Plan::ZedFree,
+        proto::Plan::ZedPro => cloud_llm_client::Plan::ZedPro,
+        proto::Plan::ZedProTrial => cloud_llm_client::Plan::ZedProTrial,
     };
 
     proto::SubscriptionUsage {
         model_requests_usage_amount: usage.model_requests as u32,
         model_requests_usage_limit: Some(proto::UsageLimit {
             variant: Some(match model_requests_limit(plan, feature_flags) {
-                zed_llm_client::UsageLimit::Limited(limit) => {
+                cloud_llm_client::UsageLimit::Limited(limit) => {
                     proto::usage_limit::Variant::Limited(proto::usage_limit::Limited {
                         limit: limit as u32,
                     })
                 }
-                zed_llm_client::UsageLimit::Unlimited => {
+                cloud_llm_client::UsageLimit::Unlimited => {
                     proto::usage_limit::Variant::Unlimited(proto::usage_limit::Unlimited {})
                 }
             }),
@@ -2917,12 +2917,12 @@ fn subscription_usage_to_proto(
         edit_predictions_usage_amount: usage.edit_predictions as u32,
         edit_predictions_usage_limit: Some(proto::UsageLimit {
             variant: Some(match plan.edit_predictions_limit() {
-                zed_llm_client::UsageLimit::Limited(limit) => {
+                cloud_llm_client::UsageLimit::Limited(limit) => {
                     proto::usage_limit::Variant::Limited(proto::usage_limit::Limited {
                         limit: limit as u32,
                     })
                 }
-                zed_llm_client::UsageLimit::Unlimited => {
+                cloud_llm_client::UsageLimit::Unlimited => {
                     proto::usage_limit::Variant::Unlimited(proto::usage_limit::Unlimited {})
                 }
             }),
@@ -2935,21 +2935,21 @@ fn make_default_subscription_usage(
     feature_flags: &Vec<String>,
 ) -> proto::SubscriptionUsage {
     let plan = match plan {
-        proto::Plan::Free => zed_llm_client::Plan::ZedFree,
-        proto::Plan::ZedPro => zed_llm_client::Plan::ZedPro,
-        proto::Plan::ZedProTrial => zed_llm_client::Plan::ZedProTrial,
+        proto::Plan::Free => cloud_llm_client::Plan::ZedFree,
+        proto::Plan::ZedPro => cloud_llm_client::Plan::ZedPro,
+        proto::Plan::ZedProTrial => cloud_llm_client::Plan::ZedProTrial,
     };
 
     proto::SubscriptionUsage {
         model_requests_usage_amount: 0,
         model_requests_usage_limit: Some(proto::UsageLimit {
             variant: Some(match model_requests_limit(plan, feature_flags) {
-                zed_llm_client::UsageLimit::Limited(limit) => {
+                cloud_llm_client::UsageLimit::Limited(limit) => {
                     proto::usage_limit::Variant::Limited(proto::usage_limit::Limited {
                         limit: limit as u32,
                     })
                 }
-                zed_llm_client::UsageLimit::Unlimited => {
+                cloud_llm_client::UsageLimit::Unlimited => {
                     proto::usage_limit::Variant::Unlimited(proto::usage_limit::Unlimited {})
                 }
             }),
@@ -2957,12 +2957,12 @@ fn make_default_subscription_usage(
         edit_predictions_usage_amount: 0,
         edit_predictions_usage_limit: Some(proto::UsageLimit {
             variant: Some(match plan.edit_predictions_limit() {
-                zed_llm_client::UsageLimit::Limited(limit) => {
+                cloud_llm_client::UsageLimit::Limited(limit) => {
                     proto::usage_limit::Variant::Limited(proto::usage_limit::Limited {
                         limit: limit as u32,
                     })
                 }
-                zed_llm_client::UsageLimit::Unlimited => {
+                cloud_llm_client::UsageLimit::Unlimited => {
                     proto::usage_limit::Variant::Unlimited(proto::usage_limit::Unlimited {})
                 }
             }),
diff --git a/crates/eval/Cargo.toml b/crates/eval/Cargo.toml
index d5db7f71a4..a0214c76a1 100644
--- a/crates/eval/Cargo.toml
+++ b/crates/eval/Cargo.toml
@@ -19,8 +19,8 @@ path = "src/explorer.rs"
 
 [dependencies]
 agent.workspace = true
-agent_ui.workspace = true
 agent_settings.workspace = true
+agent_ui.workspace = true
 anyhow.workspace = true
 assistant_tool.workspace = true
 assistant_tools.workspace = true
@@ -29,6 +29,7 @@ buffer_diff.workspace = true
 chrono.workspace = true
 clap.workspace = true
 client.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 debug_adapter_extension.workspace = true
 dirs.workspace = true
@@ -68,4 +69,3 @@ util.workspace = true
 uuid.workspace = true
 watch.workspace = true
 workspace-hack.workspace = true
-zed_llm_client.workspace = true
diff --git a/crates/eval/src/example.rs b/crates/eval/src/example.rs
index 7ce3b1fdf1..23c8814916 100644
--- a/crates/eval/src/example.rs
+++ b/crates/eval/src/example.rs
@@ -15,11 +15,11 @@ use agent_settings::AgentProfileId;
 use anyhow::{Result, anyhow};
 use async_trait::async_trait;
 use buffer_diff::DiffHunkStatus;
+use cloud_llm_client::CompletionIntent;
 use collections::HashMap;
 use futures::{FutureExt as _, StreamExt, channel::mpsc, select_biased};
 use gpui::{App, AppContext, AsyncApp, Entity};
 use language_model::{LanguageModel, Role, StopReason};
-use zed_llm_client::CompletionIntent;
 
 pub const THREAD_EVENT_TIMEOUT: Duration = Duration::from_secs(60 * 2);
 
diff --git a/crates/git_ui/Cargo.toml b/crates/git_ui/Cargo.toml
index 2fb80b7e73..4c919249ee 100644
--- a/crates/git_ui/Cargo.toml
+++ b/crates/git_ui/Cargo.toml
@@ -24,6 +24,7 @@ buffer_diff.workspace = true
 call.workspace = true
 chrono.workspace = true
 client.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 command_palette_hooks.workspace = true
 component.workspace = true
@@ -62,7 +63,6 @@ watch.workspace = true
 workspace-hack.workspace = true
 workspace.workspace = true
 zed_actions.workspace = true
-zed_llm_client.workspace = true
 
 [target.'cfg(windows)'.dependencies]
 windows.workspace = true
diff --git a/crates/git_ui/src/git_panel.rs b/crates/git_ui/src/git_panel.rs
index f7efada469..e196a5b139 100644
--- a/crates/git_ui/src/git_panel.rs
+++ b/crates/git_ui/src/git_panel.rs
@@ -71,12 +71,12 @@ use ui::{
 use util::{ResultExt, TryFutureExt, maybe};
 use workspace::SERIALIZATION_THROTTLE_TIME;
 
+use cloud_llm_client::CompletionIntent;
 use workspace::{
     Workspace,
     dock::{DockPosition, Panel, PanelEvent},
     notifications::{DetachAndPromptErr, ErrorMessagePrompt, NotificationId},
 };
-use zed_llm_client::CompletionIntent;
 
 actions!(
     git_panel,
diff --git a/crates/inline_completion_button/Cargo.toml b/crates/inline_completion_button/Cargo.toml
index c2a619d500..b34e59336b 100644
--- a/crates/inline_completion_button/Cargo.toml
+++ b/crates/inline_completion_button/Cargo.toml
@@ -15,6 +15,7 @@ doctest = false
 [dependencies]
 anyhow.workspace = true
 client.workspace = true
+cloud_llm_client.workspace = true
 copilot.workspace = true
 editor.workspace = true
 feature_flags.workspace = true
@@ -32,7 +33,6 @@ ui.workspace = true
 workspace-hack.workspace = true
 workspace.workspace = true
 zed_actions.workspace = true
-zed_llm_client.workspace = true
 zeta.workspace = true
 
 [dev-dependencies]
diff --git a/crates/inline_completion_button/src/inline_completion_button.rs b/crates/inline_completion_button/src/inline_completion_button.rs
index 2615a8beef..81d9181cfc 100644
--- a/crates/inline_completion_button/src/inline_completion_button.rs
+++ b/crates/inline_completion_button/src/inline_completion_button.rs
@@ -1,5 +1,6 @@
 use anyhow::Result;
 use client::{DisableAiSettings, UserStore, zed_urls};
+use cloud_llm_client::UsageLimit;
 use copilot::{Copilot, Status};
 use editor::{
     Editor, SelectionEffects,
@@ -34,7 +35,6 @@ use workspace::{
     notifications::NotificationId,
 };
 use zed_actions::OpenBrowser;
-use zed_llm_client::UsageLimit;
 use zeta::RateCompletions;
 
 actions!(
diff --git a/crates/language_model/Cargo.toml b/crates/language_model/Cargo.toml
index b718c530f5..841be60b0e 100644
--- a/crates/language_model/Cargo.toml
+++ b/crates/language_model/Cargo.toml
@@ -20,6 +20,7 @@ anthropic = { workspace = true, features = ["schemars"] }
 anyhow.workspace = true
 base64.workspace = true
 client.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 futures.workspace = true
 gpui.workspace = true
@@ -37,7 +38,6 @@ telemetry_events.workspace = true
 thiserror.workspace = true
 util.workspace = true
 workspace-hack.workspace = true
-zed_llm_client.workspace = true
 
 [dev-dependencies]
 gpui = { workspace = true, features = ["test-support"] }
diff --git a/crates/language_model/src/language_model.rs b/crates/language_model/src/language_model.rs
index 54640419b6..1637d2de8a 100644
--- a/crates/language_model/src/language_model.rs
+++ b/crates/language_model/src/language_model.rs
@@ -11,6 +11,7 @@ pub mod fake_provider;
 use anthropic::{AnthropicError, parse_prompt_too_long};
 use anyhow::{Result, anyhow};
 use client::Client;
+use cloud_llm_client::{CompletionMode, CompletionRequestStatus};
 use futures::FutureExt;
 use futures::{StreamExt, future::BoxFuture, stream::BoxStream};
 use gpui::{AnyElement, AnyView, App, AsyncApp, SharedString, Task, Window};
@@ -26,7 +27,6 @@ use std::time::Duration;
 use std::{fmt, io};
 use thiserror::Error;
 use util::serde::is_default;
-use zed_llm_client::{CompletionMode, CompletionRequestStatus};
 
 pub use crate::model::*;
 pub use crate::rate_limiter::*;
diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs
index 6f3d420ad5..dc485e9937 100644
--- a/crates/language_model/src/request.rs
+++ b/crates/language_model/src/request.rs
@@ -1,10 +1,9 @@
 use std::io::{Cursor, Write};
 use std::sync::Arc;
 
-use crate::role::Role;
-use crate::{LanguageModelToolUse, LanguageModelToolUseId};
 use anyhow::Result;
 use base64::write::EncoderWriter;
+use cloud_llm_client::{CompletionIntent, CompletionMode};
 use gpui::{
     App, AppContext as _, DevicePixels, Image, ImageFormat, ObjectFit, SharedString, Size, Task,
     point, px, size,
@@ -12,7 +11,9 @@ use gpui::{
 use image::codecs::png::PngEncoder;
 use serde::{Deserialize, Serialize};
 use util::ResultExt;
-use zed_llm_client::{CompletionIntent, CompletionMode};
+
+use crate::role::Role;
+use crate::{LanguageModelToolUse, LanguageModelToolUseId};
 
 #[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
 pub struct LanguageModelImage {
diff --git a/crates/language_models/Cargo.toml b/crates/language_models/Cargo.toml
index 574579aaa7..208b0d99c9 100644
--- a/crates/language_models/Cargo.toml
+++ b/crates/language_models/Cargo.toml
@@ -16,18 +16,17 @@ ai_onboarding.workspace = true
 anthropic = { workspace = true, features = ["schemars"] }
 anyhow.workspace = true
 aws-config = { workspace = true, features = ["behavior-version-latest"] }
-aws-credential-types = { workspace = true, features = [
-    "hardcoded-credentials",
-] }
+aws-credential-types = { workspace = true, features = ["hardcoded-credentials"] }
 aws_http_client.workspace = true
 bedrock.workspace = true
 chrono.workspace = true
 client.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 component.workspace = true
-credentials_provider.workspace = true
 convert_case.workspace = true
 copilot.workspace = true
+credentials_provider.workspace = true
 deepseek = { workspace = true, features = ["schemars"] }
 editor.workspace = true
 futures.workspace = true
@@ -35,6 +34,7 @@ google_ai = { workspace = true, features = ["schemars"] }
 gpui.workspace = true
 gpui_tokio.workspace = true
 http_client.workspace = true
+language.workspace = true
 language_model.workspace = true
 lmstudio = { workspace = true, features = ["schemars"] }
 log.workspace = true
@@ -43,8 +43,6 @@ mistral = { workspace = true, features = ["schemars"] }
 ollama = { workspace = true, features = ["schemars"] }
 open_ai = { workspace = true, features = ["schemars"] }
 open_router = { workspace = true, features = ["schemars"] }
-vercel = { workspace = true, features = ["schemars"] }
-x_ai = { workspace = true, features = ["schemars"] }
 partial-json-fixer.workspace = true
 proto.workspace = true
 release_channel.workspace = true
@@ -61,9 +59,9 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 ui.workspace = true
 ui_input.workspace = true
 util.workspace = true
+vercel = { workspace = true, features = ["schemars"] }
 workspace-hack.workspace = true
-zed_llm_client.workspace = true
-language.workspace = true
+x_ai = { workspace = true, features = ["schemars"] }
 
 [dev-dependencies]
 editor = { workspace = true, features = ["test-support"] }
diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs
index 1e6e7b96d0..3de135c5a2 100644
--- a/crates/language_models/src/provider/cloud.rs
+++ b/crates/language_models/src/provider/cloud.rs
@@ -3,6 +3,13 @@ use anthropic::AnthropicModelMode;
 use anyhow::{Context as _, Result, anyhow};
 use chrono::{DateTime, Utc};
 use client::{Client, ModelRequestUsage, UserStore, zed_urls};
+use cloud_llm_client::{
+    CLIENT_SUPPORTS_STATUS_MESSAGES_HEADER_NAME, CURRENT_PLAN_HEADER_NAME, CompletionBody,
+    CompletionEvent, CompletionRequestStatus, CountTokensBody, CountTokensResponse,
+    EXPIRED_LLM_TOKEN_HEADER_NAME, ListModelsResponse, MODEL_REQUESTS_RESOURCE_HEADER_VALUE,
+    SERVER_SUPPORTS_STATUS_MESSAGES_HEADER_NAME, SUBSCRIPTION_LIMIT_RESOURCE_HEADER_NAME,
+    TOOL_USE_LIMIT_REACHED_HEADER_NAME, ZED_VERSION_HEADER_NAME,
+};
 use futures::{
     AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
 };
@@ -33,13 +40,6 @@ use std::time::Duration;
 use thiserror::Error;
 use ui::{TintColor, prelude::*};
 use util::{ResultExt as _, maybe};
-use zed_llm_client::{
-    CLIENT_SUPPORTS_STATUS_MESSAGES_HEADER_NAME, CURRENT_PLAN_HEADER_NAME, CompletionBody,
-    CompletionEvent, CompletionRequestStatus, CountTokensBody, CountTokensResponse,
-    EXPIRED_LLM_TOKEN_HEADER_NAME, ListModelsResponse, MODEL_REQUESTS_RESOURCE_HEADER_VALUE,
-    SERVER_SUPPORTS_STATUS_MESSAGES_HEADER_NAME, SUBSCRIPTION_LIMIT_RESOURCE_HEADER_NAME,
-    TOOL_USE_LIMIT_REACHED_HEADER_NAME, ZED_VERSION_HEADER_NAME,
-};
 
 use crate::provider::anthropic::{AnthropicEventMapper, count_anthropic_tokens, into_anthropic};
 use crate::provider::google::{GoogleEventMapper, into_google};
@@ -120,10 +120,10 @@ pub struct State {
     user_store: Entity<UserStore>,
     status: client::Status,
     accept_terms_of_service_task: Option<Task<Result<()>>>,
-    models: Vec<Arc<zed_llm_client::LanguageModel>>,
-    default_model: Option<Arc<zed_llm_client::LanguageModel>>,
-    default_fast_model: Option<Arc<zed_llm_client::LanguageModel>>,
-    recommended_models: Vec<Arc<zed_llm_client::LanguageModel>>,
+    models: Vec<Arc<cloud_llm_client::LanguageModel>>,
+    default_model: Option<Arc<cloud_llm_client::LanguageModel>>,
+    default_fast_model: Option<Arc<cloud_llm_client::LanguageModel>>,
+    recommended_models: Vec<Arc<cloud_llm_client::LanguageModel>>,
     _fetch_models_task: Task<()>,
     _settings_subscription: Subscription,
     _llm_token_subscription: Subscription,
@@ -238,8 +238,8 @@ impl State {
             // Right now we represent thinking variants of models as separate models on the client,
             // so we need to insert variants for any model that supports thinking.
             if model.supports_thinking {
-                models.push(Arc::new(zed_llm_client::LanguageModel {
-                    id: zed_llm_client::LanguageModelId(format!("{}-thinking", model.id).into()),
+                models.push(Arc::new(cloud_llm_client::LanguageModel {
+                    id: cloud_llm_client::LanguageModelId(format!("{}-thinking", model.id).into()),
                     display_name: format!("{} Thinking", model.display_name),
                     ..model
                 }));
@@ -328,7 +328,7 @@ impl CloudLanguageModelProvider {
 
     fn create_language_model(
         &self,
-        model: Arc<zed_llm_client::LanguageModel>,
+        model: Arc<cloud_llm_client::LanguageModel>,
         llm_api_token: LlmApiToken,
     ) -> Arc<dyn LanguageModel> {
         Arc::new(CloudLanguageModel {
@@ -518,7 +518,7 @@ fn render_accept_terms(
 
 pub struct CloudLanguageModel {
     id: LanguageModelId,
-    model: Arc<zed_llm_client::LanguageModel>,
+    model: Arc<cloud_llm_client::LanguageModel>,
     llm_api_token: LlmApiToken,
     client: Arc<Client>,
     request_limiter: RateLimiter,
@@ -611,12 +611,12 @@ impl CloudLanguageModel {
                         .headers()
                         .get(CURRENT_PLAN_HEADER_NAME)
                         .and_then(|plan| plan.to_str().ok())
-                        .and_then(|plan| zed_llm_client::Plan::from_str(plan).ok())
+                        .and_then(|plan| cloud_llm_client::Plan::from_str(plan).ok())
                     {
                         let plan = match plan {
-                            zed_llm_client::Plan::ZedFree => Plan::Free,
-                            zed_llm_client::Plan::ZedPro => Plan::ZedPro,
-                            zed_llm_client::Plan::ZedProTrial => Plan::ZedProTrial,
+                            cloud_llm_client::Plan::ZedFree => Plan::Free,
+                            cloud_llm_client::Plan::ZedPro => Plan::ZedPro,
+                            cloud_llm_client::Plan::ZedProTrial => Plan::ZedProTrial,
                         };
                         return Err(anyhow!(ModelRequestLimitReachedError { plan }));
                     }
@@ -729,7 +729,7 @@ impl LanguageModel for CloudLanguageModel {
     }
 
     fn upstream_provider_id(&self) -> LanguageModelProviderId {
-        use zed_llm_client::LanguageModelProvider::*;
+        use cloud_llm_client::LanguageModelProvider::*;
         match self.model.provider {
             Anthropic => language_model::ANTHROPIC_PROVIDER_ID,
             OpenAi => language_model::OPEN_AI_PROVIDER_ID,
@@ -738,7 +738,7 @@ impl LanguageModel for CloudLanguageModel {
     }
 
     fn upstream_provider_name(&self) -> LanguageModelProviderName {
-        use zed_llm_client::LanguageModelProvider::*;
+        use cloud_llm_client::LanguageModelProvider::*;
         match self.model.provider {
             Anthropic => language_model::ANTHROPIC_PROVIDER_NAME,
             OpenAi => language_model::OPEN_AI_PROVIDER_NAME,
@@ -772,11 +772,11 @@ impl LanguageModel for CloudLanguageModel {
 
     fn tool_input_format(&self) -> LanguageModelToolSchemaFormat {
         match self.model.provider {
-            zed_llm_client::LanguageModelProvider::Anthropic
-            | zed_llm_client::LanguageModelProvider::OpenAi => {
+            cloud_llm_client::LanguageModelProvider::Anthropic
+            | cloud_llm_client::LanguageModelProvider::OpenAi => {
                 LanguageModelToolSchemaFormat::JsonSchema
             }
-            zed_llm_client::LanguageModelProvider::Google => {
+            cloud_llm_client::LanguageModelProvider::Google => {
                 LanguageModelToolSchemaFormat::JsonSchemaSubset
             }
         }
@@ -795,15 +795,15 @@ impl LanguageModel for CloudLanguageModel {
 
     fn cache_configuration(&self) -> Option<LanguageModelCacheConfiguration> {
         match &self.model.provider {
-            zed_llm_client::LanguageModelProvider::Anthropic => {
+            cloud_llm_client::LanguageModelProvider::Anthropic => {
                 Some(LanguageModelCacheConfiguration {
                     min_total_token: 2_048,
                     should_speculate: true,
                     max_cache_anchors: 4,
                 })
             }
-            zed_llm_client::LanguageModelProvider::OpenAi
-            | zed_llm_client::LanguageModelProvider::Google => None,
+            cloud_llm_client::LanguageModelProvider::OpenAi
+            | cloud_llm_client::LanguageModelProvider::Google => None,
         }
     }
 
@@ -813,15 +813,17 @@ impl LanguageModel for CloudLanguageModel {
         cx: &App,
     ) -> BoxFuture<'static, Result<u64>> {
         match self.model.provider {
-            zed_llm_client::LanguageModelProvider::Anthropic => count_anthropic_tokens(request, cx),
-            zed_llm_client::LanguageModelProvider::OpenAi => {
+            cloud_llm_client::LanguageModelProvider::Anthropic => {
+                count_anthropic_tokens(request, cx)
+            }
+            cloud_llm_client::LanguageModelProvider::OpenAi => {
                 let model = match open_ai::Model::from_id(&self.model.id.0) {
                     Ok(model) => model,
                     Err(err) => return async move { Err(anyhow!(err)) }.boxed(),
                 };
                 count_open_ai_tokens(request, model, cx)
             }
-            zed_llm_client::LanguageModelProvider::Google => {
+            cloud_llm_client::LanguageModelProvider::Google => {
                 let client = self.client.clone();
                 let llm_api_token = self.llm_api_token.clone();
                 let model_id = self.model.id.to_string();
@@ -832,7 +834,7 @@ impl LanguageModel for CloudLanguageModel {
                     let token = llm_api_token.acquire(&client).await?;
 
                     let request_body = CountTokensBody {
-                        provider: zed_llm_client::LanguageModelProvider::Google,
+                        provider: cloud_llm_client::LanguageModelProvider::Google,
                         model: model_id,
                         provider_request: serde_json::to_value(&google_ai::CountTokensRequest {
                             generate_content_request,
@@ -893,7 +895,7 @@ impl LanguageModel for CloudLanguageModel {
         let app_version = cx.update(|cx| AppVersion::global(cx)).ok();
         let thinking_allowed = request.thinking_allowed;
         match self.model.provider {
-            zed_llm_client::LanguageModelProvider::Anthropic => {
+            cloud_llm_client::LanguageModelProvider::Anthropic => {
                 let request = into_anthropic(
                     request,
                     self.model.id.to_string(),
@@ -924,7 +926,7 @@ impl LanguageModel for CloudLanguageModel {
                             prompt_id,
                             intent,
                             mode,
-                            provider: zed_llm_client::LanguageModelProvider::Anthropic,
+                            provider: cloud_llm_client::LanguageModelProvider::Anthropic,
                             model: request.model.clone(),
                             provider_request: serde_json::to_value(&request)
                                 .map_err(|e| anyhow!(e))?,
@@ -948,7 +950,7 @@ impl LanguageModel for CloudLanguageModel {
                 });
                 async move { Ok(future.await?.boxed()) }.boxed()
             }
-            zed_llm_client::LanguageModelProvider::OpenAi => {
+            cloud_llm_client::LanguageModelProvider::OpenAi => {
                 let client = self.client.clone();
                 let model = match open_ai::Model::from_id(&self.model.id.0) {
                     Ok(model) => model,
@@ -976,7 +978,7 @@ impl LanguageModel for CloudLanguageModel {
                             prompt_id,
                             intent,
                             mode,
-                            provider: zed_llm_client::LanguageModelProvider::OpenAi,
+                            provider: cloud_llm_client::LanguageModelProvider::OpenAi,
                             model: request.model.clone(),
                             provider_request: serde_json::to_value(&request)
                                 .map_err(|e| anyhow!(e))?,
@@ -996,7 +998,7 @@ impl LanguageModel for CloudLanguageModel {
                 });
                 async move { Ok(future.await?.boxed()) }.boxed()
             }
-            zed_llm_client::LanguageModelProvider::Google => {
+            cloud_llm_client::LanguageModelProvider::Google => {
                 let client = self.client.clone();
                 let request =
                     into_google(request, self.model.id.to_string(), GoogleModelMode::Default);
@@ -1016,7 +1018,7 @@ impl LanguageModel for CloudLanguageModel {
                             prompt_id,
                             intent,
                             mode,
-                            provider: zed_llm_client::LanguageModelProvider::Google,
+                            provider: cloud_llm_client::LanguageModelProvider::Google,
                             model: request.model.model_id.clone(),
                             provider_request: serde_json::to_value(&request)
                                 .map_err(|e| anyhow!(e))?,
diff --git a/crates/language_models/src/provider/copilot_chat.rs b/crates/language_models/src/provider/copilot_chat.rs
index d9a84f1eb7..3cdc2e5401 100644
--- a/crates/language_models/src/provider/copilot_chat.rs
+++ b/crates/language_models/src/provider/copilot_chat.rs
@@ -3,6 +3,7 @@ use std::str::FromStr as _;
 use std::sync::Arc;
 
 use anyhow::{Result, anyhow};
+use cloud_llm_client::CompletionIntent;
 use collections::HashMap;
 use copilot::copilot_chat::{
     ChatMessage, ChatMessageContent, ChatMessagePart, CopilotChat, ImageUrl,
@@ -30,7 +31,6 @@ use settings::SettingsStore;
 use std::time::Duration;
 use ui::prelude::*;
 use util::debug_panic;
-use zed_llm_client::CompletionIntent;
 
 use super::anthropic::count_anthropic_tokens;
 use super::google::count_google_tokens;
diff --git a/crates/web_search/Cargo.toml b/crates/web_search/Cargo.toml
index e5b8ca63b2..4ba46faec4 100644
--- a/crates/web_search/Cargo.toml
+++ b/crates/web_search/Cargo.toml
@@ -13,8 +13,8 @@ path = "src/web_search.rs"
 
 [dependencies]
 anyhow.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 gpui.workspace = true
 serde.workspace = true
 workspace-hack.workspace = true
-zed_llm_client.workspace = true
diff --git a/crates/web_search/src/web_search.rs b/crates/web_search/src/web_search.rs
index a131b0de71..8578cfe4aa 100644
--- a/crates/web_search/src/web_search.rs
+++ b/crates/web_search/src/web_search.rs
@@ -1,8 +1,9 @@
+use std::sync::Arc;
+
 use anyhow::Result;
+use cloud_llm_client::WebSearchResponse;
 use collections::HashMap;
 use gpui::{App, AppContext as _, Context, Entity, Global, SharedString, Task};
-use std::sync::Arc;
-use zed_llm_client::WebSearchResponse;
 
 pub fn init(cx: &mut App) {
     let registry = cx.new(|_cx| WebSearchRegistry::default());
diff --git a/crates/web_search_providers/Cargo.toml b/crates/web_search_providers/Cargo.toml
index 2e052796c4..f7a248d106 100644
--- a/crates/web_search_providers/Cargo.toml
+++ b/crates/web_search_providers/Cargo.toml
@@ -14,6 +14,7 @@ path = "src/web_search_providers.rs"
 [dependencies]
 anyhow.workspace = true
 client.workspace = true
+cloud_llm_client.workspace = true
 futures.workspace = true
 gpui.workspace = true
 http_client.workspace = true
@@ -22,4 +23,3 @@ serde.workspace = true
 serde_json.workspace = true
 web_search.workspace = true
 workspace-hack.workspace = true
-zed_llm_client.workspace = true
diff --git a/crates/web_search_providers/src/cloud.rs b/crates/web_search_providers/src/cloud.rs
index adf79b0ff6..52ee0da0d4 100644
--- a/crates/web_search_providers/src/cloud.rs
+++ b/crates/web_search_providers/src/cloud.rs
@@ -2,12 +2,12 @@ use std::sync::Arc;
 
 use anyhow::{Context as _, Result};
 use client::Client;
+use cloud_llm_client::{EXPIRED_LLM_TOKEN_HEADER_NAME, WebSearchBody, WebSearchResponse};
 use futures::AsyncReadExt as _;
 use gpui::{App, AppContext, Context, Entity, Subscription, Task};
 use http_client::{HttpClient, Method};
 use language_model::{LlmApiToken, RefreshLlmTokenListener};
 use web_search::{WebSearchProvider, WebSearchProviderId};
-use zed_llm_client::{EXPIRED_LLM_TOKEN_HEADER_NAME, WebSearchBody, WebSearchResponse};
 
 pub struct CloudWebSearchProvider {
     state: Entity<State>,
diff --git a/crates/zeta/Cargo.toml b/crates/zeta/Cargo.toml
index c2b1de08ae..294d95aefd 100644
--- a/crates/zeta/Cargo.toml
+++ b/crates/zeta/Cargo.toml
@@ -21,6 +21,7 @@ ai_onboarding.workspace = true
 anyhow.workspace = true
 arrayvec.workspace = true
 client.workspace = true
+cloud_llm_client.workspace = true
 collections.workspace = true
 command_palette_hooks.workspace = true
 copilot.workspace = true
@@ -52,11 +53,10 @@ thiserror.workspace = true
 ui.workspace = true
 util.workspace = true
 uuid.workspace = true
+workspace-hack.workspace = true
 workspace.workspace = true
 worktree.workspace = true
 zed_actions.workspace = true
-zed_llm_client.workspace = true
-workspace-hack.workspace = true
 
 [dev-dependencies]
 collections = { workspace = true, features = ["test-support"] }
diff --git a/crates/zeta/src/zeta.rs b/crates/zeta/src/zeta.rs
index d6f033899d..d5c6be278b 100644
--- a/crates/zeta/src/zeta.rs
+++ b/crates/zeta/src/zeta.rs
@@ -17,6 +17,10 @@ pub use rate_completion_modal::*;
 use anyhow::{Context as _, Result, anyhow};
 use arrayvec::ArrayVec;
 use client::{Client, EditPredictionUsage, UserStore};
+use cloud_llm_client::{
+    AcceptEditPredictionBody, EXPIRED_LLM_TOKEN_HEADER_NAME, MINIMUM_REQUIRED_VERSION_HEADER_NAME,
+    PredictEditsBody, PredictEditsResponse, ZED_VERSION_HEADER_NAME,
+};
 use collections::{HashMap, HashSet, VecDeque};
 use futures::AsyncReadExt;
 use gpui::{
@@ -53,10 +57,6 @@ use uuid::Uuid;
 use workspace::Workspace;
 use workspace::notifications::{ErrorMessagePrompt, NotificationId};
 use worktree::Worktree;
-use zed_llm_client::{
-    AcceptEditPredictionBody, EXPIRED_LLM_TOKEN_HEADER_NAME, MINIMUM_REQUIRED_VERSION_HEADER_NAME,
-    PredictEditsBody, PredictEditsResponse, ZED_VERSION_HEADER_NAME,
-};
 
 const CURSOR_MARKER: &'static str = "<|user_cursor_is_here|>";
 const START_OF_FILE_MARKER: &'static str = "<|start_of_file|>";