From eca36c502e40e724d4ac6ad2f53750a4b7a39d30 Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Mon, 14 Jul 2025 12:03:19 -0400 Subject: [PATCH] Route all LLM traffic through `cloud.zed.dev` (#34404) This PR makes it so all LLM traffic is routed through `cloud.zed.dev`. We're already routing `llm.zed.dev` to `cloud.zed.dev` on the server, but we want to standardize on `cloud.zed.dev` moving forward. Release Notes: - N/A --- Cargo.lock | 2 -- crates/feature_flags/src/feature_flags.rs | 11 ------- crates/http_client/src/http_client.rs | 15 ++-------- crates/language_models/Cargo.toml | 1 - crates/language_models/src/provider/cloud.rs | 31 +++++--------------- crates/web_search_providers/Cargo.toml | 1 - crates/web_search_providers/src/cloud.rs | 13 ++------ crates/zeta/src/zeta.rs | 10 ++----- 8 files changed, 13 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index da46d191ef..742c3515c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9036,7 +9036,6 @@ dependencies = [ "credentials_provider", "deepseek", "editor", - "feature_flags", "fs", "futures 0.3.31", "google_ai", @@ -18390,7 +18389,6 @@ version = "0.1.0" dependencies = [ "anyhow", "client", - "feature_flags", "futures 0.3.31", "gpui", "http_client", diff --git a/crates/feature_flags/src/feature_flags.rs b/crates/feature_flags/src/feature_flags.rs index 9252977f75..da85133bb9 100644 --- a/crates/feature_flags/src/feature_flags.rs +++ b/crates/feature_flags/src/feature_flags.rs @@ -98,17 +98,6 @@ impl FeatureFlag for AcpFeatureFlag { const NAME: &'static str = "acp"; } -pub struct ZedCloudFeatureFlag {} - -impl FeatureFlag for ZedCloudFeatureFlag { - const NAME: &'static str = "zed-cloud"; - - fn enabled_for_staff() -> bool { - // Require individual opt-in, for now. - false - } -} - pub trait FeatureFlagViewExt { fn observe_flag(&mut self, window: &Window, callback: F) -> Subscription where diff --git a/crates/http_client/src/http_client.rs b/crates/http_client/src/http_client.rs index c60a56002f..eebab86e21 100644 --- a/crates/http_client/src/http_client.rs +++ b/crates/http_client/src/http_client.rs @@ -226,21 +226,10 @@ impl HttpClientWithUrl { } /// Builds a Zed LLM URL using the given path. - pub fn build_zed_llm_url( - &self, - path: &str, - query: &[(&str, &str)], - use_cloud: bool, - ) -> Result { + pub fn build_zed_llm_url(&self, path: &str, query: &[(&str, &str)]) -> Result { let base_url = self.base_url(); let base_api_url = match base_url.as_ref() { - "https://zed.dev" => { - if use_cloud { - "https://cloud.zed.dev" - } else { - "https://llm.zed.dev" - } - } + "https://zed.dev" => "https://cloud.zed.dev", "https://staging.zed.dev" => "https://llm-staging.zed.dev", "http://localhost:3000" => "http://localhost:8787", other => other, diff --git a/crates/language_models/Cargo.toml b/crates/language_models/Cargo.toml index 514443ddec..0f248edd57 100644 --- a/crates/language_models/Cargo.toml +++ b/crates/language_models/Cargo.toml @@ -28,7 +28,6 @@ credentials_provider.workspace = true copilot.workspace = true deepseek = { workspace = true, features = ["schemars"] } editor.workspace = true -feature_flags.workspace = true fs.workspace = true futures.workspace = true google_ai = { workspace = true, features = ["schemars"] } diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index ede84713f1..c044a318b8 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -2,7 +2,6 @@ use anthropic::AnthropicModelMode; use anyhow::{Context as _, Result, anyhow}; use chrono::{DateTime, Utc}; use client::{Client, ModelRequestUsage, UserStore, zed_urls}; -use feature_flags::{FeatureFlagAppExt as _, ZedCloudFeatureFlag}; use futures::{ AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream, }; @@ -137,7 +136,6 @@ impl State { cx: &mut Context, ) -> Self { let refresh_llm_token_listener = RefreshLlmTokenListener::global(cx); - let use_cloud = cx.has_flag::(); Self { client: client.clone(), @@ -165,7 +163,7 @@ impl State { .await; } - let response = Self::fetch_models(client, llm_api_token, use_cloud).await?; + let response = Self::fetch_models(client, llm_api_token).await?; this.update(cx, |this, cx| { this.update_models(response, cx); }) @@ -184,7 +182,7 @@ impl State { let llm_api_token = this.llm_api_token.clone(); cx.spawn(async move |this, cx| { llm_api_token.refresh(&client).await?; - let response = Self::fetch_models(client, llm_api_token, use_cloud).await?; + let response = Self::fetch_models(client, llm_api_token).await?; this.update(cx, |this, cx| { this.update_models(response, cx); }) @@ -268,18 +266,13 @@ impl State { async fn fetch_models( client: Arc, llm_api_token: LlmApiToken, - use_cloud: bool, ) -> Result { let http_client = &client.http_client(); let token = llm_api_token.acquire(&client).await?; let request = http_client::Request::builder() .method(Method::GET) - .uri( - http_client - .build_zed_llm_url("/models", &[], use_cloud)? - .as_ref(), - ) + .uri(http_client.build_zed_llm_url("/models", &[])?.as_ref()) .header("Authorization", format!("Bearer {token}")) .body(AsyncBody::empty())?; let mut response = http_client @@ -543,7 +536,6 @@ impl CloudLanguageModel { llm_api_token: LlmApiToken, app_version: Option, body: CompletionBody, - use_cloud: bool, ) -> Result { let http_client = &client.http_client(); @@ -551,11 +543,9 @@ impl CloudLanguageModel { let mut refreshed_token = false; loop { - let request_builder = http_client::Request::builder().method(Method::POST).uri( - http_client - .build_zed_llm_url("/completions", &[], use_cloud)? - .as_ref(), - ); + let request_builder = http_client::Request::builder() + .method(Method::POST) + .uri(http_client.build_zed_llm_url("/completions", &[])?.as_ref()); let request_builder = if let Some(app_version) = app_version { request_builder.header(ZED_VERSION_HEADER_NAME, app_version.to_string()) } else { @@ -782,7 +772,6 @@ impl LanguageModel for CloudLanguageModel { let model_id = self.model.id.to_string(); let generate_content_request = into_google(request, model_id.clone(), GoogleModelMode::Default); - let use_cloud = cx.has_flag::(); async move { let http_client = &client.http_client(); let token = llm_api_token.acquire(&client).await?; @@ -798,7 +787,7 @@ impl LanguageModel for CloudLanguageModel { .method(Method::POST) .uri( http_client - .build_zed_llm_url("/count_tokens", &[], use_cloud)? + .build_zed_llm_url("/count_tokens", &[])? .as_ref(), ) .header("Content-Type", "application/json") @@ -847,9 +836,6 @@ impl LanguageModel for CloudLanguageModel { let intent = request.intent; let mode = request.mode; let app_version = cx.update(|cx| AppVersion::global(cx)).ok(); - let use_cloud = cx - .update(|cx| cx.has_flag::()) - .unwrap_or(false); let thinking_allowed = request.thinking_allowed; match self.model.provider { zed_llm_client::LanguageModelProvider::Anthropic => { @@ -888,7 +874,6 @@ impl LanguageModel for CloudLanguageModel { provider_request: serde_json::to_value(&request) .map_err(|e| anyhow!(e))?, }, - use_cloud, ) .await .map_err(|err| match err.downcast::() { @@ -941,7 +926,6 @@ impl LanguageModel for CloudLanguageModel { provider_request: serde_json::to_value(&request) .map_err(|e| anyhow!(e))?, }, - use_cloud, ) .await?; @@ -982,7 +966,6 @@ impl LanguageModel for CloudLanguageModel { provider_request: serde_json::to_value(&request) .map_err(|e| anyhow!(e))?, }, - use_cloud, ) .await?; diff --git a/crates/web_search_providers/Cargo.toml b/crates/web_search_providers/Cargo.toml index 208cb63593..2e052796c4 100644 --- a/crates/web_search_providers/Cargo.toml +++ b/crates/web_search_providers/Cargo.toml @@ -14,7 +14,6 @@ path = "src/web_search_providers.rs" [dependencies] anyhow.workspace = true client.workspace = true -feature_flags.workspace = true futures.workspace = true gpui.workspace = true http_client.workspace = true diff --git a/crates/web_search_providers/src/cloud.rs b/crates/web_search_providers/src/cloud.rs index 79ccf97e47..adf79b0ff6 100644 --- a/crates/web_search_providers/src/cloud.rs +++ b/crates/web_search_providers/src/cloud.rs @@ -2,7 +2,6 @@ use std::sync::Arc; use anyhow::{Context as _, Result}; use client::Client; -use feature_flags::{FeatureFlagAppExt as _, ZedCloudFeatureFlag}; use futures::AsyncReadExt as _; use gpui::{App, AppContext, Context, Entity, Subscription, Task}; use http_client::{HttpClient, Method}; @@ -63,10 +62,7 @@ impl WebSearchProvider for CloudWebSearchProvider { let client = state.client.clone(); let llm_api_token = state.llm_api_token.clone(); let body = WebSearchBody { query }; - let use_cloud = cx.has_flag::(); - cx.background_spawn(async move { - perform_web_search(client, llm_api_token, body, use_cloud).await - }) + cx.background_spawn(async move { perform_web_search(client, llm_api_token, body).await }) } } @@ -74,7 +70,6 @@ async fn perform_web_search( client: Arc, llm_api_token: LlmApiToken, body: WebSearchBody, - use_cloud: bool, ) -> Result { const MAX_RETRIES: usize = 3; @@ -91,11 +86,7 @@ async fn perform_web_search( let request = http_client::Request::builder() .method(Method::POST) - .uri( - http_client - .build_zed_llm_url("/web_search", &[], use_cloud)? - .as_ref(), - ) + .uri(http_client.build_zed_llm_url("/web_search", &[])?.as_ref()) .header("Content-Type", "application/json") .header("Authorization", format!("Bearer {token}")) .body(serde_json::to_string(&body)?.into())?; diff --git a/crates/zeta/src/zeta.rs b/crates/zeta/src/zeta.rs index 12d3d4bfbc..87cd1e604c 100644 --- a/crates/zeta/src/zeta.rs +++ b/crates/zeta/src/zeta.rs @@ -8,7 +8,6 @@ mod rate_completion_modal; pub(crate) use completion_diff_element::*; use db::kvp::KEY_VALUE_STORE; -use feature_flags::{FeatureFlagAppExt as _, ZedCloudFeatureFlag}; pub use init::*; use inline_completion::DataCollectionState; use license_detection::LICENSE_FILES_TO_CHECK; @@ -391,7 +390,6 @@ impl Zeta { let client = self.client.clone(); let llm_token = self.llm_token.clone(); let app_version = AppVersion::global(cx); - let use_cloud = cx.has_flag::(); let buffer = buffer.clone(); @@ -482,7 +480,6 @@ impl Zeta { llm_token, app_version, body, - use_cloud, }) .await; let (response, usage) = match response { @@ -748,7 +745,6 @@ and then another llm_token, app_version, body, - use_cloud, .. } = params; @@ -764,7 +760,7 @@ and then another } else { request_builder.uri( http_client - .build_zed_llm_url("/predict_edits/v2", &[], use_cloud)? + .build_zed_llm_url("/predict_edits/v2", &[])? .as_ref(), ) }; @@ -824,7 +820,6 @@ and then another let client = self.client.clone(); let llm_token = self.llm_token.clone(); let app_version = AppVersion::global(cx); - let use_cloud = cx.has_flag::(); cx.spawn(async move |this, cx| { let http_client = client.http_client(); let mut response = llm_token_retry(&llm_token, &client, |token| { @@ -835,7 +830,7 @@ and then another } else { request_builder.uri( http_client - .build_zed_llm_url("/predict_edits/accept", &[], use_cloud)? + .build_zed_llm_url("/predict_edits/accept", &[])? .as_ref(), ) }; @@ -1131,7 +1126,6 @@ struct PerformPredictEditsParams { pub llm_token: LlmApiToken, pub app_version: SemanticVersion, pub body: PredictEditsBody, - pub use_cloud: bool, } #[derive(Error, Debug)]