Add feature flag to use cloud.zed.dev instead of llm.zed.dev (#34076)

This PR adds a new `zed-cloud` feature flag that can be used to send
traffic to `cloud.zed.dev` instead of `llm.zed.dev`.

This is just so Zed staff can test the new infrastructure. When we're
ready for prime-time we'll reroute traffic on the server.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2025-07-08 14:44:51 -04:00 committed by GitHub
parent 01bdef130b
commit 1220049089
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 70 additions and 12 deletions

2
Cargo.lock generated
View file

@ -8955,6 +8955,7 @@ dependencies = [
"credentials_provider", "credentials_provider",
"deepseek", "deepseek",
"editor", "editor",
"feature_flags",
"fs", "fs",
"futures 0.3.31", "futures 0.3.31",
"google_ai", "google_ai",
@ -18296,6 +18297,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"client", "client",
"feature_flags",
"futures 0.3.31", "futures 0.3.31",
"gpui", "gpui",
"http_client", "http_client",

View file

@ -92,6 +92,17 @@ impl FeatureFlag for JjUiFeatureFlag {
const NAME: &'static str = "jj-ui"; const NAME: &'static str = "jj-ui";
} }
pub struct ZedCloudFeatureFlag {}
impl FeatureFlag for ZedCloudFeatureFlag {
const NAME: &'static str = "zed-cloud";
fn enabled_for_staff() -> bool {
// Require individual opt-in, for now.
false
}
}
pub trait FeatureFlagViewExt<V: 'static> { pub trait FeatureFlagViewExt<V: 'static> {
fn observe_flag<T: FeatureFlag, F>(&mut self, window: &Window, callback: F) -> Subscription fn observe_flag<T: FeatureFlag, F>(&mut self, window: &Window, callback: F) -> Subscription
where where

View file

@ -226,10 +226,21 @@ impl HttpClientWithUrl {
} }
/// Builds a Zed LLM URL using the given path. /// Builds a Zed LLM URL using the given path.
pub fn build_zed_llm_url(&self, path: &str, query: &[(&str, &str)]) -> Result<Url> { pub fn build_zed_llm_url(
&self,
path: &str,
query: &[(&str, &str)],
use_cloud: bool,
) -> Result<Url> {
let base_url = self.base_url(); let base_url = self.base_url();
let base_api_url = match base_url.as_ref() { let base_api_url = match base_url.as_ref() {
"https://zed.dev" => "https://llm.zed.dev", "https://zed.dev" => {
if use_cloud {
"https://cloud.zed.dev"
} else {
"https://llm.zed.dev"
}
}
"https://staging.zed.dev" => "https://llm-staging.zed.dev", "https://staging.zed.dev" => "https://llm-staging.zed.dev",
"http://localhost:3000" => "http://localhost:8787", "http://localhost:3000" => "http://localhost:8787",
other => other, other => other,

View file

@ -28,6 +28,7 @@ credentials_provider.workspace = true
copilot.workspace = true copilot.workspace = true
deepseek = { workspace = true, features = ["schemars"] } deepseek = { workspace = true, features = ["schemars"] }
editor.workspace = true editor.workspace = true
feature_flags.workspace = true
fs.workspace = true fs.workspace = true
futures.workspace = true futures.workspace = true
google_ai = { workspace = true, features = ["schemars"] } google_ai = { workspace = true, features = ["schemars"] }

View file

@ -2,6 +2,7 @@ use anthropic::AnthropicModelMode;
use anyhow::{Context as _, Result, anyhow}; use anyhow::{Context as _, Result, anyhow};
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use client::{Client, ModelRequestUsage, UserStore, zed_urls}; use client::{Client, ModelRequestUsage, UserStore, zed_urls};
use feature_flags::{FeatureFlagAppExt as _, ZedCloudFeatureFlag};
use futures::{ use futures::{
AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream, AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
}; };
@ -136,6 +137,7 @@ impl State {
cx: &mut Context<Self>, cx: &mut Context<Self>,
) -> Self { ) -> Self {
let refresh_llm_token_listener = RefreshLlmTokenListener::global(cx); let refresh_llm_token_listener = RefreshLlmTokenListener::global(cx);
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
Self { Self {
client: client.clone(), client: client.clone(),
@ -163,7 +165,7 @@ impl State {
.await; .await;
} }
let response = Self::fetch_models(client, llm_api_token).await?; let response = Self::fetch_models(client, llm_api_token, use_cloud).await?;
cx.update(|cx| { cx.update(|cx| {
this.update(cx, |this, cx| { this.update(cx, |this, cx| {
let mut models = Vec::new(); let mut models = Vec::new();
@ -265,13 +267,18 @@ impl State {
async fn fetch_models( async fn fetch_models(
client: Arc<Client>, client: Arc<Client>,
llm_api_token: LlmApiToken, llm_api_token: LlmApiToken,
use_cloud: bool,
) -> Result<ListModelsResponse> { ) -> Result<ListModelsResponse> {
let http_client = &client.http_client(); let http_client = &client.http_client();
let token = llm_api_token.acquire(&client).await?; let token = llm_api_token.acquire(&client).await?;
let request = http_client::Request::builder() let request = http_client::Request::builder()
.method(Method::GET) .method(Method::GET)
.uri(http_client.build_zed_llm_url("/models", &[])?.as_ref()) .uri(
http_client
.build_zed_llm_url("/models", &[], use_cloud)?
.as_ref(),
)
.header("Authorization", format!("Bearer {token}")) .header("Authorization", format!("Bearer {token}"))
.body(AsyncBody::empty())?; .body(AsyncBody::empty())?;
let mut response = http_client let mut response = http_client
@ -535,6 +542,7 @@ impl CloudLanguageModel {
llm_api_token: LlmApiToken, llm_api_token: LlmApiToken,
app_version: Option<SemanticVersion>, app_version: Option<SemanticVersion>,
body: CompletionBody, body: CompletionBody,
use_cloud: bool,
) -> Result<PerformLlmCompletionResponse> { ) -> Result<PerformLlmCompletionResponse> {
let http_client = &client.http_client(); let http_client = &client.http_client();
@ -542,9 +550,11 @@ impl CloudLanguageModel {
let mut refreshed_token = false; let mut refreshed_token = false;
loop { loop {
let request_builder = http_client::Request::builder() let request_builder = http_client::Request::builder().method(Method::POST).uri(
.method(Method::POST) http_client
.uri(http_client.build_zed_llm_url("/completions", &[])?.as_ref()); .build_zed_llm_url("/completions", &[], use_cloud)?
.as_ref(),
);
let request_builder = if let Some(app_version) = app_version { let request_builder = if let Some(app_version) = app_version {
request_builder.header(ZED_VERSION_HEADER_NAME, app_version.to_string()) request_builder.header(ZED_VERSION_HEADER_NAME, app_version.to_string())
} else { } else {
@ -771,6 +781,7 @@ impl LanguageModel for CloudLanguageModel {
let model_id = self.model.id.to_string(); let model_id = self.model.id.to_string();
let generate_content_request = let generate_content_request =
into_google(request, model_id.clone(), GoogleModelMode::Default); into_google(request, model_id.clone(), GoogleModelMode::Default);
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
async move { async move {
let http_client = &client.http_client(); let http_client = &client.http_client();
let token = llm_api_token.acquire(&client).await?; let token = llm_api_token.acquire(&client).await?;
@ -786,7 +797,7 @@ impl LanguageModel for CloudLanguageModel {
.method(Method::POST) .method(Method::POST)
.uri( .uri(
http_client http_client
.build_zed_llm_url("/count_tokens", &[])? .build_zed_llm_url("/count_tokens", &[], use_cloud)?
.as_ref(), .as_ref(),
) )
.header("Content-Type", "application/json") .header("Content-Type", "application/json")
@ -835,6 +846,9 @@ impl LanguageModel for CloudLanguageModel {
let intent = request.intent; let intent = request.intent;
let mode = request.mode; let mode = request.mode;
let app_version = cx.update(|cx| AppVersion::global(cx)).ok(); let app_version = cx.update(|cx| AppVersion::global(cx)).ok();
let use_cloud = cx
.update(|cx| cx.has_flag::<ZedCloudFeatureFlag>())
.unwrap_or(false);
match self.model.provider { match self.model.provider {
zed_llm_client::LanguageModelProvider::Anthropic => { zed_llm_client::LanguageModelProvider::Anthropic => {
let request = into_anthropic( let request = into_anthropic(
@ -872,6 +886,7 @@ impl LanguageModel for CloudLanguageModel {
provider_request: serde_json::to_value(&request) provider_request: serde_json::to_value(&request)
.map_err(|e| anyhow!(e))?, .map_err(|e| anyhow!(e))?,
}, },
use_cloud,
) )
.await .await
.map_err(|err| match err.downcast::<ApiError>() { .map_err(|err| match err.downcast::<ApiError>() {
@ -924,6 +939,7 @@ impl LanguageModel for CloudLanguageModel {
provider_request: serde_json::to_value(&request) provider_request: serde_json::to_value(&request)
.map_err(|e| anyhow!(e))?, .map_err(|e| anyhow!(e))?,
}, },
use_cloud,
) )
.await?; .await?;
@ -964,6 +980,7 @@ impl LanguageModel for CloudLanguageModel {
provider_request: serde_json::to_value(&request) provider_request: serde_json::to_value(&request)
.map_err(|e| anyhow!(e))?, .map_err(|e| anyhow!(e))?,
}, },
use_cloud,
) )
.await?; .await?;

View file

@ -14,6 +14,7 @@ path = "src/web_search_providers.rs"
[dependencies] [dependencies]
anyhow.workspace = true anyhow.workspace = true
client.workspace = true client.workspace = true
feature_flags.workspace = true
futures.workspace = true futures.workspace = true
gpui.workspace = true gpui.workspace = true
http_client.workspace = true http_client.workspace = true

View file

@ -2,6 +2,7 @@ use std::sync::Arc;
use anyhow::{Context as _, Result}; use anyhow::{Context as _, Result};
use client::Client; use client::Client;
use feature_flags::{FeatureFlagAppExt as _, ZedCloudFeatureFlag};
use futures::AsyncReadExt as _; use futures::AsyncReadExt as _;
use gpui::{App, AppContext, Context, Entity, Subscription, Task}; use gpui::{App, AppContext, Context, Entity, Subscription, Task};
use http_client::{HttpClient, Method}; use http_client::{HttpClient, Method};
@ -62,7 +63,10 @@ impl WebSearchProvider for CloudWebSearchProvider {
let client = state.client.clone(); let client = state.client.clone();
let llm_api_token = state.llm_api_token.clone(); let llm_api_token = state.llm_api_token.clone();
let body = WebSearchBody { query }; let body = WebSearchBody { query };
cx.background_spawn(async move { perform_web_search(client, llm_api_token, body).await }) let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
cx.background_spawn(async move {
perform_web_search(client, llm_api_token, body, use_cloud).await
})
} }
} }
@ -70,6 +74,7 @@ async fn perform_web_search(
client: Arc<Client>, client: Arc<Client>,
llm_api_token: LlmApiToken, llm_api_token: LlmApiToken,
body: WebSearchBody, body: WebSearchBody,
use_cloud: bool,
) -> Result<WebSearchResponse> { ) -> Result<WebSearchResponse> {
const MAX_RETRIES: usize = 3; const MAX_RETRIES: usize = 3;
@ -86,7 +91,11 @@ async fn perform_web_search(
let request = http_client::Request::builder() let request = http_client::Request::builder()
.method(Method::POST) .method(Method::POST)
.uri(http_client.build_zed_llm_url("/web_search", &[])?.as_ref()) .uri(
http_client
.build_zed_llm_url("/web_search", &[], use_cloud)?
.as_ref(),
)
.header("Content-Type", "application/json") .header("Content-Type", "application/json")
.header("Authorization", format!("Bearer {token}")) .header("Authorization", format!("Bearer {token}"))
.body(serde_json::to_string(&body)?.into())?; .body(serde_json::to_string(&body)?.into())?;

View file

@ -8,6 +8,7 @@ mod rate_completion_modal;
pub(crate) use completion_diff_element::*; pub(crate) use completion_diff_element::*;
use db::kvp::KEY_VALUE_STORE; use db::kvp::KEY_VALUE_STORE;
use feature_flags::{FeatureFlagAppExt as _, ZedCloudFeatureFlag};
pub use init::*; pub use init::*;
use inline_completion::DataCollectionState; use inline_completion::DataCollectionState;
use license_detection::LICENSE_FILES_TO_CHECK; use license_detection::LICENSE_FILES_TO_CHECK;
@ -390,6 +391,7 @@ impl Zeta {
let client = self.client.clone(); let client = self.client.clone();
let llm_token = self.llm_token.clone(); let llm_token = self.llm_token.clone();
let app_version = AppVersion::global(cx); let app_version = AppVersion::global(cx);
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
let buffer = buffer.clone(); let buffer = buffer.clone();
@ -480,6 +482,7 @@ impl Zeta {
llm_token, llm_token,
app_version, app_version,
body, body,
use_cloud,
}) })
.await; .await;
let (response, usage) = match response { let (response, usage) = match response {
@ -745,6 +748,7 @@ and then another
llm_token, llm_token,
app_version, app_version,
body, body,
use_cloud,
.. ..
} = params; } = params;
@ -760,7 +764,7 @@ and then another
} else { } else {
request_builder.uri( request_builder.uri(
http_client http_client
.build_zed_llm_url("/predict_edits/v2", &[])? .build_zed_llm_url("/predict_edits/v2", &[], use_cloud)?
.as_ref(), .as_ref(),
) )
}; };
@ -820,6 +824,7 @@ and then another
let client = self.client.clone(); let client = self.client.clone();
let llm_token = self.llm_token.clone(); let llm_token = self.llm_token.clone();
let app_version = AppVersion::global(cx); let app_version = AppVersion::global(cx);
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
cx.spawn(async move |this, cx| { cx.spawn(async move |this, cx| {
let http_client = client.http_client(); let http_client = client.http_client();
let mut response = llm_token_retry(&llm_token, &client, |token| { let mut response = llm_token_retry(&llm_token, &client, |token| {
@ -830,7 +835,7 @@ and then another
} else { } else {
request_builder.uri( request_builder.uri(
http_client http_client
.build_zed_llm_url("/predict_edits/accept", &[])? .build_zed_llm_url("/predict_edits/accept", &[], use_cloud)?
.as_ref(), .as_ref(),
) )
}; };
@ -1126,6 +1131,7 @@ struct PerformPredictEditsParams {
pub llm_token: LlmApiToken, pub llm_token: LlmApiToken,
pub app_version: SemanticVersion, pub app_version: SemanticVersion,
pub body: PredictEditsBody, pub body: PredictEditsBody,
pub use_cloud: bool,
} }
#[derive(Error, Debug)] #[derive(Error, Debug)]