Route all LLM traffic through cloud.zed.dev
(#34404)
This PR makes it so all LLM traffic is routed through `cloud.zed.dev`. We're already routing `llm.zed.dev` to `cloud.zed.dev` on the server, but we want to standardize on `cloud.zed.dev` moving forward. Release Notes: - N/A
This commit is contained in:
parent
6673c7cd4c
commit
eca36c502e
8 changed files with 13 additions and 71 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -9036,7 +9036,6 @@ dependencies = [
|
||||||
"credentials_provider",
|
"credentials_provider",
|
||||||
"deepseek",
|
"deepseek",
|
||||||
"editor",
|
"editor",
|
||||||
"feature_flags",
|
|
||||||
"fs",
|
"fs",
|
||||||
"futures 0.3.31",
|
"futures 0.3.31",
|
||||||
"google_ai",
|
"google_ai",
|
||||||
|
@ -18390,7 +18389,6 @@ version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"client",
|
"client",
|
||||||
"feature_flags",
|
|
||||||
"futures 0.3.31",
|
"futures 0.3.31",
|
||||||
"gpui",
|
"gpui",
|
||||||
"http_client",
|
"http_client",
|
||||||
|
|
|
@ -98,17 +98,6 @@ impl FeatureFlag for AcpFeatureFlag {
|
||||||
const NAME: &'static str = "acp";
|
const NAME: &'static str = "acp";
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ZedCloudFeatureFlag {}
|
|
||||||
|
|
||||||
impl FeatureFlag for ZedCloudFeatureFlag {
|
|
||||||
const NAME: &'static str = "zed-cloud";
|
|
||||||
|
|
||||||
fn enabled_for_staff() -> bool {
|
|
||||||
// Require individual opt-in, for now.
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait FeatureFlagViewExt<V: 'static> {
|
pub trait FeatureFlagViewExt<V: 'static> {
|
||||||
fn observe_flag<T: FeatureFlag, F>(&mut self, window: &Window, callback: F) -> Subscription
|
fn observe_flag<T: FeatureFlag, F>(&mut self, window: &Window, callback: F) -> Subscription
|
||||||
where
|
where
|
||||||
|
|
|
@ -226,21 +226,10 @@ impl HttpClientWithUrl {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Builds a Zed LLM URL using the given path.
|
/// Builds a Zed LLM URL using the given path.
|
||||||
pub fn build_zed_llm_url(
|
pub fn build_zed_llm_url(&self, path: &str, query: &[(&str, &str)]) -> Result<Url> {
|
||||||
&self,
|
|
||||||
path: &str,
|
|
||||||
query: &[(&str, &str)],
|
|
||||||
use_cloud: bool,
|
|
||||||
) -> Result<Url> {
|
|
||||||
let base_url = self.base_url();
|
let base_url = self.base_url();
|
||||||
let base_api_url = match base_url.as_ref() {
|
let base_api_url = match base_url.as_ref() {
|
||||||
"https://zed.dev" => {
|
"https://zed.dev" => "https://cloud.zed.dev",
|
||||||
if use_cloud {
|
|
||||||
"https://cloud.zed.dev"
|
|
||||||
} else {
|
|
||||||
"https://llm.zed.dev"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"https://staging.zed.dev" => "https://llm-staging.zed.dev",
|
"https://staging.zed.dev" => "https://llm-staging.zed.dev",
|
||||||
"http://localhost:3000" => "http://localhost:8787",
|
"http://localhost:3000" => "http://localhost:8787",
|
||||||
other => other,
|
other => other,
|
||||||
|
|
|
@ -28,7 +28,6 @@ credentials_provider.workspace = true
|
||||||
copilot.workspace = true
|
copilot.workspace = true
|
||||||
deepseek = { workspace = true, features = ["schemars"] }
|
deepseek = { workspace = true, features = ["schemars"] }
|
||||||
editor.workspace = true
|
editor.workspace = true
|
||||||
feature_flags.workspace = true
|
|
||||||
fs.workspace = true
|
fs.workspace = true
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
google_ai = { workspace = true, features = ["schemars"] }
|
google_ai = { workspace = true, features = ["schemars"] }
|
||||||
|
|
|
@ -2,7 +2,6 @@ use anthropic::AnthropicModelMode;
|
||||||
use anyhow::{Context as _, Result, anyhow};
|
use anyhow::{Context as _, Result, anyhow};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use client::{Client, ModelRequestUsage, UserStore, zed_urls};
|
use client::{Client, ModelRequestUsage, UserStore, zed_urls};
|
||||||
use feature_flags::{FeatureFlagAppExt as _, ZedCloudFeatureFlag};
|
|
||||||
use futures::{
|
use futures::{
|
||||||
AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
|
AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
|
||||||
};
|
};
|
||||||
|
@ -137,7 +136,6 @@ impl State {
|
||||||
cx: &mut Context<Self>,
|
cx: &mut Context<Self>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let refresh_llm_token_listener = RefreshLlmTokenListener::global(cx);
|
let refresh_llm_token_listener = RefreshLlmTokenListener::global(cx);
|
||||||
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
|
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
client: client.clone(),
|
client: client.clone(),
|
||||||
|
@ -165,7 +163,7 @@ impl State {
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
let response = Self::fetch_models(client, llm_api_token, use_cloud).await?;
|
let response = Self::fetch_models(client, llm_api_token).await?;
|
||||||
this.update(cx, |this, cx| {
|
this.update(cx, |this, cx| {
|
||||||
this.update_models(response, cx);
|
this.update_models(response, cx);
|
||||||
})
|
})
|
||||||
|
@ -184,7 +182,7 @@ impl State {
|
||||||
let llm_api_token = this.llm_api_token.clone();
|
let llm_api_token = this.llm_api_token.clone();
|
||||||
cx.spawn(async move |this, cx| {
|
cx.spawn(async move |this, cx| {
|
||||||
llm_api_token.refresh(&client).await?;
|
llm_api_token.refresh(&client).await?;
|
||||||
let response = Self::fetch_models(client, llm_api_token, use_cloud).await?;
|
let response = Self::fetch_models(client, llm_api_token).await?;
|
||||||
this.update(cx, |this, cx| {
|
this.update(cx, |this, cx| {
|
||||||
this.update_models(response, cx);
|
this.update_models(response, cx);
|
||||||
})
|
})
|
||||||
|
@ -268,18 +266,13 @@ impl State {
|
||||||
async fn fetch_models(
|
async fn fetch_models(
|
||||||
client: Arc<Client>,
|
client: Arc<Client>,
|
||||||
llm_api_token: LlmApiToken,
|
llm_api_token: LlmApiToken,
|
||||||
use_cloud: bool,
|
|
||||||
) -> Result<ListModelsResponse> {
|
) -> Result<ListModelsResponse> {
|
||||||
let http_client = &client.http_client();
|
let http_client = &client.http_client();
|
||||||
let token = llm_api_token.acquire(&client).await?;
|
let token = llm_api_token.acquire(&client).await?;
|
||||||
|
|
||||||
let request = http_client::Request::builder()
|
let request = http_client::Request::builder()
|
||||||
.method(Method::GET)
|
.method(Method::GET)
|
||||||
.uri(
|
.uri(http_client.build_zed_llm_url("/models", &[])?.as_ref())
|
||||||
http_client
|
|
||||||
.build_zed_llm_url("/models", &[], use_cloud)?
|
|
||||||
.as_ref(),
|
|
||||||
)
|
|
||||||
.header("Authorization", format!("Bearer {token}"))
|
.header("Authorization", format!("Bearer {token}"))
|
||||||
.body(AsyncBody::empty())?;
|
.body(AsyncBody::empty())?;
|
||||||
let mut response = http_client
|
let mut response = http_client
|
||||||
|
@ -543,7 +536,6 @@ impl CloudLanguageModel {
|
||||||
llm_api_token: LlmApiToken,
|
llm_api_token: LlmApiToken,
|
||||||
app_version: Option<SemanticVersion>,
|
app_version: Option<SemanticVersion>,
|
||||||
body: CompletionBody,
|
body: CompletionBody,
|
||||||
use_cloud: bool,
|
|
||||||
) -> Result<PerformLlmCompletionResponse> {
|
) -> Result<PerformLlmCompletionResponse> {
|
||||||
let http_client = &client.http_client();
|
let http_client = &client.http_client();
|
||||||
|
|
||||||
|
@ -551,11 +543,9 @@ impl CloudLanguageModel {
|
||||||
let mut refreshed_token = false;
|
let mut refreshed_token = false;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let request_builder = http_client::Request::builder().method(Method::POST).uri(
|
let request_builder = http_client::Request::builder()
|
||||||
http_client
|
.method(Method::POST)
|
||||||
.build_zed_llm_url("/completions", &[], use_cloud)?
|
.uri(http_client.build_zed_llm_url("/completions", &[])?.as_ref());
|
||||||
.as_ref(),
|
|
||||||
);
|
|
||||||
let request_builder = if let Some(app_version) = app_version {
|
let request_builder = if let Some(app_version) = app_version {
|
||||||
request_builder.header(ZED_VERSION_HEADER_NAME, app_version.to_string())
|
request_builder.header(ZED_VERSION_HEADER_NAME, app_version.to_string())
|
||||||
} else {
|
} else {
|
||||||
|
@ -782,7 +772,6 @@ impl LanguageModel for CloudLanguageModel {
|
||||||
let model_id = self.model.id.to_string();
|
let model_id = self.model.id.to_string();
|
||||||
let generate_content_request =
|
let generate_content_request =
|
||||||
into_google(request, model_id.clone(), GoogleModelMode::Default);
|
into_google(request, model_id.clone(), GoogleModelMode::Default);
|
||||||
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
|
|
||||||
async move {
|
async move {
|
||||||
let http_client = &client.http_client();
|
let http_client = &client.http_client();
|
||||||
let token = llm_api_token.acquire(&client).await?;
|
let token = llm_api_token.acquire(&client).await?;
|
||||||
|
@ -798,7 +787,7 @@ impl LanguageModel for CloudLanguageModel {
|
||||||
.method(Method::POST)
|
.method(Method::POST)
|
||||||
.uri(
|
.uri(
|
||||||
http_client
|
http_client
|
||||||
.build_zed_llm_url("/count_tokens", &[], use_cloud)?
|
.build_zed_llm_url("/count_tokens", &[])?
|
||||||
.as_ref(),
|
.as_ref(),
|
||||||
)
|
)
|
||||||
.header("Content-Type", "application/json")
|
.header("Content-Type", "application/json")
|
||||||
|
@ -847,9 +836,6 @@ impl LanguageModel for CloudLanguageModel {
|
||||||
let intent = request.intent;
|
let intent = request.intent;
|
||||||
let mode = request.mode;
|
let mode = request.mode;
|
||||||
let app_version = cx.update(|cx| AppVersion::global(cx)).ok();
|
let app_version = cx.update(|cx| AppVersion::global(cx)).ok();
|
||||||
let use_cloud = cx
|
|
||||||
.update(|cx| cx.has_flag::<ZedCloudFeatureFlag>())
|
|
||||||
.unwrap_or(false);
|
|
||||||
let thinking_allowed = request.thinking_allowed;
|
let thinking_allowed = request.thinking_allowed;
|
||||||
match self.model.provider {
|
match self.model.provider {
|
||||||
zed_llm_client::LanguageModelProvider::Anthropic => {
|
zed_llm_client::LanguageModelProvider::Anthropic => {
|
||||||
|
@ -888,7 +874,6 @@ impl LanguageModel for CloudLanguageModel {
|
||||||
provider_request: serde_json::to_value(&request)
|
provider_request: serde_json::to_value(&request)
|
||||||
.map_err(|e| anyhow!(e))?,
|
.map_err(|e| anyhow!(e))?,
|
||||||
},
|
},
|
||||||
use_cloud,
|
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.map_err(|err| match err.downcast::<ApiError>() {
|
.map_err(|err| match err.downcast::<ApiError>() {
|
||||||
|
@ -941,7 +926,6 @@ impl LanguageModel for CloudLanguageModel {
|
||||||
provider_request: serde_json::to_value(&request)
|
provider_request: serde_json::to_value(&request)
|
||||||
.map_err(|e| anyhow!(e))?,
|
.map_err(|e| anyhow!(e))?,
|
||||||
},
|
},
|
||||||
use_cloud,
|
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
@ -982,7 +966,6 @@ impl LanguageModel for CloudLanguageModel {
|
||||||
provider_request: serde_json::to_value(&request)
|
provider_request: serde_json::to_value(&request)
|
||||||
.map_err(|e| anyhow!(e))?,
|
.map_err(|e| anyhow!(e))?,
|
||||||
},
|
},
|
||||||
use_cloud,
|
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,6 @@ path = "src/web_search_providers.rs"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
client.workspace = true
|
client.workspace = true
|
||||||
feature_flags.workspace = true
|
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
gpui.workspace = true
|
gpui.workspace = true
|
||||||
http_client.workspace = true
|
http_client.workspace = true
|
||||||
|
|
|
@ -2,7 +2,6 @@ use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::{Context as _, Result};
|
use anyhow::{Context as _, Result};
|
||||||
use client::Client;
|
use client::Client;
|
||||||
use feature_flags::{FeatureFlagAppExt as _, ZedCloudFeatureFlag};
|
|
||||||
use futures::AsyncReadExt as _;
|
use futures::AsyncReadExt as _;
|
||||||
use gpui::{App, AppContext, Context, Entity, Subscription, Task};
|
use gpui::{App, AppContext, Context, Entity, Subscription, Task};
|
||||||
use http_client::{HttpClient, Method};
|
use http_client::{HttpClient, Method};
|
||||||
|
@ -63,10 +62,7 @@ impl WebSearchProvider for CloudWebSearchProvider {
|
||||||
let client = state.client.clone();
|
let client = state.client.clone();
|
||||||
let llm_api_token = state.llm_api_token.clone();
|
let llm_api_token = state.llm_api_token.clone();
|
||||||
let body = WebSearchBody { query };
|
let body = WebSearchBody { query };
|
||||||
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
|
cx.background_spawn(async move { perform_web_search(client, llm_api_token, body).await })
|
||||||
cx.background_spawn(async move {
|
|
||||||
perform_web_search(client, llm_api_token, body, use_cloud).await
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +70,6 @@ async fn perform_web_search(
|
||||||
client: Arc<Client>,
|
client: Arc<Client>,
|
||||||
llm_api_token: LlmApiToken,
|
llm_api_token: LlmApiToken,
|
||||||
body: WebSearchBody,
|
body: WebSearchBody,
|
||||||
use_cloud: bool,
|
|
||||||
) -> Result<WebSearchResponse> {
|
) -> Result<WebSearchResponse> {
|
||||||
const MAX_RETRIES: usize = 3;
|
const MAX_RETRIES: usize = 3;
|
||||||
|
|
||||||
|
@ -91,11 +86,7 @@ async fn perform_web_search(
|
||||||
|
|
||||||
let request = http_client::Request::builder()
|
let request = http_client::Request::builder()
|
||||||
.method(Method::POST)
|
.method(Method::POST)
|
||||||
.uri(
|
.uri(http_client.build_zed_llm_url("/web_search", &[])?.as_ref())
|
||||||
http_client
|
|
||||||
.build_zed_llm_url("/web_search", &[], use_cloud)?
|
|
||||||
.as_ref(),
|
|
||||||
)
|
|
||||||
.header("Content-Type", "application/json")
|
.header("Content-Type", "application/json")
|
||||||
.header("Authorization", format!("Bearer {token}"))
|
.header("Authorization", format!("Bearer {token}"))
|
||||||
.body(serde_json::to_string(&body)?.into())?;
|
.body(serde_json::to_string(&body)?.into())?;
|
||||||
|
|
|
@ -8,7 +8,6 @@ mod rate_completion_modal;
|
||||||
|
|
||||||
pub(crate) use completion_diff_element::*;
|
pub(crate) use completion_diff_element::*;
|
||||||
use db::kvp::KEY_VALUE_STORE;
|
use db::kvp::KEY_VALUE_STORE;
|
||||||
use feature_flags::{FeatureFlagAppExt as _, ZedCloudFeatureFlag};
|
|
||||||
pub use init::*;
|
pub use init::*;
|
||||||
use inline_completion::DataCollectionState;
|
use inline_completion::DataCollectionState;
|
||||||
use license_detection::LICENSE_FILES_TO_CHECK;
|
use license_detection::LICENSE_FILES_TO_CHECK;
|
||||||
|
@ -391,7 +390,6 @@ impl Zeta {
|
||||||
let client = self.client.clone();
|
let client = self.client.clone();
|
||||||
let llm_token = self.llm_token.clone();
|
let llm_token = self.llm_token.clone();
|
||||||
let app_version = AppVersion::global(cx);
|
let app_version = AppVersion::global(cx);
|
||||||
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
|
|
||||||
|
|
||||||
let buffer = buffer.clone();
|
let buffer = buffer.clone();
|
||||||
|
|
||||||
|
@ -482,7 +480,6 @@ impl Zeta {
|
||||||
llm_token,
|
llm_token,
|
||||||
app_version,
|
app_version,
|
||||||
body,
|
body,
|
||||||
use_cloud,
|
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
let (response, usage) = match response {
|
let (response, usage) = match response {
|
||||||
|
@ -748,7 +745,6 @@ and then another
|
||||||
llm_token,
|
llm_token,
|
||||||
app_version,
|
app_version,
|
||||||
body,
|
body,
|
||||||
use_cloud,
|
|
||||||
..
|
..
|
||||||
} = params;
|
} = params;
|
||||||
|
|
||||||
|
@ -764,7 +760,7 @@ and then another
|
||||||
} else {
|
} else {
|
||||||
request_builder.uri(
|
request_builder.uri(
|
||||||
http_client
|
http_client
|
||||||
.build_zed_llm_url("/predict_edits/v2", &[], use_cloud)?
|
.build_zed_llm_url("/predict_edits/v2", &[])?
|
||||||
.as_ref(),
|
.as_ref(),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
@ -824,7 +820,6 @@ and then another
|
||||||
let client = self.client.clone();
|
let client = self.client.clone();
|
||||||
let llm_token = self.llm_token.clone();
|
let llm_token = self.llm_token.clone();
|
||||||
let app_version = AppVersion::global(cx);
|
let app_version = AppVersion::global(cx);
|
||||||
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
|
|
||||||
cx.spawn(async move |this, cx| {
|
cx.spawn(async move |this, cx| {
|
||||||
let http_client = client.http_client();
|
let http_client = client.http_client();
|
||||||
let mut response = llm_token_retry(&llm_token, &client, |token| {
|
let mut response = llm_token_retry(&llm_token, &client, |token| {
|
||||||
|
@ -835,7 +830,7 @@ and then another
|
||||||
} else {
|
} else {
|
||||||
request_builder.uri(
|
request_builder.uri(
|
||||||
http_client
|
http_client
|
||||||
.build_zed_llm_url("/predict_edits/accept", &[], use_cloud)?
|
.build_zed_llm_url("/predict_edits/accept", &[])?
|
||||||
.as_ref(),
|
.as_ref(),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
@ -1131,7 +1126,6 @@ struct PerformPredictEditsParams {
|
||||||
pub llm_token: LlmApiToken,
|
pub llm_token: LlmApiToken,
|
||||||
pub app_version: SemanticVersion,
|
pub app_version: SemanticVersion,
|
||||||
pub body: PredictEditsBody,
|
pub body: PredictEditsBody,
|
||||||
pub use_cloud: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue