Route all LLM traffic through cloud.zed.dev
(#34404)
This PR makes it so all LLM traffic is routed through `cloud.zed.dev`. We're already routing `llm.zed.dev` to `cloud.zed.dev` on the server, but we want to standardize on `cloud.zed.dev` moving forward. Release Notes: - N/A
This commit is contained in:
parent
6673c7cd4c
commit
eca36c502e
8 changed files with 13 additions and 71 deletions
|
@ -28,7 +28,6 @@ credentials_provider.workspace = true
|
|||
copilot.workspace = true
|
||||
deepseek = { workspace = true, features = ["schemars"] }
|
||||
editor.workspace = true
|
||||
feature_flags.workspace = true
|
||||
fs.workspace = true
|
||||
futures.workspace = true
|
||||
google_ai = { workspace = true, features = ["schemars"] }
|
||||
|
|
|
@ -2,7 +2,6 @@ use anthropic::AnthropicModelMode;
|
|||
use anyhow::{Context as _, Result, anyhow};
|
||||
use chrono::{DateTime, Utc};
|
||||
use client::{Client, ModelRequestUsage, UserStore, zed_urls};
|
||||
use feature_flags::{FeatureFlagAppExt as _, ZedCloudFeatureFlag};
|
||||
use futures::{
|
||||
AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
|
||||
};
|
||||
|
@ -137,7 +136,6 @@ impl State {
|
|||
cx: &mut Context<Self>,
|
||||
) -> Self {
|
||||
let refresh_llm_token_listener = RefreshLlmTokenListener::global(cx);
|
||||
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
|
||||
|
||||
Self {
|
||||
client: client.clone(),
|
||||
|
@ -165,7 +163,7 @@ impl State {
|
|||
.await;
|
||||
}
|
||||
|
||||
let response = Self::fetch_models(client, llm_api_token, use_cloud).await?;
|
||||
let response = Self::fetch_models(client, llm_api_token).await?;
|
||||
this.update(cx, |this, cx| {
|
||||
this.update_models(response, cx);
|
||||
})
|
||||
|
@ -184,7 +182,7 @@ impl State {
|
|||
let llm_api_token = this.llm_api_token.clone();
|
||||
cx.spawn(async move |this, cx| {
|
||||
llm_api_token.refresh(&client).await?;
|
||||
let response = Self::fetch_models(client, llm_api_token, use_cloud).await?;
|
||||
let response = Self::fetch_models(client, llm_api_token).await?;
|
||||
this.update(cx, |this, cx| {
|
||||
this.update_models(response, cx);
|
||||
})
|
||||
|
@ -268,18 +266,13 @@ impl State {
|
|||
async fn fetch_models(
|
||||
client: Arc<Client>,
|
||||
llm_api_token: LlmApiToken,
|
||||
use_cloud: bool,
|
||||
) -> Result<ListModelsResponse> {
|
||||
let http_client = &client.http_client();
|
||||
let token = llm_api_token.acquire(&client).await?;
|
||||
|
||||
let request = http_client::Request::builder()
|
||||
.method(Method::GET)
|
||||
.uri(
|
||||
http_client
|
||||
.build_zed_llm_url("/models", &[], use_cloud)?
|
||||
.as_ref(),
|
||||
)
|
||||
.uri(http_client.build_zed_llm_url("/models", &[])?.as_ref())
|
||||
.header("Authorization", format!("Bearer {token}"))
|
||||
.body(AsyncBody::empty())?;
|
||||
let mut response = http_client
|
||||
|
@ -543,7 +536,6 @@ impl CloudLanguageModel {
|
|||
llm_api_token: LlmApiToken,
|
||||
app_version: Option<SemanticVersion>,
|
||||
body: CompletionBody,
|
||||
use_cloud: bool,
|
||||
) -> Result<PerformLlmCompletionResponse> {
|
||||
let http_client = &client.http_client();
|
||||
|
||||
|
@ -551,11 +543,9 @@ impl CloudLanguageModel {
|
|||
let mut refreshed_token = false;
|
||||
|
||||
loop {
|
||||
let request_builder = http_client::Request::builder().method(Method::POST).uri(
|
||||
http_client
|
||||
.build_zed_llm_url("/completions", &[], use_cloud)?
|
||||
.as_ref(),
|
||||
);
|
||||
let request_builder = http_client::Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri(http_client.build_zed_llm_url("/completions", &[])?.as_ref());
|
||||
let request_builder = if let Some(app_version) = app_version {
|
||||
request_builder.header(ZED_VERSION_HEADER_NAME, app_version.to_string())
|
||||
} else {
|
||||
|
@ -782,7 +772,6 @@ impl LanguageModel for CloudLanguageModel {
|
|||
let model_id = self.model.id.to_string();
|
||||
let generate_content_request =
|
||||
into_google(request, model_id.clone(), GoogleModelMode::Default);
|
||||
let use_cloud = cx.has_flag::<ZedCloudFeatureFlag>();
|
||||
async move {
|
||||
let http_client = &client.http_client();
|
||||
let token = llm_api_token.acquire(&client).await?;
|
||||
|
@ -798,7 +787,7 @@ impl LanguageModel for CloudLanguageModel {
|
|||
.method(Method::POST)
|
||||
.uri(
|
||||
http_client
|
||||
.build_zed_llm_url("/count_tokens", &[], use_cloud)?
|
||||
.build_zed_llm_url("/count_tokens", &[])?
|
||||
.as_ref(),
|
||||
)
|
||||
.header("Content-Type", "application/json")
|
||||
|
@ -847,9 +836,6 @@ impl LanguageModel for CloudLanguageModel {
|
|||
let intent = request.intent;
|
||||
let mode = request.mode;
|
||||
let app_version = cx.update(|cx| AppVersion::global(cx)).ok();
|
||||
let use_cloud = cx
|
||||
.update(|cx| cx.has_flag::<ZedCloudFeatureFlag>())
|
||||
.unwrap_or(false);
|
||||
let thinking_allowed = request.thinking_allowed;
|
||||
match self.model.provider {
|
||||
zed_llm_client::LanguageModelProvider::Anthropic => {
|
||||
|
@ -888,7 +874,6 @@ impl LanguageModel for CloudLanguageModel {
|
|||
provider_request: serde_json::to_value(&request)
|
||||
.map_err(|e| anyhow!(e))?,
|
||||
},
|
||||
use_cloud,
|
||||
)
|
||||
.await
|
||||
.map_err(|err| match err.downcast::<ApiError>() {
|
||||
|
@ -941,7 +926,6 @@ impl LanguageModel for CloudLanguageModel {
|
|||
provider_request: serde_json::to_value(&request)
|
||||
.map_err(|e| anyhow!(e))?,
|
||||
},
|
||||
use_cloud,
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
@ -982,7 +966,6 @@ impl LanguageModel for CloudLanguageModel {
|
|||
provider_request: serde_json::to_value(&request)
|
||||
.map_err(|e| anyhow!(e))?,
|
||||
},
|
||||
use_cloud,
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue