Send up Zed version with edit prediction and completion requests (#30136)

This PR makes it so we send up an `x-zed-version` header with the client's version when making a request to llm.zed.dev for edit predictions and completions. Release Notes: - N/A
2025-05-07 11:44:30 -04:00 · 2025-05-07 11:44:30 -04:00 · a34fb6f6b1
commit a34fb6f6b1
parent 5ca114be24
6 changed files with 26 additions and 9 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -7904,6 +7904,7 @@ dependencies = [
 "partial-json-fixer",
 "project",
 "proto",
 "release_channel",
 "schemars",
 "serde",
 "serde_json",
@ -18883,9 +18884,9 @@ dependencies = [
 [[package]]
 name = "zed_llm_client"
-version = "0.7.5"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fe0d60001c02d0d21a4114a13bee3a905fbb9e146ada80a90435c05fda18852"
+checksum = "a23b2fd00776b0c55072f389654910ceb501eb0083d7f78905ab0e5cc86949ec"
 dependencies = [
 "anyhow",
 "serde",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -610,7 +610,7 @@ wasmtime-wasi = "29"
 which = "6.0.0"
 wit-component = "0.221"
 workspace-hack = "0.1.0"
-zed_llm_client = "0.7.5"
+zed_llm_client = "0.8.0"
 zstd = "0.11"
 [workspace.dependencies.async-stripe]
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@ -1546,9 +1546,9 @@ impl Thread {
                                            completion.queue_state =  QueueState::Started;
                                        }
                                        CompletionRequestStatus::Failed {
-                                            code, message
+                                            code, message, request_id
                                        } => {
-                                            return Err(anyhow!("completion request failed. code: {code}, message: {message}"));
+                                            return Err(anyhow!("completion request failed. request_id: {request_id}, code: {code}, message: {message}"));
                                        }
                                        CompletionRequestStatus::UsageUpdated {
                                            amount, limit
--- a/crates/language_models/Cargo.toml
+++ b/crates/language_models/Cargo.toml
@ -41,6 +41,7 @@ open_ai = { workspace = true, features = ["schemars"] }
 partial-json-fixer.workspace = true
 project.workspace = true
 proto.workspace = true
 release_channel.workspace = true
 schemars.workspace = true
 serde.workspace = true
 serde_json.workspace = true
--- a/crates/language_models/src/provider/cloud.rs
+++ b/crates/language_models/src/provider/cloud.rs
@ -6,7 +6,9 @@ use feature_flags::{FeatureFlagAppExt, LlmClosedBetaFeatureFlag, ZedProFeatureFl
 use futures::{
    AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
 };
-use gpui::{AnyElement, AnyView, App, AsyncApp, Context, Entity, Subscription, Task};
+use gpui::{
    AnyElement, AnyView, App, AsyncApp, Context, Entity, SemanticVersion, Subscription, Task,
 };
 use http_client::{AsyncBody, HttpClient, Method, Response, StatusCode};
 use language_model::{
    AuthenticateError, CloudModel, LanguageModel, LanguageModelCacheConfiguration,
@ -20,6 +22,7 @@ use language_model::{
    MaxMonthlySpendReachedError, PaymentRequiredError, RefreshLlmTokenListener,
 };
 use proto::Plan;
 use release_channel::AppVersion;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize, de::DeserializeOwned};
 use settings::{Settings, SettingsStore};
@ -39,7 +42,7 @@ use zed_llm_client::{
    CompletionRequestStatus, CountTokensBody, CountTokensResponse, EXPIRED_LLM_TOKEN_HEADER_NAME,
    MAX_LLM_MONTHLY_SPEND_REACHED_HEADER_NAME, MODEL_REQUESTS_RESOURCE_HEADER_VALUE,
    SERVER_SUPPORTS_STATUS_MESSAGES_HEADER_NAME, SUBSCRIPTION_LIMIT_RESOURCE_HEADER_NAME,
-    TOOL_USE_LIMIT_REACHED_HEADER_NAME,
+    TOOL_USE_LIMIT_REACHED_HEADER_NAME, ZED_VERSION_HEADER_NAME,
 };
 use crate::AllLanguageModelSettings;
@ -526,6 +529,7 @@ impl CloudLanguageModel {
    async fn perform_llm_completion(
        client: Arc<Client>,
        llm_api_token: LlmApiToken,
        app_version: Option<SemanticVersion>,
        body: CompletionBody,
    ) -> Result<PerformLlmCompletionResponse> {
        let http_client = &client.http_client();
@ -542,6 +546,12 @@ impl CloudLanguageModel {
            } else {
                request_builder.uri(http_client.build_zed_llm_url("/completions", &[])?.as_ref())
            };
            let request_builder = if let Some(app_version) = app_version {
                request_builder.header(ZED_VERSION_HEADER_NAME, app_version.to_string())
            } else {
                request_builder
            };
            let request = request_builder
                .header("Content-Type", "application/json")
                .header("Authorization", format!("Bearer {token}"))
@ -774,7 +784,7 @@ impl LanguageModel for CloudLanguageModel {
    fn stream_completion(
        &self,
        request: LanguageModelRequest,
-        _cx: &AsyncApp,
+        cx: &AsyncApp,
    ) -> BoxFuture<
        'static,
        Result<
@ -784,6 +794,7 @@ impl LanguageModel for CloudLanguageModel {
        let thread_id = request.thread_id.clone();
        let prompt_id = request.prompt_id.clone();
        let mode = request.mode;
        let app_version = cx.update(|cx| AppVersion::global(cx)).ok();
        match &self.model {
            CloudModel::Anthropic(model) => {
                let request = into_anthropic(
@ -804,6 +815,7 @@ impl LanguageModel for CloudLanguageModel {
                    } = Self::perform_llm_completion(
                        client.clone(),
                        llm_api_token,
                        app_version,
                        CompletionBody {
                            thread_id,
                            prompt_id,
@ -855,6 +867,7 @@ impl LanguageModel for CloudLanguageModel {
                    } = Self::perform_llm_completion(
                        client.clone(),
                        llm_api_token,
                        app_version,
                        CompletionBody {
                            thread_id,
                            prompt_id,
@ -891,6 +904,7 @@ impl LanguageModel for CloudLanguageModel {
                    } = Self::perform_llm_completion(
                        client.clone(),
                        llm_api_token,
                        app_version,
                        CompletionBody {
                            thread_id,
                            prompt_id,
--- a/crates/zeta/src/zeta.rs
+++ b/crates/zeta/src/zeta.rs
@ -55,7 +55,7 @@ use workspace::notifications::{ErrorMessagePrompt, NotificationId};
 use worktree::Worktree;
 use zed_llm_client::{
    EXPIRED_LLM_TOKEN_HEADER_NAME, MINIMUM_REQUIRED_VERSION_HEADER_NAME, PredictEditsBody,
-    PredictEditsResponse,
+    PredictEditsResponse, ZED_VERSION_HEADER_NAME,
 };
 const CURSOR_MARKER: &'static str = "<|user_cursor_is_here|>";
@ -754,6 +754,7 @@ and then another
                let request = request_builder
                    .header("Content-Type", "application/json")
                    .header("Authorization", format!("Bearer {}", token))
                    .header(ZED_VERSION_HEADER_NAME, app_version.to_string())
                    .body(serde_json::to_string(&body)?.into())?;
                let mut response = http_client.send(request).await?;