PARTIAL feat: Integrate Anthropic models via Vertex AI

This commit introduces a new language model provider, `anthropic_vertex_ai`, that integrates Anthropic's models via the Google Cloud Vertex AI platform. The implementation uses Google Cloud's Application Default Credentials (ADC) for authentication and fetches `project_id` and `location_id` from the user's settings.
2025-07-21 21:25:00 +02:00 · 2025-07-21 21:25:00 +02:00 · 59073836c7
commit 59073836c7
parent 132f0dd36a
9 changed files with 2014 additions and 0 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -285,6 +285,7 @@ git_ui = { path = "crates/git_ui" }
 go_to_line = { path = "crates/go_to_line" }
 google_ai = { path = "crates/google_ai" }
 google_vertex_ai = { path = "crates/google_vertex_ai" }
+anthropic_vertex_ai = { path = "crates/anthropic_vertex_ai" }
 gpui = { path = "crates/gpui", default-features = false, features = [
    "http_client",
 ] }
--- a/crates/anthropic_vertex_ai/Cargo.toml
+++ b/crates/anthropic_vertex_ai/Cargo.toml
@ -0,0 +1,29 @@
+[package]
+name = "anthropic_vertex_ai"
+version = "0.1.0"
+edition.workspace = true
+publish.workspace = true
+license = "GPL-3.0-or-later"
+
+[features]
+default = []
+schemars = ["dep:schemars"]
+
+[lints]
+workspace = true
+
+[lib]
+path = "src/anthropic_vertex_ai.rs"
+
+[dependencies]
+anyhow.workspace = true
+chrono.workspace = true
+futures.workspace = true
+http_client.workspace = true
+schemars = { workspace = true, optional = true }
+anthropic.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+strum.workspace = true
+thiserror.workspace = true
+workspace-hack.workspace = true
--- a/crates/anthropic_vertex_ai/src/anthropic_vertex_ai.rs
+++ b/crates/anthropic_vertex_ai/src/anthropic_vertex_ai.rs
@ -0,0 +1,744 @@
+use std::time::Duration;
+
+use anthropic::{AnthropicError, ApiError};
+use anyhow::{Context as _, Result, anyhow};
+use chrono::{DateTime, Utc};
+use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
+use http_client::http::{HeaderMap, HeaderValue};
+use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
+use serde::{Deserialize, Serialize};
+use strum::EnumIter;
+
+#[derive(Clone, Debug, Default, Deserialize)]
+pub struct AnthropicVertexAISettings {
+    pub project_id: Option<String>,
+    pub location: Option<String>,
+}
+
+pub const ANTHROPIC_API_URL: &str = "https://aiplatform.googleapis.com";
+
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
+pub struct AnthropicVertexModelCacheConfiguration {
+    pub min_total_token: u64,
+    pub should_speculate: bool,
+    pub max_cache_anchors: usize,
+}
+
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
+pub enum ModelMode {
+    #[default]
+    Default,
+    Thinking {
+        budget_tokens: Option<u32>,
+    },
+}
+
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
+pub enum Model {
+    #[serde(rename = "claude-opus-4", alias = "claude-opus-4@20250514")]
+    ClaudeOpus4,
+    #[serde(rename = "claude-opus-4-thinking")]
+    ClaudeOpus4Thinking,
+    #[default]
+    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4@20250514")]
+    ClaudeSonnet4,
+    #[serde(rename = "claude-sonnet-4-thinking")]
+    ClaudeSonnet4Thinking,
+    #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet@20250219")]
+    Claude3_7Sonnet,
+    #[serde(rename = "claude-3-7-sonnet-thinking")]
+    Claude3_7SonnetThinking,
+    #[serde(rename = "custom")]
+    Custom {
+        name: String,
+        max_tokens: u64,
+        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
+        display_name: Option<String>,
+        /// Override this model with a different Anthropic model for tool calls.
+        tool_override: Option<String>,
+        /// Indicates whether this custom model supports caching.
+        cache_configuration: Option<AnthropicVertexModelCacheConfiguration>,
+        max_output_tokens: Option<u64>,
+        default_temperature: Option<f32>,
+        #[serde(default)]
+        mode: ModelMode,
+    },
+}
+
+impl Model {
+    pub fn default_fast() -> Self {
+        Self::ClaudeSonnet4
+    }
+
+    pub fn from_id(id: &str) -> Result<Self> {
+        if id.starts_with("claude-opus-4") {
+            return Ok(Self::ClaudeOpus4Thinking);
+        }
+
+        if id.starts_with("claude-opus-4") {
+            return Ok(Self::ClaudeOpus4);
+        }
+
+        if id.starts_with("claude-sonnet-4") {
+            return Ok(Self::ClaudeSonnet4Thinking);
+        }
+
+        if id.starts_with("claude-sonnet-4") {
+            return Ok(Self::ClaudeSonnet4);
+        }
+
+        if id.starts_with("claude-3-7-sonnet") {
+            return Ok(Self::Claude3_7SonnetThinking);
+        }
+
+        if id.starts_with("claude-3-7-sonnet") {
+            return Ok(Self::Claude3_7Sonnet);
+        }
+
+        Err(anyhow!("invalid model ID: {id}"))
+    }
+
+    pub fn id(&self) -> &str {
+        match self {
+            Self::ClaudeOpus4 => "claude-opus-4@20250514",
+            Self::ClaudeOpus4Thinking => "claude-opus-4@20250514",
+            Self::ClaudeSonnet4 => "claude-sonnet-4@20250514",
+            Self::ClaudeSonnet4Thinking => "claude-sonnet-4@20250514",
+            Self::Claude3_7Sonnet => "	claude-3-7-sonnet@20250219",
+            Self::Claude3_7SonnetThinking => "	claude-3-7-sonnet@20250219",
+            Self::Custom { name, .. } => name,
+        }
+    }
+
+    /// The id of the model that should be used for making API requests
+    pub fn request_id(&self) -> &str {
+        match self {
+            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4@20250514",
+            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4@20250514",
+            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "	claude-3-7-sonnet@20250219",
+            Self::Custom { name, .. } => name,
+        }
+    }
+
+    pub fn display_name(&self) -> &str {
+        match self {
+            Self::ClaudeOpus4 => "Claude Opus 4",
+            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
+            Self::ClaudeSonnet4 => "Claude Sonnet 4",
+            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
+            Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
+            Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
+            Self::Custom {
+                name, display_name, ..
+            } => display_name.as_ref().unwrap_or(name),
+        }
+    }
+
+    pub fn cache_configuration(&self) -> Option<AnthropicVertexModelCacheConfiguration> {
+        match self {
+            Self::ClaudeOpus4
+            | Self::ClaudeOpus4Thinking
+            | Self::ClaudeSonnet4
+            | Self::ClaudeSonnet4Thinking
+            | Self::Claude3_7Sonnet
+            | Self::Claude3_7SonnetThinking => Some(AnthropicVertexModelCacheConfiguration {
+                min_total_token: 2_048,
+                should_speculate: true,
+                max_cache_anchors: 4,
+            }),
+            Self::Custom {
+                cache_configuration,
+                ..
+            } => cache_configuration.clone(),
+        }
+    }
+
+    pub fn max_token_count(&self) -> u64 {
+        match self {
+            Self::ClaudeOpus4
+            | Self::ClaudeOpus4Thinking
+            | Self::ClaudeSonnet4
+            | Self::ClaudeSonnet4Thinking
+            | Self::Claude3_7Sonnet
+            | Self::Claude3_7SonnetThinking => 200_000,
+            Self::Custom { max_tokens, .. } => *max_tokens,
+        }
+    }
+
+    pub fn max_output_tokens(&self) -> u64 {
+        match self {
+            Self::ClaudeOpus4
+            | Self::ClaudeOpus4Thinking
+            | Self::ClaudeSonnet4
+            | Self::ClaudeSonnet4Thinking
+            | Self::Claude3_7Sonnet
+            | Self::Claude3_7SonnetThinking => 8_192,
+            Self::Custom {
+                max_output_tokens, ..
+            } => max_output_tokens.unwrap_or(4_096),
+        }
+    }
+
+    pub fn default_temperature(&self) -> f32 {
+        match self {
+            Self::ClaudeOpus4
+            | Self::ClaudeOpus4Thinking
+            | Self::ClaudeSonnet4
+            | Self::ClaudeSonnet4Thinking
+            | Self::Claude3_7Sonnet
+            | Self::Claude3_7SonnetThinking => 1.0,
+            Self::Custom {
+                default_temperature,
+                ..
+            } => default_temperature.unwrap_or(1.0),
+        }
+    }
+
+    pub fn mode(&self) -> ModelMode {
+        match self {
+            Self::ClaudeOpus4 | Self::ClaudeSonnet4 | Self::Claude3_7Sonnet => ModelMode::Default,
+            Self::ClaudeOpus4Thinking
+            | Self::ClaudeSonnet4Thinking
+            | Self::Claude3_7SonnetThinking => ModelMode::Thinking {
+                budget_tokens: Some(4_096),
+            },
+            Self::Custom { mode, .. } => mode.clone(),
+        }
+    }
+
+    pub fn tool_model_id(&self) -> &str {
+        if let Self::Custom {
+            tool_override: Some(tool_override),
+            ..
+        } = self
+        {
+            tool_override
+        } else {
+            self.request_id()
+        }
+    }
+}
+
+pub async fn complete(
+    client: &dyn HttpClient,
+    api_url: &str,
+    request: Request,
+) -> Result<Response, AnthropicError> {
+    let uri = format!("{api_url}/v1/messages");
+    let request_builder = HttpRequest::builder()
+        .method(Method::POST)
+        .uri(uri)
+        .header("Content-Type", "application/json");
+
+    let serialized_request =
+        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
+    let request = request_builder
+        .body(AsyncBody::from(serialized_request))
+        .map_err(AnthropicError::BuildRequestBody)?;
+
+    let mut response = client
+        .send(request)
+        .await
+        .map_err(AnthropicError::HttpSend)?;
+    let status_code = response.status();
+    let mut body = String::new();
+    response
+        .body_mut()
+        .read_to_string(&mut body)
+        .await
+        .map_err(AnthropicError::ReadResponse)?;
+
+    if status_code.is_success() {
+        Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
+    } else {
+        Err(AnthropicError::HttpResponseError {
+            status_code,
+            message: body,
+        })
+    }
+}
+
+pub async fn stream_completion(
+    client: &dyn HttpClient,
+    api_url: &str,
+    project_id: &str,
+    location_id: &str,
+    access_token: &str,
+    request: Request,
+) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
+    stream_completion_with_rate_limit_info(
+        client,
+        api_url,
+        project_id,
+        location_id,
+        access_token,
+        request,
+    )
+    .await
+    .map(|output| output.0)
+}
+
+/// An individual rate limit.
+#[derive(Debug)]
+pub struct RateLimit {
+    pub limit: usize,
+    pub remaining: usize,
+    pub reset: DateTime<Utc>,
+}
+
+impl RateLimit {
+    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
+        let limit =
+            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
+        let remaining = get_header(
+            &format!("anthropic-ratelimit-{resource}-remaining"),
+            headers,
+        )?
+        .parse()?;
+        let reset = DateTime::parse_from_rfc3339(get_header(
+            &format!("anthropic-ratelimit-{resource}-reset"),
+            headers,
+        )?)?
+        .to_utc();
+
+        Ok(Self {
+            limit,
+            remaining,
+            reset,
+        })
+    }
+}
+
+/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
+#[derive(Debug)]
+pub struct RateLimitInfo {
+    pub retry_after: Option<Duration>,
+    pub requests: Option<RateLimit>,
+    pub tokens: Option<RateLimit>,
+    pub input_tokens: Option<RateLimit>,
+    pub output_tokens: Option<RateLimit>,
+}
+
+impl RateLimitInfo {
+    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
+        // Check if any rate limit headers exist
+        let has_rate_limit_headers = headers
+            .keys()
+            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
+
+        if !has_rate_limit_headers {
+            return Self {
+                retry_after: None,
+                requests: None,
+                tokens: None,
+                input_tokens: None,
+                output_tokens: None,
+            };
+        }
+
+        Self {
+            retry_after: parse_retry_after(headers),
+            requests: RateLimit::from_headers("requests", headers).ok(),
+            tokens: RateLimit::from_headers("tokens", headers).ok(),
+            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
+            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
+        }
+    }
+}
+
+/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
+/// seconds). Note that other services might specify an HTTP date or some other format for this
+/// header. Returns `None` if the header is not present or cannot be parsed.
+pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
+    headers
+        .get("retry-after")
+        .and_then(|v| v.to_str().ok())
+        .and_then(|v| v.parse::<u64>().ok())
+        .map(Duration::from_secs)
+}
+
+fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
+    Ok(headers
+        .get(key)
+        .with_context(|| format!("missing header `{key}`"))?
+        .to_str()?)
+}
+
+pub async fn stream_completion_with_rate_limit_info(
+    client: &dyn HttpClient,
+    api_url: &str,
+    project_id: &str,
+    location_id: &str,
+    access_token: &str,
+    request: Request,
+) -> Result<
+    (
+        BoxStream<'static, Result<Event, AnthropicError>>,
+        Option<RateLimitInfo>,
+    ),
+    AnthropicError,
+> {
+    let model_id = request.model.clone();
+    let request = StreamingRequest {
+        base: request,
+        stream: true,
+    };
+
+    let endpoint = if location_id == "global" {
+        "https://{api_url}".to_string()
+    } else {
+        format!("https://{location_id}-{api_url}")
+    };
+
+    let uri = format!(
+        "{endpoint}/v1/projects/{project_id}/locations/{location_id}/publishers/anthropic/models/{model_id}:streamRawPredict"
+    );
+
+    // MODIFICATION 4: Add Authorization header for bearer token authentication.
+    let request_builder = HttpRequest::builder()
+        .method(Method::POST)
+        .uri(uri)
+        .header("Authorization", format!("Bearer {}", access_token))
+        .header("Content-Type", "application/json");
+
+    let serialized_request =
+        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
+    let request = request_builder
+        .body(AsyncBody::from(serialized_request))
+        .map_err(AnthropicError::BuildRequestBody)?;
+
+    let mut response = client
+        .send(request)
+        .await
+        .map_err(AnthropicError::HttpSend)?;
+    let rate_limits = RateLimitInfo::from_headers(response.headers());
+    if response.status().is_success() {
+        let reader = BufReader::new(response.into_body());
+        let stream = reader
+            .lines()
+            .filter_map(|line| async move {
+                match line {
+                    Ok(line) => {
+                        let line = line.strip_prefix("data: ")?;
+                        match serde_json::from_str(line) {
+                            Ok(response) => Some(Ok(response)),
+                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
+                        }
+                    }
+                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
+                }
+            })
+            .boxed();
+        Ok((stream, Some(rate_limits)))
+    } else if response.status().as_u16() == 529 {
+        Err(AnthropicError::ServerOverloaded {
+            retry_after: rate_limits.retry_after,
+        })
+    } else if let Some(retry_after) = rate_limits.retry_after {
+        Err(AnthropicError::RateLimit { retry_after })
+    } else {
+        let mut body = String::new();
+        response
+            .body_mut()
+            .read_to_string(&mut body)
+            .await
+            .map_err(AnthropicError::ReadResponse)?;
+
+        match serde_json::from_str::<Event>(&body) {
+            Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
+            Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
+                status_code: response.status(),
+                message: body,
+            }),
+        }
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
+#[serde(rename_all = "lowercase")]
+pub enum CacheControlType {
+    Ephemeral,
+}
+
+#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
+pub struct CacheControl {
+    #[serde(rename = "type")]
+    pub cache_type: CacheControlType,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Message {
+    pub role: Role,
+    pub content: Vec<RequestContent>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
+#[serde(rename_all = "lowercase")]
+pub enum Role {
+    User,
+    Assistant,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub enum RequestContent {
+    #[serde(rename = "text")]
+    Text {
+        text: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        cache_control: Option<CacheControl>,
+    },
+    #[serde(rename = "thinking")]
+    Thinking {
+        thinking: String,
+        signature: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        cache_control: Option<CacheControl>,
+    },
+    #[serde(rename = "redacted_thinking")]
+    RedactedThinking { data: String },
+    #[serde(rename = "image")]
+    Image {
+        source: ImageSource,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        cache_control: Option<CacheControl>,
+    },
+    #[serde(rename = "tool_use")]
+    ToolUse {
+        id: String,
+        name: String,
+        input: serde_json::Value,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        cache_control: Option<CacheControl>,
+    },
+    #[serde(rename = "tool_result")]
+    ToolResult {
+        tool_use_id: String,
+        is_error: bool,
+        content: ToolResultContent,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        cache_control: Option<CacheControl>,
+    },
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum ToolResultContent {
+    Plain(String),
+    Multipart(Vec<ToolResultPart>),
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum ToolResultPart {
+    Text { text: String },
+    Image { source: ImageSource },
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub enum ResponseContent {
+    #[serde(rename = "text")]
+    Text { text: String },
+    #[serde(rename = "thinking")]
+    Thinking { thinking: String },
+    #[serde(rename = "redacted_thinking")]
+    RedactedThinking { data: String },
+    #[serde(rename = "tool_use")]
+    ToolUse {
+        id: String,
+        name: String,
+        input: serde_json::Value,
+    },
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ImageSource {
+    #[serde(rename = "type")]
+    pub source_type: String,
+    pub media_type: String,
+    pub data: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Tool {
+    pub name: String,
+    pub description: String,
+    pub input_schema: serde_json::Value,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum ToolChoice {
+    Auto,
+    Any,
+    Tool { name: String },
+    None,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum Thinking {
+    Enabled { budget_tokens: Option<u32> },
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum StringOrContents {
+    String(String),
+    Content(Vec<RequestContent>),
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Request {
+    #[serde(skip)]
+    pub model: String,
+    pub anthropic_version: String,
+    pub max_tokens: u64,
+    pub messages: Vec<Message>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub tools: Vec<Tool>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub thinking: Option<Thinking>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<ToolChoice>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub system: Option<StringOrContents>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<Metadata>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub stop_sequences: Vec<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub top_k: Option<u32>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct StreamingRequest {
+    #[serde(flatten)]
+    pub base: Request,
+    pub stream: bool,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Metadata {
+    pub user_id: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Default)]
+pub struct Usage {
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub input_tokens: Option<u64>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub output_tokens: Option<u64>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub cache_creation_input_tokens: Option<u64>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub cache_read_input_tokens: Option<u64>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Response {
+    pub id: String,
+    #[serde(rename = "type")]
+    pub response_type: String,
+    pub role: Role,
+    pub content: Vec<ResponseContent>,
+    pub model: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub stop_reason: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub stop_sequence: Option<String>,
+    pub usage: Usage,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub enum Event {
+    #[serde(rename = "message_start")]
+    MessageStart { message: Response },
+    #[serde(rename = "content_block_start")]
+    ContentBlockStart {
+        index: usize,
+        content_block: ResponseContent,
+    },
+    #[serde(rename = "content_block_delta")]
+    ContentBlockDelta { index: usize, delta: ContentDelta },
+    #[serde(rename = "content_block_stop")]
+    ContentBlockStop { index: usize },
+    #[serde(rename = "message_delta")]
+    MessageDelta { delta: MessageDelta, usage: Usage },
+    #[serde(rename = "message_stop")]
+    MessageStop,
+    #[serde(rename = "ping")]
+    Ping,
+    #[serde(rename = "error")]
+    Error { error: ApiError },
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub enum ContentDelta {
+    #[serde(rename = "text_delta")]
+    TextDelta { text: String },
+    #[serde(rename = "thinking_delta")]
+    ThinkingDelta { thinking: String },
+    #[serde(rename = "signature_delta")]
+    SignatureDelta { signature: String },
+    #[serde(rename = "input_json_delta")]
+    InputJsonDelta { partial_json: String },
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct MessageDelta {
+    pub stop_reason: Option<String>,
+    pub stop_sequence: Option<String>,
+}
+
+pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
+    message
+        .strip_prefix("prompt is too long: ")?
+        .split_once(" tokens")?
+        .0
+        .parse()
+        .ok()
+}
+
+#[test]
+fn test_match_window_exceeded() {
+    let error = ApiError {
+        error_type: "invalid_request_error".to_string(),
+        message: "prompt is too long: 220000 tokens > 200000".to_string(),
+    };
+    assert_eq!(error.match_window_exceeded(), Some(220_000));
+
+    let error = ApiError {
+        error_type: "invalid_request_error".to_string(),
+        message: "prompt is too long: 1234953 tokens".to_string(),
+    };
+    assert_eq!(error.match_window_exceeded(), Some(1234953));
+
+    let error = ApiError {
+        error_type: "invalid_request_error".to_string(),
+        message: "not a prompt length error".to_string(),
+    };
+    assert_eq!(error.match_window_exceeded(), None);
+
+    let error = ApiError {
+        error_type: "rate_limit_error".to_string(),
+        message: "prompt is too long: 12345 tokens".to_string(),
+    };
+    assert_eq!(error.match_window_exceeded(), None);
+
+    let error = ApiError {
+        error_type: "invalid_request_error".to_string(),
+        message: "prompt is too long: invalid tokens".to_string(),
+    };
+    assert_eq!(error.match_window_exceeded(), None);
+}
--- a/crates/anthropic_vertex_ai/task.md
+++ b/crates/anthropic_vertex_ai/task.md
@ -0,0 +1,79 @@
+Task ID: TA001 - Integrate Anthropic Models via Google Vertex AI**
+
+**Objective:**
+To develop a new language model provider, `anthropic_vertex_ai`, that seamlessly integrates Anthropic's models (e.g., Claude) into the Zed editor via the Google Cloud Vertex AI platform.
+
+**Background:**
+While Zed has a direct integration with Anthropic's API, many users operate within the Google Cloud ecosystem. Vertex AI provides access to third-party models like Anthropic's through its own endpoint. This task involves creating a new provider that bridges the existing `anthropic` API logic with the authentication and endpoint requirements of Google Cloud.
+
+This integration will not use explicit API keys. Instead, it will leverage Google's Application Default Credentials (ADC), a standard mechanism for authenticating GCP services, ensuring a secure and streamlined user experience. Configuration will be provided through `settings.json` to specify the required `project_id` and `location` for the Vertex AI endpoint.
+
+**Key Requirements:**
+- **Authentication:** Must use Google Cloud's Application Default Credentials (ADC) for all API requests. The implementation should not handle manual tokens.
+- **Configuration:** The provider must be configurable via `settings.json`, allowing the user to specify their Google Cloud `project_id` and `location`.
+- **Endpoint Construction:** Must dynamically construct the correct Vertex AI endpoint URL for each request, in the format: `https://$LOCATION-aiplatform.googleapis.com/v1/projects/$PROJECT_ID/locations/$LOCATION/publishers/anthropic/models/$MODEL:streamRawPredict`.
+- **Payload Adaptation:** The JSON payload sent to the endpoint must be modified to:
+    - Include the mandatory field: `"anthropic_version": "vertex-2023-10-16"`.
+    - Exclude the `model` field, as it is specified in the URL.
+- **Integration:** The new provider must be a first-class citizen within Zed, appearing in the model selection list and functioning identically to other integrated providers.
+
+**Implementation Plan:**
+
+**Step 1: Foundational Analysis & Crate Setup**
+
+*   **Action 1.1: Analyze `google_vertex` Crate:** Thoroughly examine `crates/google_vertex/src/google_vertex.rs` to understand its implementation of ADC-based authentication and how it reads settings like `project_id` and `location`. This will serve as the template for our authentication logic.
+*   **Action 1.2: Define Configuration Struct:** In a new file, `crates/anthropic_vertex_ai/src/lib.rs`, define the `AnthropicVertexAISettings` struct. This struct will deserialize the `project_id` and `location` from the user's `settings.json`.
+*   **Action 1.3: Update `Cargo.toml`:** Create/update the `Cargo.toml` file for the `anthropic_vertex_ai` crate. It should include dependencies from both `anthropic` (for serde structs) and `google_vertex` (for GCP-related dependencies like `gcp_auth`).
+*   **Action 1.4: Create `lib.rs`:** Ensure `crates/anthropic_vertex_ai/src/lib.rs` exists to house the `LanguageModelProvider` implementation and serve as the crate's entry point.
+
+**Step 2: Adapt Core Anthropic Logic**
+
+*   **Action 2.1: Modify `Request` Struct:** In `crates/anthropic_vertex_ai/src/anthropic_vertex_ai.rs`, modify the main `Request` struct:
+    -   Add a new field: `pub anthropic_version: &'static str`.
+    -   Remove the existing `pub model: String` field.
+*   **Action 2.2: Refactor Completion Functions:** Refactor the `stream_completion_with_rate_limit_info` function to be more generic.
+    -   It will now accept the fully-constructed Vertex AI endpoint URL as a parameter.
+    -   It will accept an ADC-aware `HttpClient` instance instead of a simple API key.
+    -   The logic for setting the `Authorization` header will be updated to use a `Bearer` token provided by the `HttpClient`.
+
+**Step 3: Implement the `LanguageModelProvider`**
+
+*   **Action 3.1: Define Provider Struct:** In `crates/anthropic_vertex_ai/src/lib.rs`, define the main `AnthropicVertexAIProvider` struct. It will store the settings defined in Action 1.2.
+*   **Action 3.2: Implement `LanguageModelProvider` Trait:** Implement the `language_model::LanguageModelProvider` trait for `AnthropicVertexAIProvider`.
+*   **Action 3.3: Implement Core Logic:** The trait methods will contain the central logic:
+    1.  On initialization, the provider will create an `HttpClient` configured to use Google's ADC, following the pattern in the `google_vertex` crate.
+    2.  For each completion request, it will dynamically construct the full, model-specific Vertex AI URL using the configured `project_id`, `location`, and the requested model name.
+    3.  It will create an instance of the modified `Request` struct from `anthropic_vertex_ai.rs`, setting the `anthropic_version` field correctly.
+    4.  Finally, it will call the refactored `stream_completion_with_rate_limit_info` function, passing the authenticated client and the constructed request.
+
+**Step 4: Final Integration**
+
+*   **Action 4.1: Workspace Integration:** Add `anthropic_vertex_ai` to the main workspace `Cargo.toml` to link the new crate.
+*   **Action 4.2: Module Declaration:** Add `pub mod anthropic_vertex_ai;` to `crates/language_models/src/provider.rs` to make the module visible.
+*   **Action 4.3: Provider Registration:** In `crates/language_models/src/lib.rs`, update the central list of language model providers to include an instance of `AnthropicVertexAIProvider`.
+
+**Verification Plan:**
+
+*   **Compile-Time Verification:** At each major step, ask the human to review the code for compilation errors and adherence to project standards.
+*   **Configuration Verification:** The implementation will be tested against a `settings.json` file configured as follows:
+    ```json
+    "language_servers": {
+        "anthropic-vertex": {
+            "enabled": true,
+            "project_id": "your-gcp-project-id",
+            "location": "europe-west1"
+        }
+    },
+    "assistant": {
+        "default_model": {
+            "provider": "anthropic-vertex",
+            "name": "claude-sonnet-4@20250514"
+        }
+    }
+    ```
+*   **Runtime Verification:**
+    1.  Launch Zed with the above configuration.
+    2.  Ensure the local environment is authenticated with GCP (e.g., via `gcloud auth application-default login`).
+    3.  Open the assistant panel and confirm that `"anthropic-vertex/claude-sonnet-4@20250514"` is the selected model.
+    4.  Send a test prompt to the assistant.
+    5.  **Success Condition:** A valid, streamed response is received from the assistant, confirming that the entire chain—from configuration and authentication to request execution and response parsing—is working correctly.
--- a/crates/language_models/Cargo.toml
+++ b/crates/language_models/Cargo.toml
@ -33,6 +33,7 @@ fs.workspace = true
 futures.workspace = true
 google_ai = { workspace = true, features = ["schemars"] }
 google_vertex_ai = { workspace = true, features = ["schemars"] }
+anthropic_vertex_ai = { workspace = true, features = ["schemars"] }
 gpui.workspace = true
 gpui_tokio.workspace = true
 http_client.workspace = true
--- a/crates/language_models/src/language_models.rs
+++ b/crates/language_models/src/language_models.rs
@ -10,6 +10,7 @@ mod settings;
 pub mod ui;

 use crate::provider::anthropic::AnthropicLanguageModelProvider;
+use crate::provider::anthropic_vertex::AnthropicVertexLanguageModelProvider;
 use crate::provider::bedrock::BedrockLanguageModelProvider;
 use crate::provider::cloud::CloudLanguageModelProvider;
 use crate::provider::copilot_chat::CopilotChatLanguageModelProvider;
@ -72,6 +73,11 @@ fn register_language_model_providers(
        GoogleVertexLanguageModelProvider::new(client.http_client(), cx),
        cx,
    );
+    registry.register_provider(
+        // NEW REGISTRATION BY DIAB
+        AnthropicVertexLanguageModelProvider::new(client.http_client(), cx),
+        cx,
+    );
    registry.register_provider(
        MistralLanguageModelProvider::new(client.http_client(), cx),
        cx,
--- a/crates/language_models/src/provider.rs
+++ b/crates/language_models/src/provider.rs
@ -1,4 +1,5 @@
 pub mod anthropic;
+pub mod anthropic_vertex;
 pub mod bedrock;
 pub mod cloud;
 pub mod copilot_chat;
--- a/crates/language_models/src/provider/anthropic_vertex.rs
+++ b/crates/language_models/src/provider/anthropic_vertex.rs
--- a/crates/language_models/src/settings.rs
+++ b/crates/language_models/src/settings.rs
@ -7,6 +7,7 @@ use settings::{Settings, SettingsSources};
 use crate::provider::{
    self,
    anthropic::AnthropicSettings,
+    anthropic_vertex::AnthropicVertexSettings,
    bedrock::AmazonBedrockSettings,
    cloud::{self, ZedDotDevSettings},
    deepseek::DeepSeekSettings,
@ -33,6 +34,7 @@ pub struct AllLanguageModelSettings {
    pub deepseek: DeepSeekSettings,
    pub google: GoogleSettings,
    pub google_vertex: GoogleVertexSettings,
+    pub anthropic_vertex: AnthropicVertexSettings,
    pub lmstudio: LmStudioSettings,
    pub mistral: MistralSettings,
    pub ollama: OllamaSettings,
@ -50,6 +52,7 @@ pub struct AllLanguageModelSettingsContent {
    pub deepseek: Option<DeepseekSettingsContent>,
    pub google: Option<GoogleSettingsContent>,
    pub google_vertex: Option<GoogleVertexSettingsContent>,
+    pub anthropic_vertex: Option<AnthropicVertexSettingsContent>,
    pub lmstudio: Option<LmStudioSettingsContent>,
    pub mistral: Option<MistralSettingsContent>,
    pub ollama: Option<OllamaSettingsContent>,
@ -126,6 +129,14 @@ pub struct GoogleVertexSettingsContent {
    pub available_models: Option<Vec<provider::google_vertex::AvailableModel>>,
 }

+#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
+pub struct AnthropicVertexSettingsContent {
+    pub api_url: Option<String>,
+    pub project_id: Option<String>,  // ADDED
+    pub location_id: Option<String>, // ADDED
+    pub available_models: Option<Vec<provider::anthropic_vertex::AvailableModel>>,
+}
+
 #[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
 pub struct XAiSettingsContent {
    pub api_url: Option<String>,
@ -322,6 +333,29 @@ impl settings::Settings for AllLanguageModelSettings {
                    .as_ref()
                    .and_then(|s| s.location_id.clone()),
            );
+
+            // Anthropic Vertex AI
+            merge(
+                &mut settings.anthropic_vertex.api_url,
+                value
+                    .anthropic_vertex
+                    .as_ref()
+                    .and_then(|s| s.api_url.clone()),
+            );
+            merge(
+                &mut settings.anthropic_vertex.project_id,
+                value
+                    .anthropic_vertex
+                    .as_ref()
+                    .and_then(|s| s.project_id.clone()),
+            );
+            merge(
+                &mut settings.anthropic_vertex.location_id,
+                value
+                    .anthropic_vertex
+                    .as_ref()
+                    .and_then(|s| s.location_id.clone()),
+            );
        }

        Ok(settings)