PARTIAL feat: Integrate Anthropic models via Vertex AI

This commit introduces a new language model provider, `anthropic_vertex_ai`, that integrates Anthropic's models via the Google Cloud Vertex AI platform. The implementation uses Google Cloud's Application Default Credentials (ADC) for authentication and fetches `project_id` and `location_id` from the user's settings.
2025-07-21 21:25:00 +02:00 · 2025-07-21 21:25:00 +02:00 · 59073836c7
commit 59073836c7
parent 132f0dd36a
9 changed files with 2014 additions and 0 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -285,6 +285,7 @@ git_ui = { path = "crates/git_ui" }
 go_to_line = { path = "crates/go_to_line" }
 google_ai = { path = "crates/google_ai" }
 google_vertex_ai = { path = "crates/google_vertex_ai" }
 anthropic_vertex_ai = { path = "crates/anthropic_vertex_ai" }
 gpui = { path = "crates/gpui", default-features = false, features = [
    "http_client",
 ] }
--- a/crates/anthropic_vertex_ai/Cargo.toml
+++ b/crates/anthropic_vertex_ai/Cargo.toml
@ -0,0 +1,29 @@
 [package]
 name = "anthropic_vertex_ai"
 version = "0.1.0"
 edition.workspace = true
 publish.workspace = true
 license = "GPL-3.0-or-later"
 [features]
 default = []
 schemars = ["dep:schemars"]
 [lints]
 workspace = true
 [lib]
 path = "src/anthropic_vertex_ai.rs"
 [dependencies]
 anyhow.workspace = true
 chrono.workspace = true
 futures.workspace = true
 http_client.workspace = true
 schemars = { workspace = true, optional = true }
 anthropic.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 strum.workspace = true
 thiserror.workspace = true
 workspace-hack.workspace = true
--- a/crates/anthropic_vertex_ai/src/anthropic_vertex_ai.rs
+++ b/crates/anthropic_vertex_ai/src/anthropic_vertex_ai.rs
@ -0,0 +1,744 @@
 use std::time::Duration;
 use anthropic::{AnthropicError, ApiError};
 use anyhow::{Context as _, Result, anyhow};
 use chrono::{DateTime, Utc};
 use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
 use http_client::http::{HeaderMap, HeaderValue};
 use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
 use serde::{Deserialize, Serialize};
 use strum::EnumIter;
 #[derive(Clone, Debug, Default, Deserialize)]
 pub struct AnthropicVertexAISettings {
    pub project_id: Option<String>,
    pub location: Option<String>,
 }
 pub const ANTHROPIC_API_URL: &str = "https://aiplatform.googleapis.com";
 #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
 #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
 pub struct AnthropicVertexModelCacheConfiguration {
    pub min_total_token: u64,
    pub should_speculate: bool,
    pub max_cache_anchors: usize,
 }
 #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
 #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
 pub enum ModelMode {
    #[default]
    Default,
    Thinking {
        budget_tokens: Option<u32>,
    },
 }
 #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
 #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
 pub enum Model {
    #[serde(rename = "claude-opus-4", alias = "claude-opus-4@20250514")]
    ClaudeOpus4,
    #[serde(rename = "claude-opus-4-thinking")]
    ClaudeOpus4Thinking,
    #[default]
    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4@20250514")]
    ClaudeSonnet4,
    #[serde(rename = "claude-sonnet-4-thinking")]
    ClaudeSonnet4Thinking,
    #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet@20250219")]
    Claude3_7Sonnet,
    #[serde(rename = "claude-3-7-sonnet-thinking")]
    Claude3_7SonnetThinking,
    #[serde(rename = "custom")]
    Custom {
        name: String,
        max_tokens: u64,
        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
        display_name: Option<String>,
        /// Override this model with a different Anthropic model for tool calls.
        tool_override: Option<String>,
        /// Indicates whether this custom model supports caching.
        cache_configuration: Option<AnthropicVertexModelCacheConfiguration>,
        max_output_tokens: Option<u64>,
        default_temperature: Option<f32>,
        #[serde(default)]
        mode: ModelMode,
    },
 }
 impl Model {
    pub fn default_fast() -> Self {
        Self::ClaudeSonnet4
    }
    pub fn from_id(id: &str) -> Result<Self> {
        if id.starts_with("claude-opus-4") {
            return Ok(Self::ClaudeOpus4Thinking);
        }
        if id.starts_with("claude-opus-4") {
            return Ok(Self::ClaudeOpus4);
        }
        if id.starts_with("claude-sonnet-4") {
            return Ok(Self::ClaudeSonnet4Thinking);
        }
        if id.starts_with("claude-sonnet-4") {
            return Ok(Self::ClaudeSonnet4);
        }
        if id.starts_with("claude-3-7-sonnet") {
            return Ok(Self::Claude3_7SonnetThinking);
        }
        if id.starts_with("claude-3-7-sonnet") {
            return Ok(Self::Claude3_7Sonnet);
        }
        Err(anyhow!("invalid model ID: {id}"))
    }
    pub fn id(&self) -> &str {
        match self {
            Self::ClaudeOpus4 => "claude-opus-4@20250514",
            Self::ClaudeOpus4Thinking => "claude-opus-4@20250514",
            Self::ClaudeSonnet4 => "claude-sonnet-4@20250514",
            Self::ClaudeSonnet4Thinking => "claude-sonnet-4@20250514",
            Self::Claude3_7Sonnet => "	claude-3-7-sonnet@20250219",
            Self::Claude3_7SonnetThinking => "	claude-3-7-sonnet@20250219",
            Self::Custom { name, .. } => name,
        }
    }
    /// The id of the model that should be used for making API requests
    pub fn request_id(&self) -> &str {
        match self {
            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4@20250514",
            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4@20250514",
            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "	claude-3-7-sonnet@20250219",
            Self::Custom { name, .. } => name,
        }
    }
    pub fn display_name(&self) -> &str {
        match self {
            Self::ClaudeOpus4 => "Claude Opus 4",
            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
            Self::ClaudeSonnet4 => "Claude Sonnet 4",
            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
            Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
            Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
            Self::Custom {
                name, display_name, ..
            } => display_name.as_ref().unwrap_or(name),
        }
    }
    pub fn cache_configuration(&self) -> Option<AnthropicVertexModelCacheConfiguration> {
        match self {
            Self::ClaudeOpus4
            | Self::ClaudeOpus4Thinking
            | Self::ClaudeSonnet4
            | Self::ClaudeSonnet4Thinking
            | Self::Claude3_7Sonnet
            | Self::Claude3_7SonnetThinking => Some(AnthropicVertexModelCacheConfiguration {
                min_total_token: 2_048,
                should_speculate: true,
                max_cache_anchors: 4,
            }),
            Self::Custom {
                cache_configuration,
                ..
            } => cache_configuration.clone(),
        }
    }
    pub fn max_token_count(&self) -> u64 {
        match self {
            Self::ClaudeOpus4
            | Self::ClaudeOpus4Thinking
            | Self::ClaudeSonnet4
            | Self::ClaudeSonnet4Thinking
            | Self::Claude3_7Sonnet
            | Self::Claude3_7SonnetThinking => 200_000,
            Self::Custom { max_tokens, .. } => *max_tokens,
        }
    }
    pub fn max_output_tokens(&self) -> u64 {
        match self {
            Self::ClaudeOpus4
            | Self::ClaudeOpus4Thinking
            | Self::ClaudeSonnet4
            | Self::ClaudeSonnet4Thinking
            | Self::Claude3_7Sonnet
            | Self::Claude3_7SonnetThinking => 8_192,
            Self::Custom {
                max_output_tokens, ..
            } => max_output_tokens.unwrap_or(4_096),
        }
    }
    pub fn default_temperature(&self) -> f32 {
        match self {
            Self::ClaudeOpus4
            | Self::ClaudeOpus4Thinking
            | Self::ClaudeSonnet4
            | Self::ClaudeSonnet4Thinking
            | Self::Claude3_7Sonnet
            | Self::Claude3_7SonnetThinking => 1.0,
            Self::Custom {
                default_temperature,
                ..
            } => default_temperature.unwrap_or(1.0),
        }
    }
    pub fn mode(&self) -> ModelMode {
        match self {
            Self::ClaudeOpus4 | Self::ClaudeSonnet4 | Self::Claude3_7Sonnet => ModelMode::Default,
            Self::ClaudeOpus4Thinking
            | Self::ClaudeSonnet4Thinking
            | Self::Claude3_7SonnetThinking => ModelMode::Thinking {
                budget_tokens: Some(4_096),
            },
            Self::Custom { mode, .. } => mode.clone(),
        }
    }
    pub fn tool_model_id(&self) -> &str {
        if let Self::Custom {
            tool_override: Some(tool_override),
            ..
        } = self
        {
            tool_override
        } else {
            self.request_id()
        }
    }
 }
 pub async fn complete(
    client: &dyn HttpClient,
    api_url: &str,
    request: Request,
 ) -> Result<Response, AnthropicError> {
    let uri = format!("{api_url}/v1/messages");
    let request_builder = HttpRequest::builder()
        .method(Method::POST)
        .uri(uri)
        .header("Content-Type", "application/json");
    let serialized_request =
        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
    let request = request_builder
        .body(AsyncBody::from(serialized_request))
        .map_err(AnthropicError::BuildRequestBody)?;
    let mut response = client
        .send(request)
        .await
        .map_err(AnthropicError::HttpSend)?;
    let status_code = response.status();
    let mut body = String::new();
    response
        .body_mut()
        .read_to_string(&mut body)
        .await
        .map_err(AnthropicError::ReadResponse)?;
    if status_code.is_success() {
        Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
    } else {
        Err(AnthropicError::HttpResponseError {
            status_code,
            message: body,
        })
    }
 }
 pub async fn stream_completion(
    client: &dyn HttpClient,
    api_url: &str,
    project_id: &str,
    location_id: &str,
    access_token: &str,
    request: Request,
 ) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
    stream_completion_with_rate_limit_info(
        client,
        api_url,
        project_id,
        location_id,
        access_token,
        request,
    )
    .await
    .map(|output| output.0)
 }
 /// An individual rate limit.
 #[derive(Debug)]
 pub struct RateLimit {
    pub limit: usize,
    pub remaining: usize,
    pub reset: DateTime<Utc>,
 }
 impl RateLimit {
    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
        let limit =
            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
        let remaining = get_header(
            &format!("anthropic-ratelimit-{resource}-remaining"),
            headers,
        )?
        .parse()?;
        let reset = DateTime::parse_from_rfc3339(get_header(
            &format!("anthropic-ratelimit-{resource}-reset"),
            headers,
        )?)?
        .to_utc();
        Ok(Self {
            limit,
            remaining,
            reset,
        })
    }
 }
 /// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 #[derive(Debug)]
 pub struct RateLimitInfo {
    pub retry_after: Option<Duration>,
    pub requests: Option<RateLimit>,
    pub tokens: Option<RateLimit>,
    pub input_tokens: Option<RateLimit>,
    pub output_tokens: Option<RateLimit>,
 }
 impl RateLimitInfo {
    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
        // Check if any rate limit headers exist
        let has_rate_limit_headers = headers
            .keys()
            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
        if !has_rate_limit_headers {
            return Self {
                retry_after: None,
                requests: None,
                tokens: None,
                input_tokens: None,
                output_tokens: None,
            };
        }
        Self {
            retry_after: parse_retry_after(headers),
            requests: RateLimit::from_headers("requests", headers).ok(),
            tokens: RateLimit::from_headers("tokens", headers).ok(),
            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
        }
    }
 }
 /// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 /// seconds). Note that other services might specify an HTTP date or some other format for this
 /// header. Returns `None` if the header is not present or cannot be parsed.
 pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
    headers
        .get("retry-after")
        .and_then(|v| v.to_str().ok())
        .and_then(|v| v.parse::<u64>().ok())
        .map(Duration::from_secs)
 }
 fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
    Ok(headers
        .get(key)
        .with_context(|| format!("missing header `{key}`"))?
        .to_str()?)
 }
 pub async fn stream_completion_with_rate_limit_info(
    client: &dyn HttpClient,
    api_url: &str,
    project_id: &str,
    location_id: &str,
    access_token: &str,
    request: Request,
 ) -> Result<
    (
        BoxStream<'static, Result<Event, AnthropicError>>,
        Option<RateLimitInfo>,
    ),
    AnthropicError,
 > {
    let model_id = request.model.clone();
    let request = StreamingRequest {
        base: request,
        stream: true,
    };
    let endpoint = if location_id == "global" {
        "https://{api_url}".to_string()
    } else {
        format!("https://{location_id}-{api_url}")
    };
    let uri = format!(
        "{endpoint}/v1/projects/{project_id}/locations/{location_id}/publishers/anthropic/models/{model_id}:streamRawPredict"
    );
    // MODIFICATION 4: Add Authorization header for bearer token authentication.
    let request_builder = HttpRequest::builder()
        .method(Method::POST)
        .uri(uri)
        .header("Authorization", format!("Bearer {}", access_token))
        .header("Content-Type", "application/json");
    let serialized_request =
        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
    let request = request_builder
        .body(AsyncBody::from(serialized_request))
        .map_err(AnthropicError::BuildRequestBody)?;
    let mut response = client
        .send(request)
        .await
        .map_err(AnthropicError::HttpSend)?;
    let rate_limits = RateLimitInfo::from_headers(response.headers());
    if response.status().is_success() {
        let reader = BufReader::new(response.into_body());
        let stream = reader
            .lines()
            .filter_map(|line| async move {
                match line {
                    Ok(line) => {
                        let line = line.strip_prefix("data: ")?;
                        match serde_json::from_str(line) {
                            Ok(response) => Some(Ok(response)),
                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
                        }
                    }
                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
                }
            })
            .boxed();
        Ok((stream, Some(rate_limits)))
    } else if response.status().as_u16() == 529 {
        Err(AnthropicError::ServerOverloaded {
            retry_after: rate_limits.retry_after,
        })
    } else if let Some(retry_after) = rate_limits.retry_after {
        Err(AnthropicError::RateLimit { retry_after })
    } else {
        let mut body = String::new();
        response
            .body_mut()
            .read_to_string(&mut body)
            .await
            .map_err(AnthropicError::ReadResponse)?;
        match serde_json::from_str::<Event>(&body) {
            Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
            Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
                status_code: response.status(),
                message: body,
            }),
        }
    }
 }
 #[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 #[serde(rename_all = "lowercase")]
 pub enum CacheControlType {
    Ephemeral,
 }
 #[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 pub struct CacheControl {
    #[serde(rename = "type")]
    pub cache_type: CacheControlType,
 }
 #[derive(Debug, Serialize, Deserialize)]
 pub struct Message {
    pub role: Role,
    pub content: Vec<RequestContent>,
 }
 #[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 #[serde(rename_all = "lowercase")]
 pub enum Role {
    User,
    Assistant,
 }
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type")]
 pub enum RequestContent {
    #[serde(rename = "text")]
    Text {
        text: String,
        #[serde(skip_serializing_if = "Option::is_none")]
        cache_control: Option<CacheControl>,
    },
    #[serde(rename = "thinking")]
    Thinking {
        thinking: String,
        signature: String,
        #[serde(skip_serializing_if = "Option::is_none")]
        cache_control: Option<CacheControl>,
    },
    #[serde(rename = "redacted_thinking")]
    RedactedThinking { data: String },
    #[serde(rename = "image")]
    Image {
        source: ImageSource,
        #[serde(skip_serializing_if = "Option::is_none")]
        cache_control: Option<CacheControl>,
    },
    #[serde(rename = "tool_use")]
    ToolUse {
        id: String,
        name: String,
        input: serde_json::Value,
        #[serde(skip_serializing_if = "Option::is_none")]
        cache_control: Option<CacheControl>,
    },
    #[serde(rename = "tool_result")]
    ToolResult {
        tool_use_id: String,
        is_error: bool,
        content: ToolResultContent,
        #[serde(skip_serializing_if = "Option::is_none")]
        cache_control: Option<CacheControl>,
    },
 }
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum ToolResultContent {
    Plain(String),
    Multipart(Vec<ToolResultPart>),
 }
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "lowercase")]
 pub enum ToolResultPart {
    Text { text: String },
    Image { source: ImageSource },
 }
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type")]
 pub enum ResponseContent {
    #[serde(rename = "text")]
    Text { text: String },
    #[serde(rename = "thinking")]
    Thinking { thinking: String },
    #[serde(rename = "redacted_thinking")]
    RedactedThinking { data: String },
    #[serde(rename = "tool_use")]
    ToolUse {
        id: String,
        name: String,
        input: serde_json::Value,
    },
 }
 #[derive(Debug, Serialize, Deserialize)]
 pub struct ImageSource {
    #[serde(rename = "type")]
    pub source_type: String,
    pub media_type: String,
    pub data: String,
 }
 #[derive(Debug, Serialize, Deserialize)]
 pub struct Tool {
    pub name: String,
    pub description: String,
    pub input_schema: serde_json::Value,
 }
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "lowercase")]
 pub enum ToolChoice {
    Auto,
    Any,
    Tool { name: String },
    None,
 }
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "lowercase")]
 pub enum Thinking {
    Enabled { budget_tokens: Option<u32> },
 }
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum StringOrContents {
    String(String),
    Content(Vec<RequestContent>),
 }
 #[derive(Debug, Serialize, Deserialize)]
 pub struct Request {
    #[serde(skip)]
    pub model: String,
    pub anthropic_version: String,
    pub max_tokens: u64,
    pub messages: Vec<Message>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub tools: Vec<Tool>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub thinking: Option<Thinking>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub tool_choice: Option<ToolChoice>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub system: Option<StringOrContents>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub metadata: Option<Metadata>,
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub stop_sequences: Vec<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f32>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub top_k: Option<u32>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f32>,
 }
 #[derive(Debug, Serialize, Deserialize)]
 struct StreamingRequest {
    #[serde(flatten)]
    pub base: Request,
    pub stream: bool,
 }
 #[derive(Debug, Serialize, Deserialize)]
 pub struct Metadata {
    pub user_id: Option<String>,
 }
 #[derive(Debug, Serialize, Deserialize, Default)]
 pub struct Usage {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub input_tokens: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub output_tokens: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub cache_creation_input_tokens: Option<u64>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub cache_read_input_tokens: Option<u64>,
 }
 #[derive(Debug, Serialize, Deserialize)]
 pub struct Response {
    pub id: String,
    #[serde(rename = "type")]
    pub response_type: String,
    pub role: Role,
    pub content: Vec<ResponseContent>,
    pub model: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub stop_reason: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub stop_sequence: Option<String>,
    pub usage: Usage,
 }
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type")]
 pub enum Event {
    #[serde(rename = "message_start")]
    MessageStart { message: Response },
    #[serde(rename = "content_block_start")]
    ContentBlockStart {
        index: usize,
        content_block: ResponseContent,
    },
    #[serde(rename = "content_block_delta")]
    ContentBlockDelta { index: usize, delta: ContentDelta },
    #[serde(rename = "content_block_stop")]
    ContentBlockStop { index: usize },
    #[serde(rename = "message_delta")]
    MessageDelta { delta: MessageDelta, usage: Usage },
    #[serde(rename = "message_stop")]
    MessageStop,
    #[serde(rename = "ping")]
    Ping,
    #[serde(rename = "error")]
    Error { error: ApiError },
 }
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type")]
 pub enum ContentDelta {
    #[serde(rename = "text_delta")]
    TextDelta { text: String },
    #[serde(rename = "thinking_delta")]
    ThinkingDelta { thinking: String },
    #[serde(rename = "signature_delta")]
    SignatureDelta { signature: String },
    #[serde(rename = "input_json_delta")]
    InputJsonDelta { partial_json: String },
 }
 #[derive(Debug, Serialize, Deserialize)]
 pub struct MessageDelta {
    pub stop_reason: Option<String>,
    pub stop_sequence: Option<String>,
 }
 pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
    message
        .strip_prefix("prompt is too long: ")?
        .split_once(" tokens")?
        .0
        .parse()
        .ok()
 }
 #[test]
 fn test_match_window_exceeded() {
    let error = ApiError {
        error_type: "invalid_request_error".to_string(),
        message: "prompt is too long: 220000 tokens > 200000".to_string(),
    };
    assert_eq!(error.match_window_exceeded(), Some(220_000));
    let error = ApiError {
        error_type: "invalid_request_error".to_string(),
        message: "prompt is too long: 1234953 tokens".to_string(),
    };
    assert_eq!(error.match_window_exceeded(), Some(1234953));
    let error = ApiError {
        error_type: "invalid_request_error".to_string(),
        message: "not a prompt length error".to_string(),
    };
    assert_eq!(error.match_window_exceeded(), None);
    let error = ApiError {
        error_type: "rate_limit_error".to_string(),
        message: "prompt is too long: 12345 tokens".to_string(),
    };
    assert_eq!(error.match_window_exceeded(), None);
    let error = ApiError {
        error_type: "invalid_request_error".to_string(),
        message: "prompt is too long: invalid tokens".to_string(),
    };
    assert_eq!(error.match_window_exceeded(), None);
 }
--- a/crates/anthropic_vertex_ai/task.md
+++ b/crates/anthropic_vertex_ai/task.md
@ -0,0 +1,79 @@
 Task ID: TA001 - Integrate Anthropic Models via Google Vertex AI**
 **Objective:**
 To develop a new language model provider, `anthropic_vertex_ai`, that seamlessly integrates Anthropic's models (e.g., Claude) into the Zed editor via the Google Cloud Vertex AI platform.
 **Background:**
 While Zed has a direct integration with Anthropic's API, many users operate within the Google Cloud ecosystem. Vertex AI provides access to third-party models like Anthropic's through its own endpoint. This task involves creating a new provider that bridges the existing `anthropic` API logic with the authentication and endpoint requirements of Google Cloud.
 This integration will not use explicit API keys. Instead, it will leverage Google's Application Default Credentials (ADC), a standard mechanism for authenticating GCP services, ensuring a secure and streamlined user experience. Configuration will be provided through `settings.json` to specify the required `project_id` and `location` for the Vertex AI endpoint.
 **Key Requirements:**
 - **Authentication:** Must use Google Cloud's Application Default Credentials (ADC) for all API requests. The implementation should not handle manual tokens.
 - **Configuration:** The provider must be configurable via `settings.json`, allowing the user to specify their Google Cloud `project_id` and `location`.
 - **Endpoint Construction:** Must dynamically construct the correct Vertex AI endpoint URL for each request, in the format: `https://$LOCATION-aiplatform.googleapis.com/v1/projects/$PROJECT_ID/locations/$LOCATION/publishers/anthropic/models/$MODEL:streamRawPredict`.
 - **Payload Adaptation:** The JSON payload sent to the endpoint must be modified to:
    - Include the mandatory field: `"anthropic_version": "vertex-2023-10-16"`.
    - Exclude the `model` field, as it is specified in the URL.
 - **Integration:** The new provider must be a first-class citizen within Zed, appearing in the model selection list and functioning identically to other integrated providers.
 **Implementation Plan:**
 **Step 1: Foundational Analysis & Crate Setup**
 *   **Action 1.1: Analyze `google_vertex` Crate:** Thoroughly examine `crates/google_vertex/src/google_vertex.rs` to understand its implementation of ADC-based authentication and how it reads settings like `project_id` and `location`. This will serve as the template for our authentication logic.
 *   **Action 1.2: Define Configuration Struct:** In a new file, `crates/anthropic_vertex_ai/src/lib.rs`, define the `AnthropicVertexAISettings` struct. This struct will deserialize the `project_id` and `location` from the user's `settings.json`.
 *   **Action 1.3: Update `Cargo.toml`:** Create/update the `Cargo.toml` file for the `anthropic_vertex_ai` crate. It should include dependencies from both `anthropic` (for serde structs) and `google_vertex` (for GCP-related dependencies like `gcp_auth`).
 *   **Action 1.4: Create `lib.rs`:** Ensure `crates/anthropic_vertex_ai/src/lib.rs` exists to house the `LanguageModelProvider` implementation and serve as the crate's entry point.
 **Step 2: Adapt Core Anthropic Logic**
 *   **Action 2.1: Modify `Request` Struct:** In `crates/anthropic_vertex_ai/src/anthropic_vertex_ai.rs`, modify the main `Request` struct:
    -   Add a new field: `pub anthropic_version: &'static str`.
    -   Remove the existing `pub model: String` field.
 *   **Action 2.2: Refactor Completion Functions:** Refactor the `stream_completion_with_rate_limit_info` function to be more generic.
    -   It will now accept the fully-constructed Vertex AI endpoint URL as a parameter.
    -   It will accept an ADC-aware `HttpClient` instance instead of a simple API key.
    -   The logic for setting the `Authorization` header will be updated to use a `Bearer` token provided by the `HttpClient`.
 **Step 3: Implement the `LanguageModelProvider`**
 *   **Action 3.1: Define Provider Struct:** In `crates/anthropic_vertex_ai/src/lib.rs`, define the main `AnthropicVertexAIProvider` struct. It will store the settings defined in Action 1.2.
 *   **Action 3.2: Implement `LanguageModelProvider` Trait:** Implement the `language_model::LanguageModelProvider` trait for `AnthropicVertexAIProvider`.
 *   **Action 3.3: Implement Core Logic:** The trait methods will contain the central logic:
    1.  On initialization, the provider will create an `HttpClient` configured to use Google's ADC, following the pattern in the `google_vertex` crate.
    2.  For each completion request, it will dynamically construct the full, model-specific Vertex AI URL using the configured `project_id`, `location`, and the requested model name.
    3.  It will create an instance of the modified `Request` struct from `anthropic_vertex_ai.rs`, setting the `anthropic_version` field correctly.
    4.  Finally, it will call the refactored `stream_completion_with_rate_limit_info` function, passing the authenticated client and the constructed request.
 **Step 4: Final Integration**
 *   **Action 4.1: Workspace Integration:** Add `anthropic_vertex_ai` to the main workspace `Cargo.toml` to link the new crate.
 *   **Action 4.2: Module Declaration:** Add `pub mod anthropic_vertex_ai;` to `crates/language_models/src/provider.rs` to make the module visible.
 *   **Action 4.3: Provider Registration:** In `crates/language_models/src/lib.rs`, update the central list of language model providers to include an instance of `AnthropicVertexAIProvider`.
 **Verification Plan:**
 *   **Compile-Time Verification:** At each major step, ask the human to review the code for compilation errors and adherence to project standards.
 *   **Configuration Verification:** The implementation will be tested against a `settings.json` file configured as follows:
    ```json
    "language_servers": {
        "anthropic-vertex": {
            "enabled": true,
            "project_id": "your-gcp-project-id",
            "location": "europe-west1"
        }
    },
    "assistant": {
        "default_model": {
            "provider": "anthropic-vertex",
            "name": "claude-sonnet-4@20250514"
        }
    }
    ```
 *   **Runtime Verification:**
    1.  Launch Zed with the above configuration.
    2.  Ensure the local environment is authenticated with GCP (e.g., via `gcloud auth application-default login`).
    3.  Open the assistant panel and confirm that `"anthropic-vertex/claude-sonnet-4@20250514"` is the selected model.
    4.  Send a test prompt to the assistant.
    5.  **Success Condition:** A valid, streamed response is received from the assistant, confirming that the entire chain—from configuration and authentication to request execution and response parsing—is working correctly.
--- a/crates/language_models/Cargo.toml
+++ b/crates/language_models/Cargo.toml
@ -33,6 +33,7 @@ fs.workspace = true
 futures.workspace = true
 google_ai = { workspace = true, features = ["schemars"] }
 google_vertex_ai = { workspace = true, features = ["schemars"] }
 anthropic_vertex_ai = { workspace = true, features = ["schemars"] }
 gpui.workspace = true
 gpui_tokio.workspace = true
 http_client.workspace = true
--- a/crates/language_models/src/language_models.rs
+++ b/crates/language_models/src/language_models.rs
@ -10,6 +10,7 @@ mod settings;
 pub mod ui;
 use crate::provider::anthropic::AnthropicLanguageModelProvider;
 use crate::provider::anthropic_vertex::AnthropicVertexLanguageModelProvider;
 use crate::provider::bedrock::BedrockLanguageModelProvider;
 use crate::provider::cloud::CloudLanguageModelProvider;
 use crate::provider::copilot_chat::CopilotChatLanguageModelProvider;
@ -72,6 +73,11 @@ fn register_language_model_providers(
        GoogleVertexLanguageModelProvider::new(client.http_client(), cx),
        cx,
    );
    registry.register_provider(
        // NEW REGISTRATION BY DIAB
        AnthropicVertexLanguageModelProvider::new(client.http_client(), cx),
        cx,
    );
    registry.register_provider(
        MistralLanguageModelProvider::new(client.http_client(), cx),
        cx,
--- a/crates/language_models/src/provider.rs
+++ b/crates/language_models/src/provider.rs
@ -1,4 +1,5 @@
 pub mod anthropic;
 pub mod anthropic_vertex;
 pub mod bedrock;
 pub mod cloud;
 pub mod copilot_chat;
--- a/crates/language_models/src/provider/anthropic_vertex.rs
+++ b/crates/language_models/src/provider/anthropic_vertex.rs
--- a/crates/language_models/src/settings.rs
+++ b/crates/language_models/src/settings.rs
@ -7,6 +7,7 @@ use settings::{Settings, SettingsSources};
 use crate::provider::{
    self,
    anthropic::AnthropicSettings,
    anthropic_vertex::AnthropicVertexSettings,
    bedrock::AmazonBedrockSettings,
    cloud::{self, ZedDotDevSettings},
    deepseek::DeepSeekSettings,
@ -33,6 +34,7 @@ pub struct AllLanguageModelSettings {
    pub deepseek: DeepSeekSettings,
    pub google: GoogleSettings,
    pub google_vertex: GoogleVertexSettings,
    pub anthropic_vertex: AnthropicVertexSettings,
    pub lmstudio: LmStudioSettings,
    pub mistral: MistralSettings,
    pub ollama: OllamaSettings,
@ -50,6 +52,7 @@ pub struct AllLanguageModelSettingsContent {
    pub deepseek: Option<DeepseekSettingsContent>,
    pub google: Option<GoogleSettingsContent>,
    pub google_vertex: Option<GoogleVertexSettingsContent>,
    pub anthropic_vertex: Option<AnthropicVertexSettingsContent>,
    pub lmstudio: Option<LmStudioSettingsContent>,
    pub mistral: Option<MistralSettingsContent>,
    pub ollama: Option<OllamaSettingsContent>,
@ -126,6 +129,14 @@ pub struct GoogleVertexSettingsContent {
    pub available_models: Option<Vec<provider::google_vertex::AvailableModel>>,
 }
 #[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
 pub struct AnthropicVertexSettingsContent {
    pub api_url: Option<String>,
    pub project_id: Option<String>,  // ADDED
    pub location_id: Option<String>, // ADDED
    pub available_models: Option<Vec<provider::anthropic_vertex::AvailableModel>>,
 }
 #[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
 pub struct XAiSettingsContent {
    pub api_url: Option<String>,
@ -322,6 +333,29 @@ impl settings::Settings for AllLanguageModelSettings {
                    .as_ref()
                    .and_then(|s| s.location_id.clone()),
            );
            // Anthropic Vertex AI
            merge(
                &mut settings.anthropic_vertex.api_url,
                value
                    .anthropic_vertex
                    .as_ref()
                    .and_then(|s| s.api_url.clone()),
            );
            merge(
                &mut settings.anthropic_vertex.project_id,
                value
                    .anthropic_vertex
                    .as_ref()
                    .and_then(|s| s.project_id.clone()),
            );
            merge(
                &mut settings.anthropic_vertex.location_id,
                value
                    .anthropic_vertex
                    .as_ref()
                    .and_then(|s| s.location_id.clone()),
            );
        }
        Ok(settings)