PARTIAL feat: Integrate Anthropic models via Vertex AI

This commit introduces a new language model provider,
`anthropic_vertex_ai`, that integrates Anthropic's models via the Google
Cloud Vertex AI platform. The implementation uses Google Cloud's
Application Default Credentials (ADC) for authentication and fetches
`project_id` and `location_id` from the user's settings.
This commit is contained in:
joe.schwerdtner 2025-07-21 21:25:00 +02:00
parent 132f0dd36a
commit 59073836c7
9 changed files with 2014 additions and 0 deletions

View file

@ -285,6 +285,7 @@ git_ui = { path = "crates/git_ui" }
go_to_line = { path = "crates/go_to_line" }
google_ai = { path = "crates/google_ai" }
google_vertex_ai = { path = "crates/google_vertex_ai" }
anthropic_vertex_ai = { path = "crates/anthropic_vertex_ai" }
gpui = { path = "crates/gpui", default-features = false, features = [
"http_client",
] }

View file

@ -0,0 +1,29 @@
[package]
name = "anthropic_vertex_ai"
version = "0.1.0"
edition.workspace = true
publish.workspace = true
license = "GPL-3.0-or-later"
[features]
default = []
schemars = ["dep:schemars"]
[lints]
workspace = true
[lib]
path = "src/anthropic_vertex_ai.rs"
[dependencies]
anyhow.workspace = true
chrono.workspace = true
futures.workspace = true
http_client.workspace = true
schemars = { workspace = true, optional = true }
anthropic.workspace = true
serde.workspace = true
serde_json.workspace = true
strum.workspace = true
thiserror.workspace = true
workspace-hack.workspace = true

View file

@ -0,0 +1,744 @@
use std::time::Duration;
use anthropic::{AnthropicError, ApiError};
use anyhow::{Context as _, Result, anyhow};
use chrono::{DateTime, Utc};
use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
use http_client::http::{HeaderMap, HeaderValue};
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
use serde::{Deserialize, Serialize};
use strum::EnumIter;
#[derive(Clone, Debug, Default, Deserialize)]
pub struct AnthropicVertexAISettings {
pub project_id: Option<String>,
pub location: Option<String>,
}
pub const ANTHROPIC_API_URL: &str = "https://aiplatform.googleapis.com";
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub struct AnthropicVertexModelCacheConfiguration {
pub min_total_token: u64,
pub should_speculate: bool,
pub max_cache_anchors: usize,
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub enum ModelMode {
#[default]
Default,
Thinking {
budget_tokens: Option<u32>,
},
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
pub enum Model {
#[serde(rename = "claude-opus-4", alias = "claude-opus-4@20250514")]
ClaudeOpus4,
#[serde(rename = "claude-opus-4-thinking")]
ClaudeOpus4Thinking,
#[default]
#[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4@20250514")]
ClaudeSonnet4,
#[serde(rename = "claude-sonnet-4-thinking")]
ClaudeSonnet4Thinking,
#[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet@20250219")]
Claude3_7Sonnet,
#[serde(rename = "claude-3-7-sonnet-thinking")]
Claude3_7SonnetThinking,
#[serde(rename = "custom")]
Custom {
name: String,
max_tokens: u64,
/// The name displayed in the UI, such as in the assistant panel model dropdown menu.
display_name: Option<String>,
/// Override this model with a different Anthropic model for tool calls.
tool_override: Option<String>,
/// Indicates whether this custom model supports caching.
cache_configuration: Option<AnthropicVertexModelCacheConfiguration>,
max_output_tokens: Option<u64>,
default_temperature: Option<f32>,
#[serde(default)]
mode: ModelMode,
},
}
impl Model {
pub fn default_fast() -> Self {
Self::ClaudeSonnet4
}
pub fn from_id(id: &str) -> Result<Self> {
if id.starts_with("claude-opus-4") {
return Ok(Self::ClaudeOpus4Thinking);
}
if id.starts_with("claude-opus-4") {
return Ok(Self::ClaudeOpus4);
}
if id.starts_with("claude-sonnet-4") {
return Ok(Self::ClaudeSonnet4Thinking);
}
if id.starts_with("claude-sonnet-4") {
return Ok(Self::ClaudeSonnet4);
}
if id.starts_with("claude-3-7-sonnet") {
return Ok(Self::Claude3_7SonnetThinking);
}
if id.starts_with("claude-3-7-sonnet") {
return Ok(Self::Claude3_7Sonnet);
}
Err(anyhow!("invalid model ID: {id}"))
}
pub fn id(&self) -> &str {
match self {
Self::ClaudeOpus4 => "claude-opus-4@20250514",
Self::ClaudeOpus4Thinking => "claude-opus-4@20250514",
Self::ClaudeSonnet4 => "claude-sonnet-4@20250514",
Self::ClaudeSonnet4Thinking => "claude-sonnet-4@20250514",
Self::Claude3_7Sonnet => " claude-3-7-sonnet@20250219",
Self::Claude3_7SonnetThinking => " claude-3-7-sonnet@20250219",
Self::Custom { name, .. } => name,
}
}
/// The id of the model that should be used for making API requests
pub fn request_id(&self) -> &str {
match self {
Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4@20250514",
Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4@20250514",
Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => " claude-3-7-sonnet@20250219",
Self::Custom { name, .. } => name,
}
}
pub fn display_name(&self) -> &str {
match self {
Self::ClaudeOpus4 => "Claude Opus 4",
Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
Self::ClaudeSonnet4 => "Claude Sonnet 4",
Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
Self::Custom {
name, display_name, ..
} => display_name.as_ref().unwrap_or(name),
}
}
pub fn cache_configuration(&self) -> Option<AnthropicVertexModelCacheConfiguration> {
match self {
Self::ClaudeOpus4
| Self::ClaudeOpus4Thinking
| Self::ClaudeSonnet4
| Self::ClaudeSonnet4Thinking
| Self::Claude3_7Sonnet
| Self::Claude3_7SonnetThinking => Some(AnthropicVertexModelCacheConfiguration {
min_total_token: 2_048,
should_speculate: true,
max_cache_anchors: 4,
}),
Self::Custom {
cache_configuration,
..
} => cache_configuration.clone(),
}
}
pub fn max_token_count(&self) -> u64 {
match self {
Self::ClaudeOpus4
| Self::ClaudeOpus4Thinking
| Self::ClaudeSonnet4
| Self::ClaudeSonnet4Thinking
| Self::Claude3_7Sonnet
| Self::Claude3_7SonnetThinking => 200_000,
Self::Custom { max_tokens, .. } => *max_tokens,
}
}
pub fn max_output_tokens(&self) -> u64 {
match self {
Self::ClaudeOpus4
| Self::ClaudeOpus4Thinking
| Self::ClaudeSonnet4
| Self::ClaudeSonnet4Thinking
| Self::Claude3_7Sonnet
| Self::Claude3_7SonnetThinking => 8_192,
Self::Custom {
max_output_tokens, ..
} => max_output_tokens.unwrap_or(4_096),
}
}
pub fn default_temperature(&self) -> f32 {
match self {
Self::ClaudeOpus4
| Self::ClaudeOpus4Thinking
| Self::ClaudeSonnet4
| Self::ClaudeSonnet4Thinking
| Self::Claude3_7Sonnet
| Self::Claude3_7SonnetThinking => 1.0,
Self::Custom {
default_temperature,
..
} => default_temperature.unwrap_or(1.0),
}
}
pub fn mode(&self) -> ModelMode {
match self {
Self::ClaudeOpus4 | Self::ClaudeSonnet4 | Self::Claude3_7Sonnet => ModelMode::Default,
Self::ClaudeOpus4Thinking
| Self::ClaudeSonnet4Thinking
| Self::Claude3_7SonnetThinking => ModelMode::Thinking {
budget_tokens: Some(4_096),
},
Self::Custom { mode, .. } => mode.clone(),
}
}
pub fn tool_model_id(&self) -> &str {
if let Self::Custom {
tool_override: Some(tool_override),
..
} = self
{
tool_override
} else {
self.request_id()
}
}
}
pub async fn complete(
client: &dyn HttpClient,
api_url: &str,
request: Request,
) -> Result<Response, AnthropicError> {
let uri = format!("{api_url}/v1/messages");
let request_builder = HttpRequest::builder()
.method(Method::POST)
.uri(uri)
.header("Content-Type", "application/json");
let serialized_request =
serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
let request = request_builder
.body(AsyncBody::from(serialized_request))
.map_err(AnthropicError::BuildRequestBody)?;
let mut response = client
.send(request)
.await
.map_err(AnthropicError::HttpSend)?;
let status_code = response.status();
let mut body = String::new();
response
.body_mut()
.read_to_string(&mut body)
.await
.map_err(AnthropicError::ReadResponse)?;
if status_code.is_success() {
Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
} else {
Err(AnthropicError::HttpResponseError {
status_code,
message: body,
})
}
}
pub async fn stream_completion(
client: &dyn HttpClient,
api_url: &str,
project_id: &str,
location_id: &str,
access_token: &str,
request: Request,
) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
stream_completion_with_rate_limit_info(
client,
api_url,
project_id,
location_id,
access_token,
request,
)
.await
.map(|output| output.0)
}
/// An individual rate limit.
#[derive(Debug)]
pub struct RateLimit {
pub limit: usize,
pub remaining: usize,
pub reset: DateTime<Utc>,
}
impl RateLimit {
fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
let limit =
get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
let remaining = get_header(
&format!("anthropic-ratelimit-{resource}-remaining"),
headers,
)?
.parse()?;
let reset = DateTime::parse_from_rfc3339(get_header(
&format!("anthropic-ratelimit-{resource}-reset"),
headers,
)?)?
.to_utc();
Ok(Self {
limit,
remaining,
reset,
})
}
}
/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
#[derive(Debug)]
pub struct RateLimitInfo {
pub retry_after: Option<Duration>,
pub requests: Option<RateLimit>,
pub tokens: Option<RateLimit>,
pub input_tokens: Option<RateLimit>,
pub output_tokens: Option<RateLimit>,
}
impl RateLimitInfo {
fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
// Check if any rate limit headers exist
let has_rate_limit_headers = headers
.keys()
.any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
if !has_rate_limit_headers {
return Self {
retry_after: None,
requests: None,
tokens: None,
input_tokens: None,
output_tokens: None,
};
}
Self {
retry_after: parse_retry_after(headers),
requests: RateLimit::from_headers("requests", headers).ok(),
tokens: RateLimit::from_headers("tokens", headers).ok(),
input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
}
}
}
/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
/// seconds). Note that other services might specify an HTTP date or some other format for this
/// header. Returns `None` if the header is not present or cannot be parsed.
pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
headers
.get("retry-after")
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse::<u64>().ok())
.map(Duration::from_secs)
}
fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
Ok(headers
.get(key)
.with_context(|| format!("missing header `{key}`"))?
.to_str()?)
}
pub async fn stream_completion_with_rate_limit_info(
client: &dyn HttpClient,
api_url: &str,
project_id: &str,
location_id: &str,
access_token: &str,
request: Request,
) -> Result<
(
BoxStream<'static, Result<Event, AnthropicError>>,
Option<RateLimitInfo>,
),
AnthropicError,
> {
let model_id = request.model.clone();
let request = StreamingRequest {
base: request,
stream: true,
};
let endpoint = if location_id == "global" {
"https://{api_url}".to_string()
} else {
format!("https://{location_id}-{api_url}")
};
let uri = format!(
"{endpoint}/v1/projects/{project_id}/locations/{location_id}/publishers/anthropic/models/{model_id}:streamRawPredict"
);
// MODIFICATION 4: Add Authorization header for bearer token authentication.
let request_builder = HttpRequest::builder()
.method(Method::POST)
.uri(uri)
.header("Authorization", format!("Bearer {}", access_token))
.header("Content-Type", "application/json");
let serialized_request =
serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
let request = request_builder
.body(AsyncBody::from(serialized_request))
.map_err(AnthropicError::BuildRequestBody)?;
let mut response = client
.send(request)
.await
.map_err(AnthropicError::HttpSend)?;
let rate_limits = RateLimitInfo::from_headers(response.headers());
if response.status().is_success() {
let reader = BufReader::new(response.into_body());
let stream = reader
.lines()
.filter_map(|line| async move {
match line {
Ok(line) => {
let line = line.strip_prefix("data: ")?;
match serde_json::from_str(line) {
Ok(response) => Some(Ok(response)),
Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
}
}
Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
}
})
.boxed();
Ok((stream, Some(rate_limits)))
} else if response.status().as_u16() == 529 {
Err(AnthropicError::ServerOverloaded {
retry_after: rate_limits.retry_after,
})
} else if let Some(retry_after) = rate_limits.retry_after {
Err(AnthropicError::RateLimit { retry_after })
} else {
let mut body = String::new();
response
.body_mut()
.read_to_string(&mut body)
.await
.map_err(AnthropicError::ReadResponse)?;
match serde_json::from_str::<Event>(&body) {
Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
status_code: response.status(),
message: body,
}),
}
}
}
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
#[serde(rename_all = "lowercase")]
pub enum CacheControlType {
Ephemeral,
}
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
pub struct CacheControl {
#[serde(rename = "type")]
pub cache_type: CacheControlType,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Message {
pub role: Role,
pub content: Vec<RequestContent>,
}
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
#[serde(rename_all = "lowercase")]
pub enum Role {
User,
Assistant,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RequestContent {
#[serde(rename = "text")]
Text {
text: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "thinking")]
Thinking {
thinking: String,
signature: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "redacted_thinking")]
RedactedThinking { data: String },
#[serde(rename = "image")]
Image {
source: ImageSource,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
name: String,
input: serde_json::Value,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "tool_result")]
ToolResult {
tool_use_id: String,
is_error: bool,
content: ToolResultContent,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
pub enum ToolResultContent {
Plain(String),
Multipart(Vec<ToolResultPart>),
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "lowercase")]
pub enum ToolResultPart {
Text { text: String },
Image { source: ImageSource },
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum ResponseContent {
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "thinking")]
Thinking { thinking: String },
#[serde(rename = "redacted_thinking")]
RedactedThinking { data: String },
#[serde(rename = "tool_use")]
ToolUse {
id: String,
name: String,
input: serde_json::Value,
},
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ImageSource {
#[serde(rename = "type")]
pub source_type: String,
pub media_type: String,
pub data: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Tool {
pub name: String,
pub description: String,
pub input_schema: serde_json::Value,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "lowercase")]
pub enum ToolChoice {
Auto,
Any,
Tool { name: String },
None,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "lowercase")]
pub enum Thinking {
Enabled { budget_tokens: Option<u32> },
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
pub enum StringOrContents {
String(String),
Content(Vec<RequestContent>),
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Request {
#[serde(skip)]
pub model: String,
pub anthropic_version: String,
pub max_tokens: u64,
pub messages: Vec<Message>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub tools: Vec<Tool>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub thinking: Option<Thinking>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<ToolChoice>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub system: Option<StringOrContents>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub metadata: Option<Metadata>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub stop_sequences: Vec<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub top_k: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
}
#[derive(Debug, Serialize, Deserialize)]
struct StreamingRequest {
#[serde(flatten)]
pub base: Request,
pub stream: bool,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Metadata {
pub user_id: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Default)]
pub struct Usage {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub input_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub output_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cache_creation_input_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cache_read_input_tokens: Option<u64>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Response {
pub id: String,
#[serde(rename = "type")]
pub response_type: String,
pub role: Role,
pub content: Vec<ResponseContent>,
pub model: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub stop_reason: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub stop_sequence: Option<String>,
pub usage: Usage,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum Event {
#[serde(rename = "message_start")]
MessageStart { message: Response },
#[serde(rename = "content_block_start")]
ContentBlockStart {
index: usize,
content_block: ResponseContent,
},
#[serde(rename = "content_block_delta")]
ContentBlockDelta { index: usize, delta: ContentDelta },
#[serde(rename = "content_block_stop")]
ContentBlockStop { index: usize },
#[serde(rename = "message_delta")]
MessageDelta { delta: MessageDelta, usage: Usage },
#[serde(rename = "message_stop")]
MessageStop,
#[serde(rename = "ping")]
Ping,
#[serde(rename = "error")]
Error { error: ApiError },
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum ContentDelta {
#[serde(rename = "text_delta")]
TextDelta { text: String },
#[serde(rename = "thinking_delta")]
ThinkingDelta { thinking: String },
#[serde(rename = "signature_delta")]
SignatureDelta { signature: String },
#[serde(rename = "input_json_delta")]
InputJsonDelta { partial_json: String },
}
#[derive(Debug, Serialize, Deserialize)]
pub struct MessageDelta {
pub stop_reason: Option<String>,
pub stop_sequence: Option<String>,
}
pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
message
.strip_prefix("prompt is too long: ")?
.split_once(" tokens")?
.0
.parse()
.ok()
}
#[test]
fn test_match_window_exceeded() {
let error = ApiError {
error_type: "invalid_request_error".to_string(),
message: "prompt is too long: 220000 tokens > 200000".to_string(),
};
assert_eq!(error.match_window_exceeded(), Some(220_000));
let error = ApiError {
error_type: "invalid_request_error".to_string(),
message: "prompt is too long: 1234953 tokens".to_string(),
};
assert_eq!(error.match_window_exceeded(), Some(1234953));
let error = ApiError {
error_type: "invalid_request_error".to_string(),
message: "not a prompt length error".to_string(),
};
assert_eq!(error.match_window_exceeded(), None);
let error = ApiError {
error_type: "rate_limit_error".to_string(),
message: "prompt is too long: 12345 tokens".to_string(),
};
assert_eq!(error.match_window_exceeded(), None);
let error = ApiError {
error_type: "invalid_request_error".to_string(),
message: "prompt is too long: invalid tokens".to_string(),
};
assert_eq!(error.match_window_exceeded(), None);
}

View file

@ -0,0 +1,79 @@
Task ID: TA001 - Integrate Anthropic Models via Google Vertex AI**
**Objective:**
To develop a new language model provider, `anthropic_vertex_ai`, that seamlessly integrates Anthropic's models (e.g., Claude) into the Zed editor via the Google Cloud Vertex AI platform.
**Background:**
While Zed has a direct integration with Anthropic's API, many users operate within the Google Cloud ecosystem. Vertex AI provides access to third-party models like Anthropic's through its own endpoint. This task involves creating a new provider that bridges the existing `anthropic` API logic with the authentication and endpoint requirements of Google Cloud.
This integration will not use explicit API keys. Instead, it will leverage Google's Application Default Credentials (ADC), a standard mechanism for authenticating GCP services, ensuring a secure and streamlined user experience. Configuration will be provided through `settings.json` to specify the required `project_id` and `location` for the Vertex AI endpoint.
**Key Requirements:**
- **Authentication:** Must use Google Cloud's Application Default Credentials (ADC) for all API requests. The implementation should not handle manual tokens.
- **Configuration:** The provider must be configurable via `settings.json`, allowing the user to specify their Google Cloud `project_id` and `location`.
- **Endpoint Construction:** Must dynamically construct the correct Vertex AI endpoint URL for each request, in the format: `https://$LOCATION-aiplatform.googleapis.com/v1/projects/$PROJECT_ID/locations/$LOCATION/publishers/anthropic/models/$MODEL:streamRawPredict`.
- **Payload Adaptation:** The JSON payload sent to the endpoint must be modified to:
- Include the mandatory field: `"anthropic_version": "vertex-2023-10-16"`.
- Exclude the `model` field, as it is specified in the URL.
- **Integration:** The new provider must be a first-class citizen within Zed, appearing in the model selection list and functioning identically to other integrated providers.
**Implementation Plan:**
**Step 1: Foundational Analysis & Crate Setup**
* **Action 1.1: Analyze `google_vertex` Crate:** Thoroughly examine `crates/google_vertex/src/google_vertex.rs` to understand its implementation of ADC-based authentication and how it reads settings like `project_id` and `location`. This will serve as the template for our authentication logic.
* **Action 1.2: Define Configuration Struct:** In a new file, `crates/anthropic_vertex_ai/src/lib.rs`, define the `AnthropicVertexAISettings` struct. This struct will deserialize the `project_id` and `location` from the user's `settings.json`.
* **Action 1.3: Update `Cargo.toml`:** Create/update the `Cargo.toml` file for the `anthropic_vertex_ai` crate. It should include dependencies from both `anthropic` (for serde structs) and `google_vertex` (for GCP-related dependencies like `gcp_auth`).
* **Action 1.4: Create `lib.rs`:** Ensure `crates/anthropic_vertex_ai/src/lib.rs` exists to house the `LanguageModelProvider` implementation and serve as the crate's entry point.
**Step 2: Adapt Core Anthropic Logic**
* **Action 2.1: Modify `Request` Struct:** In `crates/anthropic_vertex_ai/src/anthropic_vertex_ai.rs`, modify the main `Request` struct:
- Add a new field: `pub anthropic_version: &'static str`.
- Remove the existing `pub model: String` field.
* **Action 2.2: Refactor Completion Functions:** Refactor the `stream_completion_with_rate_limit_info` function to be more generic.
- It will now accept the fully-constructed Vertex AI endpoint URL as a parameter.
- It will accept an ADC-aware `HttpClient` instance instead of a simple API key.
- The logic for setting the `Authorization` header will be updated to use a `Bearer` token provided by the `HttpClient`.
**Step 3: Implement the `LanguageModelProvider`**
* **Action 3.1: Define Provider Struct:** In `crates/anthropic_vertex_ai/src/lib.rs`, define the main `AnthropicVertexAIProvider` struct. It will store the settings defined in Action 1.2.
* **Action 3.2: Implement `LanguageModelProvider` Trait:** Implement the `language_model::LanguageModelProvider` trait for `AnthropicVertexAIProvider`.
* **Action 3.3: Implement Core Logic:** The trait methods will contain the central logic:
1. On initialization, the provider will create an `HttpClient` configured to use Google's ADC, following the pattern in the `google_vertex` crate.
2. For each completion request, it will dynamically construct the full, model-specific Vertex AI URL using the configured `project_id`, `location`, and the requested model name.
3. It will create an instance of the modified `Request` struct from `anthropic_vertex_ai.rs`, setting the `anthropic_version` field correctly.
4. Finally, it will call the refactored `stream_completion_with_rate_limit_info` function, passing the authenticated client and the constructed request.
**Step 4: Final Integration**
* **Action 4.1: Workspace Integration:** Add `anthropic_vertex_ai` to the main workspace `Cargo.toml` to link the new crate.
* **Action 4.2: Module Declaration:** Add `pub mod anthropic_vertex_ai;` to `crates/language_models/src/provider.rs` to make the module visible.
* **Action 4.3: Provider Registration:** In `crates/language_models/src/lib.rs`, update the central list of language model providers to include an instance of `AnthropicVertexAIProvider`.
**Verification Plan:**
* **Compile-Time Verification:** At each major step, ask the human to review the code for compilation errors and adherence to project standards.
* **Configuration Verification:** The implementation will be tested against a `settings.json` file configured as follows:
```json
"language_servers": {
"anthropic-vertex": {
"enabled": true,
"project_id": "your-gcp-project-id",
"location": "europe-west1"
}
},
"assistant": {
"default_model": {
"provider": "anthropic-vertex",
"name": "claude-sonnet-4@20250514"
}
}
```
* **Runtime Verification:**
1. Launch Zed with the above configuration.
2. Ensure the local environment is authenticated with GCP (e.g., via `gcloud auth application-default login`).
3. Open the assistant panel and confirm that `"anthropic-vertex/claude-sonnet-4@20250514"` is the selected model.
4. Send a test prompt to the assistant.
5. **Success Condition:** A valid, streamed response is received from the assistant, confirming that the entire chain—from configuration and authentication to request execution and response parsing—is working correctly.

View file

@ -33,6 +33,7 @@ fs.workspace = true
futures.workspace = true
google_ai = { workspace = true, features = ["schemars"] }
google_vertex_ai = { workspace = true, features = ["schemars"] }
anthropic_vertex_ai = { workspace = true, features = ["schemars"] }
gpui.workspace = true
gpui_tokio.workspace = true
http_client.workspace = true

View file

@ -10,6 +10,7 @@ mod settings;
pub mod ui;
use crate::provider::anthropic::AnthropicLanguageModelProvider;
use crate::provider::anthropic_vertex::AnthropicVertexLanguageModelProvider;
use crate::provider::bedrock::BedrockLanguageModelProvider;
use crate::provider::cloud::CloudLanguageModelProvider;
use crate::provider::copilot_chat::CopilotChatLanguageModelProvider;
@ -72,6 +73,11 @@ fn register_language_model_providers(
GoogleVertexLanguageModelProvider::new(client.http_client(), cx),
cx,
);
registry.register_provider(
// NEW REGISTRATION BY DIAB
AnthropicVertexLanguageModelProvider::new(client.http_client(), cx),
cx,
);
registry.register_provider(
MistralLanguageModelProvider::new(client.http_client(), cx),
cx,

View file

@ -1,4 +1,5 @@
pub mod anthropic;
pub mod anthropic_vertex;
pub mod bedrock;
pub mod cloud;
pub mod copilot_chat;

File diff suppressed because it is too large Load diff

View file

@ -7,6 +7,7 @@ use settings::{Settings, SettingsSources};
use crate::provider::{
self,
anthropic::AnthropicSettings,
anthropic_vertex::AnthropicVertexSettings,
bedrock::AmazonBedrockSettings,
cloud::{self, ZedDotDevSettings},
deepseek::DeepSeekSettings,
@ -33,6 +34,7 @@ pub struct AllLanguageModelSettings {
pub deepseek: DeepSeekSettings,
pub google: GoogleSettings,
pub google_vertex: GoogleVertexSettings,
pub anthropic_vertex: AnthropicVertexSettings,
pub lmstudio: LmStudioSettings,
pub mistral: MistralSettings,
pub ollama: OllamaSettings,
@ -50,6 +52,7 @@ pub struct AllLanguageModelSettingsContent {
pub deepseek: Option<DeepseekSettingsContent>,
pub google: Option<GoogleSettingsContent>,
pub google_vertex: Option<GoogleVertexSettingsContent>,
pub anthropic_vertex: Option<AnthropicVertexSettingsContent>,
pub lmstudio: Option<LmStudioSettingsContent>,
pub mistral: Option<MistralSettingsContent>,
pub ollama: Option<OllamaSettingsContent>,
@ -126,6 +129,14 @@ pub struct GoogleVertexSettingsContent {
pub available_models: Option<Vec<provider::google_vertex::AvailableModel>>,
}
#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct AnthropicVertexSettingsContent {
pub api_url: Option<String>,
pub project_id: Option<String>, // ADDED
pub location_id: Option<String>, // ADDED
pub available_models: Option<Vec<provider::anthropic_vertex::AvailableModel>>,
}
#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
pub struct XAiSettingsContent {
pub api_url: Option<String>,
@ -322,6 +333,29 @@ impl settings::Settings for AllLanguageModelSettings {
.as_ref()
.and_then(|s| s.location_id.clone()),
);
// Anthropic Vertex AI
merge(
&mut settings.anthropic_vertex.api_url,
value
.anthropic_vertex
.as_ref()
.and_then(|s| s.api_url.clone()),
);
merge(
&mut settings.anthropic_vertex.project_id,
value
.anthropic_vertex
.as_ref()
.and_then(|s| s.project_id.clone()),
);
merge(
&mut settings.anthropic_vertex.location_id,
value
.anthropic_vertex
.as_ref()
.and_then(|s| s.location_id.clone()),
);
}
Ok(settings)