PARTIAL feat: Integrate Anthropic models via Vertex AI
This commit introduces a new language model provider, `anthropic_vertex_ai`, that integrates Anthropic's models via the Google Cloud Vertex AI platform. The implementation uses Google Cloud's Application Default Credentials (ADC) for authentication and fetches `project_id` and `location_id` from the user's settings.
This commit is contained in:
parent
132f0dd36a
commit
59073836c7
9 changed files with 2014 additions and 0 deletions
|
@ -285,6 +285,7 @@ git_ui = { path = "crates/git_ui" }
|
|||
go_to_line = { path = "crates/go_to_line" }
|
||||
google_ai = { path = "crates/google_ai" }
|
||||
google_vertex_ai = { path = "crates/google_vertex_ai" }
|
||||
anthropic_vertex_ai = { path = "crates/anthropic_vertex_ai" }
|
||||
gpui = { path = "crates/gpui", default-features = false, features = [
|
||||
"http_client",
|
||||
] }
|
||||
|
|
29
crates/anthropic_vertex_ai/Cargo.toml
Normal file
29
crates/anthropic_vertex_ai/Cargo.toml
Normal file
|
@ -0,0 +1,29 @@
|
|||
[package]
|
||||
name = "anthropic_vertex_ai"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
publish.workspace = true
|
||||
license = "GPL-3.0-or-later"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
schemars = ["dep:schemars"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[lib]
|
||||
path = "src/anthropic_vertex_ai.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
chrono.workspace = true
|
||||
futures.workspace = true
|
||||
http_client.workspace = true
|
||||
schemars = { workspace = true, optional = true }
|
||||
anthropic.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
strum.workspace = true
|
||||
thiserror.workspace = true
|
||||
workspace-hack.workspace = true
|
744
crates/anthropic_vertex_ai/src/anthropic_vertex_ai.rs
Normal file
744
crates/anthropic_vertex_ai/src/anthropic_vertex_ai.rs
Normal file
|
@ -0,0 +1,744 @@
|
|||
use std::time::Duration;
|
||||
|
||||
use anthropic::{AnthropicError, ApiError};
|
||||
use anyhow::{Context as _, Result, anyhow};
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
|
||||
use http_client::http::{HeaderMap, HeaderValue};
|
||||
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use strum::EnumIter;
|
||||
|
||||
#[derive(Clone, Debug, Default, Deserialize)]
|
||||
pub struct AnthropicVertexAISettings {
|
||||
pub project_id: Option<String>,
|
||||
pub location: Option<String>,
|
||||
}
|
||||
|
||||
pub const ANTHROPIC_API_URL: &str = "https://aiplatform.googleapis.com";
|
||||
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub struct AnthropicVertexModelCacheConfiguration {
|
||||
pub min_total_token: u64,
|
||||
pub should_speculate: bool,
|
||||
pub max_cache_anchors: usize,
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub enum ModelMode {
|
||||
#[default]
|
||||
Default,
|
||||
Thinking {
|
||||
budget_tokens: Option<u32>,
|
||||
},
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
|
||||
pub enum Model {
|
||||
#[serde(rename = "claude-opus-4", alias = "claude-opus-4@20250514")]
|
||||
ClaudeOpus4,
|
||||
#[serde(rename = "claude-opus-4-thinking")]
|
||||
ClaudeOpus4Thinking,
|
||||
#[default]
|
||||
#[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4@20250514")]
|
||||
ClaudeSonnet4,
|
||||
#[serde(rename = "claude-sonnet-4-thinking")]
|
||||
ClaudeSonnet4Thinking,
|
||||
#[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet@20250219")]
|
||||
Claude3_7Sonnet,
|
||||
#[serde(rename = "claude-3-7-sonnet-thinking")]
|
||||
Claude3_7SonnetThinking,
|
||||
#[serde(rename = "custom")]
|
||||
Custom {
|
||||
name: String,
|
||||
max_tokens: u64,
|
||||
/// The name displayed in the UI, such as in the assistant panel model dropdown menu.
|
||||
display_name: Option<String>,
|
||||
/// Override this model with a different Anthropic model for tool calls.
|
||||
tool_override: Option<String>,
|
||||
/// Indicates whether this custom model supports caching.
|
||||
cache_configuration: Option<AnthropicVertexModelCacheConfiguration>,
|
||||
max_output_tokens: Option<u64>,
|
||||
default_temperature: Option<f32>,
|
||||
#[serde(default)]
|
||||
mode: ModelMode,
|
||||
},
|
||||
}
|
||||
|
||||
impl Model {
|
||||
pub fn default_fast() -> Self {
|
||||
Self::ClaudeSonnet4
|
||||
}
|
||||
|
||||
pub fn from_id(id: &str) -> Result<Self> {
|
||||
if id.starts_with("claude-opus-4") {
|
||||
return Ok(Self::ClaudeOpus4Thinking);
|
||||
}
|
||||
|
||||
if id.starts_with("claude-opus-4") {
|
||||
return Ok(Self::ClaudeOpus4);
|
||||
}
|
||||
|
||||
if id.starts_with("claude-sonnet-4") {
|
||||
return Ok(Self::ClaudeSonnet4Thinking);
|
||||
}
|
||||
|
||||
if id.starts_with("claude-sonnet-4") {
|
||||
return Ok(Self::ClaudeSonnet4);
|
||||
}
|
||||
|
||||
if id.starts_with("claude-3-7-sonnet") {
|
||||
return Ok(Self::Claude3_7SonnetThinking);
|
||||
}
|
||||
|
||||
if id.starts_with("claude-3-7-sonnet") {
|
||||
return Ok(Self::Claude3_7Sonnet);
|
||||
}
|
||||
|
||||
Err(anyhow!("invalid model ID: {id}"))
|
||||
}
|
||||
|
||||
pub fn id(&self) -> &str {
|
||||
match self {
|
||||
Self::ClaudeOpus4 => "claude-opus-4@20250514",
|
||||
Self::ClaudeOpus4Thinking => "claude-opus-4@20250514",
|
||||
Self::ClaudeSonnet4 => "claude-sonnet-4@20250514",
|
||||
Self::ClaudeSonnet4Thinking => "claude-sonnet-4@20250514",
|
||||
Self::Claude3_7Sonnet => " claude-3-7-sonnet@20250219",
|
||||
Self::Claude3_7SonnetThinking => " claude-3-7-sonnet@20250219",
|
||||
Self::Custom { name, .. } => name,
|
||||
}
|
||||
}
|
||||
|
||||
/// The id of the model that should be used for making API requests
|
||||
pub fn request_id(&self) -> &str {
|
||||
match self {
|
||||
Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4@20250514",
|
||||
Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4@20250514",
|
||||
Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => " claude-3-7-sonnet@20250219",
|
||||
Self::Custom { name, .. } => name,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn display_name(&self) -> &str {
|
||||
match self {
|
||||
Self::ClaudeOpus4 => "Claude Opus 4",
|
||||
Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
|
||||
Self::ClaudeSonnet4 => "Claude Sonnet 4",
|
||||
Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
|
||||
Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
|
||||
Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
|
||||
Self::Custom {
|
||||
name, display_name, ..
|
||||
} => display_name.as_ref().unwrap_or(name),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cache_configuration(&self) -> Option<AnthropicVertexModelCacheConfiguration> {
|
||||
match self {
|
||||
Self::ClaudeOpus4
|
||||
| Self::ClaudeOpus4Thinking
|
||||
| Self::ClaudeSonnet4
|
||||
| Self::ClaudeSonnet4Thinking
|
||||
| Self::Claude3_7Sonnet
|
||||
| Self::Claude3_7SonnetThinking => Some(AnthropicVertexModelCacheConfiguration {
|
||||
min_total_token: 2_048,
|
||||
should_speculate: true,
|
||||
max_cache_anchors: 4,
|
||||
}),
|
||||
Self::Custom {
|
||||
cache_configuration,
|
||||
..
|
||||
} => cache_configuration.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn max_token_count(&self) -> u64 {
|
||||
match self {
|
||||
Self::ClaudeOpus4
|
||||
| Self::ClaudeOpus4Thinking
|
||||
| Self::ClaudeSonnet4
|
||||
| Self::ClaudeSonnet4Thinking
|
||||
| Self::Claude3_7Sonnet
|
||||
| Self::Claude3_7SonnetThinking => 200_000,
|
||||
Self::Custom { max_tokens, .. } => *max_tokens,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn max_output_tokens(&self) -> u64 {
|
||||
match self {
|
||||
Self::ClaudeOpus4
|
||||
| Self::ClaudeOpus4Thinking
|
||||
| Self::ClaudeSonnet4
|
||||
| Self::ClaudeSonnet4Thinking
|
||||
| Self::Claude3_7Sonnet
|
||||
| Self::Claude3_7SonnetThinking => 8_192,
|
||||
Self::Custom {
|
||||
max_output_tokens, ..
|
||||
} => max_output_tokens.unwrap_or(4_096),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_temperature(&self) -> f32 {
|
||||
match self {
|
||||
Self::ClaudeOpus4
|
||||
| Self::ClaudeOpus4Thinking
|
||||
| Self::ClaudeSonnet4
|
||||
| Self::ClaudeSonnet4Thinking
|
||||
| Self::Claude3_7Sonnet
|
||||
| Self::Claude3_7SonnetThinking => 1.0,
|
||||
Self::Custom {
|
||||
default_temperature,
|
||||
..
|
||||
} => default_temperature.unwrap_or(1.0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mode(&self) -> ModelMode {
|
||||
match self {
|
||||
Self::ClaudeOpus4 | Self::ClaudeSonnet4 | Self::Claude3_7Sonnet => ModelMode::Default,
|
||||
Self::ClaudeOpus4Thinking
|
||||
| Self::ClaudeSonnet4Thinking
|
||||
| Self::Claude3_7SonnetThinking => ModelMode::Thinking {
|
||||
budget_tokens: Some(4_096),
|
||||
},
|
||||
Self::Custom { mode, .. } => mode.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tool_model_id(&self) -> &str {
|
||||
if let Self::Custom {
|
||||
tool_override: Some(tool_override),
|
||||
..
|
||||
} = self
|
||||
{
|
||||
tool_override
|
||||
} else {
|
||||
self.request_id()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn complete(
|
||||
client: &dyn HttpClient,
|
||||
api_url: &str,
|
||||
request: Request,
|
||||
) -> Result<Response, AnthropicError> {
|
||||
let uri = format!("{api_url}/v1/messages");
|
||||
let request_builder = HttpRequest::builder()
|
||||
.method(Method::POST)
|
||||
.uri(uri)
|
||||
.header("Content-Type", "application/json");
|
||||
|
||||
let serialized_request =
|
||||
serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
|
||||
let request = request_builder
|
||||
.body(AsyncBody::from(serialized_request))
|
||||
.map_err(AnthropicError::BuildRequestBody)?;
|
||||
|
||||
let mut response = client
|
||||
.send(request)
|
||||
.await
|
||||
.map_err(AnthropicError::HttpSend)?;
|
||||
let status_code = response.status();
|
||||
let mut body = String::new();
|
||||
response
|
||||
.body_mut()
|
||||
.read_to_string(&mut body)
|
||||
.await
|
||||
.map_err(AnthropicError::ReadResponse)?;
|
||||
|
||||
if status_code.is_success() {
|
||||
Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
|
||||
} else {
|
||||
Err(AnthropicError::HttpResponseError {
|
||||
status_code,
|
||||
message: body,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn stream_completion(
|
||||
client: &dyn HttpClient,
|
||||
api_url: &str,
|
||||
project_id: &str,
|
||||
location_id: &str,
|
||||
access_token: &str,
|
||||
request: Request,
|
||||
) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
|
||||
stream_completion_with_rate_limit_info(
|
||||
client,
|
||||
api_url,
|
||||
project_id,
|
||||
location_id,
|
||||
access_token,
|
||||
request,
|
||||
)
|
||||
.await
|
||||
.map(|output| output.0)
|
||||
}
|
||||
|
||||
/// An individual rate limit.
|
||||
#[derive(Debug)]
|
||||
pub struct RateLimit {
|
||||
pub limit: usize,
|
||||
pub remaining: usize,
|
||||
pub reset: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl RateLimit {
|
||||
fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
|
||||
let limit =
|
||||
get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
|
||||
let remaining = get_header(
|
||||
&format!("anthropic-ratelimit-{resource}-remaining"),
|
||||
headers,
|
||||
)?
|
||||
.parse()?;
|
||||
let reset = DateTime::parse_from_rfc3339(get_header(
|
||||
&format!("anthropic-ratelimit-{resource}-reset"),
|
||||
headers,
|
||||
)?)?
|
||||
.to_utc();
|
||||
|
||||
Ok(Self {
|
||||
limit,
|
||||
remaining,
|
||||
reset,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
|
||||
#[derive(Debug)]
|
||||
pub struct RateLimitInfo {
|
||||
pub retry_after: Option<Duration>,
|
||||
pub requests: Option<RateLimit>,
|
||||
pub tokens: Option<RateLimit>,
|
||||
pub input_tokens: Option<RateLimit>,
|
||||
pub output_tokens: Option<RateLimit>,
|
||||
}
|
||||
|
||||
impl RateLimitInfo {
|
||||
fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
|
||||
// Check if any rate limit headers exist
|
||||
let has_rate_limit_headers = headers
|
||||
.keys()
|
||||
.any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
|
||||
|
||||
if !has_rate_limit_headers {
|
||||
return Self {
|
||||
retry_after: None,
|
||||
requests: None,
|
||||
tokens: None,
|
||||
input_tokens: None,
|
||||
output_tokens: None,
|
||||
};
|
||||
}
|
||||
|
||||
Self {
|
||||
retry_after: parse_retry_after(headers),
|
||||
requests: RateLimit::from_headers("requests", headers).ok(),
|
||||
tokens: RateLimit::from_headers("tokens", headers).ok(),
|
||||
input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
|
||||
output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
|
||||
/// seconds). Note that other services might specify an HTTP date or some other format for this
|
||||
/// header. Returns `None` if the header is not present or cannot be parsed.
|
||||
pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
|
||||
headers
|
||||
.get("retry-after")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|v| v.parse::<u64>().ok())
|
||||
.map(Duration::from_secs)
|
||||
}
|
||||
|
||||
fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
|
||||
Ok(headers
|
||||
.get(key)
|
||||
.with_context(|| format!("missing header `{key}`"))?
|
||||
.to_str()?)
|
||||
}
|
||||
|
||||
pub async fn stream_completion_with_rate_limit_info(
|
||||
client: &dyn HttpClient,
|
||||
api_url: &str,
|
||||
project_id: &str,
|
||||
location_id: &str,
|
||||
access_token: &str,
|
||||
request: Request,
|
||||
) -> Result<
|
||||
(
|
||||
BoxStream<'static, Result<Event, AnthropicError>>,
|
||||
Option<RateLimitInfo>,
|
||||
),
|
||||
AnthropicError,
|
||||
> {
|
||||
let model_id = request.model.clone();
|
||||
let request = StreamingRequest {
|
||||
base: request,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
let endpoint = if location_id == "global" {
|
||||
"https://{api_url}".to_string()
|
||||
} else {
|
||||
format!("https://{location_id}-{api_url}")
|
||||
};
|
||||
|
||||
let uri = format!(
|
||||
"{endpoint}/v1/projects/{project_id}/locations/{location_id}/publishers/anthropic/models/{model_id}:streamRawPredict"
|
||||
);
|
||||
|
||||
// MODIFICATION 4: Add Authorization header for bearer token authentication.
|
||||
let request_builder = HttpRequest::builder()
|
||||
.method(Method::POST)
|
||||
.uri(uri)
|
||||
.header("Authorization", format!("Bearer {}", access_token))
|
||||
.header("Content-Type", "application/json");
|
||||
|
||||
let serialized_request =
|
||||
serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
|
||||
let request = request_builder
|
||||
.body(AsyncBody::from(serialized_request))
|
||||
.map_err(AnthropicError::BuildRequestBody)?;
|
||||
|
||||
let mut response = client
|
||||
.send(request)
|
||||
.await
|
||||
.map_err(AnthropicError::HttpSend)?;
|
||||
let rate_limits = RateLimitInfo::from_headers(response.headers());
|
||||
if response.status().is_success() {
|
||||
let reader = BufReader::new(response.into_body());
|
||||
let stream = reader
|
||||
.lines()
|
||||
.filter_map(|line| async move {
|
||||
match line {
|
||||
Ok(line) => {
|
||||
let line = line.strip_prefix("data: ")?;
|
||||
match serde_json::from_str(line) {
|
||||
Ok(response) => Some(Ok(response)),
|
||||
Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
|
||||
}
|
||||
}
|
||||
Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
|
||||
}
|
||||
})
|
||||
.boxed();
|
||||
Ok((stream, Some(rate_limits)))
|
||||
} else if response.status().as_u16() == 529 {
|
||||
Err(AnthropicError::ServerOverloaded {
|
||||
retry_after: rate_limits.retry_after,
|
||||
})
|
||||
} else if let Some(retry_after) = rate_limits.retry_after {
|
||||
Err(AnthropicError::RateLimit { retry_after })
|
||||
} else {
|
||||
let mut body = String::new();
|
||||
response
|
||||
.body_mut()
|
||||
.read_to_string(&mut body)
|
||||
.await
|
||||
.map_err(AnthropicError::ReadResponse)?;
|
||||
|
||||
match serde_json::from_str::<Event>(&body) {
|
||||
Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
|
||||
Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
|
||||
status_code: response.status(),
|
||||
message: body,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum CacheControlType {
|
||||
Ephemeral,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
||||
pub struct CacheControl {
|
||||
#[serde(rename = "type")]
|
||||
pub cache_type: CacheControlType,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct Message {
|
||||
pub role: Role,
|
||||
pub content: Vec<RequestContent>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Role {
|
||||
User,
|
||||
Assistant,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum RequestContent {
|
||||
#[serde(rename = "text")]
|
||||
Text {
|
||||
text: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
cache_control: Option<CacheControl>,
|
||||
},
|
||||
#[serde(rename = "thinking")]
|
||||
Thinking {
|
||||
thinking: String,
|
||||
signature: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
cache_control: Option<CacheControl>,
|
||||
},
|
||||
#[serde(rename = "redacted_thinking")]
|
||||
RedactedThinking { data: String },
|
||||
#[serde(rename = "image")]
|
||||
Image {
|
||||
source: ImageSource,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
cache_control: Option<CacheControl>,
|
||||
},
|
||||
#[serde(rename = "tool_use")]
|
||||
ToolUse {
|
||||
id: String,
|
||||
name: String,
|
||||
input: serde_json::Value,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
cache_control: Option<CacheControl>,
|
||||
},
|
||||
#[serde(rename = "tool_result")]
|
||||
ToolResult {
|
||||
tool_use_id: String,
|
||||
is_error: bool,
|
||||
content: ToolResultContent,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
cache_control: Option<CacheControl>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum ToolResultContent {
|
||||
Plain(String),
|
||||
Multipart(Vec<ToolResultPart>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "lowercase")]
|
||||
pub enum ToolResultPart {
|
||||
Text { text: String },
|
||||
Image { source: ImageSource },
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ResponseContent {
|
||||
#[serde(rename = "text")]
|
||||
Text { text: String },
|
||||
#[serde(rename = "thinking")]
|
||||
Thinking { thinking: String },
|
||||
#[serde(rename = "redacted_thinking")]
|
||||
RedactedThinking { data: String },
|
||||
#[serde(rename = "tool_use")]
|
||||
ToolUse {
|
||||
id: String,
|
||||
name: String,
|
||||
input: serde_json::Value,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ImageSource {
|
||||
#[serde(rename = "type")]
|
||||
pub source_type: String,
|
||||
pub media_type: String,
|
||||
pub data: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct Tool {
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
pub input_schema: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "lowercase")]
|
||||
pub enum ToolChoice {
|
||||
Auto,
|
||||
Any,
|
||||
Tool { name: String },
|
||||
None,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "lowercase")]
|
||||
pub enum Thinking {
|
||||
Enabled { budget_tokens: Option<u32> },
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum StringOrContents {
|
||||
String(String),
|
||||
Content(Vec<RequestContent>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct Request {
|
||||
#[serde(skip)]
|
||||
pub model: String,
|
||||
pub anthropic_version: String,
|
||||
pub max_tokens: u64,
|
||||
pub messages: Vec<Message>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub tools: Vec<Tool>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub thinking: Option<Thinking>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub tool_choice: Option<ToolChoice>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub system: Option<StringOrContents>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub metadata: Option<Metadata>,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub stop_sequences: Vec<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub temperature: Option<f32>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub top_k: Option<u32>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub top_p: Option<f32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct StreamingRequest {
|
||||
#[serde(flatten)]
|
||||
pub base: Request,
|
||||
pub stream: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct Metadata {
|
||||
pub user_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
pub struct Usage {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub input_tokens: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub output_tokens: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub cache_creation_input_tokens: Option<u64>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub cache_read_input_tokens: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct Response {
|
||||
pub id: String,
|
||||
#[serde(rename = "type")]
|
||||
pub response_type: String,
|
||||
pub role: Role,
|
||||
pub content: Vec<ResponseContent>,
|
||||
pub model: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub stop_reason: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub stop_sequence: Option<String>,
|
||||
pub usage: Usage,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum Event {
|
||||
#[serde(rename = "message_start")]
|
||||
MessageStart { message: Response },
|
||||
#[serde(rename = "content_block_start")]
|
||||
ContentBlockStart {
|
||||
index: usize,
|
||||
content_block: ResponseContent,
|
||||
},
|
||||
#[serde(rename = "content_block_delta")]
|
||||
ContentBlockDelta { index: usize, delta: ContentDelta },
|
||||
#[serde(rename = "content_block_stop")]
|
||||
ContentBlockStop { index: usize },
|
||||
#[serde(rename = "message_delta")]
|
||||
MessageDelta { delta: MessageDelta, usage: Usage },
|
||||
#[serde(rename = "message_stop")]
|
||||
MessageStop,
|
||||
#[serde(rename = "ping")]
|
||||
Ping,
|
||||
#[serde(rename = "error")]
|
||||
Error { error: ApiError },
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ContentDelta {
|
||||
#[serde(rename = "text_delta")]
|
||||
TextDelta { text: String },
|
||||
#[serde(rename = "thinking_delta")]
|
||||
ThinkingDelta { thinking: String },
|
||||
#[serde(rename = "signature_delta")]
|
||||
SignatureDelta { signature: String },
|
||||
#[serde(rename = "input_json_delta")]
|
||||
InputJsonDelta { partial_json: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct MessageDelta {
|
||||
pub stop_reason: Option<String>,
|
||||
pub stop_sequence: Option<String>,
|
||||
}
|
||||
|
||||
pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
|
||||
message
|
||||
.strip_prefix("prompt is too long: ")?
|
||||
.split_once(" tokens")?
|
||||
.0
|
||||
.parse()
|
||||
.ok()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_window_exceeded() {
|
||||
let error = ApiError {
|
||||
error_type: "invalid_request_error".to_string(),
|
||||
message: "prompt is too long: 220000 tokens > 200000".to_string(),
|
||||
};
|
||||
assert_eq!(error.match_window_exceeded(), Some(220_000));
|
||||
|
||||
let error = ApiError {
|
||||
error_type: "invalid_request_error".to_string(),
|
||||
message: "prompt is too long: 1234953 tokens".to_string(),
|
||||
};
|
||||
assert_eq!(error.match_window_exceeded(), Some(1234953));
|
||||
|
||||
let error = ApiError {
|
||||
error_type: "invalid_request_error".to_string(),
|
||||
message: "not a prompt length error".to_string(),
|
||||
};
|
||||
assert_eq!(error.match_window_exceeded(), None);
|
||||
|
||||
let error = ApiError {
|
||||
error_type: "rate_limit_error".to_string(),
|
||||
message: "prompt is too long: 12345 tokens".to_string(),
|
||||
};
|
||||
assert_eq!(error.match_window_exceeded(), None);
|
||||
|
||||
let error = ApiError {
|
||||
error_type: "invalid_request_error".to_string(),
|
||||
message: "prompt is too long: invalid tokens".to_string(),
|
||||
};
|
||||
assert_eq!(error.match_window_exceeded(), None);
|
||||
}
|
79
crates/anthropic_vertex_ai/task.md
Normal file
79
crates/anthropic_vertex_ai/task.md
Normal file
|
@ -0,0 +1,79 @@
|
|||
Task ID: TA001 - Integrate Anthropic Models via Google Vertex AI**
|
||||
|
||||
**Objective:**
|
||||
To develop a new language model provider, `anthropic_vertex_ai`, that seamlessly integrates Anthropic's models (e.g., Claude) into the Zed editor via the Google Cloud Vertex AI platform.
|
||||
|
||||
**Background:**
|
||||
While Zed has a direct integration with Anthropic's API, many users operate within the Google Cloud ecosystem. Vertex AI provides access to third-party models like Anthropic's through its own endpoint. This task involves creating a new provider that bridges the existing `anthropic` API logic with the authentication and endpoint requirements of Google Cloud.
|
||||
|
||||
This integration will not use explicit API keys. Instead, it will leverage Google's Application Default Credentials (ADC), a standard mechanism for authenticating GCP services, ensuring a secure and streamlined user experience. Configuration will be provided through `settings.json` to specify the required `project_id` and `location` for the Vertex AI endpoint.
|
||||
|
||||
**Key Requirements:**
|
||||
- **Authentication:** Must use Google Cloud's Application Default Credentials (ADC) for all API requests. The implementation should not handle manual tokens.
|
||||
- **Configuration:** The provider must be configurable via `settings.json`, allowing the user to specify their Google Cloud `project_id` and `location`.
|
||||
- **Endpoint Construction:** Must dynamically construct the correct Vertex AI endpoint URL for each request, in the format: `https://$LOCATION-aiplatform.googleapis.com/v1/projects/$PROJECT_ID/locations/$LOCATION/publishers/anthropic/models/$MODEL:streamRawPredict`.
|
||||
- **Payload Adaptation:** The JSON payload sent to the endpoint must be modified to:
|
||||
- Include the mandatory field: `"anthropic_version": "vertex-2023-10-16"`.
|
||||
- Exclude the `model` field, as it is specified in the URL.
|
||||
- **Integration:** The new provider must be a first-class citizen within Zed, appearing in the model selection list and functioning identically to other integrated providers.
|
||||
|
||||
**Implementation Plan:**
|
||||
|
||||
**Step 1: Foundational Analysis & Crate Setup**
|
||||
|
||||
* **Action 1.1: Analyze `google_vertex` Crate:** Thoroughly examine `crates/google_vertex/src/google_vertex.rs` to understand its implementation of ADC-based authentication and how it reads settings like `project_id` and `location`. This will serve as the template for our authentication logic.
|
||||
* **Action 1.2: Define Configuration Struct:** In a new file, `crates/anthropic_vertex_ai/src/lib.rs`, define the `AnthropicVertexAISettings` struct. This struct will deserialize the `project_id` and `location` from the user's `settings.json`.
|
||||
* **Action 1.3: Update `Cargo.toml`:** Create/update the `Cargo.toml` file for the `anthropic_vertex_ai` crate. It should include dependencies from both `anthropic` (for serde structs) and `google_vertex` (for GCP-related dependencies like `gcp_auth`).
|
||||
* **Action 1.4: Create `lib.rs`:** Ensure `crates/anthropic_vertex_ai/src/lib.rs` exists to house the `LanguageModelProvider` implementation and serve as the crate's entry point.
|
||||
|
||||
**Step 2: Adapt Core Anthropic Logic**
|
||||
|
||||
* **Action 2.1: Modify `Request` Struct:** In `crates/anthropic_vertex_ai/src/anthropic_vertex_ai.rs`, modify the main `Request` struct:
|
||||
- Add a new field: `pub anthropic_version: &'static str`.
|
||||
- Remove the existing `pub model: String` field.
|
||||
* **Action 2.2: Refactor Completion Functions:** Refactor the `stream_completion_with_rate_limit_info` function to be more generic.
|
||||
- It will now accept the fully-constructed Vertex AI endpoint URL as a parameter.
|
||||
- It will accept an ADC-aware `HttpClient` instance instead of a simple API key.
|
||||
- The logic for setting the `Authorization` header will be updated to use a `Bearer` token provided by the `HttpClient`.
|
||||
|
||||
**Step 3: Implement the `LanguageModelProvider`**
|
||||
|
||||
* **Action 3.1: Define Provider Struct:** In `crates/anthropic_vertex_ai/src/lib.rs`, define the main `AnthropicVertexAIProvider` struct. It will store the settings defined in Action 1.2.
|
||||
* **Action 3.2: Implement `LanguageModelProvider` Trait:** Implement the `language_model::LanguageModelProvider` trait for `AnthropicVertexAIProvider`.
|
||||
* **Action 3.3: Implement Core Logic:** The trait methods will contain the central logic:
|
||||
1. On initialization, the provider will create an `HttpClient` configured to use Google's ADC, following the pattern in the `google_vertex` crate.
|
||||
2. For each completion request, it will dynamically construct the full, model-specific Vertex AI URL using the configured `project_id`, `location`, and the requested model name.
|
||||
3. It will create an instance of the modified `Request` struct from `anthropic_vertex_ai.rs`, setting the `anthropic_version` field correctly.
|
||||
4. Finally, it will call the refactored `stream_completion_with_rate_limit_info` function, passing the authenticated client and the constructed request.
|
||||
|
||||
**Step 4: Final Integration**
|
||||
|
||||
* **Action 4.1: Workspace Integration:** Add `anthropic_vertex_ai` to the main workspace `Cargo.toml` to link the new crate.
|
||||
* **Action 4.2: Module Declaration:** Add `pub mod anthropic_vertex_ai;` to `crates/language_models/src/provider.rs` to make the module visible.
|
||||
* **Action 4.3: Provider Registration:** In `crates/language_models/src/lib.rs`, update the central list of language model providers to include an instance of `AnthropicVertexAIProvider`.
|
||||
|
||||
**Verification Plan:**
|
||||
|
||||
* **Compile-Time Verification:** At each major step, ask the human to review the code for compilation errors and adherence to project standards.
|
||||
* **Configuration Verification:** The implementation will be tested against a `settings.json` file configured as follows:
|
||||
```json
|
||||
"language_servers": {
|
||||
"anthropic-vertex": {
|
||||
"enabled": true,
|
||||
"project_id": "your-gcp-project-id",
|
||||
"location": "europe-west1"
|
||||
}
|
||||
},
|
||||
"assistant": {
|
||||
"default_model": {
|
||||
"provider": "anthropic-vertex",
|
||||
"name": "claude-sonnet-4@20250514"
|
||||
}
|
||||
}
|
||||
```
|
||||
* **Runtime Verification:**
|
||||
1. Launch Zed with the above configuration.
|
||||
2. Ensure the local environment is authenticated with GCP (e.g., via `gcloud auth application-default login`).
|
||||
3. Open the assistant panel and confirm that `"anthropic-vertex/claude-sonnet-4@20250514"` is the selected model.
|
||||
4. Send a test prompt to the assistant.
|
||||
5. **Success Condition:** A valid, streamed response is received from the assistant, confirming that the entire chain—from configuration and authentication to request execution and response parsing—is working correctly.
|
|
@ -33,6 +33,7 @@ fs.workspace = true
|
|||
futures.workspace = true
|
||||
google_ai = { workspace = true, features = ["schemars"] }
|
||||
google_vertex_ai = { workspace = true, features = ["schemars"] }
|
||||
anthropic_vertex_ai = { workspace = true, features = ["schemars"] }
|
||||
gpui.workspace = true
|
||||
gpui_tokio.workspace = true
|
||||
http_client.workspace = true
|
||||
|
|
|
@ -10,6 +10,7 @@ mod settings;
|
|||
pub mod ui;
|
||||
|
||||
use crate::provider::anthropic::AnthropicLanguageModelProvider;
|
||||
use crate::provider::anthropic_vertex::AnthropicVertexLanguageModelProvider;
|
||||
use crate::provider::bedrock::BedrockLanguageModelProvider;
|
||||
use crate::provider::cloud::CloudLanguageModelProvider;
|
||||
use crate::provider::copilot_chat::CopilotChatLanguageModelProvider;
|
||||
|
@ -72,6 +73,11 @@ fn register_language_model_providers(
|
|||
GoogleVertexLanguageModelProvider::new(client.http_client(), cx),
|
||||
cx,
|
||||
);
|
||||
registry.register_provider(
|
||||
// NEW REGISTRATION BY DIAB
|
||||
AnthropicVertexLanguageModelProvider::new(client.http_client(), cx),
|
||||
cx,
|
||||
);
|
||||
registry.register_provider(
|
||||
MistralLanguageModelProvider::new(client.http_client(), cx),
|
||||
cx,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
pub mod anthropic;
|
||||
pub mod anthropic_vertex;
|
||||
pub mod bedrock;
|
||||
pub mod cloud;
|
||||
pub mod copilot_chat;
|
||||
|
|
1119
crates/language_models/src/provider/anthropic_vertex.rs
Normal file
1119
crates/language_models/src/provider/anthropic_vertex.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -7,6 +7,7 @@ use settings::{Settings, SettingsSources};
|
|||
use crate::provider::{
|
||||
self,
|
||||
anthropic::AnthropicSettings,
|
||||
anthropic_vertex::AnthropicVertexSettings,
|
||||
bedrock::AmazonBedrockSettings,
|
||||
cloud::{self, ZedDotDevSettings},
|
||||
deepseek::DeepSeekSettings,
|
||||
|
@ -33,6 +34,7 @@ pub struct AllLanguageModelSettings {
|
|||
pub deepseek: DeepSeekSettings,
|
||||
pub google: GoogleSettings,
|
||||
pub google_vertex: GoogleVertexSettings,
|
||||
pub anthropic_vertex: AnthropicVertexSettings,
|
||||
pub lmstudio: LmStudioSettings,
|
||||
pub mistral: MistralSettings,
|
||||
pub ollama: OllamaSettings,
|
||||
|
@ -50,6 +52,7 @@ pub struct AllLanguageModelSettingsContent {
|
|||
pub deepseek: Option<DeepseekSettingsContent>,
|
||||
pub google: Option<GoogleSettingsContent>,
|
||||
pub google_vertex: Option<GoogleVertexSettingsContent>,
|
||||
pub anthropic_vertex: Option<AnthropicVertexSettingsContent>,
|
||||
pub lmstudio: Option<LmStudioSettingsContent>,
|
||||
pub mistral: Option<MistralSettingsContent>,
|
||||
pub ollama: Option<OllamaSettingsContent>,
|
||||
|
@ -126,6 +129,14 @@ pub struct GoogleVertexSettingsContent {
|
|||
pub available_models: Option<Vec<provider::google_vertex::AvailableModel>>,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
|
||||
pub struct AnthropicVertexSettingsContent {
|
||||
pub api_url: Option<String>,
|
||||
pub project_id: Option<String>, // ADDED
|
||||
pub location_id: Option<String>, // ADDED
|
||||
pub available_models: Option<Vec<provider::anthropic_vertex::AvailableModel>>,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
|
||||
pub struct XAiSettingsContent {
|
||||
pub api_url: Option<String>,
|
||||
|
@ -322,6 +333,29 @@ impl settings::Settings for AllLanguageModelSettings {
|
|||
.as_ref()
|
||||
.and_then(|s| s.location_id.clone()),
|
||||
);
|
||||
|
||||
// Anthropic Vertex AI
|
||||
merge(
|
||||
&mut settings.anthropic_vertex.api_url,
|
||||
value
|
||||
.anthropic_vertex
|
||||
.as_ref()
|
||||
.and_then(|s| s.api_url.clone()),
|
||||
);
|
||||
merge(
|
||||
&mut settings.anthropic_vertex.project_id,
|
||||
value
|
||||
.anthropic_vertex
|
||||
.as_ref()
|
||||
.and_then(|s| s.project_id.clone()),
|
||||
);
|
||||
merge(
|
||||
&mut settings.anthropic_vertex.location_id,
|
||||
value
|
||||
.anthropic_vertex
|
||||
.as_ref()
|
||||
.and_then(|s| s.location_id.clone()),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(settings)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue