WIP and merge

This commit is contained in:
Anthony 2025-06-27 18:38:25 -04:00
parent 97f4406ef6
commit 1bdde8b2e4
584 changed files with 33536 additions and 17400 deletions

View file

@ -26,6 +26,7 @@ gpui.workspace = true
http_client.workspace = true
icons.workspace = true
image.workspace = true
log.workspace = true
parking_lot.workspace = true
proto.workspace = true
schemars.workspace = true

View file

@ -169,11 +169,11 @@ impl LanguageModel for FakeLanguageModel {
"fake".to_string()
}
fn max_token_count(&self) -> usize {
fn max_token_count(&self) -> u64 {
1000000
}
fn count_tokens(&self, _: LanguageModelRequest, _: &App) -> BoxFuture<'static, Result<usize>> {
fn count_tokens(&self, _: LanguageModelRequest, _: &App) -> BoxFuture<'static, Result<u64>> {
futures::future::ready(Ok(0)).boxed()
}

View file

@ -8,27 +8,24 @@ mod telemetry;
#[cfg(any(test, feature = "test-support"))]
pub mod fake_provider;
use anyhow::{Context as _, Result};
use anthropic::{AnthropicError, parse_prompt_too_long};
use anyhow::Result;
use client::Client;
use futures::FutureExt;
use futures::{StreamExt, future::BoxFuture, stream::BoxStream};
use gpui::{AnyElement, AnyView, App, AsyncApp, SharedString, Task, Window};
use http_client::http::{HeaderMap, HeaderValue};
use http_client::http;
use icons::IconName;
use parking_lot::Mutex;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize, de::DeserializeOwned};
use std::fmt;
use std::ops::{Add, Sub};
use std::str::FromStr as _;
use std::sync::Arc;
use std::time::Duration;
use std::{fmt, io};
use thiserror::Error;
use util::serde::is_default;
use zed_llm_client::{
CompletionRequestStatus, MODEL_REQUESTS_USAGE_AMOUNT_HEADER_NAME,
MODEL_REQUESTS_USAGE_LIMIT_HEADER_NAME, UsageLimit,
};
use zed_llm_client::CompletionRequestStatus;
pub use crate::model::*;
pub use crate::rate_limiter::*;
@ -39,6 +36,10 @@ pub use crate::telemetry::*;
pub const ZED_CLOUD_PROVIDER_ID: &str = "zed.dev";
/// If we get a rate limit error that doesn't tell us when we can retry,
/// default to waiting this long before retrying.
const DEFAULT_RATE_LIMIT_RETRY_AFTER: Duration = Duration::from_secs(4);
pub fn init(client: Arc<Client>, cx: &mut App) {
init_settings(cx);
RefreshLlmTokenListener::register(client.clone(), cx);
@ -53,7 +54,7 @@ pub fn init_settings(cx: &mut App) {
pub struct LanguageModelCacheConfiguration {
pub max_cache_anchors: usize,
pub should_speculate: bool,
pub min_total_token: usize,
pub min_total_token: u64,
}
/// A completion event from a language model.
@ -66,6 +67,9 @@ pub enum LanguageModelCompletionEvent {
text: String,
signature: Option<String>,
},
RedactedThinking {
data: String,
},
ToolUse(LanguageModelToolUse),
StartMessage {
message_id: String,
@ -75,8 +79,8 @@ pub enum LanguageModelCompletionEvent {
#[derive(Error, Debug)]
pub enum LanguageModelCompletionError {
#[error("rate limit exceeded, retry after {0:?}")]
RateLimit(Duration),
#[error("rate limit exceeded, retry after {retry_after:?}")]
RateLimitExceeded { retry_after: Duration },
#[error("received bad input JSON")]
BadInputJson {
id: LanguageModelToolUseId,
@ -84,8 +88,78 @@ pub enum LanguageModelCompletionError {
raw_input: Arc<str>,
json_parse_error: String,
},
#[error("language model provider's API is overloaded")]
Overloaded,
#[error(transparent)]
Other(#[from] anyhow::Error),
#[error("invalid request format to language model provider's API")]
BadRequestFormat,
#[error("authentication error with language model provider's API")]
AuthenticationError,
#[error("permission error with language model provider's API")]
PermissionError,
#[error("language model provider API endpoint not found")]
ApiEndpointNotFound,
#[error("prompt too large for context window")]
PromptTooLarge { tokens: Option<u64> },
#[error("internal server error in language model provider's API")]
ApiInternalServerError,
#[error("I/O error reading response from language model provider's API: {0:?}")]
ApiReadResponseError(io::Error),
#[error("HTTP response error from language model provider's API: status {status} - {body:?}")]
HttpResponseError { status: u16, body: String },
#[error("error serializing request to language model provider API: {0}")]
SerializeRequest(serde_json::Error),
#[error("error building request body to language model provider API: {0}")]
BuildRequestBody(http::Error),
#[error("error sending HTTP request to language model provider API: {0}")]
HttpSend(anyhow::Error),
#[error("error deserializing language model provider API response: {0}")]
DeserializeResponse(serde_json::Error),
#[error("unexpected language model provider API response format: {0}")]
UnknownResponseFormat(String),
}
impl From<AnthropicError> for LanguageModelCompletionError {
fn from(error: AnthropicError) -> Self {
match error {
AnthropicError::SerializeRequest(error) => Self::SerializeRequest(error),
AnthropicError::BuildRequestBody(error) => Self::BuildRequestBody(error),
AnthropicError::HttpSend(error) => Self::HttpSend(error),
AnthropicError::DeserializeResponse(error) => Self::DeserializeResponse(error),
AnthropicError::ReadResponse(error) => Self::ApiReadResponseError(error),
AnthropicError::HttpResponseError { status, body } => {
Self::HttpResponseError { status, body }
}
AnthropicError::RateLimit { retry_after } => Self::RateLimitExceeded { retry_after },
AnthropicError::ApiError(api_error) => api_error.into(),
AnthropicError::UnexpectedResponseFormat(error) => Self::UnknownResponseFormat(error),
}
}
}
impl From<anthropic::ApiError> for LanguageModelCompletionError {
fn from(error: anthropic::ApiError) -> Self {
use anthropic::ApiErrorCode::*;
match error.code() {
Some(code) => match code {
InvalidRequestError => LanguageModelCompletionError::BadRequestFormat,
AuthenticationError => LanguageModelCompletionError::AuthenticationError,
PermissionError => LanguageModelCompletionError::PermissionError,
NotFoundError => LanguageModelCompletionError::ApiEndpointNotFound,
RequestTooLarge => LanguageModelCompletionError::PromptTooLarge {
tokens: parse_prompt_too_long(&error.message),
},
RateLimitError => LanguageModelCompletionError::RateLimitExceeded {
retry_after: DEFAULT_RATE_LIMIT_RETRY_AFTER,
},
ApiError => LanguageModelCompletionError::ApiInternalServerError,
OverloadedError => LanguageModelCompletionError::Overloaded,
},
None => LanguageModelCompletionError::Other(error.into()),
}
}
}
/// Indicates the format used to define the input schema for a language model tool.
@ -106,46 +180,20 @@ pub enum StopReason {
Refusal,
}
#[derive(Debug, Clone, Copy)]
pub struct RequestUsage {
pub limit: UsageLimit,
pub amount: i32,
}
impl RequestUsage {
pub fn from_headers(headers: &HeaderMap<HeaderValue>) -> Result<Self> {
let limit = headers
.get(MODEL_REQUESTS_USAGE_LIMIT_HEADER_NAME)
.with_context(|| {
format!("missing {MODEL_REQUESTS_USAGE_LIMIT_HEADER_NAME:?} header")
})?;
let limit = UsageLimit::from_str(limit.to_str()?)?;
let amount = headers
.get(MODEL_REQUESTS_USAGE_AMOUNT_HEADER_NAME)
.with_context(|| {
format!("missing {MODEL_REQUESTS_USAGE_AMOUNT_HEADER_NAME:?} header")
})?;
let amount = amount.to_str()?.parse::<i32>()?;
Ok(Self { limit, amount })
}
}
#[derive(Debug, PartialEq, Clone, Copy, Serialize, Deserialize, Default)]
pub struct TokenUsage {
#[serde(default, skip_serializing_if = "is_default")]
pub input_tokens: u32,
pub input_tokens: u64,
#[serde(default, skip_serializing_if = "is_default")]
pub output_tokens: u32,
pub output_tokens: u64,
#[serde(default, skip_serializing_if = "is_default")]
pub cache_creation_input_tokens: u32,
pub cache_creation_input_tokens: u64,
#[serde(default, skip_serializing_if = "is_default")]
pub cache_read_input_tokens: u32,
pub cache_read_input_tokens: u64,
}
impl TokenUsage {
pub fn total_tokens(&self) -> u32 {
pub fn total_tokens(&self) -> u64 {
self.input_tokens
+ self.output_tokens
+ self.cache_read_input_tokens
@ -254,8 +302,8 @@ pub trait LanguageModel: Send + Sync {
LanguageModelToolSchemaFormat::JsonSchema
}
fn max_token_count(&self) -> usize;
fn max_output_tokens(&self) -> Option<u32> {
fn max_token_count(&self) -> u64;
fn max_output_tokens(&self) -> Option<u64> {
None
}
@ -263,7 +311,7 @@ pub trait LanguageModel: Send + Sync {
&self,
request: LanguageModelRequest,
cx: &App,
) -> BoxFuture<'static, Result<usize>>;
) -> BoxFuture<'static, Result<u64>>;
fn stream_completion(
&self,
@ -314,6 +362,7 @@ pub trait LanguageModel: Send + Sync {
Ok(LanguageModelCompletionEvent::StartMessage { .. }) => None,
Ok(LanguageModelCompletionEvent::Text(text)) => Some(Ok(text)),
Ok(LanguageModelCompletionEvent::Thinking { .. }) => None,
Ok(LanguageModelCompletionEvent::RedactedThinking { .. }) => None,
Ok(LanguageModelCompletionEvent::Stop(_)) => None,
Ok(LanguageModelCompletionEvent::ToolUse(_)) => None,
Ok(LanguageModelCompletionEvent::UsageUpdate(token_usage)) => {
@ -349,7 +398,34 @@ pub trait LanguageModel: Send + Sync {
#[derive(Debug, Error)]
pub enum LanguageModelKnownError {
#[error("Context window limit exceeded ({tokens})")]
ContextWindowLimitExceeded { tokens: usize },
ContextWindowLimitExceeded { tokens: u64 },
#[error("Language model provider's API is currently overloaded")]
Overloaded,
#[error("Language model provider's API encountered an internal server error")]
ApiInternalServerError,
#[error("I/O error while reading response from language model provider's API: {0:?}")]
ReadResponseError(io::Error),
#[error("Error deserializing response from language model provider's API: {0:?}")]
DeserializeResponse(serde_json::Error),
#[error("Language model provider's API returned a response in an unknown format")]
UnknownResponseFormat(String),
#[error("Rate limit exceeded for language model provider's API; retry in {retry_after:?}")]
RateLimitExceeded { retry_after: Duration },
}
impl LanguageModelKnownError {
/// Attempts to map an HTTP response status code to a known error type.
/// Returns None if the status code doesn't map to a specific known error.
pub fn from_http_response(status: u16, _body: &str) -> Option<Self> {
match status {
429 => Some(Self::RateLimitExceeded {
retry_after: DEFAULT_RATE_LIMIT_RETRY_AFTER,
}),
503 => Some(Self::Overloaded),
500..=599 => Some(Self::ApiInternalServerError),
_ => None,
}
}
}
pub trait LanguageModelTool: 'static + DeserializeOwned + JsonSchema {

View file

@ -5,6 +5,7 @@ use crate::{
use collections::BTreeMap;
use gpui::{App, Context, Entity, EventEmitter, Global, prelude::*};
use std::{str::FromStr, sync::Arc};
use thiserror::Error;
use util::maybe;
pub fn init(cx: &mut App) {
@ -16,6 +17,34 @@ struct GlobalLanguageModelRegistry(Entity<LanguageModelRegistry>);
impl Global for GlobalLanguageModelRegistry {}
#[derive(Error)]
pub enum ConfigurationError {
#[error("Configure at least one LLM provider to start using the panel.")]
NoProvider,
#[error("LLM Provider is not configured or does not support the configured model.")]
ModelNotFound,
#[error("{} LLM provider is not configured.", .0.name().0)]
ProviderNotAuthenticated(Arc<dyn LanguageModelProvider>),
#[error("Using the {} LLM provider requires accepting the Terms of Service.",
.0.name().0)]
ProviderPendingTermsAcceptance(Arc<dyn LanguageModelProvider>),
}
impl std::fmt::Debug for ConfigurationError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::NoProvider => write!(f, "NoProvider"),
Self::ModelNotFound => write!(f, "ModelNotFound"),
Self::ProviderNotAuthenticated(provider) => {
write!(f, "ProviderNotAuthenticated({})", provider.id())
}
Self::ProviderPendingTermsAcceptance(provider) => {
write!(f, "ProviderPendingTermsAcceptance({})", provider.id())
}
}
}
}
#[derive(Default)]
pub struct LanguageModelRegistry {
default_model: Option<ConfiguredModel>,
@ -152,6 +181,36 @@ impl LanguageModelRegistry {
providers
}
pub fn configuration_error(
&self,
model: Option<ConfiguredModel>,
cx: &App,
) -> Option<ConfigurationError> {
let Some(model) = model else {
if !self.has_authenticated_provider(cx) {
return Some(ConfigurationError::NoProvider);
}
return Some(ConfigurationError::ModelNotFound);
};
if !model.provider.is_authenticated(cx) {
return Some(ConfigurationError::ProviderNotAuthenticated(model.provider));
}
if model.provider.must_accept_terms(cx) {
return Some(ConfigurationError::ProviderPendingTermsAcceptance(
model.provider,
));
}
None
}
/// Check that we have at least one provider that is authenticated.
fn has_authenticated_provider(&self, cx: &App) -> bool {
self.providers.values().any(|p| p.is_authenticated(cx))
}
pub fn available_models<'a>(
&'a self,
cx: &'a App,

View file

@ -303,7 +303,7 @@ pub enum MessageContent {
text: String,
signature: Option<String>,
},
RedactedThinking(Vec<u8>),
RedactedThinking(String),
Image(LanguageModelImage),
ToolUse(LanguageModelToolUse),
ToolResult(LanguageModelToolResult),

View file

@ -1,5 +1,5 @@
use anthropic::{ANTHROPIC_API_URL, AnthropicError};
use anyhow::{Context as _, Result, anyhow};
use anthropic::ANTHROPIC_API_URL;
use anyhow::{Context as _, anyhow};
use client::telemetry::Telemetry;
use gpui::BackgroundExecutor;
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
@ -20,13 +20,17 @@ pub fn report_assistant_event(
if let Some(telemetry) = telemetry.as_ref() {
telemetry.report_assistant_event(event.clone());
if telemetry.metrics_enabled() && event.model_provider == ANTHROPIC_PROVIDER_ID {
executor
.spawn(async move {
report_anthropic_event(event, client, model_api_key)
.await
.log_err();
})
.detach();
if let Some(api_key) = model_api_key {
executor
.spawn(async move {
report_anthropic_event(event, client, api_key)
.await
.log_err();
})
.detach();
} else {
log::error!("Cannot send Anthropic telemetry because API key is missing");
}
}
}
}
@ -34,17 +38,8 @@ pub fn report_assistant_event(
async fn report_anthropic_event(
event: AssistantEventData,
client: Arc<dyn HttpClient>,
model_api_key: Option<String>,
) -> Result<(), AnthropicError> {
let api_key = match model_api_key {
Some(key) => key,
None => {
return Err(AnthropicError::Other(anyhow!(
"Anthropic API key is not set"
)));
}
};
api_key: String,
) -> anyhow::Result<()> {
let uri = format!("{ANTHROPIC_API_URL}/v1/log/zed");
let request_builder = HttpRequest::builder()
.method(Method::POST)
@ -72,19 +67,19 @@ async fn report_anthropic_event(
let request = request_builder
.body(AsyncBody::from(serialized_event.to_string()))
.context("failed to construct request body")?;
.context("Failed to construct Anthropic telemetry HTTP request body")?;
let response = client
.send(request)
.await
.context("failed to send request to Anthropic")?;
.context("Failed to send telemetry HTTP request to Anthropic")?;
if response.status().is_success() {
return Ok(());
Ok(())
} else {
Err(anyhow!(
"Anthropic telemetry logging failed with HTTP status: {}",
response.status()
))
}
return Err(AnthropicError::Other(anyhow!(
"Failed to log: {}",
response.status(),
)));
}