WIP and merge
This commit is contained in:
parent
97f4406ef6
commit
1bdde8b2e4
584 changed files with 33536 additions and 17400 deletions
|
@ -26,6 +26,7 @@ gpui.workspace = true
|
|||
http_client.workspace = true
|
||||
icons.workspace = true
|
||||
image.workspace = true
|
||||
log.workspace = true
|
||||
parking_lot.workspace = true
|
||||
proto.workspace = true
|
||||
schemars.workspace = true
|
||||
|
|
|
@ -169,11 +169,11 @@ impl LanguageModel for FakeLanguageModel {
|
|||
"fake".to_string()
|
||||
}
|
||||
|
||||
fn max_token_count(&self) -> usize {
|
||||
fn max_token_count(&self) -> u64 {
|
||||
1000000
|
||||
}
|
||||
|
||||
fn count_tokens(&self, _: LanguageModelRequest, _: &App) -> BoxFuture<'static, Result<usize>> {
|
||||
fn count_tokens(&self, _: LanguageModelRequest, _: &App) -> BoxFuture<'static, Result<u64>> {
|
||||
futures::future::ready(Ok(0)).boxed()
|
||||
}
|
||||
|
||||
|
|
|
@ -8,27 +8,24 @@ mod telemetry;
|
|||
#[cfg(any(test, feature = "test-support"))]
|
||||
pub mod fake_provider;
|
||||
|
||||
use anyhow::{Context as _, Result};
|
||||
use anthropic::{AnthropicError, parse_prompt_too_long};
|
||||
use anyhow::Result;
|
||||
use client::Client;
|
||||
use futures::FutureExt;
|
||||
use futures::{StreamExt, future::BoxFuture, stream::BoxStream};
|
||||
use gpui::{AnyElement, AnyView, App, AsyncApp, SharedString, Task, Window};
|
||||
use http_client::http::{HeaderMap, HeaderValue};
|
||||
use http_client::http;
|
||||
use icons::IconName;
|
||||
use parking_lot::Mutex;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize, de::DeserializeOwned};
|
||||
use std::fmt;
|
||||
use std::ops::{Add, Sub};
|
||||
use std::str::FromStr as _;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{fmt, io};
|
||||
use thiserror::Error;
|
||||
use util::serde::is_default;
|
||||
use zed_llm_client::{
|
||||
CompletionRequestStatus, MODEL_REQUESTS_USAGE_AMOUNT_HEADER_NAME,
|
||||
MODEL_REQUESTS_USAGE_LIMIT_HEADER_NAME, UsageLimit,
|
||||
};
|
||||
use zed_llm_client::CompletionRequestStatus;
|
||||
|
||||
pub use crate::model::*;
|
||||
pub use crate::rate_limiter::*;
|
||||
|
@ -39,6 +36,10 @@ pub use crate::telemetry::*;
|
|||
|
||||
pub const ZED_CLOUD_PROVIDER_ID: &str = "zed.dev";
|
||||
|
||||
/// If we get a rate limit error that doesn't tell us when we can retry,
|
||||
/// default to waiting this long before retrying.
|
||||
const DEFAULT_RATE_LIMIT_RETRY_AFTER: Duration = Duration::from_secs(4);
|
||||
|
||||
pub fn init(client: Arc<Client>, cx: &mut App) {
|
||||
init_settings(cx);
|
||||
RefreshLlmTokenListener::register(client.clone(), cx);
|
||||
|
@ -53,7 +54,7 @@ pub fn init_settings(cx: &mut App) {
|
|||
pub struct LanguageModelCacheConfiguration {
|
||||
pub max_cache_anchors: usize,
|
||||
pub should_speculate: bool,
|
||||
pub min_total_token: usize,
|
||||
pub min_total_token: u64,
|
||||
}
|
||||
|
||||
/// A completion event from a language model.
|
||||
|
@ -66,6 +67,9 @@ pub enum LanguageModelCompletionEvent {
|
|||
text: String,
|
||||
signature: Option<String>,
|
||||
},
|
||||
RedactedThinking {
|
||||
data: String,
|
||||
},
|
||||
ToolUse(LanguageModelToolUse),
|
||||
StartMessage {
|
||||
message_id: String,
|
||||
|
@ -75,8 +79,8 @@ pub enum LanguageModelCompletionEvent {
|
|||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum LanguageModelCompletionError {
|
||||
#[error("rate limit exceeded, retry after {0:?}")]
|
||||
RateLimit(Duration),
|
||||
#[error("rate limit exceeded, retry after {retry_after:?}")]
|
||||
RateLimitExceeded { retry_after: Duration },
|
||||
#[error("received bad input JSON")]
|
||||
BadInputJson {
|
||||
id: LanguageModelToolUseId,
|
||||
|
@ -84,8 +88,78 @@ pub enum LanguageModelCompletionError {
|
|||
raw_input: Arc<str>,
|
||||
json_parse_error: String,
|
||||
},
|
||||
#[error("language model provider's API is overloaded")]
|
||||
Overloaded,
|
||||
#[error(transparent)]
|
||||
Other(#[from] anyhow::Error),
|
||||
#[error("invalid request format to language model provider's API")]
|
||||
BadRequestFormat,
|
||||
#[error("authentication error with language model provider's API")]
|
||||
AuthenticationError,
|
||||
#[error("permission error with language model provider's API")]
|
||||
PermissionError,
|
||||
#[error("language model provider API endpoint not found")]
|
||||
ApiEndpointNotFound,
|
||||
#[error("prompt too large for context window")]
|
||||
PromptTooLarge { tokens: Option<u64> },
|
||||
#[error("internal server error in language model provider's API")]
|
||||
ApiInternalServerError,
|
||||
#[error("I/O error reading response from language model provider's API: {0:?}")]
|
||||
ApiReadResponseError(io::Error),
|
||||
#[error("HTTP response error from language model provider's API: status {status} - {body:?}")]
|
||||
HttpResponseError { status: u16, body: String },
|
||||
#[error("error serializing request to language model provider API: {0}")]
|
||||
SerializeRequest(serde_json::Error),
|
||||
#[error("error building request body to language model provider API: {0}")]
|
||||
BuildRequestBody(http::Error),
|
||||
#[error("error sending HTTP request to language model provider API: {0}")]
|
||||
HttpSend(anyhow::Error),
|
||||
#[error("error deserializing language model provider API response: {0}")]
|
||||
DeserializeResponse(serde_json::Error),
|
||||
#[error("unexpected language model provider API response format: {0}")]
|
||||
UnknownResponseFormat(String),
|
||||
}
|
||||
|
||||
impl From<AnthropicError> for LanguageModelCompletionError {
|
||||
fn from(error: AnthropicError) -> Self {
|
||||
match error {
|
||||
AnthropicError::SerializeRequest(error) => Self::SerializeRequest(error),
|
||||
AnthropicError::BuildRequestBody(error) => Self::BuildRequestBody(error),
|
||||
AnthropicError::HttpSend(error) => Self::HttpSend(error),
|
||||
AnthropicError::DeserializeResponse(error) => Self::DeserializeResponse(error),
|
||||
AnthropicError::ReadResponse(error) => Self::ApiReadResponseError(error),
|
||||
AnthropicError::HttpResponseError { status, body } => {
|
||||
Self::HttpResponseError { status, body }
|
||||
}
|
||||
AnthropicError::RateLimit { retry_after } => Self::RateLimitExceeded { retry_after },
|
||||
AnthropicError::ApiError(api_error) => api_error.into(),
|
||||
AnthropicError::UnexpectedResponseFormat(error) => Self::UnknownResponseFormat(error),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<anthropic::ApiError> for LanguageModelCompletionError {
|
||||
fn from(error: anthropic::ApiError) -> Self {
|
||||
use anthropic::ApiErrorCode::*;
|
||||
|
||||
match error.code() {
|
||||
Some(code) => match code {
|
||||
InvalidRequestError => LanguageModelCompletionError::BadRequestFormat,
|
||||
AuthenticationError => LanguageModelCompletionError::AuthenticationError,
|
||||
PermissionError => LanguageModelCompletionError::PermissionError,
|
||||
NotFoundError => LanguageModelCompletionError::ApiEndpointNotFound,
|
||||
RequestTooLarge => LanguageModelCompletionError::PromptTooLarge {
|
||||
tokens: parse_prompt_too_long(&error.message),
|
||||
},
|
||||
RateLimitError => LanguageModelCompletionError::RateLimitExceeded {
|
||||
retry_after: DEFAULT_RATE_LIMIT_RETRY_AFTER,
|
||||
},
|
||||
ApiError => LanguageModelCompletionError::ApiInternalServerError,
|
||||
OverloadedError => LanguageModelCompletionError::Overloaded,
|
||||
},
|
||||
None => LanguageModelCompletionError::Other(error.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Indicates the format used to define the input schema for a language model tool.
|
||||
|
@ -106,46 +180,20 @@ pub enum StopReason {
|
|||
Refusal,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct RequestUsage {
|
||||
pub limit: UsageLimit,
|
||||
pub amount: i32,
|
||||
}
|
||||
|
||||
impl RequestUsage {
|
||||
pub fn from_headers(headers: &HeaderMap<HeaderValue>) -> Result<Self> {
|
||||
let limit = headers
|
||||
.get(MODEL_REQUESTS_USAGE_LIMIT_HEADER_NAME)
|
||||
.with_context(|| {
|
||||
format!("missing {MODEL_REQUESTS_USAGE_LIMIT_HEADER_NAME:?} header")
|
||||
})?;
|
||||
let limit = UsageLimit::from_str(limit.to_str()?)?;
|
||||
|
||||
let amount = headers
|
||||
.get(MODEL_REQUESTS_USAGE_AMOUNT_HEADER_NAME)
|
||||
.with_context(|| {
|
||||
format!("missing {MODEL_REQUESTS_USAGE_AMOUNT_HEADER_NAME:?} header")
|
||||
})?;
|
||||
let amount = amount.to_str()?.parse::<i32>()?;
|
||||
|
||||
Ok(Self { limit, amount })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, Serialize, Deserialize, Default)]
|
||||
pub struct TokenUsage {
|
||||
#[serde(default, skip_serializing_if = "is_default")]
|
||||
pub input_tokens: u32,
|
||||
pub input_tokens: u64,
|
||||
#[serde(default, skip_serializing_if = "is_default")]
|
||||
pub output_tokens: u32,
|
||||
pub output_tokens: u64,
|
||||
#[serde(default, skip_serializing_if = "is_default")]
|
||||
pub cache_creation_input_tokens: u32,
|
||||
pub cache_creation_input_tokens: u64,
|
||||
#[serde(default, skip_serializing_if = "is_default")]
|
||||
pub cache_read_input_tokens: u32,
|
||||
pub cache_read_input_tokens: u64,
|
||||
}
|
||||
|
||||
impl TokenUsage {
|
||||
pub fn total_tokens(&self) -> u32 {
|
||||
pub fn total_tokens(&self) -> u64 {
|
||||
self.input_tokens
|
||||
+ self.output_tokens
|
||||
+ self.cache_read_input_tokens
|
||||
|
@ -254,8 +302,8 @@ pub trait LanguageModel: Send + Sync {
|
|||
LanguageModelToolSchemaFormat::JsonSchema
|
||||
}
|
||||
|
||||
fn max_token_count(&self) -> usize;
|
||||
fn max_output_tokens(&self) -> Option<u32> {
|
||||
fn max_token_count(&self) -> u64;
|
||||
fn max_output_tokens(&self) -> Option<u64> {
|
||||
None
|
||||
}
|
||||
|
||||
|
@ -263,7 +311,7 @@ pub trait LanguageModel: Send + Sync {
|
|||
&self,
|
||||
request: LanguageModelRequest,
|
||||
cx: &App,
|
||||
) -> BoxFuture<'static, Result<usize>>;
|
||||
) -> BoxFuture<'static, Result<u64>>;
|
||||
|
||||
fn stream_completion(
|
||||
&self,
|
||||
|
@ -314,6 +362,7 @@ pub trait LanguageModel: Send + Sync {
|
|||
Ok(LanguageModelCompletionEvent::StartMessage { .. }) => None,
|
||||
Ok(LanguageModelCompletionEvent::Text(text)) => Some(Ok(text)),
|
||||
Ok(LanguageModelCompletionEvent::Thinking { .. }) => None,
|
||||
Ok(LanguageModelCompletionEvent::RedactedThinking { .. }) => None,
|
||||
Ok(LanguageModelCompletionEvent::Stop(_)) => None,
|
||||
Ok(LanguageModelCompletionEvent::ToolUse(_)) => None,
|
||||
Ok(LanguageModelCompletionEvent::UsageUpdate(token_usage)) => {
|
||||
|
@ -349,7 +398,34 @@ pub trait LanguageModel: Send + Sync {
|
|||
#[derive(Debug, Error)]
|
||||
pub enum LanguageModelKnownError {
|
||||
#[error("Context window limit exceeded ({tokens})")]
|
||||
ContextWindowLimitExceeded { tokens: usize },
|
||||
ContextWindowLimitExceeded { tokens: u64 },
|
||||
#[error("Language model provider's API is currently overloaded")]
|
||||
Overloaded,
|
||||
#[error("Language model provider's API encountered an internal server error")]
|
||||
ApiInternalServerError,
|
||||
#[error("I/O error while reading response from language model provider's API: {0:?}")]
|
||||
ReadResponseError(io::Error),
|
||||
#[error("Error deserializing response from language model provider's API: {0:?}")]
|
||||
DeserializeResponse(serde_json::Error),
|
||||
#[error("Language model provider's API returned a response in an unknown format")]
|
||||
UnknownResponseFormat(String),
|
||||
#[error("Rate limit exceeded for language model provider's API; retry in {retry_after:?}")]
|
||||
RateLimitExceeded { retry_after: Duration },
|
||||
}
|
||||
|
||||
impl LanguageModelKnownError {
|
||||
/// Attempts to map an HTTP response status code to a known error type.
|
||||
/// Returns None if the status code doesn't map to a specific known error.
|
||||
pub fn from_http_response(status: u16, _body: &str) -> Option<Self> {
|
||||
match status {
|
||||
429 => Some(Self::RateLimitExceeded {
|
||||
retry_after: DEFAULT_RATE_LIMIT_RETRY_AFTER,
|
||||
}),
|
||||
503 => Some(Self::Overloaded),
|
||||
500..=599 => Some(Self::ApiInternalServerError),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait LanguageModelTool: 'static + DeserializeOwned + JsonSchema {
|
||||
|
|
|
@ -5,6 +5,7 @@ use crate::{
|
|||
use collections::BTreeMap;
|
||||
use gpui::{App, Context, Entity, EventEmitter, Global, prelude::*};
|
||||
use std::{str::FromStr, sync::Arc};
|
||||
use thiserror::Error;
|
||||
use util::maybe;
|
||||
|
||||
pub fn init(cx: &mut App) {
|
||||
|
@ -16,6 +17,34 @@ struct GlobalLanguageModelRegistry(Entity<LanguageModelRegistry>);
|
|||
|
||||
impl Global for GlobalLanguageModelRegistry {}
|
||||
|
||||
#[derive(Error)]
|
||||
pub enum ConfigurationError {
|
||||
#[error("Configure at least one LLM provider to start using the panel.")]
|
||||
NoProvider,
|
||||
#[error("LLM Provider is not configured or does not support the configured model.")]
|
||||
ModelNotFound,
|
||||
#[error("{} LLM provider is not configured.", .0.name().0)]
|
||||
ProviderNotAuthenticated(Arc<dyn LanguageModelProvider>),
|
||||
#[error("Using the {} LLM provider requires accepting the Terms of Service.",
|
||||
.0.name().0)]
|
||||
ProviderPendingTermsAcceptance(Arc<dyn LanguageModelProvider>),
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ConfigurationError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::NoProvider => write!(f, "NoProvider"),
|
||||
Self::ModelNotFound => write!(f, "ModelNotFound"),
|
||||
Self::ProviderNotAuthenticated(provider) => {
|
||||
write!(f, "ProviderNotAuthenticated({})", provider.id())
|
||||
}
|
||||
Self::ProviderPendingTermsAcceptance(provider) => {
|
||||
write!(f, "ProviderPendingTermsAcceptance({})", provider.id())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct LanguageModelRegistry {
|
||||
default_model: Option<ConfiguredModel>,
|
||||
|
@ -152,6 +181,36 @@ impl LanguageModelRegistry {
|
|||
providers
|
||||
}
|
||||
|
||||
pub fn configuration_error(
|
||||
&self,
|
||||
model: Option<ConfiguredModel>,
|
||||
cx: &App,
|
||||
) -> Option<ConfigurationError> {
|
||||
let Some(model) = model else {
|
||||
if !self.has_authenticated_provider(cx) {
|
||||
return Some(ConfigurationError::NoProvider);
|
||||
}
|
||||
return Some(ConfigurationError::ModelNotFound);
|
||||
};
|
||||
|
||||
if !model.provider.is_authenticated(cx) {
|
||||
return Some(ConfigurationError::ProviderNotAuthenticated(model.provider));
|
||||
}
|
||||
|
||||
if model.provider.must_accept_terms(cx) {
|
||||
return Some(ConfigurationError::ProviderPendingTermsAcceptance(
|
||||
model.provider,
|
||||
));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Check that we have at least one provider that is authenticated.
|
||||
fn has_authenticated_provider(&self, cx: &App) -> bool {
|
||||
self.providers.values().any(|p| p.is_authenticated(cx))
|
||||
}
|
||||
|
||||
pub fn available_models<'a>(
|
||||
&'a self,
|
||||
cx: &'a App,
|
||||
|
|
|
@ -303,7 +303,7 @@ pub enum MessageContent {
|
|||
text: String,
|
||||
signature: Option<String>,
|
||||
},
|
||||
RedactedThinking(Vec<u8>),
|
||||
RedactedThinking(String),
|
||||
Image(LanguageModelImage),
|
||||
ToolUse(LanguageModelToolUse),
|
||||
ToolResult(LanguageModelToolResult),
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use anthropic::{ANTHROPIC_API_URL, AnthropicError};
|
||||
use anyhow::{Context as _, Result, anyhow};
|
||||
use anthropic::ANTHROPIC_API_URL;
|
||||
use anyhow::{Context as _, anyhow};
|
||||
use client::telemetry::Telemetry;
|
||||
use gpui::BackgroundExecutor;
|
||||
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
|
||||
|
@ -20,13 +20,17 @@ pub fn report_assistant_event(
|
|||
if let Some(telemetry) = telemetry.as_ref() {
|
||||
telemetry.report_assistant_event(event.clone());
|
||||
if telemetry.metrics_enabled() && event.model_provider == ANTHROPIC_PROVIDER_ID {
|
||||
executor
|
||||
.spawn(async move {
|
||||
report_anthropic_event(event, client, model_api_key)
|
||||
.await
|
||||
.log_err();
|
||||
})
|
||||
.detach();
|
||||
if let Some(api_key) = model_api_key {
|
||||
executor
|
||||
.spawn(async move {
|
||||
report_anthropic_event(event, client, api_key)
|
||||
.await
|
||||
.log_err();
|
||||
})
|
||||
.detach();
|
||||
} else {
|
||||
log::error!("Cannot send Anthropic telemetry because API key is missing");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -34,17 +38,8 @@ pub fn report_assistant_event(
|
|||
async fn report_anthropic_event(
|
||||
event: AssistantEventData,
|
||||
client: Arc<dyn HttpClient>,
|
||||
model_api_key: Option<String>,
|
||||
) -> Result<(), AnthropicError> {
|
||||
let api_key = match model_api_key {
|
||||
Some(key) => key,
|
||||
None => {
|
||||
return Err(AnthropicError::Other(anyhow!(
|
||||
"Anthropic API key is not set"
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
api_key: String,
|
||||
) -> anyhow::Result<()> {
|
||||
let uri = format!("{ANTHROPIC_API_URL}/v1/log/zed");
|
||||
let request_builder = HttpRequest::builder()
|
||||
.method(Method::POST)
|
||||
|
@ -72,19 +67,19 @@ async fn report_anthropic_event(
|
|||
|
||||
let request = request_builder
|
||||
.body(AsyncBody::from(serialized_event.to_string()))
|
||||
.context("failed to construct request body")?;
|
||||
.context("Failed to construct Anthropic telemetry HTTP request body")?;
|
||||
|
||||
let response = client
|
||||
.send(request)
|
||||
.await
|
||||
.context("failed to send request to Anthropic")?;
|
||||
.context("Failed to send telemetry HTTP request to Anthropic")?;
|
||||
|
||||
if response.status().is_success() {
|
||||
return Ok(());
|
||||
Ok(())
|
||||
} else {
|
||||
Err(anyhow!(
|
||||
"Anthropic telemetry logging failed with HTTP status: {}",
|
||||
response.status()
|
||||
))
|
||||
}
|
||||
|
||||
return Err(AnthropicError::Other(anyhow!(
|
||||
"Failed to log: {}",
|
||||
response.status(),
|
||||
)));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue