agent: Handle context window exceeded errors from Anthropic (#28688)
 Release Notes: - agent: Handle context window exceeded errors from Anthropic
This commit is contained in:
parent
4a57664c7f
commit
b45230784d
9 changed files with 190 additions and 28 deletions
|
@ -47,6 +47,7 @@ settings.workspace = true
|
|||
smol.workspace = true
|
||||
strum.workspace = true
|
||||
theme.workspace = true
|
||||
thiserror.workspace = true
|
||||
tiktoken-rs.workspace = true
|
||||
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
ui.workspace = true
|
||||
|
|
|
@ -13,8 +13,9 @@ use gpui::{
|
|||
use http_client::HttpClient;
|
||||
use language_model::{
|
||||
AuthenticateError, LanguageModel, LanguageModelCacheConfiguration, LanguageModelId,
|
||||
LanguageModelName, LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
|
||||
LanguageModelProviderState, LanguageModelRequest, MessageContent, RateLimiter, Role,
|
||||
LanguageModelKnownError, LanguageModelName, LanguageModelProvider, LanguageModelProviderId,
|
||||
LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, MessageContent,
|
||||
RateLimiter, Role,
|
||||
};
|
||||
use language_model::{LanguageModelCompletionEvent, LanguageModelToolUse, StopReason};
|
||||
use schemars::JsonSchema;
|
||||
|
@ -454,7 +455,12 @@ impl LanguageModel for AnthropicModel {
|
|||
);
|
||||
let request = self.stream_completion(request, cx);
|
||||
let future = self.request_limiter.stream(async move {
|
||||
let response = request.await.map_err(|err| anyhow!(err))?;
|
||||
let response = request
|
||||
.await
|
||||
.map_err(|err| match err.downcast::<AnthropicError>() {
|
||||
Ok(anthropic_err) => anthropic_err_to_anyhow(anthropic_err),
|
||||
Err(err) => anyhow!(err),
|
||||
})?;
|
||||
Ok(map_to_language_model_completion_events(response))
|
||||
});
|
||||
async move { Ok(future.await?.boxed()) }.boxed()
|
||||
|
@ -746,7 +752,7 @@ pub fn map_to_language_model_completion_events(
|
|||
_ => {}
|
||||
},
|
||||
Err(err) => {
|
||||
return Some((vec![Err(anyhow!(err))], state));
|
||||
return Some((vec![Err(anthropic_err_to_anyhow(err))], state));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -757,6 +763,16 @@ pub fn map_to_language_model_completion_events(
|
|||
.flat_map(futures::stream::iter)
|
||||
}
|
||||
|
||||
pub fn anthropic_err_to_anyhow(err: AnthropicError) -> anyhow::Error {
|
||||
if let AnthropicError::ApiError(api_err) = &err {
|
||||
if let Some(tokens) = api_err.match_window_exceeded() {
|
||||
return anyhow!(LanguageModelKnownError::ContextWindowLimitExceeded { tokens });
|
||||
}
|
||||
}
|
||||
|
||||
anyhow!(err)
|
||||
}
|
||||
|
||||
/// Updates usage data by preferring counts from `new`.
|
||||
fn update_usage(usage: &mut Usage, new: &Usage) {
|
||||
if let Some(input_tokens) = new.input_tokens {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use anthropic::{AnthropicError, AnthropicModelMode};
|
||||
use anthropic::{AnthropicError, AnthropicModelMode, parse_prompt_too_long};
|
||||
use anyhow::{Result, anyhow};
|
||||
use client::{
|
||||
Client, EXPIRED_LLM_TOKEN_HEADER_NAME, MAX_LLM_MONTHLY_SPEND_REACHED_HEADER_NAME,
|
||||
|
@ -14,7 +14,7 @@ use gpui::{AnyElement, AnyView, App, AsyncApp, Context, Entity, Subscription, Ta
|
|||
use http_client::{AsyncBody, HttpClient, Method, Response, StatusCode};
|
||||
use language_model::{
|
||||
AuthenticateError, CloudModel, LanguageModel, LanguageModelCacheConfiguration, LanguageModelId,
|
||||
LanguageModelName, LanguageModelProviderId, LanguageModelProviderName,
|
||||
LanguageModelKnownError, LanguageModelName, LanguageModelProviderId, LanguageModelProviderName,
|
||||
LanguageModelProviderState, LanguageModelProviderTosView, LanguageModelRequest,
|
||||
LanguageModelToolSchemaFormat, RateLimiter, ZED_CLOUD_PROVIDER_ID,
|
||||
};
|
||||
|
@ -33,6 +33,7 @@ use std::{
|
|||
time::Duration,
|
||||
};
|
||||
use strum::IntoEnumIterator;
|
||||
use thiserror::Error;
|
||||
use ui::{TintColor, prelude::*};
|
||||
|
||||
use crate::AllLanguageModelSettings;
|
||||
|
@ -575,14 +576,19 @@ impl CloudLanguageModel {
|
|||
} else {
|
||||
let mut body = String::new();
|
||||
response.body_mut().read_to_string(&mut body).await?;
|
||||
return Err(anyhow!(
|
||||
"cloud language model completion failed with status {status}: {body}",
|
||||
));
|
||||
return Err(anyhow!(ApiError { status, body }));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[error("cloud language model completion failed with status {status}: {body}")]
|
||||
struct ApiError {
|
||||
status: StatusCode,
|
||||
body: String,
|
||||
}
|
||||
|
||||
impl LanguageModel for CloudLanguageModel {
|
||||
fn id(&self) -> LanguageModelId {
|
||||
self.id.clone()
|
||||
|
@ -696,7 +702,23 @@ impl LanguageModel for CloudLanguageModel {
|
|||
)?)?,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|err| match err.downcast::<ApiError>() {
|
||||
Ok(api_err) => {
|
||||
if api_err.status == StatusCode::BAD_REQUEST {
|
||||
if let Some(tokens) = parse_prompt_too_long(&api_err.body) {
|
||||
return anyhow!(
|
||||
LanguageModelKnownError::ContextWindowLimitExceeded {
|
||||
tokens
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
anyhow!(api_err)
|
||||
}
|
||||
Err(err) => anyhow!(err),
|
||||
})?;
|
||||
|
||||
Ok(
|
||||
crate::provider::anthropic::map_to_language_model_completion_events(
|
||||
Box::pin(response_lines(response).map_err(AnthropicError::Other)),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue