agent: Improve error handling and retry for zed-provided models (#33565)
* Updates to `zed_llm_client-0.8.5` which adds support for `retry_after` when anthropic provides it. * Distinguishes upstream provider errors and rate limits from errors that originate from zed's servers * Moves `LanguageModelCompletionError::BadInputJson` to `LanguageModelCompletionEvent::ToolUseJsonParseError`. While arguably this is an error case, the logic in thread is cleaner with this move. There is also precedent for inclusion of errors in the event type - `CompletionRequestStatus::Failed` is how cloud errors arrive. * Updates `PROVIDER_ID` / `PROVIDER_NAME` constants to use proper types instead of `&str`, since they can be constructed in a const fashion. * Removes use of `CLIENT_SUPPORTS_EXA_WEB_SEARCH_PROVIDER_HEADER_NAME` as the server no longer reads this header and just defaults to that behavior. Release notes for this is covered by #33275 Release Notes: - N/A --------- Co-authored-by: Richard Feldman <oss@rtfeldman.com> Co-authored-by: Richard <richard@zed.dev>
This commit is contained in:
parent
f022a13091
commit
d497f52e17
25 changed files with 656 additions and 479 deletions
|
@ -6,7 +6,7 @@ use anyhow::{Context as _, Result, anyhow};
|
|||
use chrono::{DateTime, Utc};
|
||||
use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
|
||||
use http_client::http::{self, HeaderMap, HeaderValue};
|
||||
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
|
||||
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use strum::{EnumIter, EnumString};
|
||||
use thiserror::Error;
|
||||
|
@ -356,7 +356,7 @@ pub async fn complete(
|
|||
.send(request)
|
||||
.await
|
||||
.map_err(AnthropicError::HttpSend)?;
|
||||
let status = response.status();
|
||||
let status_code = response.status();
|
||||
let mut body = String::new();
|
||||
response
|
||||
.body_mut()
|
||||
|
@ -364,12 +364,12 @@ pub async fn complete(
|
|||
.await
|
||||
.map_err(AnthropicError::ReadResponse)?;
|
||||
|
||||
if status.is_success() {
|
||||
if status_code.is_success() {
|
||||
Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
|
||||
} else {
|
||||
Err(AnthropicError::HttpResponseError {
|
||||
status: status.as_u16(),
|
||||
body,
|
||||
status_code,
|
||||
message: body,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -444,11 +444,7 @@ impl RateLimitInfo {
|
|||
}
|
||||
|
||||
Self {
|
||||
retry_after: headers
|
||||
.get("retry-after")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|v| v.parse::<u64>().ok())
|
||||
.map(Duration::from_secs),
|
||||
retry_after: parse_retry_after(headers),
|
||||
requests: RateLimit::from_headers("requests", headers).ok(),
|
||||
tokens: RateLimit::from_headers("tokens", headers).ok(),
|
||||
input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
|
||||
|
@ -457,6 +453,17 @@ impl RateLimitInfo {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
|
||||
/// seconds). Note that other services might specify an HTTP date or some other format for this
|
||||
/// header. Returns `None` if the header is not present or cannot be parsed.
|
||||
pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
|
||||
headers
|
||||
.get("retry-after")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|v| v.parse::<u64>().ok())
|
||||
.map(Duration::from_secs)
|
||||
}
|
||||
|
||||
fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
|
||||
Ok(headers
|
||||
.get(key)
|
||||
|
@ -520,6 +527,10 @@ pub async fn stream_completion_with_rate_limit_info(
|
|||
})
|
||||
.boxed();
|
||||
Ok((stream, Some(rate_limits)))
|
||||
} else if response.status().as_u16() == 529 {
|
||||
Err(AnthropicError::ServerOverloaded {
|
||||
retry_after: rate_limits.retry_after,
|
||||
})
|
||||
} else if let Some(retry_after) = rate_limits.retry_after {
|
||||
Err(AnthropicError::RateLimit { retry_after })
|
||||
} else {
|
||||
|
@ -532,10 +543,9 @@ pub async fn stream_completion_with_rate_limit_info(
|
|||
|
||||
match serde_json::from_str::<Event>(&body) {
|
||||
Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
|
||||
Ok(_) => Err(AnthropicError::UnexpectedResponseFormat(body)),
|
||||
Err(_) => Err(AnthropicError::HttpResponseError {
|
||||
status: response.status().as_u16(),
|
||||
body: body,
|
||||
Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
|
||||
status_code: response.status(),
|
||||
message: body,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
@ -801,16 +811,19 @@ pub enum AnthropicError {
|
|||
ReadResponse(io::Error),
|
||||
|
||||
/// HTTP error response from the API
|
||||
HttpResponseError { status: u16, body: String },
|
||||
HttpResponseError {
|
||||
status_code: StatusCode,
|
||||
message: String,
|
||||
},
|
||||
|
||||
/// Rate limit exceeded
|
||||
RateLimit { retry_after: Duration },
|
||||
|
||||
/// Server overloaded
|
||||
ServerOverloaded { retry_after: Option<Duration> },
|
||||
|
||||
/// API returned an error response
|
||||
ApiError(ApiError),
|
||||
|
||||
/// Unexpected response format
|
||||
UnexpectedResponseFormat(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Error)]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue