agent: Improve error handling and retry for zed-provided models (#33565)

* Updates to `zed_llm_client-0.8.5` which adds support for `retry_after`
when anthropic provides it.

* Distinguishes upstream provider errors and rate limits from errors
that originate from zed's servers

* Moves `LanguageModelCompletionError::BadInputJson` to
`LanguageModelCompletionEvent::ToolUseJsonParseError`. While arguably
this is an error case, the logic in thread is cleaner with this move.
There is also precedent for inclusion of errors in the event type -
`CompletionRequestStatus::Failed` is how cloud errors arrive.

* Updates `PROVIDER_ID` / `PROVIDER_NAME` constants to use proper types
instead of `&str`, since they can be constructed in a const fashion.

* Removes use of `CLIENT_SUPPORTS_EXA_WEB_SEARCH_PROVIDER_HEADER_NAME`
as the server no longer reads this header and just defaults to that
behavior.

Release notes for this is covered by #33275

Release Notes:

- N/A

---------

Co-authored-by: Richard Feldman <oss@rtfeldman.com>
Co-authored-by: Richard <richard@zed.dev>
This commit is contained in:
Michael Sloan 2025-06-30 21:01:32 -06:00 committed by GitHub
parent f022a13091
commit d497f52e17
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 656 additions and 479 deletions

View file

@ -6,7 +6,7 @@ use anyhow::{Context as _, Result, anyhow};
use chrono::{DateTime, Utc};
use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
use http_client::http::{self, HeaderMap, HeaderValue};
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
use serde::{Deserialize, Serialize};
use strum::{EnumIter, EnumString};
use thiserror::Error;
@ -356,7 +356,7 @@ pub async fn complete(
.send(request)
.await
.map_err(AnthropicError::HttpSend)?;
let status = response.status();
let status_code = response.status();
let mut body = String::new();
response
.body_mut()
@ -364,12 +364,12 @@ pub async fn complete(
.await
.map_err(AnthropicError::ReadResponse)?;
if status.is_success() {
if status_code.is_success() {
Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
} else {
Err(AnthropicError::HttpResponseError {
status: status.as_u16(),
body,
status_code,
message: body,
})
}
}
@ -444,11 +444,7 @@ impl RateLimitInfo {
}
Self {
retry_after: headers
.get("retry-after")
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse::<u64>().ok())
.map(Duration::from_secs),
retry_after: parse_retry_after(headers),
requests: RateLimit::from_headers("requests", headers).ok(),
tokens: RateLimit::from_headers("tokens", headers).ok(),
input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
@ -457,6 +453,17 @@ impl RateLimitInfo {
}
}
/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
/// seconds). Note that other services might specify an HTTP date or some other format for this
/// header. Returns `None` if the header is not present or cannot be parsed.
pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
headers
.get("retry-after")
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse::<u64>().ok())
.map(Duration::from_secs)
}
fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
Ok(headers
.get(key)
@ -520,6 +527,10 @@ pub async fn stream_completion_with_rate_limit_info(
})
.boxed();
Ok((stream, Some(rate_limits)))
} else if response.status().as_u16() == 529 {
Err(AnthropicError::ServerOverloaded {
retry_after: rate_limits.retry_after,
})
} else if let Some(retry_after) = rate_limits.retry_after {
Err(AnthropicError::RateLimit { retry_after })
} else {
@ -532,10 +543,9 @@ pub async fn stream_completion_with_rate_limit_info(
match serde_json::from_str::<Event>(&body) {
Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
Ok(_) => Err(AnthropicError::UnexpectedResponseFormat(body)),
Err(_) => Err(AnthropicError::HttpResponseError {
status: response.status().as_u16(),
body: body,
Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
status_code: response.status(),
message: body,
}),
}
}
@ -801,16 +811,19 @@ pub enum AnthropicError {
ReadResponse(io::Error),
/// HTTP error response from the API
HttpResponseError { status: u16, body: String },
HttpResponseError {
status_code: StatusCode,
message: String,
},
/// Rate limit exceeded
RateLimit { retry_after: Duration },
/// Server overloaded
ServerOverloaded { retry_after: Option<Duration> },
/// API returned an error response
ApiError(ApiError),
/// Unexpected response format
UnexpectedResponseFormat(String),
}
#[derive(Debug, Serialize, Deserialize, Error)]