agent: Improve error handling and retry for zed-provided models (#33565)

* Updates to `zed_llm_client-0.8.5` which adds support for `retry_after` when anthropic provides it. * Distinguishes upstream provider errors and rate limits from errors that originate from zed's servers * Moves `LanguageModelCompletionError::BadInputJson` to `LanguageModelCompletionEvent::ToolUseJsonParseError`. While arguably this is an error case, the logic in thread is cleaner with this move. There is also precedent for inclusion of errors in the event type - `CompletionRequestStatus::Failed` is how cloud errors arrive. * Updates `PROVIDER_ID` / `PROVIDER_NAME` constants to use proper types instead of `&str`, since they can be constructed in a const fashion. * Removes use of `CLIENT_SUPPORTS_EXA_WEB_SEARCH_PROVIDER_HEADER_NAME` as the server no longer reads this header and just defaults to that behavior. Release notes for this is covered by #33275 Release Notes: - N/A --------- Co-authored-by: Richard Feldman <oss@rtfeldman.com> Co-authored-by: Richard <richard@zed.dev>
2025-06-30 21:01:32 -06:00 · 2025-06-30 21:01:32 -06:00 · d497f52e17
commit d497f52e17
parent f022a13091
25 changed files with 656 additions and 479 deletions
--- a/crates/assistant_tools/src/edit_agent/evals.rs
+++ b/crates/assistant_tools/src/edit_agent/evals.rs
@ -29,6 +29,7 @@ use std::{
    path::Path,
    str::FromStr,
    sync::mpsc,
+    time::Duration,
 };
 use util::path;

@ -1658,12 +1659,14 @@ async fn retry_on_rate_limit<R>(mut request: impl AsyncFnMut() -> Result<R>) ->
        match request().await {
            Ok(result) => return Ok(result),
            Err(err) => match err.downcast::<LanguageModelCompletionError>() {
-                Ok(err) => match err {
-                    LanguageModelCompletionError::RateLimitExceeded { retry_after } => {
+                Ok(err) => match &err {
+                    LanguageModelCompletionError::RateLimitExceeded { retry_after, .. }
+                    | LanguageModelCompletionError::ServerOverloaded { retry_after, .. } => {
+                        let retry_after = retry_after.unwrap_or(Duration::from_secs(5));
                        // Wait for the duration supplied, with some jitter to avoid all requests being made at the same time.
                        let jitter = retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
                        eprintln!(
-                            "Attempt #{attempt}: Rate limit exceeded. Retry after {retry_after:?} + jitter of {jitter:?}"
+                            "Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}"
                        );
                        Timer::after(retry_after + jitter).await;
                        continue;