diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index eda7eee0e3..9a8e762455 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -1658,23 +1658,24 @@ impl EditAgentTest { } async fn retry_on_rate_limit(mut request: impl AsyncFnMut() -> Result) -> Result { + const MAX_RETRIES: usize = 20; let mut attempt = 0; + loop { attempt += 1; - match request().await { - Ok(result) => return Ok(result), - Err(err) => match err.downcast::() { - Ok(err) => match &err { + let response = request().await; + + if attempt >= MAX_RETRIES { + return response; + } + + let retry_delay = match &response { + Ok(_) => None, + Err(err) => match err.downcast_ref::() { + Some(err) => match &err { LanguageModelCompletionError::RateLimitExceeded { retry_after, .. } | LanguageModelCompletionError::ServerOverloaded { retry_after, .. } => { - let retry_after = retry_after.unwrap_or(Duration::from_secs(5)); - // Wait for the duration supplied, with some jitter to avoid all requests being made at the same time. - let jitter = retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0)); - eprintln!( - "Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}" - ); - Timer::after(retry_after + jitter).await; - continue; + Some(retry_after.unwrap_or(Duration::from_secs(5))) } LanguageModelCompletionError::UpstreamProviderError { status, @@ -1687,23 +1688,31 @@ async fn retry_on_rate_limit(mut request: impl AsyncFnMut() -> Result) -> StatusCode::TOO_MANY_REQUESTS | StatusCode::SERVICE_UNAVAILABLE ) || status.as_u16() == 529; - if !should_retry { - return Err(err.into()); + if should_retry { + // Use server-provided retry_after if available, otherwise use default + Some(retry_after.unwrap_or(Duration::from_secs(5))) + } else { + None } - - // Use server-provided retry_after if available, otherwise use default - let retry_after = retry_after.unwrap_or(Duration::from_secs(5)); - let jitter = retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0)); - eprintln!( - "Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}" - ); - Timer::after(retry_after + jitter).await; - continue; } - _ => return Err(err.into()), + LanguageModelCompletionError::ApiReadResponseError { .. } + | LanguageModelCompletionError::ApiInternalServerError { .. } + | LanguageModelCompletionError::HttpSend { .. } => { + // Exponential backoff for transient I/O and internal server errors + Some(Duration::from_secs(2_u64.pow((attempt - 1) as u32).min(30))) + } + _ => None, }, - Err(err) => return Err(err), + _ => None, }, + }; + + if let Some(retry_after) = retry_delay { + let jitter = retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0)); + eprintln!("Attempt #{attempt}: Retry after {retry_after:?} + jitter of {jitter:?}"); + Timer::after(retry_after + jitter).await; + } else { + return response; } } }