eval: retry in more scenarios

This commit is contained in:
Ben Brandt 2025-07-31 11:21:05 +02:00
parent 47af878ebb
commit a2ce038352
No known key found for this signature in database
GPG key ID: D4618C5D3B500571

View file

@ -1663,47 +1663,68 @@ async fn retry_on_rate_limit<R>(mut request: impl AsyncFnMut() -> Result<R>) ->
attempt += 1; attempt += 1;
match request().await { match request().await {
Ok(result) => return Ok(result), Ok(result) => return Ok(result),
Err(err) => match err.downcast::<LanguageModelCompletionError>() { Err(err) => {
Ok(err) => match &err { if attempt > 20 {
LanguageModelCompletionError::RateLimitExceeded { retry_after, .. } return Err(err);
| LanguageModelCompletionError::ServerOverloaded { retry_after, .. } => { }
let retry_after = retry_after.unwrap_or(Duration::from_secs(5));
// Wait for the duration supplied, with some jitter to avoid all requests being made at the same time.
let jitter = retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
eprintln!(
"Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}"
);
Timer::after(retry_after + jitter).await;
continue;
}
LanguageModelCompletionError::UpstreamProviderError {
status,
retry_after,
..
} => {
// Only retry for specific status codes
let should_retry = matches!(
*status,
StatusCode::TOO_MANY_REQUESTS | StatusCode::SERVICE_UNAVAILABLE
) || status.as_u16() == 529;
if !should_retry { match err.downcast::<LanguageModelCompletionError>() {
return Err(err.into()); Ok(err) => match &err {
LanguageModelCompletionError::RateLimitExceeded { retry_after, .. }
| LanguageModelCompletionError::ServerOverloaded { retry_after, .. } => {
let retry_after = retry_after.unwrap_or(Duration::from_secs(5));
// Wait for the duration supplied, with some jitter to avoid all requests being made at the same time.
let jitter =
retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
eprintln!(
"Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}"
);
Timer::after(retry_after + jitter).await;
continue;
} }
LanguageModelCompletionError::UpstreamProviderError {
status,
retry_after,
..
} => {
// Only retry for specific status codes
let should_retry = matches!(
*status,
StatusCode::TOO_MANY_REQUESTS | StatusCode::SERVICE_UNAVAILABLE
) || status.as_u16() == 529;
// Use server-provided retry_after if available, otherwise use default if !should_retry {
let retry_after = retry_after.unwrap_or(Duration::from_secs(5)); return Err(err.into());
let jitter = retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0)); }
eprintln!(
"Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}" // Use server-provided retry_after if available, otherwise use default
); let retry_after = retry_after.unwrap_or(Duration::from_secs(5));
Timer::after(retry_after + jitter).await; let jitter =
continue; retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
} eprintln!(
_ => return Err(err.into()), "Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}"
}, );
Err(err) => return Err(err), Timer::after(retry_after + jitter).await;
}, continue;
}
LanguageModelCompletionError::ApiInternalServerError { .. }
| LanguageModelCompletionError::ApiReadResponseError { .. }
| LanguageModelCompletionError::DeserializeResponse { .. }
| LanguageModelCompletionError::HttpSend { .. } => {
let retry_after = Duration::from_secs(attempt);
let jitter =
retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
eprintln!(
"Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}"
);
Timer::after(retry_after + jitter).await;
continue;
}
_ => return Err(err.into()),
},
Err(err) => return Err(err),
}
}
} }
} }
} }