eval: retry in more scenarios
This commit is contained in:
parent
47af878ebb
commit
a2ce038352
1 changed files with 59 additions and 38 deletions
|
@ -1663,47 +1663,68 @@ async fn retry_on_rate_limit<R>(mut request: impl AsyncFnMut() -> Result<R>) ->
|
||||||
attempt += 1;
|
attempt += 1;
|
||||||
match request().await {
|
match request().await {
|
||||||
Ok(result) => return Ok(result),
|
Ok(result) => return Ok(result),
|
||||||
Err(err) => match err.downcast::<LanguageModelCompletionError>() {
|
Err(err) => {
|
||||||
Ok(err) => match &err {
|
if attempt > 20 {
|
||||||
LanguageModelCompletionError::RateLimitExceeded { retry_after, .. }
|
return Err(err);
|
||||||
| LanguageModelCompletionError::ServerOverloaded { retry_after, .. } => {
|
}
|
||||||
let retry_after = retry_after.unwrap_or(Duration::from_secs(5));
|
|
||||||
// Wait for the duration supplied, with some jitter to avoid all requests being made at the same time.
|
|
||||||
let jitter = retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
|
|
||||||
eprintln!(
|
|
||||||
"Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}"
|
|
||||||
);
|
|
||||||
Timer::after(retry_after + jitter).await;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
LanguageModelCompletionError::UpstreamProviderError {
|
|
||||||
status,
|
|
||||||
retry_after,
|
|
||||||
..
|
|
||||||
} => {
|
|
||||||
// Only retry for specific status codes
|
|
||||||
let should_retry = matches!(
|
|
||||||
*status,
|
|
||||||
StatusCode::TOO_MANY_REQUESTS | StatusCode::SERVICE_UNAVAILABLE
|
|
||||||
) || status.as_u16() == 529;
|
|
||||||
|
|
||||||
if !should_retry {
|
match err.downcast::<LanguageModelCompletionError>() {
|
||||||
return Err(err.into());
|
Ok(err) => match &err {
|
||||||
|
LanguageModelCompletionError::RateLimitExceeded { retry_after, .. }
|
||||||
|
| LanguageModelCompletionError::ServerOverloaded { retry_after, .. } => {
|
||||||
|
let retry_after = retry_after.unwrap_or(Duration::from_secs(5));
|
||||||
|
// Wait for the duration supplied, with some jitter to avoid all requests being made at the same time.
|
||||||
|
let jitter =
|
||||||
|
retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
|
||||||
|
eprintln!(
|
||||||
|
"Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}"
|
||||||
|
);
|
||||||
|
Timer::after(retry_after + jitter).await;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
LanguageModelCompletionError::UpstreamProviderError {
|
||||||
|
status,
|
||||||
|
retry_after,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
// Only retry for specific status codes
|
||||||
|
let should_retry = matches!(
|
||||||
|
*status,
|
||||||
|
StatusCode::TOO_MANY_REQUESTS | StatusCode::SERVICE_UNAVAILABLE
|
||||||
|
) || status.as_u16() == 529;
|
||||||
|
|
||||||
// Use server-provided retry_after if available, otherwise use default
|
if !should_retry {
|
||||||
let retry_after = retry_after.unwrap_or(Duration::from_secs(5));
|
return Err(err.into());
|
||||||
let jitter = retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
|
}
|
||||||
eprintln!(
|
|
||||||
"Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}"
|
// Use server-provided retry_after if available, otherwise use default
|
||||||
);
|
let retry_after = retry_after.unwrap_or(Duration::from_secs(5));
|
||||||
Timer::after(retry_after + jitter).await;
|
let jitter =
|
||||||
continue;
|
retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
|
||||||
}
|
eprintln!(
|
||||||
_ => return Err(err.into()),
|
"Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}"
|
||||||
},
|
);
|
||||||
Err(err) => return Err(err),
|
Timer::after(retry_after + jitter).await;
|
||||||
},
|
continue;
|
||||||
|
}
|
||||||
|
LanguageModelCompletionError::ApiInternalServerError { .. }
|
||||||
|
| LanguageModelCompletionError::ApiReadResponseError { .. }
|
||||||
|
| LanguageModelCompletionError::DeserializeResponse { .. }
|
||||||
|
| LanguageModelCompletionError::HttpSend { .. } => {
|
||||||
|
let retry_after = Duration::from_secs(attempt);
|
||||||
|
let jitter =
|
||||||
|
retry_after.mul_f64(rand::thread_rng().gen_range(0.0..1.0));
|
||||||
|
eprintln!(
|
||||||
|
"Attempt #{attempt}: {err}. Retry after {retry_after:?} + jitter of {jitter:?}"
|
||||||
|
);
|
||||||
|
Timer::after(retry_after + jitter).await;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
_ => return Err(err.into()),
|
||||||
|
},
|
||||||
|
Err(err) => return Err(err),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue