More resilient eval (#32257)

Bubbles up rate limit information so that we can retry after a certain
duration if needed higher up in the stack.

Also caps the number of concurrent evals running at once to also help.

Release Notes:

- N/A
This commit is contained in:
Ben Brandt 2025-06-09 20:07:22 +02:00 committed by GitHub
parent fa54fa80d0
commit e4bd115a63
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 147 additions and 56 deletions

View file

@ -527,6 +527,7 @@ impl LanguageModel for BedrockModel {
'static,
Result<
BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
LanguageModelCompletionError,
>,
> {
let Ok(region) = cx.read_entity(&self.state, |state, _cx| {
@ -539,16 +540,13 @@ impl LanguageModel for BedrockModel {
.or(settings_region)
.unwrap_or(String::from("us-east-1"))
}) else {
return async move {
anyhow::bail!("App State Dropped");
}
.boxed();
return async move { Err(anyhow::anyhow!("App State Dropped").into()) }.boxed();
};
let model_id = match self.model.cross_region_inference_id(&region) {
Ok(s) => s,
Err(e) => {
return async move { Err(e) }.boxed();
return async move { Err(e.into()) }.boxed();
}
};
@ -560,7 +558,7 @@ impl LanguageModel for BedrockModel {
self.model.mode(),
) {
Ok(request) => request,
Err(err) => return futures::future::ready(Err(err)).boxed(),
Err(err) => return futures::future::ready(Err(err.into())).boxed(),
};
let owned_handle = self.handler.clone();