More resilient eval (#32257)

Bubbles up rate limit information so that we can retry after a certain
duration if needed higher up in the stack.

Also caps the number of concurrent evals running at once to also help.

Release Notes:

- N/A
This commit is contained in:
Ben Brandt 2025-06-09 20:07:22 +02:00 committed by GitHub
parent fa54fa80d0
commit e4bd115a63
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 147 additions and 56 deletions

View file

@ -807,6 +807,7 @@ impl LanguageModel for CloudLanguageModel {
'static,
Result<
BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
LanguageModelCompletionError,
>,
> {
let thread_id = request.thread_id.clone();
@ -848,7 +849,8 @@ impl LanguageModel for CloudLanguageModel {
mode,
provider: zed_llm_client::LanguageModelProvider::Anthropic,
model: request.model.clone(),
provider_request: serde_json::to_value(&request)?,
provider_request: serde_json::to_value(&request)
.map_err(|e| anyhow!(e))?,
},
)
.await
@ -884,7 +886,7 @@ impl LanguageModel for CloudLanguageModel {
let client = self.client.clone();
let model = match open_ai::Model::from_id(&self.model.id.0) {
Ok(model) => model,
Err(err) => return async move { Err(anyhow!(err)) }.boxed(),
Err(err) => return async move { Err(anyhow!(err).into()) }.boxed(),
};
let request = into_open_ai(request, &model, None);
let llm_api_token = self.llm_api_token.clone();
@ -905,7 +907,8 @@ impl LanguageModel for CloudLanguageModel {
mode,
provider: zed_llm_client::LanguageModelProvider::OpenAi,
model: request.model.clone(),
provider_request: serde_json::to_value(&request)?,
provider_request: serde_json::to_value(&request)
.map_err(|e| anyhow!(e))?,
},
)
.await?;
@ -944,7 +947,8 @@ impl LanguageModel for CloudLanguageModel {
mode,
provider: zed_llm_client::LanguageModelProvider::Google,
model: request.model.model_id.clone(),
provider_request: serde_json::to_value(&request)?,
provider_request: serde_json::to_value(&request)
.map_err(|e| anyhow!(e))?,
},
)
.await?;