More resilient eval (#32257)

Bubbles up rate limit information so that we can retry after a certain
duration if needed higher up in the stack.

Also caps the number of concurrent evals running at once to also help.

Release Notes:

- N/A
This commit is contained in:
Ben Brandt 2025-06-09 20:07:22 +02:00 committed by GitHub
parent fa54fa80d0
commit e4bd115a63
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 147 additions and 56 deletions

View file

@ -265,13 +265,15 @@ impl LanguageModel for CopilotChatLanguageModel {
'static,
Result<
BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
LanguageModelCompletionError,
>,
> {
if let Some(message) = request.messages.last() {
if message.contents_empty() {
const EMPTY_PROMPT_MSG: &str =
"Empty prompts aren't allowed. Please provide a non-empty prompt.";
return futures::future::ready(Err(anyhow::anyhow!(EMPTY_PROMPT_MSG))).boxed();
return futures::future::ready(Err(anyhow::anyhow!(EMPTY_PROMPT_MSG).into()))
.boxed();
}
// Copilot Chat has a restriction that the final message must be from the user.
@ -279,13 +281,13 @@ impl LanguageModel for CopilotChatLanguageModel {
// and provide a more helpful error message.
if !matches!(message.role, Role::User) {
const USER_ROLE_MSG: &str = "The final message must be from the user. To provide a system prompt, you must provide the system prompt followed by a user prompt.";
return futures::future::ready(Err(anyhow::anyhow!(USER_ROLE_MSG))).boxed();
return futures::future::ready(Err(anyhow::anyhow!(USER_ROLE_MSG).into())).boxed();
}
}
let copilot_request = match into_copilot_chat(&self.model, request) {
Ok(request) => request,
Err(err) => return futures::future::ready(Err(err)).boxed(),
Err(err) => return futures::future::ready(Err(err.into())).boxed(),
};
let is_streaming = copilot_request.stream;