More resilient eval (#32257)

Bubbles up rate limit information so that we can retry after a certain
duration if needed higher up in the stack.

Also caps the number of concurrent evals running at once to also help.

Release Notes:

- N/A
This commit is contained in:
Ben Brandt 2025-06-09 20:07:22 +02:00 committed by GitHub
parent fa54fa80d0
commit e4bd115a63
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 147 additions and 56 deletions

View file

@ -22,6 +22,7 @@ use std::fmt;
use std::ops::{Add, Sub};
use std::str::FromStr as _;
use std::sync::Arc;
use std::time::Duration;
use thiserror::Error;
use util::serde::is_default;
use zed_llm_client::{
@ -74,6 +75,8 @@ pub enum LanguageModelCompletionEvent {
#[derive(Error, Debug)]
pub enum LanguageModelCompletionError {
#[error("rate limit exceeded, retry after {0:?}")]
RateLimit(Duration),
#[error("received bad input JSON")]
BadInputJson {
id: LanguageModelToolUseId,
@ -270,6 +273,7 @@ pub trait LanguageModel: Send + Sync {
'static,
Result<
BoxStream<'static, Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>,
LanguageModelCompletionError,
>,
>;
@ -277,7 +281,7 @@ pub trait LanguageModel: Send + Sync {
&self,
request: LanguageModelRequest,
cx: &AsyncApp,
) -> BoxFuture<'static, Result<LanguageModelTextStream>> {
) -> BoxFuture<'static, Result<LanguageModelTextStream, LanguageModelCompletionError>> {
let future = self.stream_completion(request, cx);
async move {