More resilient eval (#32257)

Bubbles up rate limit information so that we can retry after a certain duration if needed higher up in the stack. Also caps the number of concurrent evals running at once to also help. Release Notes: - N/A
2025-06-09 20:07:22 +02:00 · 2025-06-09 20:07:22 +02:00 · e4bd115a63
commit e4bd115a63
parent fa54fa80d0
22 changed files with 147 additions and 56 deletions
--- a/crates/agent/src/buffer_codegen.rs
+++ b/crates/agent/src/buffer_codegen.rs
@ -386,8 +386,10 @@ impl CodegenAlternative {
                async { Ok(LanguageModelTextStream::default()) }.boxed_local()
            } else {
                let request = self.build_request(&model, user_prompt, cx)?;
-                cx.spawn(async move |_, cx| model.stream_completion_text(request.await, &cx).await)
-                    .boxed_local()
+                cx.spawn(async move |_, cx| {
+                    Ok(model.stream_completion_text(request.await, &cx).await?)
+                })
+                .boxed_local()
            };
        self.handle_stream(telemetry_id, provider_id.to_string(), api_key, stream, cx);
        Ok(())