More resilient eval (#32257)
Bubbles up rate limit information so that we can retry after a certain duration if needed higher up in the stack. Also caps the number of concurrent evals running at once to also help. Release Notes: - N/A
This commit is contained in:
parent
fa54fa80d0
commit
e4bd115a63
22 changed files with 147 additions and 56 deletions
|
@ -11,7 +11,7 @@ use client::{Client, UserStore};
|
|||
use collections::HashMap;
|
||||
use fs::FakeFs;
|
||||
use futures::{FutureExt, future::LocalBoxFuture};
|
||||
use gpui::{AppContext, TestAppContext};
|
||||
use gpui::{AppContext, TestAppContext, Timer};
|
||||
use indoc::{formatdoc, indoc};
|
||||
use language_model::{
|
||||
LanguageModelRegistry, LanguageModelRequestTool, LanguageModelToolResult,
|
||||
|
@ -1255,9 +1255,12 @@ impl EvalAssertion {
|
|||
}],
|
||||
..Default::default()
|
||||
};
|
||||
let mut response = judge
|
||||
.stream_completion_text(request, &cx.to_async())
|
||||
.await?;
|
||||
let mut response = retry_on_rate_limit(async || {
|
||||
Ok(judge
|
||||
.stream_completion_text(request.clone(), &cx.to_async())
|
||||
.await?)
|
||||
})
|
||||
.await?;
|
||||
let mut output = String::new();
|
||||
while let Some(chunk) = response.stream.next().await {
|
||||
let chunk = chunk?;
|
||||
|
@ -1308,10 +1311,17 @@ fn eval(
|
|||
run_eval(eval.clone(), tx.clone());
|
||||
|
||||
let executor = gpui::background_executor();
|
||||
let semaphore = Arc::new(smol::lock::Semaphore::new(32));
|
||||
for _ in 1..iterations {
|
||||
let eval = eval.clone();
|
||||
let tx = tx.clone();
|
||||
executor.spawn(async move { run_eval(eval, tx) }).detach();
|
||||
let semaphore = semaphore.clone();
|
||||
executor
|
||||
.spawn(async move {
|
||||
let _guard = semaphore.acquire().await;
|
||||
run_eval(eval, tx)
|
||||
})
|
||||
.detach();
|
||||
}
|
||||
drop(tx);
|
||||
|
||||
|
@ -1577,21 +1587,31 @@ impl EditAgentTest {
|
|||
if let Some(input_content) = eval.input_content.as_deref() {
|
||||
buffer.update(cx, |buffer, cx| buffer.set_text(input_content, cx));
|
||||
}
|
||||
let (edit_output, _) = self.agent.edit(
|
||||
buffer.clone(),
|
||||
eval.edit_file_input.display_description,
|
||||
&conversation,
|
||||
&mut cx.to_async(),
|
||||
);
|
||||
edit_output.await?
|
||||
retry_on_rate_limit(async || {
|
||||
self.agent
|
||||
.edit(
|
||||
buffer.clone(),
|
||||
eval.edit_file_input.display_description.clone(),
|
||||
&conversation,
|
||||
&mut cx.to_async(),
|
||||
)
|
||||
.0
|
||||
.await
|
||||
})
|
||||
.await?
|
||||
} else {
|
||||
let (edit_output, _) = self.agent.overwrite(
|
||||
buffer.clone(),
|
||||
eval.edit_file_input.display_description,
|
||||
&conversation,
|
||||
&mut cx.to_async(),
|
||||
);
|
||||
edit_output.await?
|
||||
retry_on_rate_limit(async || {
|
||||
self.agent
|
||||
.overwrite(
|
||||
buffer.clone(),
|
||||
eval.edit_file_input.display_description.clone(),
|
||||
&conversation,
|
||||
&mut cx.to_async(),
|
||||
)
|
||||
.0
|
||||
.await
|
||||
})
|
||||
.await?
|
||||
};
|
||||
|
||||
let buffer_text = buffer.read_with(cx, |buffer, _| buffer.text());
|
||||
|
@ -1613,6 +1633,26 @@ impl EditAgentTest {
|
|||
}
|
||||
}
|
||||
|
||||
async fn retry_on_rate_limit<R>(mut request: impl AsyncFnMut() -> Result<R>) -> Result<R> {
|
||||
loop {
|
||||
match request().await {
|
||||
Ok(result) => return Ok(result),
|
||||
Err(err) => match err.downcast::<LanguageModelCompletionError>() {
|
||||
Ok(err) => match err {
|
||||
LanguageModelCompletionError::RateLimit(duration) => {
|
||||
// Wait until after we are allowed to try again
|
||||
eprintln!("Rate limit exceeded. Waiting for {duration:?}...",);
|
||||
Timer::after(duration).await;
|
||||
continue;
|
||||
}
|
||||
_ => return Err(err.into()),
|
||||
},
|
||||
Err(err) => return Err(err),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
|
||||
struct EvalAssertionOutcome {
|
||||
score: usize,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue