From 04c68dc0cf0f933f6b441f33ffa739b33334c72f Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Wed, 30 Apr 2025 15:21:19 -0400 Subject: [PATCH] Make the default repetitions be 8, and concurrency 4 (#29576) This is based on having observed that there is a lot of variation between runs on `n=1` and `n=3`. * With `n=8` two runs on the same branch give answers that seem close enough to be reasonably consistent. * With higher concurrency, trying to run this many repetitions seems to lead language servers to time out a lot, causing evals to fail. Release Notes: - N/A --- .github/workflows/eval.yml | 2 +- crates/eval/src/eval.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml index f614dde9d9..8e36a62f3d 100644 --- a/.github/workflows/eval.yml +++ b/.github/workflows/eval.yml @@ -69,7 +69,7 @@ jobs: run: cargo build --package=eval - name: Run eval - run: cargo run --package=eval -- --repetitions=3 --concurrency=1 + run: cargo run --package=eval -- --repetitions=8 --concurrency=1 # Even the Linux runner is not stateful, in theory there is no need to do this cleanup. # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code diff --git a/crates/eval/src/eval.rs b/crates/eval/src/eval.rs index 482e94c902..df3ed691ae 100644 --- a/crates/eval/src/eval.rs +++ b/crates/eval/src/eval.rs @@ -52,10 +52,10 @@ struct Args { #[arg(long, value_delimiter = ',', default_value = "rs,ts")] languages: Vec, /// How many times to run each example. - #[arg(long, default_value = "1")] + #[arg(long, default_value = "8")] repetitions: usize, /// Maximum number of examples to run concurrently. - #[arg(long, default_value = "10")] + #[arg(long, default_value = "4")] concurrency: usize, }