From 04c68dc0cf0f933f6b441f33ffa739b33334c72f Mon Sep 17 00:00:00 2001
From: Richard Feldman <oss@rtfeldman.com>
Date: Wed, 30 Apr 2025 15:21:19 -0400
Subject: [PATCH] Make the default repetitions be 8, and concurrency 4 (#29576)

This is based on having observed that there is a lot of variation
between runs on `n=1` and `n=3`.

* With `n=8` two runs on the same branch give answers that seem close
enough to be reasonably consistent.
* With higher concurrency, trying to run this many repetitions seems to
lead language servers to time out a lot, causing evals to fail.

Release Notes:

- N/A
---
 .github/workflows/eval.yml | 2 +-
 crates/eval/src/eval.rs    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
index f614dde9d9..8e36a62f3d 100644
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@@ -69,7 +69,7 @@ jobs:
         run: cargo build --package=eval
 
       - name: Run eval
-        run: cargo run --package=eval -- --repetitions=3 --concurrency=1
+        run: cargo run --package=eval -- --repetitions=8 --concurrency=1
 
       # Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
       # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
diff --git a/crates/eval/src/eval.rs b/crates/eval/src/eval.rs
index 482e94c902..df3ed691ae 100644
--- a/crates/eval/src/eval.rs
+++ b/crates/eval/src/eval.rs
@@ -52,10 +52,10 @@ struct Args {
     #[arg(long, value_delimiter = ',', default_value = "rs,ts")]
     languages: Vec<String>,
     /// How many times to run each example.
-    #[arg(long, default_value = "1")]
+    #[arg(long, default_value = "8")]
     repetitions: usize,
     /// Maximum number of examples to run concurrently.
-    #[arg(long, default_value = "10")]
+    #[arg(long, default_value = "4")]
     concurrency: usize,
 }