eval: Count execution errors as failures (#30712)

- Evals returning an error (e.g., LLM API format mismatch) were silently skipped in the aggregated results. Now we count them as a failure (0% success score). - Setting the `VERBOSE` environment variable to something non-empty disables string truncation Release Notes: - N/A
2025-05-14 20:44:19 +03:00 · 2025-05-14 20:44:19 +03:00 · 6420df3975
commit 6420df3975
parent 83498ebf2b
2 changed files with 39 additions and 24 deletions
--- a/crates/eval/src/assertions.rs
+++ b/crates/eval/src/assertions.rs
@ -28,6 +28,17 @@ impl AssertionsReport {
        }
    }

+    pub fn error(msg: String) -> Self {
+        let assert = RanAssertion {
+            id: "no-unhandled-errors".into(),
+            result: Err(msg),
+        };
+        AssertionsReport {
+            ran: vec![assert],
+            max: Some(1),
+        }
+    }
+
    pub fn is_empty(&self) -> bool {
        self.ran.is_empty()
    }
@ -145,7 +156,9 @@ pub fn print_table_divider() {
 }

 fn truncate(assertion: &str, max_width: usize) -> String {
-    if assertion.len() <= max_width {
+    let is_verbose = std::env::var("VERBOSE").is_ok_and(|v| !v.is_empty());
+
+    if assertion.len() <= max_width || is_verbose {
        assertion.to_string()
    } else {
        let mut end_ix = max_width - 1;