eval: Count execution errors as failures (#30712)

- Evals returning an error (e.g., LLM API format mismatch) were silently
skipped in the aggregated results. Now we count them as a failure (0%
success score).

- Setting the `VERBOSE` environment variable to something non-empty
disables string truncation

Release Notes:

- N/A
This commit is contained in:
Oleksiy Syvokon 2025-05-14 20:44:19 +03:00 committed by GitHub
parent 83498ebf2b
commit 6420df3975
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 39 additions and 24 deletions

View file

@ -28,6 +28,17 @@ impl AssertionsReport {
}
}
pub fn error(msg: String) -> Self {
let assert = RanAssertion {
id: "no-unhandled-errors".into(),
result: Err(msg),
};
AssertionsReport {
ran: vec![assert],
max: Some(1),
}
}
pub fn is_empty(&self) -> bool {
self.ran.is_empty()
}
@ -145,7 +156,9 @@ pub fn print_table_divider() {
}
fn truncate(assertion: &str, max_width: usize) -> String {
if assertion.len() <= max_width {
let is_verbose = std::env::var("VERBOSE").is_ok_and(|v| !v.is_empty());
if assertion.len() <= max_width || is_verbose {
assertion.to_string()
} else {
let mut end_ix = max_width - 1;