eval: Count execution errors as failures (#30712)
- Evals returning an error (e.g., LLM API format mismatch) were silently skipped in the aggregated results. Now we count them as a failure (0% success score). - Setting the `VERBOSE` environment variable to something non-empty disables string truncation Release Notes: - N/A
This commit is contained in:
parent
83498ebf2b
commit
6420df3975
2 changed files with 39 additions and 24 deletions
|
@ -28,6 +28,17 @@ impl AssertionsReport {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn error(msg: String) -> Self {
|
||||
let assert = RanAssertion {
|
||||
id: "no-unhandled-errors".into(),
|
||||
result: Err(msg),
|
||||
};
|
||||
AssertionsReport {
|
||||
ran: vec![assert],
|
||||
max: Some(1),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.ran.is_empty()
|
||||
}
|
||||
|
@ -145,7 +156,9 @@ pub fn print_table_divider() {
|
|||
}
|
||||
|
||||
fn truncate(assertion: &str, max_width: usize) -> String {
|
||||
if assertion.len() <= max_width {
|
||||
let is_verbose = std::env::var("VERBOSE").is_ok_and(|v| !v.is_empty());
|
||||
|
||||
if assertion.len() <= max_width || is_verbose {
|
||||
assertion.to_string()
|
||||
} else {
|
||||
let mut end_ix = max_width - 1;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue