Add new editing eval scenario and improve it substantially (#29997)

This improves the new eval scenario by ~80% (`0.29` vs `0.525`) without
decreasing performance in the other evals.

Release Notes:

- Improved the performance of the `edit_file` tool.
This commit is contained in:
Antonio Scandurra 2025-05-06 14:22:42 +02:00 committed by GitHub
parent 6e9f8f997e
commit 07e6e49583
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 1811 additions and 7 deletions

View file

@ -652,14 +652,16 @@ impl EditAgent {
}
fn fuzzy_eq(left: &str, right: &str) -> bool {
const THRESHOLD: f64 = 0.8;
let min_levenshtein = left.len().abs_diff(right.len());
let min_normalized_levenshtein =
1. - (min_levenshtein as f32 / cmp::max(left.len(), right.len()) as f32);
if min_normalized_levenshtein < 0.8 {
1. - (min_levenshtein as f64 / cmp::max(left.len(), right.len()) as f64);
if min_normalized_levenshtein < THRESHOLD {
return false;
}
strsim::normalized_levenshtein(left, right) >= 0.8
strsim::normalized_levenshtein(left, right) >= THRESHOLD
}
#[derive(Copy, Clone, Debug)]