evals: Switch disable_cursor_blinking to determenistic asserts (#34398)

Release Notes:

- N/A
This commit is contained in:
Oleksiy Syvokon 2025-07-14 14:26:15 +03:00 committed by GitHub
parent 00ec243771
commit 2edf85f054
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 137 additions and 11 deletions

View file

@ -365,17 +365,23 @@ fn eval_disable_cursor_blinking() {
// Model | Pass rate
// ============================================
//
// claude-3.7-sonnet | 0.99 (2025-06-14)
// claude-sonnet-4 | 0.85 (2025-06-14)
// gemini-2.5-pro-preview-latest | 0.97 (2025-06-16)
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
// claude-3.7-sonnet | 0.59 (2025-07-14)
// claude-sonnet-4 | 0.81 (2025-07-14)
// gemini-2.5-pro | 0.95 (2025-07-14)
// gemini-2.5-flash-preview-04-17 | 0.78 (2025-07-14)
// gpt-4.1 | 0.00 (2025-07-14) (follows edit_description too literally)
let input_file_path = "root/editor.rs";
let input_file_content = include_str!("evals/fixtures/disable_cursor_blinking/before.rs");
let edit_description = "Comment out the call to `BlinkManager::enable`";
let possible_diffs = vec![
include_str!("evals/fixtures/disable_cursor_blinking/possible-01.diff"),
include_str!("evals/fixtures/disable_cursor_blinking/possible-02.diff"),
include_str!("evals/fixtures/disable_cursor_blinking/possible-03.diff"),
include_str!("evals/fixtures/disable_cursor_blinking/possible-04.diff"),
];
eval(
100,
0.95,
0.51,
0.05,
EvalInput::from_conversation(
vec![
@ -433,11 +439,7 @@ fn eval_disable_cursor_blinking() {
),
],
Some(input_file_content.into()),
EvalAssertion::judge_diff(indoc! {"
- Calls to BlinkManager in `observe_window_activation` were commented out
- The call to `blink_manager.enable` above the call to show_cursor_names was commented out
- All the edits have valid indentation
"}),
EvalAssertion::assert_diff_any(possible_diffs),
),
);
}

View file

@ -0,0 +1,28 @@
--- before.rs 2025-07-07 11:37:48.434629001 +0300
+++ expected.rs 2025-07-14 10:33:53.346906775 +0300
@@ -1780,11 +1780,11 @@
cx.observe_window_activation(window, |editor, window, cx| {
let active = window.is_window_active();
editor.blink_manager.update(cx, |blink_manager, cx| {
- if active {
- blink_manager.enable(cx);
- } else {
- blink_manager.disable(cx);
- }
+ // if active {
+ // blink_manager.enable(cx);
+ // } else {
+ // blink_manager.disable(cx);
+ // }
});
}),
],
@@ -18463,7 +18463,7 @@
}
self.blink_manager.update(cx, |blink_manager, cx| {
- blink_manager.enable(cx);
+ // blink_manager.enable(cx);
});
self.show_cursor_names(window, cx);
self.buffer.update(cx, |buffer, cx| {

View file

@ -0,0 +1,29 @@
@@ -1778,13 +1778,13 @@
cx.observe_global_in::<SettingsStore>(window, Self::settings_changed),
observe_buffer_font_size_adjustment(cx, |_, cx| cx.notify()),
cx.observe_window_activation(window, |editor, window, cx| {
- let active = window.is_window_active();
+ // let active = window.is_window_active();
editor.blink_manager.update(cx, |blink_manager, cx| {
- if active {
- blink_manager.enable(cx);
- } else {
- blink_manager.disable(cx);
- }
+ // if active {
+ // blink_manager.enable(cx);
+ // } else {
+ // blink_manager.disable(cx);
+ // }
});
}),
],
@@ -18463,7 +18463,7 @@
}
self.blink_manager.update(cx, |blink_manager, cx| {
- blink_manager.enable(cx);
+ // blink_manager.enable(cx);
});
self.show_cursor_names(window, cx);
self.buffer.update(cx, |buffer, cx| {

View file

@ -0,0 +1,34 @@
@@ -1774,17 +1774,17 @@
cx.observe(&buffer, Self::on_buffer_changed),
cx.subscribe_in(&buffer, window, Self::on_buffer_event),
cx.observe_in(&display_map, window, Self::on_display_map_changed),
- cx.observe(&blink_manager, |_, _, cx| cx.notify()),
+ // cx.observe(&blink_manager, |_, _, cx| cx.notify()),
cx.observe_global_in::<SettingsStore>(window, Self::settings_changed),
observe_buffer_font_size_adjustment(cx, |_, cx| cx.notify()),
cx.observe_window_activation(window, |editor, window, cx| {
- let active = window.is_window_active();
+ // let active = window.is_window_active();
editor.blink_manager.update(cx, |blink_manager, cx| {
- if active {
- blink_manager.enable(cx);
- } else {
- blink_manager.disable(cx);
- }
+ // if active {
+ // blink_manager.enable(cx);
+ // } else {
+ // blink_manager.disable(cx);
+ // }
});
}),
],
@@ -18463,7 +18463,7 @@
}
self.blink_manager.update(cx, |blink_manager, cx| {
- blink_manager.enable(cx);
+ // blink_manager.enable(cx);
});
self.show_cursor_names(window, cx);
self.buffer.update(cx, |buffer, cx| {

View file

@ -0,0 +1,33 @@
@@ -1774,17 +1774,17 @@
cx.observe(&buffer, Self::on_buffer_changed),
cx.subscribe_in(&buffer, window, Self::on_buffer_event),
cx.observe_in(&display_map, window, Self::on_display_map_changed),
- cx.observe(&blink_manager, |_, _, cx| cx.notify()),
+ // cx.observe(&blink_manager, |_, _, cx| cx.notify()),
cx.observe_global_in::<SettingsStore>(window, Self::settings_changed),
observe_buffer_font_size_adjustment(cx, |_, cx| cx.notify()),
cx.observe_window_activation(window, |editor, window, cx| {
let active = window.is_window_active();
editor.blink_manager.update(cx, |blink_manager, cx| {
- if active {
- blink_manager.enable(cx);
- } else {
- blink_manager.disable(cx);
- }
+ // if active {
+ // blink_manager.enable(cx);
+ // } else {
+ // blink_manager.disable(cx);
+ // }
});
}),
],
@@ -18463,7 +18463,7 @@
}
self.blink_manager.update(cx, |blink_manager, cx| {
- blink_manager.enable(cx);
+ // blink_manager.enable(cx);
});
self.show_cursor_names(window, cx);
self.buffer.update(cx, |buffer, cx| {