diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index d2ee03f08f..c7af7dc64e 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -365,17 +365,23 @@ fn eval_disable_cursor_blinking() { // Model | Pass rate // ============================================ // - // claude-3.7-sonnet | 0.99 (2025-06-14) - // claude-sonnet-4 | 0.85 (2025-06-14) - // gemini-2.5-pro-preview-latest | 0.97 (2025-06-16) - // gemini-2.5-flash-preview-04-17 | - // gpt-4.1 | + // claude-3.7-sonnet | 0.59 (2025-07-14) + // claude-sonnet-4 | 0.81 (2025-07-14) + // gemini-2.5-pro | 0.95 (2025-07-14) + // gemini-2.5-flash-preview-04-17 | 0.78 (2025-07-14) + // gpt-4.1 | 0.00 (2025-07-14) (follows edit_description too literally) let input_file_path = "root/editor.rs"; let input_file_content = include_str!("evals/fixtures/disable_cursor_blinking/before.rs"); let edit_description = "Comment out the call to `BlinkManager::enable`"; + let possible_diffs = vec![ + include_str!("evals/fixtures/disable_cursor_blinking/possible-01.diff"), + include_str!("evals/fixtures/disable_cursor_blinking/possible-02.diff"), + include_str!("evals/fixtures/disable_cursor_blinking/possible-03.diff"), + include_str!("evals/fixtures/disable_cursor_blinking/possible-04.diff"), + ]; eval( 100, - 0.95, + 0.51, 0.05, EvalInput::from_conversation( vec![ @@ -433,11 +439,7 @@ fn eval_disable_cursor_blinking() { ), ], Some(input_file_content.into()), - EvalAssertion::judge_diff(indoc! {" - - Calls to BlinkManager in `observe_window_activation` were commented out - - The call to `blink_manager.enable` above the call to show_cursor_names was commented out - - All the edits have valid indentation - "}), + EvalAssertion::assert_diff_any(possible_diffs), ), ); } diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-01.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-01.diff new file mode 100644 index 0000000000..1a38a1967f --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-01.diff @@ -0,0 +1,28 @@ +--- before.rs 2025-07-07 11:37:48.434629001 +0300 ++++ expected.rs 2025-07-14 10:33:53.346906775 +0300 +@@ -1780,11 +1780,11 @@ + cx.observe_window_activation(window, |editor, window, cx| { + let active = window.is_window_active(); + editor.blink_manager.update(cx, |blink_manager, cx| { +- if active { +- blink_manager.enable(cx); +- } else { +- blink_manager.disable(cx); +- } ++ // if active { ++ // blink_manager.enable(cx); ++ // } else { ++ // blink_manager.disable(cx); ++ // } + }); + }), + ], +@@ -18463,7 +18463,7 @@ + } + + self.blink_manager.update(cx, |blink_manager, cx| { +- blink_manager.enable(cx); ++ // blink_manager.enable(cx); + }); + self.show_cursor_names(window, cx); + self.buffer.update(cx, |buffer, cx| { diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-02.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-02.diff new file mode 100644 index 0000000000..b484cce48f --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-02.diff @@ -0,0 +1,29 @@ +@@ -1778,13 +1778,13 @@ + cx.observe_global_in::(window, Self::settings_changed), + observe_buffer_font_size_adjustment(cx, |_, cx| cx.notify()), + cx.observe_window_activation(window, |editor, window, cx| { +- let active = window.is_window_active(); ++ // let active = window.is_window_active(); + editor.blink_manager.update(cx, |blink_manager, cx| { +- if active { +- blink_manager.enable(cx); +- } else { +- blink_manager.disable(cx); +- } ++ // if active { ++ // blink_manager.enable(cx); ++ // } else { ++ // blink_manager.disable(cx); ++ // } + }); + }), + ], +@@ -18463,7 +18463,7 @@ + } + + self.blink_manager.update(cx, |blink_manager, cx| { +- blink_manager.enable(cx); ++ // blink_manager.enable(cx); + }); + self.show_cursor_names(window, cx); + self.buffer.update(cx, |buffer, cx| { diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-03.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-03.diff new file mode 100644 index 0000000000..431e34e48a --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-03.diff @@ -0,0 +1,34 @@ +@@ -1774,17 +1774,17 @@ + cx.observe(&buffer, Self::on_buffer_changed), + cx.subscribe_in(&buffer, window, Self::on_buffer_event), + cx.observe_in(&display_map, window, Self::on_display_map_changed), +- cx.observe(&blink_manager, |_, _, cx| cx.notify()), ++ // cx.observe(&blink_manager, |_, _, cx| cx.notify()), + cx.observe_global_in::(window, Self::settings_changed), + observe_buffer_font_size_adjustment(cx, |_, cx| cx.notify()), + cx.observe_window_activation(window, |editor, window, cx| { +- let active = window.is_window_active(); ++ // let active = window.is_window_active(); + editor.blink_manager.update(cx, |blink_manager, cx| { +- if active { +- blink_manager.enable(cx); +- } else { +- blink_manager.disable(cx); +- } ++ // if active { ++ // blink_manager.enable(cx); ++ // } else { ++ // blink_manager.disable(cx); ++ // } + }); + }), + ], +@@ -18463,7 +18463,7 @@ + } + + self.blink_manager.update(cx, |blink_manager, cx| { +- blink_manager.enable(cx); ++ // blink_manager.enable(cx); + }); + self.show_cursor_names(window, cx); + self.buffer.update(cx, |buffer, cx| { diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-04.diff b/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-04.diff new file mode 100644 index 0000000000..64a6b85dd3 --- /dev/null +++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/disable_cursor_blinking/possible-04.diff @@ -0,0 +1,33 @@ +@@ -1774,17 +1774,17 @@ + cx.observe(&buffer, Self::on_buffer_changed), + cx.subscribe_in(&buffer, window, Self::on_buffer_event), + cx.observe_in(&display_map, window, Self::on_display_map_changed), +- cx.observe(&blink_manager, |_, _, cx| cx.notify()), ++ // cx.observe(&blink_manager, |_, _, cx| cx.notify()), + cx.observe_global_in::(window, Self::settings_changed), + observe_buffer_font_size_adjustment(cx, |_, cx| cx.notify()), + cx.observe_window_activation(window, |editor, window, cx| { + let active = window.is_window_active(); + editor.blink_manager.update(cx, |blink_manager, cx| { +- if active { +- blink_manager.enable(cx); +- } else { +- blink_manager.disable(cx); +- } ++ // if active { ++ // blink_manager.enable(cx); ++ // } else { ++ // blink_manager.disable(cx); ++ // } + }); + }), + ], +@@ -18463,7 +18463,7 @@ + } + + self.blink_manager.update(cx, |blink_manager, cx| { +- blink_manager.enable(cx); ++ // blink_manager.enable(cx); + }); + self.show_cursor_names(window, cx); + self.buffer.update(cx, |buffer, cx| {