From d566864891f5497f02d2002b4967bba5e56b2d1a Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 29 Apr 2025 22:13:13 -0400 Subject: [PATCH] Make code block eval resilient to indentation (#29633) This reduces spurious failures in the eval. Release Notes: - N/A --- crates/assistant_tools/src/edit_file_tool.rs | 16 ----------- .../eval/src/examples/code_block_citations.rs | 28 ++++++++++++++++--- crates/markdown/src/path_range.rs | 11 +++++++- 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/crates/assistant_tools/src/edit_file_tool.rs b/crates/assistant_tools/src/edit_file_tool.rs index 9a766cf7a2..fc88db2351 100644 --- a/crates/assistant_tools/src/edit_file_tool.rs +++ b/crates/assistant_tools/src/edit_file_tool.rs @@ -174,7 +174,6 @@ impl Tool for EditFileTool { "The `old_string` and `new_string` are identical, so no changes would be made." )); } - let old_string = input.old_string.clone(); let result = cx .background_spawn(async move { @@ -214,21 +213,6 @@ impl Tool for EditFileTool { input.path.display() ) } else { - let old_string_with_buffer = format!( - "old_string:\n\n{}\n\n-------file-------\n\n{}", - &old_string, - buffer.text() - ); - let path = { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - - let mut hasher = DefaultHasher::new(); - old_string_with_buffer.hash(&mut hasher); - - PathBuf::from(format!("failed_tool_{}.txt", hasher.finish())) - }; - std::fs::write(path, old_string_with_buffer).unwrap(); anyhow!("Failed to match the provided `old_string`") } })?; diff --git a/crates/eval/src/examples/code_block_citations.rs b/crates/eval/src/examples/code_block_citations.rs index 595d430841..4a4d566a26 100644 --- a/crates/eval/src/examples/code_block_citations.rs +++ b/crates/eval/src/examples/code_block_citations.rs @@ -101,8 +101,13 @@ impl Example for CodeBlockCitations { let content = &text[(citation.len() + 1)..content_len - (citation.len() + 1)]; + // deindent (trim the start of each line) because sometimes the model + // chooses to deindent its code snippets for the sake of readability, + // which in markdown is not only reasonable but usually desirable. cx.assert( - buffer_text.contains(&content), + deindent(&buffer_text) + .trim() + .contains(deindent(&content).trim()), "Code block content was found in file", ) .ok(); @@ -129,10 +134,16 @@ impl Example for CodeBlockCitations { .to_string(); } + // deindent (trim the start of each line) because sometimes the model + // chooses to deindent its code snippets for the sake of readability, + // which in markdown is not only reasonable but usually desirable. cx.assert_eq( - snippet.as_str(), - content, - "Code block snippet was at specified line/col", + deindent(snippet.as_str()).trim(), + deindent(content).trim(), + format!( + "Code block was at {:?}-{:?}", + range.start, range.end + ), ) .ok(); } @@ -189,3 +200,12 @@ impl Example for CodeBlockCitations { ] } } + +fn deindent(as_str: impl AsRef) -> String { + as_str + .as_ref() + .lines() + .map(|line| line.trim_start()) + .collect::>() + .join("\n") +} diff --git a/crates/markdown/src/path_range.rs b/crates/markdown/src/path_range.rs index 0fb7f4cd23..19cfda0d9d 100644 --- a/crates/markdown/src/path_range.rs +++ b/crates/markdown/src/path_range.rs @@ -6,12 +6,21 @@ pub struct PathWithRange { pub range: Option>, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, PartialEq, Eq)] pub struct LineCol { pub line: u32, pub col: Option, } +impl std::fmt::Debug for LineCol { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.col { + Some(col) => write!(f, "L{}:{}", self.line, col), + None => write!(f, "L{}", self.line), + } + } +} + impl LineCol { pub fn new(str: impl AsRef) -> Option { let str = str.as_ref();