From 3884de937bf608c3f9e28cdbebc43ce0af4dc7c9 Mon Sep 17 00:00:00 2001 From: Oleksiy Syvokon Date: Thu, 5 Jun 2025 13:36:55 +0300 Subject: [PATCH] assistant: Partial fix for HTML entities in tools params (#32148) This problem seems to be specific to Opus 4. Eval shows improvement from 89% to 97%. Closes: https://github.com/zed-industries/zed/issues/32060 Release Notes: - N/A Co-authored-by: Ben Brandt --- assets/prompts/assistant_system_prompt.hbs | 3 +- crates/assistant_tools/src/assistant_tools.rs | 2 +- .../src/grep_tool/description.md | 1 + .../src/examples/grep_params_escapement.rs | 59 +++++++++++++++++++ crates/eval/src/examples/mod.rs | 2 + 5 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 crates/eval/src/examples/grep_params_escapement.rs diff --git a/assets/prompts/assistant_system_prompt.hbs b/assets/prompts/assistant_system_prompt.hbs index 0aaceb156a..a155dea19d 100644 --- a/assets/prompts/assistant_system_prompt.hbs +++ b/assets/prompts/assistant_system_prompt.hbs @@ -17,13 +17,13 @@ You are a highly skilled software engineer with extensive knowledge in many prog 4. Use only the tools that are currently available. 5. DO NOT use a tool that is not available just because it appears in the conversation. This means the user turned it off. 6. NEVER run commands that don't terminate on their own such as web servers (like `npm run start`, `npm run dev`, `python -m http.server`, etc) or file watchers. +7. Avoid HTML entity escaping - use plain characters instead. ## Searching and Reading If you are unsure how to fulfill the user's request, gather more information with tool calls and/or clarifying questions. {{! TODO: If there are files, we should mention it but otherwise omit that fact }} -{{#if has_tools}} If appropriate, use tool calls to explore the current project, which contains the following root directories: {{#each worktrees}} @@ -38,7 +38,6 @@ If appropriate, use tool calls to explore the current project, which contains th - As you learn about the structure of the project, use that information to scope `grep` searches to targeted subtrees of the project. - The user might specify a partial file path. If you don't know the full path, use `find_path` (not `grep`) before you read the file. {{/if}} -{{/if}} {{else}} You are being tasked with providing a response, but you have no ability to use tools or to read or write any aspect of the user's system (other than any context the user might have provided to you). diff --git a/crates/assistant_tools/src/assistant_tools.rs b/crates/assistant_tools/src/assistant_tools.rs index de57e8ebb3..83312a07b6 100644 --- a/crates/assistant_tools/src/assistant_tools.rs +++ b/crates/assistant_tools/src/assistant_tools.rs @@ -37,13 +37,13 @@ use crate::diagnostics_tool::DiagnosticsTool; use crate::edit_file_tool::EditFileTool; use crate::fetch_tool::FetchTool; use crate::find_path_tool::FindPathTool; -use crate::grep_tool::GrepTool; use crate::list_directory_tool::ListDirectoryTool; use crate::now_tool::NowTool; use crate::thinking_tool::ThinkingTool; pub use edit_file_tool::{EditFileMode, EditFileToolInput}; pub use find_path_tool::FindPathToolInput; +pub use grep_tool::{GrepTool, GrepToolInput}; pub use open_tool::OpenTool; pub use read_file_tool::{ReadFileTool, ReadFileToolInput}; pub use terminal_tool::TerminalTool; diff --git a/crates/assistant_tools/src/grep_tool/description.md b/crates/assistant_tools/src/grep_tool/description.md index 33983e66dd..e3c0b43f31 100644 --- a/crates/assistant_tools/src/grep_tool/description.md +++ b/crates/assistant_tools/src/grep_tool/description.md @@ -6,3 +6,4 @@ Searches the contents of files in the project with a regular expression - Never use this tool to search for paths. Only search file contents with this tool. - Use this tool when you need to find files containing specific patterns - Results are paginated with 20 matches per page. Use the optional 'offset' parameter to request subsequent pages. +- DO NOT use HTML entities solely to escape characters in the tool parameters. diff --git a/crates/eval/src/examples/grep_params_escapement.rs b/crates/eval/src/examples/grep_params_escapement.rs new file mode 100644 index 0000000000..0532698ba2 --- /dev/null +++ b/crates/eval/src/examples/grep_params_escapement.rs @@ -0,0 +1,59 @@ +use agent_settings::AgentProfileId; +use anyhow::Result; +use assistant_tools::GrepToolInput; +use async_trait::async_trait; + +use crate::example::{Example, ExampleContext, ExampleMetadata}; + +pub struct GrepParamsEscapementExample; + +/* + +This eval checks that the model doesn't use HTML escapement for characters like `<` and +`>` in tool parameters. + + original +system_prompt change +tool description + claude-opus-4 89% 92% 97%+ + claude-sonnet-4 100% + gpt-4.1-mini 100% + gemini-2.5-pro 98% + +*/ + +#[async_trait(?Send)] +impl Example for GrepParamsEscapementExample { + fn meta(&self) -> ExampleMetadata { + ExampleMetadata { + name: "grep_params_escapement".to_string(), + url: "https://github.com/octocat/hello-world".to_string(), + revision: "7fd1a60b01f91b314f59955a4e4d4e80d8edf11d".to_string(), + language_server: None, + max_assertions: Some(1), + profile_id: AgentProfileId::default(), + existing_thread_json: None, + max_turns: Some(2), + } + } + + async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> { + // cx.push_user_message("How does the precedence/specificity work with Keymap contexts? I am seeing that `MessageEditor > Editor` is lower precendence than `Editor` which is surprising to me, but might be how it works"); + cx.push_user_message("Search for files containing the characters `>` or `<`"); + let response = cx.run_turns(2).await?; + let grep_input = response + .find_tool_call("grep") + .and_then(|tool_use| tool_use.parse_input::().ok()); + + cx.assert_some(grep_input.as_ref(), "`grep` tool should be called")?; + + cx.assert( + !contains_html_entities(&grep_input.unwrap().regex), + "Tool parameters should not be escaped", + ) + } +} + +fn contains_html_entities(pattern: &str) -> bool { + regex::Regex::new(r"&[a-zA-Z]+;|&#[0-9]+;|&#x[0-9a-fA-F]+;") + .unwrap() + .is_match(pattern) +} diff --git a/crates/eval/src/examples/mod.rs b/crates/eval/src/examples/mod.rs index 5968ee2fd0..26c4c95e50 100644 --- a/crates/eval/src/examples/mod.rs +++ b/crates/eval/src/examples/mod.rs @@ -16,6 +16,7 @@ mod add_arg_to_trait_method; mod code_block_citations; mod comment_translation; mod file_search; +mod grep_params_escapement; mod overwrite_file; mod planets; @@ -27,6 +28,7 @@ pub fn all(examples_dir: &Path) -> Vec> { Rc::new(planets::Planets), Rc::new(comment_translation::CommentTranslation), Rc::new(overwrite_file::FileOverwriteExample), + Rc::new(grep_params_escapement::GrepParamsEscapementExample), ]; for example_path in list_declarative_examples(examples_dir).unwrap() {