agent: Improve initial file search quality (#29317)

This PR significantly improves the quality of the initial file search
that occurs when the model doesn't yet know the full path to a file it
needs to read/edit.

Previously, the assertions in file_search often failed on main as the
model attempted to guess full file paths. On this branch, it reliably
calls `find_path` (previously `path_search`) before reading files.

After getting the model to find paths first, I noticed it would try
using `grep` instead of `path_search`. This motivated renaming
`path_search` to `find_path` (continuing the analogy to unix commands)
and adding system prompt instructions about proper tool selection.

Note: I know the command is just called `find`, but that seemed too
general.

In my eval runs, the `file_search` example improved from 40% ± 10% to
98% ± 2%. The only assertion I'm seeing occasionally fail is "glob
starts with `**` or project". We can probably add some instructions in
that regard.

Release Notes:

- N/A
This commit is contained in:
Agus Zubiaga 2025-04-23 21:24:41 -03:00 committed by GitHub
parent 2124b7ea99
commit 8b5835de17
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 107 additions and 28 deletions

View file

@ -9,12 +9,12 @@ mod delete_path_tool;
mod diagnostics_tool;
mod edit_file_tool;
mod fetch_tool;
mod find_path_tool;
mod grep_tool;
mod list_directory_tool;
mod move_path_tool;
mod now_tool;
mod open_tool;
mod path_search_tool;
mod read_file_tool;
mod rename_tool;
mod replace;
@ -45,11 +45,11 @@ use crate::delete_path_tool::DeletePathTool;
use crate::diagnostics_tool::DiagnosticsTool;
use crate::edit_file_tool::EditFileTool;
use crate::fetch_tool::FetchTool;
use crate::find_path_tool::FindPathTool;
use crate::grep_tool::GrepTool;
use crate::list_directory_tool::ListDirectoryTool;
use crate::now_tool::NowTool;
use crate::open_tool::OpenTool;
use crate::path_search_tool::PathSearchTool;
use crate::read_file_tool::ReadFileTool;
use crate::rename_tool::RenameTool;
use crate::symbol_info_tool::SymbolInfoTool;
@ -58,7 +58,7 @@ use crate::thinking_tool::ThinkingTool;
pub use create_file_tool::CreateFileToolInput;
pub use edit_file_tool::EditFileToolInput;
pub use path_search_tool::PathSearchToolInput;
pub use find_path_tool::FindPathToolInput;
pub use read_file_tool::ReadFileToolInput;
pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
@ -81,7 +81,7 @@ pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
registry.register_tool(OpenTool);
registry.register_tool(CodeSymbolsTool);
registry.register_tool(ContentsTool);
registry.register_tool(PathSearchTool);
registry.register_tool(FindPathTool);
registry.register_tool(ReadFileTool);
registry.register_tool(GrepTool);
registry.register_tool(RenameTool);

View file

@ -97,7 +97,7 @@ pub struct BatchToolInput {
/// }
/// },
/// {
/// "name": "path_search",
/// "name": "find_path",
/// "input": {
/// "glob": "**/*test*.rs"
/// }

View file

@ -12,7 +12,7 @@ use util::paths::PathMatcher;
use worktree::Snapshot;
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
pub struct PathSearchToolInput {
pub struct FindPathToolInput {
/// The glob to match against every path in the project.
///
/// <example>
@ -34,11 +34,11 @@ pub struct PathSearchToolInput {
const RESULTS_PER_PAGE: usize = 50;
pub struct PathSearchTool;
pub struct FindPathTool;
impl Tool for PathSearchTool {
impl Tool for FindPathTool {
fn name(&self) -> String {
"path_search".into()
"find_path".into()
}
fn needs_confirmation(&self, _: &serde_json::Value, _: &App) -> bool {
@ -46,7 +46,7 @@ impl Tool for PathSearchTool {
}
fn description(&self) -> String {
include_str!("./path_search_tool/description.md").into()
include_str!("./find_path_tool/description.md").into()
}
fn icon(&self) -> IconName {
@ -54,11 +54,11 @@ impl Tool for PathSearchTool {
}
fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result<serde_json::Value> {
json_schema_for::<PathSearchToolInput>(format)
json_schema_for::<FindPathToolInput>(format)
}
fn ui_text(&self, input: &serde_json::Value) -> String {
match serde_json::from_value::<PathSearchToolInput>(input.clone()) {
match serde_json::from_value::<FindPathToolInput>(input.clone()) {
Ok(input) => format!("Find paths matching “`{}`”", input.glob),
Err(_) => "Search paths".to_string(),
}
@ -73,7 +73,7 @@ impl Tool for PathSearchTool {
_window: Option<AnyWindowHandle>,
cx: &mut App,
) -> ToolResult {
let (offset, glob) = match serde_json::from_value::<PathSearchToolInput>(input) {
let (offset, glob) = match serde_json::from_value::<FindPathToolInput>(input) {
Ok(input) => (input.offset, input.glob),
Err(err) => return Task::ready(Err(anyhow!(err))).into(),
};
@ -144,7 +144,7 @@ mod test {
use util::path;
#[gpui::test]
async fn test_path_search_tool(cx: &mut TestAppContext) {
async fn test_find_path_tool(cx: &mut TestAppContext) {
init_test(cx);
let fs = FakeFs::new(cx.executor());

View file

@ -1,4 +1,4 @@
Fast file pattern matching tool that works with any codebase size
Fast file path pattern matching tool that works with any codebase size
- Supports glob patterns like "**/*.js" or "src/**/*.ts"
- Returns matching file paths sorted alphabetically

View file

@ -20,6 +20,8 @@ use util::paths::PathMatcher;
pub struct GrepToolInput {
/// A regex pattern to search for in the entire project. Note that the regex
/// will be parsed by the Rust `regex` crate.
///
/// Do NOT specify a path here! This will only be matched against the code **content**.
pub regex: String,
/// A glob pattern for the paths of files to include in the search.

View file

@ -1 +1 @@
Lists files and directories in a given path. Prefer the `grep` or `path_search` tools when searching the codebase.
Lists files and directories in a given path. Prefer the `grep` or `find_path` tools when searching the codebase.