Add contents_tool (#28738)

This is a combination of the "read file" and "list directory contents" tools as part of a push to reduce our quantity of builtin tools by combining some of them. The functionality is all there for this tool, although there's room for improvement on the visuals side: it currently always shows the same icon and always says "Read" - so you can't tell at a glance when it's reading a directory vs an individual file. Changing this will require a change to the `Tool` trait, which can be in a separate PR. (FYI @danilo-leal!) <img width="606" alt="Screenshot 2025-04-14 at 11 56 27 PM" src="https://github.com/user-attachments/assets/bded72af-6476-4469-97c6-2f344629b0e4" /> Release Notes: - Added `contents` tool
2025-04-15 00:54:25 -04:00 · 2025-04-15 00:54:25 -04:00 · b794919842
commit b794919842
parent fc1252b0cd
4 changed files with 253 additions and 0 deletions
--- a/assets/settings/default.json
+++ b/assets/settings/default.json
@ -644,6 +644,7 @@
        // We don't know which of the context server tools are safe for the "Ask" profile, so we don't enable them by default.
        // "enable_all_context_servers": true,
        "tools": {
+          "contents": true,
          "diagnostics": true,
          "fetch": true,
          "list_directory": false,
@ -662,6 +663,7 @@
          "batch_tool": true,
          "code_actions": true,
          "code_symbols": true,
+          "contents": true,
          "copy_path": false,
          "create_file": true,
          "delete_path": false,
--- a/crates/assistant_tools/src/assistant_tools.rs
+++ b/crates/assistant_tools/src/assistant_tools.rs
@ -1,6 +1,7 @@
 mod batch_tool;
 mod code_action_tool;
 mod code_symbols_tool;
+mod contents_tool;
 mod copy_path_tool;
 mod create_directory_tool;
 mod create_file_tool;
@ -33,6 +34,7 @@ use move_path_tool::MovePathTool;
 use crate::batch_tool::BatchTool;
 use crate::code_action_tool::CodeActionTool;
 use crate::code_symbols_tool::CodeSymbolsTool;
+use crate::contents_tool::ContentsTool;
 use crate::create_directory_tool::CreateDirectoryTool;
 use crate::create_file_tool::CreateFileTool;
 use crate::delete_path_tool::DeletePathTool;
@ -69,6 +71,7 @@ pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
    registry.register_tool(NowTool);
    registry.register_tool(OpenTool);
    registry.register_tool(CodeSymbolsTool);
+    registry.register_tool(ContentsTool);
    registry.register_tool(PathSearchTool);
    registry.register_tool(ReadFileTool);
    registry.register_tool(RegexSearchTool);
--- a/crates/assistant_tools/src/contents_tool.rs
+++ b/crates/assistant_tools/src/contents_tool.rs
@ -0,0 +1,239 @@
+use std::sync::Arc;
+
+use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
+use anyhow::{Result, anyhow};
+use assistant_tool::{ActionLog, Tool};
+use gpui::{App, Entity, Task};
+use itertools::Itertools;
+use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
+use project::Project;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use std::{fmt::Write, path::Path};
+use ui::IconName;
+use util::markdown::MarkdownString;
+
+/// If the model requests to read a file whose size exceeds this, then
+/// the tool will return the file's symbol outline instead of its contents,
+/// and suggest trying again using line ranges from the outline.
+const MAX_FILE_SIZE_TO_READ: usize = 16384;
+
+/// If the model requests to list the entries in a directory with more
+/// entries than this, then the tool will return a subset of the entries
+/// and suggest trying again.
+const MAX_DIR_ENTRIES: usize = 1024;
+
+#[derive(Debug, Serialize, Deserialize, JsonSchema)]
+pub struct ContentsToolInput {
+    /// The relative path of the file or directory to access.
+    ///
+    /// This path should never be absolute, and the first component
+    /// of the path should always be a root directory in a project.
+    ///
+    /// <example>
+    /// If the project has the following root directories:
+    ///
+    /// - directory1
+    /// - directory2
+    ///
+    /// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
+    /// If you want to list contents in the directory `directory2/subfolder`, you should use the path `directory2/subfolder`.
+    /// </example>
+    pub path: String,
+
+    /// Optional position (1-based index) to start reading on, if you want to read a subset of the contents.
+    /// When reading a file, this refers to a line number in the file (e.g. 1 is the first line).
+    /// When reading a directory, this refers to the number of the directory entry (e.g. 1 is the first entry).
+    ///
+    /// Defaults to 1.
+    pub start: Option<u32>,
+
+    /// Optional position (1-based index) to end reading on, if you want to read a subset of the contents.
+    /// When reading a file, this refers to a line number in the file (e.g. 1 is the first line).
+    /// When reading a directory, this refers to the number of the directory entry (e.g. 1 is the first entry).
+    ///
+    /// Defaults to reading until the end of the file or directory.
+    pub end: Option<u32>,
+}
+
+pub struct ContentsTool;
+
+impl Tool for ContentsTool {
+    fn name(&self) -> String {
+        "contents".into()
+    }
+
+    fn needs_confirmation(&self, _: &serde_json::Value, _: &App) -> bool {
+        false
+    }
+
+    fn description(&self) -> String {
+        include_str!("./contents_tool/description.md").into()
+    }
+
+    fn icon(&self) -> IconName {
+        IconName::FileSearch
+    }
+
+    fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result<serde_json::Value> {
+        json_schema_for::<ContentsToolInput>(format)
+    }
+
+    fn ui_text(&self, input: &serde_json::Value) -> String {
+        match serde_json::from_value::<ContentsToolInput>(input.clone()) {
+            Ok(input) => {
+                let path = MarkdownString::inline_code(&input.path);
+
+                match (input.start, input.end) {
+                    (Some(start), None) => format!("Read {path} (from line {start})"),
+                    (Some(start), Some(end)) => {
+                        format!("Read {path} (lines {start}-{end})")
+                    }
+                    _ => format!("Read {path}"),
+                }
+            }
+            Err(_) => "Read file or directory".to_string(),
+        }
+    }
+
+    fn run(
+        self: Arc<Self>,
+        input: serde_json::Value,
+        _messages: &[LanguageModelRequestMessage],
+        project: Entity<Project>,
+        action_log: Entity<ActionLog>,
+        cx: &mut App,
+    ) -> Task<Result<String>> {
+        let input = match serde_json::from_value::<ContentsToolInput>(input) {
+            Ok(input) => input,
+            Err(err) => return Task::ready(Err(anyhow!(err))),
+        };
+
+        // Sometimes models will return these even though we tell it to give a path and not a glob.
+        // When this happens, just list the root worktree directories.
+        if matches!(input.path.as_str(), "." | "" | "./" | "*") {
+            let output = project
+                .read(cx)
+                .worktrees(cx)
+                .filter_map(|worktree| {
+                    worktree.read(cx).root_entry().and_then(|entry| {
+                        if entry.is_dir() {
+                            entry.path.to_str()
+                        } else {
+                            None
+                        }
+                    })
+                })
+                .collect::<Vec<_>>()
+                .join("\n");
+
+            return Task::ready(Ok(output));
+        }
+
+        let Some(project_path) = project.read(cx).find_project_path(&input.path, cx) else {
+            return Task::ready(Err(anyhow!("Path {} not found in project", &input.path)));
+        };
+
+        let Some(worktree) = project
+            .read(cx)
+            .worktree_for_id(project_path.worktree_id, cx)
+        else {
+            return Task::ready(Err(anyhow!("Worktree not found")));
+        };
+        let worktree = worktree.read(cx);
+
+        let Some(entry) = worktree.entry_for_path(&project_path.path) else {
+            return Task::ready(Err(anyhow!("Path not found: {}", input.path)));
+        };
+
+        // If it's a directory, list its contents
+        if entry.is_dir() {
+            let mut output = String::new();
+            let start_index = input
+                .start
+                .map(|line| (line as usize).saturating_sub(1))
+                .unwrap_or(0);
+            let end_index = input
+                .end
+                .map(|line| (line as usize).saturating_sub(1))
+                .unwrap_or(MAX_DIR_ENTRIES);
+            let mut skipped = 0;
+
+            for (index, entry) in worktree.child_entries(&project_path.path).enumerate() {
+                if index >= start_index && index <= end_index {
+                    writeln!(
+                        output,
+                        "{}",
+                        Path::new(worktree.root_name()).join(&entry.path).display(),
+                    )
+                    .unwrap();
+                } else {
+                    skipped += 1;
+                }
+            }
+
+            if output.is_empty() {
+                output.push_str(&input.path);
+                output.push_str(" is empty.");
+            }
+
+            if skipped > 0 {
+                write!(
+                    output,
+                    "\n\nNote: Skipped {skipped} entries. Adjust start and end to see other entries.",
+                ).ok();
+            }
+
+            Task::ready(Ok(output))
+        } else {
+            // It's a file, so read its contents
+            let file_path = input.path.clone();
+            cx.spawn(async move |cx| {
+                let buffer = cx
+                    .update(|cx| {
+                        project.update(cx, |project, cx| project.open_buffer(project_path, cx))
+                    })?
+                    .await?;
+
+                if input.start.is_some() || input.end.is_some() {
+                    let result = buffer.read_with(cx, |buffer, _cx| {
+                        let text = buffer.text();
+                        let start = input.start.unwrap_or(1);
+                        let lines = text.split('\n').skip(start as usize - 1);
+                        if let Some(end) = input.end {
+                            let count = end.saturating_sub(start).max(1); // Ensure at least 1 line
+                            Itertools::intersperse(lines.take(count as usize), "\n").collect()
+                        } else {
+                            Itertools::intersperse(lines, "\n").collect()
+                        }
+                    })?;
+
+                    action_log.update(cx, |log, cx| {
+                        log.buffer_read(buffer, cx);
+                    })?;
+
+                    Ok(result)
+                } else {
+                    // No line ranges specified, so check file size to see if it's too big.
+                    let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
+
+                    if file_size <= MAX_FILE_SIZE_TO_READ {
+                        let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
+
+                        action_log.update(cx, |log, cx| {
+                            log.buffer_read(buffer, cx);
+                        })?;
+
+                        Ok(result)
+                    } else {
+                        // File is too big, so return its outline and a suggestion to
+                        // read again with a line number range specified.
+                        let outline = file_outline(project, file_path, action_log, None, 0, cx).await?;
+
+                        Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline."))
+                    }
+                }
+            })
+        }
+    }
+}
--- a/crates/assistant_tools/src/contents_tool/description.md
+++ b/crates/assistant_tools/src/contents_tool/description.md
@ -0,0 +1,9 @@
+Reads the contents of a path on the filesystem.
+
+If the path is a directory, this lists all files and directories within that path.
+If the path is a file, this returns the file's contents.
+
+When reading a file, if the file is too big and no line range is specified, an outline of the file's code symbols is listed instead, which can be used to request specific line ranges in a subsequent call.
+
+Similarly, if a directory has too many entries to show at once, a subset of entries will be shown,
+and subsequent requests can use starting and ending line numbers to get other subsets.