If file is too big, provide the outline and suggest a follow-up tool (#28158)

<img width="622" alt="Screenshot 2025-04-05 at 5 48 14 PM" src="https://github.com/user-attachments/assets/24b9c7d4-d3e2-4929-bca8-79db5b4e5748" /> Release Notes: - The `read_files` tool now reads only the symbol outline files above a certain size, to conserve context window space. Then it suggests that the agent call `read_files` again with the relevant line ranges it saw in the outline.
2025-04-05 18:52:52 -04:00 · 2025-04-05 18:52:52 -04:00 · a2fbe82c42
commit a2fbe82c42
parent 57d8c99473
3 changed files with 49 additions and 22 deletions
--- a/crates/assistant_tools/src/read_file_tool.rs
+++ b/crates/assistant_tools/src/read_file_tool.rs
@ -1,6 +1,6 @@
-use std::path::Path;
 use std::sync::Arc;

+use crate::code_symbols_tool::file_outline;
 use crate::schema::json_schema_for;
 use anyhow::{Result, anyhow};
 use assistant_tool::{ActionLog, Tool};
@ -13,6 +13,11 @@ use serde::{Deserialize, Serialize};
 use ui::IconName;
 use util::markdown::MarkdownString;

+/// If the model requests to read a file whose size exceeds this, then
+/// the tool will return an error along with the model's symbol outline,
+/// and suggest trying again using line ranges from the outline.
+const MAX_FILE_SIZE_TO_READ: usize = 4096;
+
 #[derive(Debug, Serialize, Deserialize, JsonSchema)]
 pub struct ReadFileToolInput {
    /// The relative path of the file to read.
@ -26,10 +31,10 @@ pub struct ReadFileToolInput {
    /// - directory1
    /// - directory2
    ///
-    /// If you wanna access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
-    /// If you wanna access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.
+    /// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
+    /// If you want to access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.
    /// </example>
-    pub path: Arc<Path>,
+    pub path: String,

    /// Optional line number to start reading on (1-based index)
    #[serde(default)]
@ -66,7 +71,7 @@ impl Tool for ReadFileTool {
    fn ui_text(&self, input: &serde_json::Value) -> String {
        match serde_json::from_value::<ReadFileToolInput>(input.clone()) {
            Ok(input) => {
-                let path = MarkdownString::inline_code(&input.path.display().to_string());
+                let path = MarkdownString::inline_code(&input.path);
                match (input.start_line, input.end_line) {
                    (Some(start), None) => format!("Read file {path} (from line {start})"),
                    (Some(start), Some(end)) => format!("Read file {path} (lines {start}-{end})"),
@ -91,12 +96,10 @@ impl Tool for ReadFileTool {
        };

        let Some(project_path) = project.read(cx).find_project_path(&input.path, cx) else {
-            return Task::ready(Err(anyhow!(
-                "Path {} not found in project",
-                &input.path.display()
-            )));
+            return Task::ready(Err(anyhow!("Path {} not found in project", &input.path,)));
        };

+        let file_path = input.path.clone();
        cx.spawn(async move |cx| {
            let buffer = cx
                .update(|cx| {
@ -104,27 +107,46 @@ impl Tool for ReadFileTool {
                })?
                .await?;

-            let result = buffer.read_with(cx, |buffer, _cx| {
-                let text = buffer.text();
-                if input.start_line.is_some() || input.end_line.is_some() {
+            // Check if specific line ranges are provided
+            if input.start_line.is_some() || input.end_line.is_some() {
+                let result = buffer.read_with(cx, |buffer, _cx| {
+                    let text = buffer.text();
                    let start = input.start_line.unwrap_or(1);
                    let lines = text.split('\n').skip(start - 1);
                    if let Some(end) = input.end_line {
-                        let count = end.saturating_sub(start);
+                        let count = end.saturating_sub(start).max(1); // Ensure at least 1 line
                        Itertools::intersperse(lines.take(count), "\n").collect()
                    } else {
                        Itertools::intersperse(lines, "\n").collect()
                    }
+                })?;
+
+                action_log.update(cx, |log, cx| {
+                    log.buffer_read(buffer, cx);
+                })?;
+
+                Ok(result)
+            } else {
+                // No line ranges specified, so check file size to see if it's too big.
+                let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
+
+                if file_size <= MAX_FILE_SIZE_TO_READ {
+                    // File is small enough, so return its contents.
+                    let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
+
+                    action_log.update(cx, |log, cx| {
+                        log.buffer_read(buffer, cx);
+                    })?;
+
+                    Ok(result)
                } else {
-                    text
+                    // File is too big, so return an error with the outline
+                    // and a suggestion to read again with line numbers.
+                    let outline = file_outline(project, file_path, action_log, None, 0, cx).await?;
+
+                    Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start_line and end_line fields to see the implementations of symbols in the outline."))
                }
-            })?;
-
-            action_log.update(cx, |log, cx| {
-                log.buffer_read(buffer, cx);
-            })?;
-
-            anyhow::Ok(result)
+            }
        })
    }
 }