If file is too big, provide the outline and suggest a follow-up tool (#28158)

<img width="622" alt="Screenshot 2025-04-05 at 5 48 14 PM"
src="https://github.com/user-attachments/assets/24b9c7d4-d3e2-4929-bca8-79db5b4e5748"
/>

Release Notes:

- The `read_files` tool now reads only the symbol outline files above a
certain size, to conserve context window space. Then it suggests that
the agent call `read_files` again with the relevant line ranges it saw
in the outline.
This commit is contained in:
Richard Feldman 2025-04-05 18:52:52 -04:00 committed by GitHub
parent 57d8c99473
commit a2fbe82c42
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 49 additions and 22 deletions

View file

@ -1,6 +1,6 @@
use std::path::Path;
use std::sync::Arc;
use crate::code_symbols_tool::file_outline;
use crate::schema::json_schema_for;
use anyhow::{Result, anyhow};
use assistant_tool::{ActionLog, Tool};
@ -13,6 +13,11 @@ use serde::{Deserialize, Serialize};
use ui::IconName;
use util::markdown::MarkdownString;
/// If the model requests to read a file whose size exceeds this, then
/// the tool will return an error along with the model's symbol outline,
/// and suggest trying again using line ranges from the outline.
const MAX_FILE_SIZE_TO_READ: usize = 4096;
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
pub struct ReadFileToolInput {
/// The relative path of the file to read.
@ -26,10 +31,10 @@ pub struct ReadFileToolInput {
/// - directory1
/// - directory2
///
/// If you wanna access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
/// If you wanna access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.
/// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
/// If you want to access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.
/// </example>
pub path: Arc<Path>,
pub path: String,
/// Optional line number to start reading on (1-based index)
#[serde(default)]
@ -66,7 +71,7 @@ impl Tool for ReadFileTool {
fn ui_text(&self, input: &serde_json::Value) -> String {
match serde_json::from_value::<ReadFileToolInput>(input.clone()) {
Ok(input) => {
let path = MarkdownString::inline_code(&input.path.display().to_string());
let path = MarkdownString::inline_code(&input.path);
match (input.start_line, input.end_line) {
(Some(start), None) => format!("Read file {path} (from line {start})"),
(Some(start), Some(end)) => format!("Read file {path} (lines {start}-{end})"),
@ -91,12 +96,10 @@ impl Tool for ReadFileTool {
};
let Some(project_path) = project.read(cx).find_project_path(&input.path, cx) else {
return Task::ready(Err(anyhow!(
"Path {} not found in project",
&input.path.display()
)));
return Task::ready(Err(anyhow!("Path {} not found in project", &input.path,)));
};
let file_path = input.path.clone();
cx.spawn(async move |cx| {
let buffer = cx
.update(|cx| {
@ -104,27 +107,46 @@ impl Tool for ReadFileTool {
})?
.await?;
let result = buffer.read_with(cx, |buffer, _cx| {
let text = buffer.text();
if input.start_line.is_some() || input.end_line.is_some() {
// Check if specific line ranges are provided
if input.start_line.is_some() || input.end_line.is_some() {
let result = buffer.read_with(cx, |buffer, _cx| {
let text = buffer.text();
let start = input.start_line.unwrap_or(1);
let lines = text.split('\n').skip(start - 1);
if let Some(end) = input.end_line {
let count = end.saturating_sub(start);
let count = end.saturating_sub(start).max(1); // Ensure at least 1 line
Itertools::intersperse(lines.take(count), "\n").collect()
} else {
Itertools::intersperse(lines, "\n").collect()
}
})?;
action_log.update(cx, |log, cx| {
log.buffer_read(buffer, cx);
})?;
Ok(result)
} else {
// No line ranges specified, so check file size to see if it's too big.
let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
if file_size <= MAX_FILE_SIZE_TO_READ {
// File is small enough, so return its contents.
let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
action_log.update(cx, |log, cx| {
log.buffer_read(buffer, cx);
})?;
Ok(result)
} else {
text
// File is too big, so return an error with the outline
// and a suggestion to read again with line numbers.
let outline = file_outline(project, file_path, action_log, None, 0, cx).await?;
Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start_line and end_line fields to see the implementations of symbols in the outline."))
}
})?;
action_log.update(cx, |log, cx| {
log.buffer_read(buffer, cx);
})?;
anyhow::Ok(result)
}
})
}
}