Add contents_tool (#28738)
This is a combination of the "read file" and "list directory contents" tools as part of a push to reduce our quantity of builtin tools by combining some of them. The functionality is all there for this tool, although there's room for improvement on the visuals side: it currently always shows the same icon and always says "Read" - so you can't tell at a glance when it's reading a directory vs an individual file. Changing this will require a change to the `Tool` trait, which can be in a separate PR. (FYI @danilo-leal!) <img width="606" alt="Screenshot 2025-04-14 at 11 56 27 PM" src="https://github.com/user-attachments/assets/bded72af-6476-4469-97c6-2f344629b0e4" /> Release Notes: - Added `contents` tool
This commit is contained in:
parent
fc1252b0cd
commit
b794919842
4 changed files with 253 additions and 0 deletions
239
crates/assistant_tools/src/contents_tool.rs
Normal file
239
crates/assistant_tools/src/contents_tool.rs
Normal file
|
@ -0,0 +1,239 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
|
||||
use anyhow::{Result, anyhow};
|
||||
use assistant_tool::{ActionLog, Tool};
|
||||
use gpui::{App, Entity, Task};
|
||||
use itertools::Itertools;
|
||||
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
|
||||
use project::Project;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{fmt::Write, path::Path};
|
||||
use ui::IconName;
|
||||
use util::markdown::MarkdownString;
|
||||
|
||||
/// If the model requests to read a file whose size exceeds this, then
|
||||
/// the tool will return the file's symbol outline instead of its contents,
|
||||
/// and suggest trying again using line ranges from the outline.
|
||||
const MAX_FILE_SIZE_TO_READ: usize = 16384;
|
||||
|
||||
/// If the model requests to list the entries in a directory with more
|
||||
/// entries than this, then the tool will return a subset of the entries
|
||||
/// and suggest trying again.
|
||||
const MAX_DIR_ENTRIES: usize = 1024;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct ContentsToolInput {
|
||||
/// The relative path of the file or directory to access.
|
||||
///
|
||||
/// This path should never be absolute, and the first component
|
||||
/// of the path should always be a root directory in a project.
|
||||
///
|
||||
/// <example>
|
||||
/// If the project has the following root directories:
|
||||
///
|
||||
/// - directory1
|
||||
/// - directory2
|
||||
///
|
||||
/// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.
|
||||
/// If you want to list contents in the directory `directory2/subfolder`, you should use the path `directory2/subfolder`.
|
||||
/// </example>
|
||||
pub path: String,
|
||||
|
||||
/// Optional position (1-based index) to start reading on, if you want to read a subset of the contents.
|
||||
/// When reading a file, this refers to a line number in the file (e.g. 1 is the first line).
|
||||
/// When reading a directory, this refers to the number of the directory entry (e.g. 1 is the first entry).
|
||||
///
|
||||
/// Defaults to 1.
|
||||
pub start: Option<u32>,
|
||||
|
||||
/// Optional position (1-based index) to end reading on, if you want to read a subset of the contents.
|
||||
/// When reading a file, this refers to a line number in the file (e.g. 1 is the first line).
|
||||
/// When reading a directory, this refers to the number of the directory entry (e.g. 1 is the first entry).
|
||||
///
|
||||
/// Defaults to reading until the end of the file or directory.
|
||||
pub end: Option<u32>,
|
||||
}
|
||||
|
||||
pub struct ContentsTool;
|
||||
|
||||
impl Tool for ContentsTool {
|
||||
fn name(&self) -> String {
|
||||
"contents".into()
|
||||
}
|
||||
|
||||
fn needs_confirmation(&self, _: &serde_json::Value, _: &App) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn description(&self) -> String {
|
||||
include_str!("./contents_tool/description.md").into()
|
||||
}
|
||||
|
||||
fn icon(&self) -> IconName {
|
||||
IconName::FileSearch
|
||||
}
|
||||
|
||||
fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result<serde_json::Value> {
|
||||
json_schema_for::<ContentsToolInput>(format)
|
||||
}
|
||||
|
||||
fn ui_text(&self, input: &serde_json::Value) -> String {
|
||||
match serde_json::from_value::<ContentsToolInput>(input.clone()) {
|
||||
Ok(input) => {
|
||||
let path = MarkdownString::inline_code(&input.path);
|
||||
|
||||
match (input.start, input.end) {
|
||||
(Some(start), None) => format!("Read {path} (from line {start})"),
|
||||
(Some(start), Some(end)) => {
|
||||
format!("Read {path} (lines {start}-{end})")
|
||||
}
|
||||
_ => format!("Read {path}"),
|
||||
}
|
||||
}
|
||||
Err(_) => "Read file or directory".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn run(
|
||||
self: Arc<Self>,
|
||||
input: serde_json::Value,
|
||||
_messages: &[LanguageModelRequestMessage],
|
||||
project: Entity<Project>,
|
||||
action_log: Entity<ActionLog>,
|
||||
cx: &mut App,
|
||||
) -> Task<Result<String>> {
|
||||
let input = match serde_json::from_value::<ContentsToolInput>(input) {
|
||||
Ok(input) => input,
|
||||
Err(err) => return Task::ready(Err(anyhow!(err))),
|
||||
};
|
||||
|
||||
// Sometimes models will return these even though we tell it to give a path and not a glob.
|
||||
// When this happens, just list the root worktree directories.
|
||||
if matches!(input.path.as_str(), "." | "" | "./" | "*") {
|
||||
let output = project
|
||||
.read(cx)
|
||||
.worktrees(cx)
|
||||
.filter_map(|worktree| {
|
||||
worktree.read(cx).root_entry().and_then(|entry| {
|
||||
if entry.is_dir() {
|
||||
entry.path.to_str()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
return Task::ready(Ok(output));
|
||||
}
|
||||
|
||||
let Some(project_path) = project.read(cx).find_project_path(&input.path, cx) else {
|
||||
return Task::ready(Err(anyhow!("Path {} not found in project", &input.path)));
|
||||
};
|
||||
|
||||
let Some(worktree) = project
|
||||
.read(cx)
|
||||
.worktree_for_id(project_path.worktree_id, cx)
|
||||
else {
|
||||
return Task::ready(Err(anyhow!("Worktree not found")));
|
||||
};
|
||||
let worktree = worktree.read(cx);
|
||||
|
||||
let Some(entry) = worktree.entry_for_path(&project_path.path) else {
|
||||
return Task::ready(Err(anyhow!("Path not found: {}", input.path)));
|
||||
};
|
||||
|
||||
// If it's a directory, list its contents
|
||||
if entry.is_dir() {
|
||||
let mut output = String::new();
|
||||
let start_index = input
|
||||
.start
|
||||
.map(|line| (line as usize).saturating_sub(1))
|
||||
.unwrap_or(0);
|
||||
let end_index = input
|
||||
.end
|
||||
.map(|line| (line as usize).saturating_sub(1))
|
||||
.unwrap_or(MAX_DIR_ENTRIES);
|
||||
let mut skipped = 0;
|
||||
|
||||
for (index, entry) in worktree.child_entries(&project_path.path).enumerate() {
|
||||
if index >= start_index && index <= end_index {
|
||||
writeln!(
|
||||
output,
|
||||
"{}",
|
||||
Path::new(worktree.root_name()).join(&entry.path).display(),
|
||||
)
|
||||
.unwrap();
|
||||
} else {
|
||||
skipped += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if output.is_empty() {
|
||||
output.push_str(&input.path);
|
||||
output.push_str(" is empty.");
|
||||
}
|
||||
|
||||
if skipped > 0 {
|
||||
write!(
|
||||
output,
|
||||
"\n\nNote: Skipped {skipped} entries. Adjust start and end to see other entries.",
|
||||
).ok();
|
||||
}
|
||||
|
||||
Task::ready(Ok(output))
|
||||
} else {
|
||||
// It's a file, so read its contents
|
||||
let file_path = input.path.clone();
|
||||
cx.spawn(async move |cx| {
|
||||
let buffer = cx
|
||||
.update(|cx| {
|
||||
project.update(cx, |project, cx| project.open_buffer(project_path, cx))
|
||||
})?
|
||||
.await?;
|
||||
|
||||
if input.start.is_some() || input.end.is_some() {
|
||||
let result = buffer.read_with(cx, |buffer, _cx| {
|
||||
let text = buffer.text();
|
||||
let start = input.start.unwrap_or(1);
|
||||
let lines = text.split('\n').skip(start as usize - 1);
|
||||
if let Some(end) = input.end {
|
||||
let count = end.saturating_sub(start).max(1); // Ensure at least 1 line
|
||||
Itertools::intersperse(lines.take(count as usize), "\n").collect()
|
||||
} else {
|
||||
Itertools::intersperse(lines, "\n").collect()
|
||||
}
|
||||
})?;
|
||||
|
||||
action_log.update(cx, |log, cx| {
|
||||
log.buffer_read(buffer, cx);
|
||||
})?;
|
||||
|
||||
Ok(result)
|
||||
} else {
|
||||
// No line ranges specified, so check file size to see if it's too big.
|
||||
let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
|
||||
|
||||
if file_size <= MAX_FILE_SIZE_TO_READ {
|
||||
let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
|
||||
|
||||
action_log.update(cx, |log, cx| {
|
||||
log.buffer_read(buffer, cx);
|
||||
})?;
|
||||
|
||||
Ok(result)
|
||||
} else {
|
||||
// File is too big, so return its outline and a suggestion to
|
||||
// read again with a line number range specified.
|
||||
let outline = file_outline(project, file_path, action_log, None, 0, cx).await?;
|
||||
|
||||
Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline."))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue