use crate::schema::json_schema_for; use anyhow::{Context as _, Result, anyhow}; use assistant_tool::{ActionLog, Tool, ToolResult}; use assistant_tool::{ToolResultContent, outline}; use gpui::{AnyWindowHandle, App, Entity, Task}; use project::{ImageItem, image_store}; use assistant_tool::ToolResultOutput; use indoc::formatdoc; use itertools::Itertools; use language::{Anchor, Point}; use language_model::{ LanguageModel, LanguageModelImage, LanguageModelRequest, LanguageModelToolSchemaFormat, }; use project::{AgentLocation, Project}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::sync::Arc; use ui::IconName; use util::markdown::MarkdownInlineCode; /// If the model requests to read a file whose size exceeds this, then #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct ReadFileToolInput { /// The relative path of the file to read. /// /// This path should never be absolute, and the first component /// of the path should always be a root directory in a project. /// /// /// If the project has the following root directories: /// /// - directory1 /// - directory2 /// /// If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`. /// If you want to access `file.txt` in `directory2`, you should use the path `directory2/file.txt`. /// pub path: String, /// Optional line number to start reading on (1-based index) #[serde(default)] pub start_line: Option, /// Optional line number to end reading on (1-based index, inclusive) #[serde(default)] pub end_line: Option, } pub struct ReadFileTool; impl Tool for ReadFileTool { fn name(&self) -> String { "read_file".into() } fn needs_confirmation(&self, _: &serde_json::Value, _: &App) -> bool { false } fn description(&self) -> String { include_str!("./read_file_tool/description.md").into() } fn icon(&self) -> IconName { IconName::FileSearch } fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result { json_schema_for::(format) } fn ui_text(&self, input: &serde_json::Value) -> String { match serde_json::from_value::(input.clone()) { Ok(input) => { let path = MarkdownInlineCode(&input.path); match (input.start_line, input.end_line) { (Some(start), None) => format!("Read file {path} (from line {start})"), (Some(start), Some(end)) => format!("Read file {path} (lines {start}-{end})"), _ => format!("Read file {path}"), } } Err(_) => "Read file".to_string(), } } fn run( self: Arc, input: serde_json::Value, _request: Arc, project: Entity, action_log: Entity, model: Arc, _window: Option, cx: &mut App, ) -> ToolResult { let input = match serde_json::from_value::(input) { Ok(input) => input, Err(err) => return Task::ready(Err(anyhow!(err))).into(), }; let Some(project_path) = project.read(cx).find_project_path(&input.path, cx) else { return Task::ready(Err(anyhow!("Path {} not found in project", &input.path))).into(); }; let file_path = input.path.clone(); if image_store::is_image_file(&project, &project_path, cx) { if !model.supports_images() { return Task::ready(Err(anyhow!( "Attempted to read an image, but Zed doesn't currently sending images to {}.", model.name().0 ))) .into(); } let task = cx.spawn(async move |cx| -> Result { let image_entity: Entity = cx .update(|cx| { project.update(cx, |project, cx| { project.open_image(project_path.clone(), cx) }) })? .await?; let image = image_entity.read_with(cx, |image_item, _| Arc::clone(&image_item.image))?; let language_model_image = cx .update(|cx| LanguageModelImage::from_image(image, cx))? .await .context("processing image")?; Ok(ToolResultOutput { content: ToolResultContent::Image(language_model_image), output: None, }) }); return task.into(); } cx.spawn(async move |cx| { let buffer = cx .update(|cx| { project.update(cx, |project, cx| project.open_buffer(project_path, cx)) })? .await?; if buffer.read_with(cx, |buffer, _| { buffer .file() .as_ref() .map_or(true, |file| !file.disk_state().exists()) })? { anyhow::bail!("{file_path} not found"); } project.update(cx, |project, cx| { project.set_agent_location( Some(AgentLocation { buffer: buffer.downgrade(), position: Anchor::MIN, }), cx, ); })?; // Check if specific line ranges are provided if input.start_line.is_some() || input.end_line.is_some() { let mut anchor = None; let result = buffer.read_with(cx, |buffer, _cx| { let text = buffer.text(); // .max(1) because despite instructions to be 1-indexed, sometimes the model passes 0. let start = input.start_line.unwrap_or(1).max(1); let start_row = start - 1; if start_row <= buffer.max_point().row { let column = buffer.line_indent_for_row(start_row).raw_len(); anchor = Some(buffer.anchor_before(Point::new(start_row, column))); } let lines = text.split('\n').skip(start_row as usize); if let Some(end) = input.end_line { let count = end.saturating_sub(start).saturating_add(1); // Ensure at least 1 line Itertools::intersperse(lines.take(count as usize), "\n") .collect::() .into() } else { Itertools::intersperse(lines, "\n") .collect::() .into() } })?; action_log.update(cx, |log, cx| { log.buffer_read(buffer.clone(), cx); })?; if let Some(anchor) = anchor { project.update(cx, |project, cx| { project.set_agent_location( Some(AgentLocation { buffer: buffer.downgrade(), position: anchor, }), cx, ); })?; } Ok(result) } else { // No line ranges specified, so check file size to see if it's too big. let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?; if file_size <= outline::AUTO_OUTLINE_SIZE { // File is small enough, so return its contents. let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?; action_log.update(cx, |log, cx| { log.buffer_read(buffer, cx); })?; Ok(result.into()) } else { // File is too big, so return the outline // and a suggestion to read again with line numbers. let outline = outline::file_outline(project, file_path, action_log, None, cx).await?; Ok(formatdoc! {" This file was too big to read all at once. Here is an outline of its symbols: {outline} Using the line numbers in this outline, you can call this tool again while specifying the start_line and end_line fields to see the implementations of symbols in the outline." } .into()) } } }) .into() } } #[cfg(test)] mod test { use super::*; use gpui::{AppContext, TestAppContext}; use language::{Language, LanguageConfig, LanguageMatcher}; use language_model::fake_provider::FakeLanguageModel; use project::{FakeFs, Project}; use serde_json::json; use settings::SettingsStore; use util::path; #[gpui::test] async fn test_read_nonexistent_file(cx: &mut TestAppContext) { init_test(cx); let fs = FakeFs::new(cx.executor()); fs.insert_tree("/root", json!({})).await; let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await; let action_log = cx.new(|_| ActionLog::new(project.clone())); let model = Arc::new(FakeLanguageModel::default()); let result = cx .update(|cx| { let input = json!({ "path": "root/nonexistent_file.txt" }); Arc::new(ReadFileTool) .run( input, Arc::default(), project.clone(), action_log, model, None, cx, ) .output }) .await; assert_eq!( result.unwrap_err().to_string(), "root/nonexistent_file.txt not found" ); } #[gpui::test] async fn test_read_small_file(cx: &mut TestAppContext) { init_test(cx); let fs = FakeFs::new(cx.executor()); fs.insert_tree( "/root", json!({ "small_file.txt": "This is a small file content" }), ) .await; let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await; let action_log = cx.new(|_| ActionLog::new(project.clone())); let model = Arc::new(FakeLanguageModel::default()); let result = cx .update(|cx| { let input = json!({ "path": "root/small_file.txt" }); Arc::new(ReadFileTool) .run( input, Arc::default(), project.clone(), action_log, model, None, cx, ) .output }) .await; assert_eq!( result.unwrap().content.as_str(), Some("This is a small file content") ); } #[gpui::test] async fn test_read_large_file(cx: &mut TestAppContext) { init_test(cx); let fs = FakeFs::new(cx.executor()); fs.insert_tree( "/root", json!({ "large_file.rs": (0..1000).map(|i| format!("struct Test{} {{\n a: u32,\n b: usize,\n}}", i)).collect::>().join("\n") }), ) .await; let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await; let language_registry = project.read_with(cx, |project, _| project.languages().clone()); language_registry.add(Arc::new(rust_lang())); let action_log = cx.new(|_| ActionLog::new(project.clone())); let model = Arc::new(FakeLanguageModel::default()); let result = cx .update(|cx| { let input = json!({ "path": "root/large_file.rs" }); Arc::new(ReadFileTool) .run( input, Arc::default(), project.clone(), action_log.clone(), model.clone(), None, cx, ) .output }) .await; let content = result.unwrap(); let content = content.as_str().unwrap(); assert_eq!( content.lines().skip(4).take(6).collect::>(), vec![ "struct Test0 [L1-4]", " a [L2]", " b [L3]", "struct Test1 [L5-8]", " a [L6]", " b [L7]", ] ); let result = cx .update(|cx| { let input = json!({ "path": "root/large_file.rs", "offset": 1 }); Arc::new(ReadFileTool) .run( input, Arc::default(), project.clone(), action_log, model, None, cx, ) .output }) .await; let content = result.unwrap(); let expected_content = (0..1000) .flat_map(|i| { vec![ format!("struct Test{} [L{}-{}]", i, i * 4 + 1, i * 4 + 4), format!(" a [L{}]", i * 4 + 2), format!(" b [L{}]", i * 4 + 3), ] }) .collect::>(); pretty_assertions::assert_eq!( content .as_str() .unwrap() .lines() .skip(4) .take(expected_content.len()) .collect::>(), expected_content ); } #[gpui::test] async fn test_read_file_with_line_range(cx: &mut TestAppContext) { init_test(cx); let fs = FakeFs::new(cx.executor()); fs.insert_tree( "/root", json!({ "multiline.txt": "Line 1\nLine 2\nLine 3\nLine 4\nLine 5" }), ) .await; let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await; let action_log = cx.new(|_| ActionLog::new(project.clone())); let model = Arc::new(FakeLanguageModel::default()); let result = cx .update(|cx| { let input = json!({ "path": "root/multiline.txt", "start_line": 2, "end_line": 4 }); Arc::new(ReadFileTool) .run( input, Arc::default(), project.clone(), action_log, model, None, cx, ) .output }) .await; assert_eq!( result.unwrap().content.as_str(), Some("Line 2\nLine 3\nLine 4") ); } #[gpui::test] async fn test_read_file_line_range_edge_cases(cx: &mut TestAppContext) { init_test(cx); let fs = FakeFs::new(cx.executor()); fs.insert_tree( "/root", json!({ "multiline.txt": "Line 1\nLine 2\nLine 3\nLine 4\nLine 5" }), ) .await; let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await; let action_log = cx.new(|_| ActionLog::new(project.clone())); let model = Arc::new(FakeLanguageModel::default()); // start_line of 0 should be treated as 1 let result = cx .update(|cx| { let input = json!({ "path": "root/multiline.txt", "start_line": 0, "end_line": 2 }); Arc::new(ReadFileTool) .run( input, Arc::default(), project.clone(), action_log.clone(), model.clone(), None, cx, ) .output }) .await; assert_eq!(result.unwrap().content.as_str(), Some("Line 1\nLine 2")); // end_line of 0 should result in at least 1 line let result = cx .update(|cx| { let input = json!({ "path": "root/multiline.txt", "start_line": 1, "end_line": 0 }); Arc::new(ReadFileTool) .run( input, Arc::default(), project.clone(), action_log.clone(), model.clone(), None, cx, ) .output }) .await; assert_eq!(result.unwrap().content.as_str(), Some("Line 1")); // when start_line > end_line, should still return at least 1 line let result = cx .update(|cx| { let input = json!({ "path": "root/multiline.txt", "start_line": 3, "end_line": 2 }); Arc::new(ReadFileTool) .run( input, Arc::default(), project.clone(), action_log, model, None, cx, ) .output }) .await; assert_eq!(result.unwrap().content.as_str(), Some("Line 3")); } fn init_test(cx: &mut TestAppContext) { cx.update(|cx| { let settings_store = SettingsStore::test(cx); cx.set_global(settings_store); language::init(cx); Project::init_settings(cx); }); } fn rust_lang() -> Language { Language::new( LanguageConfig { name: "Rust".into(), matcher: LanguageMatcher { path_suffixes: vec!["rs".to_string()], ..Default::default() }, ..Default::default() }, Some(tree_sitter_rust::LANGUAGE.into()), ) .with_outline_query( r#" (line_comment) @annotation (struct_item "struct" @context name: (_) @name) @item (enum_item "enum" @context name: (_) @name) @item (enum_variant name: (_) @name) @item (field_declaration name: (_) @name) @item (impl_item "impl" @context trait: (_)? @name "for"? @context type: (_) @name body: (_ "{" (_)* "}")) @item (function_item "fn" @context name: (_) @name) @item (mod_item "mod" @context name: (_) @name) @item "#, ) .unwrap() } }