Use outline (#29687)

## Before

![Screenshot 2025-04-30 at 10 56
36 AM](https://github.com/user-attachments/assets/3a435f4c-ad45-4f26-a847-2d5c9d03648e)

## After

![Screenshot 2025-04-30 at 10 55
27 AM](https://github.com/user-attachments/assets/cc3a8144-b6fe-4a15-8a47-b2487ce4f66e)

Release Notes:

- Context picker and `@`-mentions now work with very large files.
This commit is contained in:
Richard Feldman 2025-04-30 14:00:00 -04:00 committed by GitHub
parent 8173534ad5
commit 50f705e779
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 301 additions and 142 deletions

1
Cargo.lock generated
View file

@ -690,6 +690,7 @@ dependencies = [
"pretty_assertions", "pretty_assertions",
"project", "project",
"rand 0.8.5", "rand 0.8.5",
"regex",
"serde", "serde",
"serde_json", "serde_json",
"settings", "settings",

View file

@ -3,11 +3,12 @@ use std::hash::{Hash, Hasher};
use std::path::PathBuf; use std::path::PathBuf;
use std::{ops::Range, path::Path, sync::Arc}; use std::{ops::Range, path::Path, sync::Arc};
use assistant_tool::outline;
use collections::HashSet; use collections::HashSet;
use futures::future; use futures::future;
use futures::{FutureExt, future::Shared}; use futures::{FutureExt, future::Shared};
use gpui::{App, AppContext as _, Entity, SharedString, Task}; use gpui::{App, AppContext as _, Entity, SharedString, Task};
use language::Buffer; use language::{Buffer, ParseStatus};
use language_model::{LanguageModelImage, LanguageModelRequestMessage, MessageContent}; use language_model::{LanguageModelImage, LanguageModelRequestMessage, MessageContent};
use project::{Project, ProjectEntryId, ProjectPath, Worktree}; use project::{Project, ProjectEntryId, ProjectPath, Worktree};
use prompt_store::{PromptStore, UserPromptId}; use prompt_store::{PromptStore, UserPromptId};
@ -152,6 +153,7 @@ pub struct FileContext {
pub handle: FileContextHandle, pub handle: FileContextHandle,
pub full_path: Arc<Path>, pub full_path: Arc<Path>,
pub text: SharedString, pub text: SharedString,
pub is_outline: bool,
} }
impl FileContextHandle { impl FileContextHandle {
@ -177,14 +179,51 @@ impl FileContextHandle {
log::error!("file context missing path"); log::error!("file context missing path");
return Task::ready(None); return Task::ready(None);
}; };
let full_path = file.full_path(cx); let full_path: Arc<Path> = file.full_path(cx).into();
let rope = buffer_ref.as_rope().clone(); let rope = buffer_ref.as_rope().clone();
let buffer = self.buffer.clone(); let buffer = self.buffer.clone();
cx.background_spawn(async move {
cx.spawn(async move |cx| {
// For large files, use outline instead of full content
if rope.len() > outline::AUTO_OUTLINE_SIZE {
// Wait until the buffer has been fully parsed, so we can read its outline
if let Ok(mut parse_status) =
buffer.read_with(cx, |buffer, _| buffer.parse_status())
{
while *parse_status.borrow() != ParseStatus::Idle {
parse_status.changed().await.log_err();
}
if let Ok(snapshot) = buffer.read_with(cx, |buffer, _| buffer.snapshot()) {
if let Some(outline) = snapshot.outline(None) {
let items = outline
.items
.into_iter()
.map(|item| item.to_point(&snapshot));
if let Ok(outline_text) =
outline::render_outline(items, None, 0, usize::MAX).await
{
let context = AgentContext::File(FileContext { let context = AgentContext::File(FileContext {
handle: self, handle: self,
full_path: full_path.into(), full_path,
text: outline_text.into(),
is_outline: true,
});
return Some((context, vec![buffer]));
}
}
}
}
}
// Fallback to full content if we couldn't build an outline
// (or didn't need to because the file was small enough)
let context = AgentContext::File(FileContext {
handle: self,
full_path,
text: rope.to_string().into(), text: rope.to_string().into(),
is_outline: false,
}); });
Some((context, vec![buffer])) Some((context, vec![buffer]))
}) })
@ -996,3 +1035,115 @@ impl Hash for AgentContextKey {
} }
} }
} }
#[cfg(test)]
mod tests {
use super::*;
use gpui::TestAppContext;
use project::{FakeFs, Project};
use serde_json::json;
use settings::SettingsStore;
use util::path;
fn init_test_settings(cx: &mut TestAppContext) {
cx.update(|cx| {
let settings_store = SettingsStore::test(cx);
cx.set_global(settings_store);
language::init(cx);
Project::init_settings(cx);
});
}
// Helper to create a test project with test files
async fn create_test_project(
cx: &mut TestAppContext,
files: serde_json::Value,
) -> Entity<Project> {
let fs = FakeFs::new(cx.background_executor.clone());
fs.insert_tree(path!("/test"), files).await;
Project::test(fs, [path!("/test").as_ref()], cx).await
}
#[gpui::test]
async fn test_large_file_uses_outline(cx: &mut TestAppContext) {
init_test_settings(cx);
// Create a large file that exceeds AUTO_OUTLINE_SIZE
const LINE: &str = "Line with some text\n";
let large_content = LINE.repeat(2 * (outline::AUTO_OUTLINE_SIZE / LINE.len()));
let content_len = large_content.len();
assert!(content_len > outline::AUTO_OUTLINE_SIZE);
let file_context = file_context_for(large_content, cx).await;
assert!(
file_context.is_outline,
"Large file should use outline format"
);
assert!(
file_context.text.len() < content_len,
"Outline should be smaller than original content"
);
}
#[gpui::test]
async fn test_small_file_uses_full_content(cx: &mut TestAppContext) {
init_test_settings(cx);
let small_content = "This is a small file.\n";
let content_len = small_content.len();
assert!(content_len < outline::AUTO_OUTLINE_SIZE);
let file_context = file_context_for(small_content.to_string(), cx).await;
assert!(
!file_context.is_outline,
"Small files should not get an outline"
);
assert_eq!(file_context.text, small_content);
}
async fn file_context_for(content: String, cx: &mut TestAppContext) -> FileContext {
// Create a test project with the file
let project = create_test_project(
cx,
json!({
"file.txt": content,
}),
)
.await;
// Open the buffer
let buffer_path = project
.read_with(cx, |project, cx| project.find_project_path("file.txt", cx))
.unwrap();
let buffer = project
.update(cx, |project, cx| project.open_buffer(buffer_path, cx))
.await
.unwrap();
let context_handle = AgentContextHandle::File(FileContextHandle {
buffer: buffer.clone(),
context_id: ContextId::zero(),
});
cx.update(|cx| load_context(vec![context_handle], &project, &None, cx))
.await
.loaded_context
.contexts
.into_iter()
.find_map(|ctx| {
if let AgentContext::File(file_ctx) = ctx {
Some(file_ctx)
} else {
None
}
})
.expect("Should have found a file context")
}
}

View file

@ -24,6 +24,7 @@ language.workspace = true
language_model.workspace = true language_model.workspace = true
parking_lot.workspace = true parking_lot.workspace = true
project.workspace = true project.workspace = true
regex.workspace = true
serde.workspace = true serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
text.workspace = true text.workspace = true

View file

@ -1,4 +1,5 @@
mod action_log; mod action_log;
pub mod outline;
mod tool_registry; mod tool_registry;
mod tool_schema; mod tool_schema;
mod tool_working_set; mod tool_working_set;

View file

@ -0,0 +1,132 @@
use crate::ActionLog;
use anyhow::{Result, anyhow};
use gpui::{AsyncApp, Entity};
use language::{OutlineItem, ParseStatus};
use project::Project;
use regex::Regex;
use std::fmt::Write;
use text::Point;
/// For files over this size, instead of reading them (or including them in context),
/// we automatically provide the file's symbol outline instead, with line numbers.
pub const AUTO_OUTLINE_SIZE: usize = 16384;
pub async fn file_outline(
project: Entity<Project>,
path: String,
action_log: Entity<ActionLog>,
regex: Option<Regex>,
cx: &mut AsyncApp,
) -> anyhow::Result<String> {
let buffer = {
let project_path = project.read_with(cx, |project, cx| {
project
.find_project_path(&path, cx)
.ok_or_else(|| anyhow!("Path {path} not found in project"))
})??;
project
.update(cx, |project, cx| project.open_buffer(project_path, cx))?
.await?
};
action_log.update(cx, |action_log, cx| {
action_log.track_buffer(buffer.clone(), cx);
})?;
// Wait until the buffer has been fully parsed, so that we can read its outline.
let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
while *parse_status.borrow() != ParseStatus::Idle {
parse_status.changed().await?;
}
let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
let Some(outline) = snapshot.outline(None) else {
return Err(anyhow!("No outline information available for this file."));
};
render_outline(
outline
.items
.into_iter()
.map(|item| item.to_point(&snapshot)),
regex,
0,
usize::MAX,
)
.await
}
pub async fn render_outline(
items: impl IntoIterator<Item = OutlineItem<Point>>,
regex: Option<Regex>,
offset: usize,
results_per_page: usize,
) -> Result<String> {
let mut items = items.into_iter().skip(offset);
let entries = items
.by_ref()
.filter(|item| {
regex
.as_ref()
.is_none_or(|regex| regex.is_match(&item.text))
})
.take(results_per_page)
.collect::<Vec<_>>();
let has_more = items.next().is_some();
let mut output = String::new();
let entries_rendered = render_entries(&mut output, entries);
// Calculate pagination information
let page_start = offset + 1;
let page_end = offset + entries_rendered;
let total_symbols = if has_more {
format!("more than {}", page_end)
} else {
page_end.to_string()
};
// Add pagination information
if has_more {
writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)",
)
} else {
writeln!(
&mut output,
"\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})",
)
}
.ok();
Ok(output)
}
fn render_entries(
output: &mut String,
items: impl IntoIterator<Item = OutlineItem<Point>>,
) -> usize {
let mut entries_rendered = 0;
for item in items {
// Indent based on depth ("" for level 0, " " for level 1, etc.)
for _ in 0..item.depth {
output.push(' ');
}
output.push_str(&item.text);
// Add position information - convert to 1-based line numbers for display
let start_line = item.range.start.row + 1;
let end_line = item.range.end.row + 1;
if start_line == end_line {
writeln!(output, " [L{}]", start_line).ok();
} else {
writeln!(output, " [L{}-{}]", start_line, end_line).ok();
}
entries_rendered += 1;
}
entries_rendered
}

View file

@ -4,10 +4,10 @@ use std::sync::Arc;
use crate::schema::json_schema_for; use crate::schema::json_schema_for;
use anyhow::{Result, anyhow}; use anyhow::{Result, anyhow};
use assistant_tool::outline;
use assistant_tool::{ActionLog, Tool, ToolResult}; use assistant_tool::{ActionLog, Tool, ToolResult};
use collections::IndexMap; use collections::IndexMap;
use gpui::{AnyWindowHandle, App, AsyncApp, Entity, Task}; use gpui::{AnyWindowHandle, App, AsyncApp, Entity, Task};
use language::{OutlineItem, ParseStatus, Point};
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat}; use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
use project::{Project, Symbol}; use project::{Project, Symbol};
use regex::{Regex, RegexBuilder}; use regex::{Regex, RegexBuilder};
@ -148,59 +148,13 @@ impl Tool for CodeSymbolsTool {
}; };
cx.spawn(async move |cx| match input.path { cx.spawn(async move |cx| match input.path {
Some(path) => file_outline(project, path, action_log, regex, cx).await, Some(path) => outline::file_outline(project, path, action_log, regex, cx).await,
None => project_symbols(project, regex, input.offset, cx).await, None => project_symbols(project, regex, input.offset, cx).await,
}) })
.into() .into()
} }
} }
pub async fn file_outline(
project: Entity<Project>,
path: String,
action_log: Entity<ActionLog>,
regex: Option<Regex>,
cx: &mut AsyncApp,
) -> anyhow::Result<String> {
let buffer = {
let project_path = project.read_with(cx, |project, cx| {
project
.find_project_path(&path, cx)
.ok_or_else(|| anyhow!("Path {path} not found in project"))
})??;
project
.update(cx, |project, cx| project.open_buffer(project_path, cx))?
.await?
};
action_log.update(cx, |action_log, cx| {
action_log.track_buffer(buffer.clone(), cx);
})?;
// Wait until the buffer has been fully parsed, so that we can read its outline.
let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
while *parse_status.borrow() != ParseStatus::Idle {
parse_status.changed().await?;
}
let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
let Some(outline) = snapshot.outline(None) else {
return Err(anyhow!("No outline information available for this file."));
};
render_outline(
outline
.items
.into_iter()
.map(|item| item.to_point(&snapshot)),
regex,
0,
usize::MAX,
)
.await
}
async fn project_symbols( async fn project_symbols(
project: Entity<Project>, project: Entity<Project>,
regex: Option<Regex>, regex: Option<Regex>,
@ -291,77 +245,3 @@ async fn project_symbols(
output output
}) })
} }
async fn render_outline(
items: impl IntoIterator<Item = OutlineItem<Point>>,
regex: Option<Regex>,
offset: usize,
results_per_page: usize,
) -> Result<String> {
let mut items = items.into_iter().skip(offset);
let entries = items
.by_ref()
.filter(|item| {
regex
.as_ref()
.is_none_or(|regex| regex.is_match(&item.text))
})
.take(results_per_page)
.collect::<Vec<_>>();
let has_more = items.next().is_some();
let mut output = String::new();
let entries_rendered = render_entries(&mut output, entries);
// Calculate pagination information
let page_start = offset + 1;
let page_end = offset + entries_rendered;
let total_symbols = if has_more {
format!("more than {}", page_end)
} else {
page_end.to_string()
};
// Add pagination information
if has_more {
writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)",
)
} else {
writeln!(
&mut output,
"\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})",
)
}
.ok();
Ok(output)
}
fn render_entries(
output: &mut String,
items: impl IntoIterator<Item = OutlineItem<Point>>,
) -> usize {
let mut entries_rendered = 0;
for item in items {
// Indent based on depth ("" for level 0, " " for level 1, etc.)
for _ in 0..item.depth {
output.push(' ');
}
output.push_str(&item.text);
// Add position information - convert to 1-based line numbers for display
let start_line = item.range.start.row + 1;
let end_line = item.range.end.row + 1;
if start_line == end_line {
writeln!(output, " [L{}]", start_line).ok();
} else {
writeln!(output, " [L{}-{}]", start_line, end_line).ok();
}
entries_rendered += 1;
}
entries_rendered
}

View file

@ -1,8 +1,8 @@
use std::sync::Arc; use std::sync::Arc;
use crate::{code_symbols_tool::file_outline, schema::json_schema_for}; use crate::schema::json_schema_for;
use anyhow::{Result, anyhow}; use anyhow::{Result, anyhow};
use assistant_tool::{ActionLog, Tool, ToolResult}; use assistant_tool::{ActionLog, Tool, ToolResult, outline};
use gpui::{AnyWindowHandle, App, Entity, Task}; use gpui::{AnyWindowHandle, App, Entity, Task};
use itertools::Itertools; use itertools::Itertools;
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat}; use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
@ -14,10 +14,6 @@ use ui::IconName;
use util::markdown::MarkdownInlineCode; use util::markdown::MarkdownInlineCode;
/// If the model requests to read a file whose size exceeds this, then /// If the model requests to read a file whose size exceeds this, then
/// the tool will return the file's symbol outline instead of its contents,
/// and suggest trying again using line ranges from the outline.
const MAX_FILE_SIZE_TO_READ: usize = 16384;
/// If the model requests to list the entries in a directory with more /// If the model requests to list the entries in a directory with more
/// entries than this, then the tool will return a subset of the entries /// entries than this, then the tool will return a subset of the entries
/// and suggest trying again. /// and suggest trying again.
@ -218,7 +214,7 @@ impl Tool for ContentsTool {
// No line ranges specified, so check file size to see if it's too big. // No line ranges specified, so check file size to see if it's too big.
let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?; let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
if file_size <= MAX_FILE_SIZE_TO_READ { if file_size <= outline::AUTO_OUTLINE_SIZE {
let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?; let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
action_log.update(cx, |log, cx| { action_log.update(cx, |log, cx| {
@ -229,7 +225,7 @@ impl Tool for ContentsTool {
} else { } else {
// File is too big, so return its outline and a suggestion to // File is too big, so return its outline and a suggestion to
// read again with a line number range specified. // read again with a line number range specified.
let outline = file_outline(project, file_path, action_log, None, cx).await?; let outline = outline::file_outline(project, file_path, action_log, None, cx).await?;
Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline.")) Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline."))
} }

View file

@ -1,5 +1,6 @@
use crate::{code_symbols_tool::file_outline, schema::json_schema_for}; use crate::schema::json_schema_for;
use anyhow::{Result, anyhow}; use anyhow::{Result, anyhow};
use assistant_tool::outline;
use assistant_tool::{ActionLog, Tool, ToolResult}; use assistant_tool::{ActionLog, Tool, ToolResult};
use gpui::{AnyWindowHandle, App, Entity, Task}; use gpui::{AnyWindowHandle, App, Entity, Task};
@ -14,10 +15,6 @@ use ui::IconName;
use util::markdown::MarkdownInlineCode; use util::markdown::MarkdownInlineCode;
/// If the model requests to read a file whose size exceeds this, then /// If the model requests to read a file whose size exceeds this, then
/// the tool will return an error along with the model's symbol outline,
/// and suggest trying again using line ranges from the outline.
const MAX_FILE_SIZE_TO_READ: usize = 16384;
#[derive(Debug, Serialize, Deserialize, JsonSchema)] #[derive(Debug, Serialize, Deserialize, JsonSchema)]
pub struct ReadFileToolInput { pub struct ReadFileToolInput {
/// The relative path of the file to read. /// The relative path of the file to read.
@ -144,7 +141,7 @@ impl Tool for ReadFileTool {
// No line ranges specified, so check file size to see if it's too big. // No line ranges specified, so check file size to see if it's too big.
let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?; let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
if file_size <= MAX_FILE_SIZE_TO_READ { if file_size <= outline::AUTO_OUTLINE_SIZE {
// File is small enough, so return its contents. // File is small enough, so return its contents.
let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?; let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
@ -154,9 +151,9 @@ impl Tool for ReadFileTool {
Ok(result) Ok(result)
} else { } else {
// File is too big, so return an error with the outline // File is too big, so return the outline
// and a suggestion to read again with line numbers. // and a suggestion to read again with line numbers.
let outline = file_outline(project, file_path, action_log, None, cx).await?; let outline = outline::file_outline(project, file_path, action_log, None, cx).await?;
Ok(formatdoc! {" Ok(formatdoc! {"
This file was too big to read all at once. Here is an outline of its symbols: This file was too big to read all at once. Here is an outline of its symbols: