Use tree-sitter when returning symbols to the model for a given file (#28352)
This also increases the threshold for when we return an outline during `read_file`. Release Notes: - Fixed an issue that caused the agent to fail reading large files if the LSP hadn't started yet.
This commit is contained in:
parent
ca8f6e8a3f
commit
97641c3298
6 changed files with 46 additions and 218 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -746,7 +746,6 @@ dependencies = [
|
||||||
"itertools 0.14.0",
|
"itertools 0.14.0",
|
||||||
"language",
|
"language",
|
||||||
"language_model",
|
"language_model",
|
||||||
"lsp",
|
|
||||||
"open",
|
"open",
|
||||||
"project",
|
"project",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
|
|
|
@ -23,7 +23,6 @@ http_client.workspace = true
|
||||||
itertools.workspace = true
|
itertools.workspace = true
|
||||||
language.workspace = true
|
language.workspace = true
|
||||||
language_model.workspace = true
|
language_model.workspace = true
|
||||||
lsp.workspace = true
|
|
||||||
project.workspace = true
|
project.workspace = true
|
||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
schemars.workspace = true
|
schemars.workspace = true
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
mod bash_tool;
|
mod bash_tool;
|
||||||
mod batch_tool;
|
mod batch_tool;
|
||||||
mod code_symbol_iter;
|
|
||||||
mod code_symbols_tool;
|
mod code_symbols_tool;
|
||||||
mod copy_path_tool;
|
mod copy_path_tool;
|
||||||
mod create_directory_tool;
|
mod create_directory_tool;
|
||||||
|
|
|
@ -1,88 +0,0 @@
|
||||||
use project::DocumentSymbol;
|
|
||||||
use regex::Regex;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct Entry {
|
|
||||||
pub name: String,
|
|
||||||
pub kind: lsp::SymbolKind,
|
|
||||||
pub depth: u32,
|
|
||||||
pub start_line: usize,
|
|
||||||
pub end_line: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An iterator that filters document symbols based on a regex pattern.
|
|
||||||
/// This iterator recursively traverses the document symbol tree, incrementing depth for child symbols.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct CodeSymbolIterator<'a> {
|
|
||||||
symbols: &'a [DocumentSymbol],
|
|
||||||
regex: Option<Regex>,
|
|
||||||
// Stack of (symbol, depth) pairs to process
|
|
||||||
pending_symbols: Vec<(&'a DocumentSymbol, u32)>,
|
|
||||||
current_index: usize,
|
|
||||||
current_depth: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> CodeSymbolIterator<'a> {
|
|
||||||
pub fn new(symbols: &'a [DocumentSymbol], regex: Option<Regex>) -> Self {
|
|
||||||
Self {
|
|
||||||
symbols,
|
|
||||||
regex,
|
|
||||||
pending_symbols: Vec::new(),
|
|
||||||
current_index: 0,
|
|
||||||
current_depth: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Iterator for CodeSymbolIterator<'_> {
|
|
||||||
type Item = Entry;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
if let Some((symbol, depth)) = self.pending_symbols.pop() {
|
|
||||||
for child in symbol.children.iter().rev() {
|
|
||||||
self.pending_symbols.push((child, depth + 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
return Some(Entry {
|
|
||||||
name: symbol.name.clone(),
|
|
||||||
kind: symbol.kind,
|
|
||||||
depth,
|
|
||||||
start_line: symbol.range.start.0.row as usize,
|
|
||||||
end_line: symbol.range.end.0.row as usize,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
while self.current_index < self.symbols.len() {
|
|
||||||
let regex = self.regex.as_ref();
|
|
||||||
let symbol = &self.symbols[self.current_index];
|
|
||||||
self.current_index += 1;
|
|
||||||
|
|
||||||
if regex.is_none_or(|regex| regex.is_match(&symbol.name)) {
|
|
||||||
// Push in reverse order to maintain traversal order
|
|
||||||
for child in symbol.children.iter().rev() {
|
|
||||||
self.pending_symbols.push((child, self.current_depth + 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
return Some(Entry {
|
|
||||||
name: symbol.name.clone(),
|
|
||||||
kind: symbol.kind,
|
|
||||||
depth: self.current_depth,
|
|
||||||
start_line: symbol.range.start.0.row as usize,
|
|
||||||
end_line: symbol.range.end.0.row as usize,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
// Even if parent doesn't match, push children to check them later
|
|
||||||
for child in symbol.children.iter().rev() {
|
|
||||||
self.pending_symbols.push((child, self.current_depth + 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if any pending children match our criteria
|
|
||||||
if let Some(result) = self.next() {
|
|
||||||
return Some(result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,24 +1,21 @@
|
||||||
use std::fmt::{self, Write};
|
use std::fmt::Write;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use crate::schema::json_schema_for;
|
||||||
use anyhow::{Result, anyhow};
|
use anyhow::{Result, anyhow};
|
||||||
use assistant_tool::{ActionLog, Tool};
|
use assistant_tool::{ActionLog, Tool};
|
||||||
use collections::IndexMap;
|
use collections::IndexMap;
|
||||||
use gpui::{App, AsyncApp, Entity, Task};
|
use gpui::{App, AsyncApp, Entity, Task};
|
||||||
use language::{CodeLabel, Language, LanguageRegistry};
|
use language::{OutlineItem, ParseStatus, Point};
|
||||||
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
|
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
|
||||||
use lsp::SymbolKind;
|
use project::{Project, Symbol};
|
||||||
use project::{DocumentSymbol, Project, Symbol};
|
|
||||||
use regex::{Regex, RegexBuilder};
|
use regex::{Regex, RegexBuilder};
|
||||||
use schemars::JsonSchema;
|
use schemars::JsonSchema;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use ui::IconName;
|
use ui::IconName;
|
||||||
use util::markdown::MarkdownString;
|
use util::markdown::MarkdownString;
|
||||||
|
|
||||||
use crate::code_symbol_iter::{CodeSymbolIterator, Entry};
|
|
||||||
use crate::schema::json_schema_for;
|
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||||
pub struct CodeSymbolsInput {
|
pub struct CodeSymbolsInput {
|
||||||
/// The relative path of the source code file to read and get the symbols for.
|
/// The relative path of the source code file to read and get the symbols for.
|
||||||
|
@ -180,24 +177,28 @@ pub async fn file_outline(
|
||||||
action_log.buffer_read(buffer.clone(), cx);
|
action_log.buffer_read(buffer.clone(), cx);
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let symbols = project
|
// Wait until the buffer has been fully parsed, so that we can read its outline.
|
||||||
.update(cx, |project, cx| project.document_symbols(&buffer, cx))?
|
let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
|
||||||
.await?;
|
while parse_status
|
||||||
|
.recv()
|
||||||
|
.await
|
||||||
|
.map_or(false, |status| status != ParseStatus::Idle)
|
||||||
|
{}
|
||||||
|
|
||||||
if symbols.is_empty() {
|
let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
|
||||||
return Err(
|
let Some(outline) = snapshot.outline(None) else {
|
||||||
if buffer.read_with(cx, |buffer, _| buffer.snapshot().is_empty())? {
|
return Err(anyhow!("No outline information available for this file."));
|
||||||
anyhow!("This file is empty.")
|
};
|
||||||
} else {
|
|
||||||
anyhow!("No outline information available for this file.")
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
let language = buffer.read_with(cx, |buffer, _| buffer.language().cloned())?;
|
render_outline(
|
||||||
let language_registry = project.read_with(cx, |project, _| project.languages().clone())?;
|
outline
|
||||||
|
.items
|
||||||
render_outline(&symbols, language, language_registry, regex, offset).await
|
.into_iter()
|
||||||
|
.map(|item| item.to_point(&snapshot)),
|
||||||
|
regex,
|
||||||
|
offset,
|
||||||
|
)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn project_symbols(
|
async fn project_symbols(
|
||||||
|
@ -292,61 +293,27 @@ async fn project_symbols(
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn render_outline(
|
async fn render_outline(
|
||||||
symbols: &[DocumentSymbol],
|
items: impl IntoIterator<Item = OutlineItem<Point>>,
|
||||||
language: Option<Arc<Language>>,
|
|
||||||
registry: Arc<LanguageRegistry>,
|
|
||||||
regex: Option<Regex>,
|
regex: Option<Regex>,
|
||||||
offset: u32,
|
offset: u32,
|
||||||
) -> Result<String> {
|
) -> Result<String> {
|
||||||
const RESULTS_PER_PAGE_USIZE: usize = RESULTS_PER_PAGE as usize;
|
const RESULTS_PER_PAGE_USIZE: usize = RESULTS_PER_PAGE as usize;
|
||||||
let entries = CodeSymbolIterator::new(symbols, regex.clone())
|
|
||||||
.skip(offset as usize)
|
|
||||||
// Take 1 more than RESULTS_PER_PAGE so we can tell if there are more results.
|
|
||||||
.take(RESULTS_PER_PAGE_USIZE.saturating_add(1))
|
|
||||||
.collect::<Vec<Entry>>();
|
|
||||||
let has_more = entries.len() > RESULTS_PER_PAGE_USIZE;
|
|
||||||
|
|
||||||
// Get language-specific labels, if available
|
let mut items = items.into_iter().skip(offset as usize);
|
||||||
let labels = match &language {
|
|
||||||
Some(lang) => {
|
|
||||||
let entries_for_labels: Vec<(String, SymbolKind)> = entries
|
|
||||||
.iter()
|
|
||||||
.take(RESULTS_PER_PAGE_USIZE)
|
|
||||||
.map(|entry| (entry.name.clone(), entry.kind))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let lang_name = lang.name();
|
let entries = items
|
||||||
if let Some(lsp_adapter) = registry.lsp_adapters(&lang_name).first().cloned() {
|
.by_ref()
|
||||||
lsp_adapter
|
.filter(|item| {
|
||||||
.labels_for_symbols(&entries_for_labels, lang)
|
regex
|
||||||
.await
|
.as_ref()
|
||||||
.ok()
|
.is_none_or(|regex| regex.is_match(&item.text))
|
||||||
} else {
|
})
|
||||||
None
|
.take(RESULTS_PER_PAGE_USIZE)
|
||||||
}
|
.collect::<Vec<_>>();
|
||||||
}
|
let has_more = items.next().is_some();
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut output = String::new();
|
let mut output = String::new();
|
||||||
|
let entries_rendered = render_entries(&mut output, entries);
|
||||||
let entries_rendered = match &labels {
|
|
||||||
Some(label_list) => render_entries(
|
|
||||||
&mut output,
|
|
||||||
entries
|
|
||||||
.into_iter()
|
|
||||||
.take(RESULTS_PER_PAGE_USIZE)
|
|
||||||
.zip(label_list.iter())
|
|
||||||
.map(|(entry, label)| (entry, label.as_ref())),
|
|
||||||
),
|
|
||||||
None => render_entries(
|
|
||||||
&mut output,
|
|
||||||
entries
|
|
||||||
.into_iter()
|
|
||||||
.take(RESULTS_PER_PAGE_USIZE)
|
|
||||||
.map(|entry| (entry, None)),
|
|
||||||
),
|
|
||||||
};
|
|
||||||
|
|
||||||
// Calculate pagination information
|
// Calculate pagination information
|
||||||
let page_start = offset + 1;
|
let page_start = offset + 1;
|
||||||
|
@ -372,31 +339,19 @@ async fn render_outline(
|
||||||
Ok(output)
|
Ok(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn render_entries<'a>(
|
fn render_entries(output: &mut String, items: impl IntoIterator<Item = OutlineItem<Point>>) -> u32 {
|
||||||
output: &mut String,
|
|
||||||
entries: impl IntoIterator<Item = (Entry, Option<&'a CodeLabel>)>,
|
|
||||||
) -> u32 {
|
|
||||||
let mut entries_rendered = 0;
|
let mut entries_rendered = 0;
|
||||||
|
|
||||||
for (entry, label) in entries {
|
for item in items {
|
||||||
// Indent based on depth ("" for level 0, " " for level 1, etc.)
|
// Indent based on depth ("" for level 0, " " for level 1, etc.)
|
||||||
for _ in 0..entry.depth {
|
for _ in 0..item.depth {
|
||||||
output.push_str(" ");
|
output.push(' ');
|
||||||
}
|
|
||||||
|
|
||||||
match label {
|
|
||||||
Some(label) => {
|
|
||||||
output.push_str(label.text());
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
write_symbol_kind(output, entry.kind).ok();
|
|
||||||
output.push_str(&entry.name);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
output.push_str(&item.text);
|
||||||
|
|
||||||
// Add position information - convert to 1-based line numbers for display
|
// Add position information - convert to 1-based line numbers for display
|
||||||
let start_line = entry.start_line + 1;
|
let start_line = item.range.start.row + 1;
|
||||||
let end_line = entry.end_line + 1;
|
let end_line = item.range.end.row + 1;
|
||||||
|
|
||||||
if start_line == end_line {
|
if start_line == end_line {
|
||||||
writeln!(output, " [L{}]", start_line).ok();
|
writeln!(output, " [L{}]", start_line).ok();
|
||||||
|
@ -408,38 +363,3 @@ fn render_entries<'a>(
|
||||||
|
|
||||||
entries_rendered
|
entries_rendered
|
||||||
}
|
}
|
||||||
|
|
||||||
// We may not have a language server adapter to have language-specific
|
|
||||||
// ways to translate SymbolKnd into a string. In that situation,
|
|
||||||
// fall back on some reasonable default strings to render.
|
|
||||||
fn write_symbol_kind(buf: &mut String, kind: SymbolKind) -> Result<(), fmt::Error> {
|
|
||||||
match kind {
|
|
||||||
SymbolKind::FILE => write!(buf, "file "),
|
|
||||||
SymbolKind::MODULE => write!(buf, "module "),
|
|
||||||
SymbolKind::NAMESPACE => write!(buf, "namespace "),
|
|
||||||
SymbolKind::PACKAGE => write!(buf, "package "),
|
|
||||||
SymbolKind::CLASS => write!(buf, "class "),
|
|
||||||
SymbolKind::METHOD => write!(buf, "method "),
|
|
||||||
SymbolKind::PROPERTY => write!(buf, "property "),
|
|
||||||
SymbolKind::FIELD => write!(buf, "field "),
|
|
||||||
SymbolKind::CONSTRUCTOR => write!(buf, "constructor "),
|
|
||||||
SymbolKind::ENUM => write!(buf, "enum "),
|
|
||||||
SymbolKind::INTERFACE => write!(buf, "interface "),
|
|
||||||
SymbolKind::FUNCTION => write!(buf, "function "),
|
|
||||||
SymbolKind::VARIABLE => write!(buf, "variable "),
|
|
||||||
SymbolKind::CONSTANT => write!(buf, "constant "),
|
|
||||||
SymbolKind::STRING => write!(buf, "string "),
|
|
||||||
SymbolKind::NUMBER => write!(buf, "number "),
|
|
||||||
SymbolKind::BOOLEAN => write!(buf, "boolean "),
|
|
||||||
SymbolKind::ARRAY => write!(buf, "array "),
|
|
||||||
SymbolKind::OBJECT => write!(buf, "object "),
|
|
||||||
SymbolKind::KEY => write!(buf, "key "),
|
|
||||||
SymbolKind::NULL => write!(buf, "null "),
|
|
||||||
SymbolKind::ENUM_MEMBER => write!(buf, "enum member "),
|
|
||||||
SymbolKind::STRUCT => write!(buf, "struct "),
|
|
||||||
SymbolKind::EVENT => write!(buf, "event "),
|
|
||||||
SymbolKind::OPERATOR => write!(buf, "operator "),
|
|
||||||
SymbolKind::TYPE_PARAMETER => write!(buf, "type parameter "),
|
|
||||||
_ => Ok(()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use crate::code_symbols_tool::file_outline;
|
use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
|
||||||
use crate::schema::json_schema_for;
|
|
||||||
use anyhow::{Result, anyhow};
|
use anyhow::{Result, anyhow};
|
||||||
use assistant_tool::{ActionLog, Tool};
|
use assistant_tool::{ActionLog, Tool};
|
||||||
use gpui::{App, Entity, Task};
|
use gpui::{App, Entity, Task};
|
||||||
|
@ -16,7 +15,7 @@ use util::markdown::MarkdownString;
|
||||||
/// If the model requests to read a file whose size exceeds this, then
|
/// If the model requests to read a file whose size exceeds this, then
|
||||||
/// the tool will return an error along with the model's symbol outline,
|
/// the tool will return an error along with the model's symbol outline,
|
||||||
/// and suggest trying again using line ranges from the outline.
|
/// and suggest trying again using line ranges from the outline.
|
||||||
const MAX_FILE_SIZE_TO_READ: usize = 4096;
|
const MAX_FILE_SIZE_TO_READ: usize = 16384;
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||||
pub struct ReadFileToolInput {
|
pub struct ReadFileToolInput {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue