Expand diagnostic excerpts using heuristics on syntactic information from TreeSitter (#21942)

This is quite experimental and untested in languages other than Rust.
It's written to attempt to do something sensible in many languages. Due
to its experimental nature, just releasing to staff, and so not
including it in release notes. Future release note might be "Improved
diagnostic excerpts by using syntactic info to determine the context
lines to show."

Release Notes:

- N/A
This commit is contained in:
Michael Sloan 2024-12-20 15:42:18 -07:00 committed by GitHub
parent ca9cee85e1
commit 9b2bc458e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 284 additions and 43 deletions

2
Cargo.lock generated
View file

@ -3682,6 +3682,7 @@ dependencies = [
"ctor", "ctor",
"editor", "editor",
"env_logger 0.11.5", "env_logger 0.11.5",
"feature_flags",
"gpui", "gpui",
"language", "language",
"log", "log",
@ -7697,6 +7698,7 @@ dependencies = [
"sum_tree", "sum_tree",
"text", "text",
"theme", "theme",
"tree-sitter",
"util", "util",
] ]

View file

@ -18,6 +18,7 @@ collections.workspace = true
ctor.workspace = true ctor.workspace = true
editor.workspace = true editor.workspace = true
env_logger.workspace = true env_logger.workspace = true
feature_flags.workspace = true
gpui.workspace = true gpui.workspace = true
language.workspace = true language.workspace = true
log.workspace = true log.workspace = true

View file

@ -14,6 +14,7 @@ use editor::{
scroll::Autoscroll, scroll::Autoscroll,
Editor, EditorEvent, ExcerptId, ExcerptRange, MultiBuffer, ToOffset, Editor, EditorEvent, ExcerptId, ExcerptRange, MultiBuffer, ToOffset,
}; };
use feature_flags::FeatureFlagAppExt;
use gpui::{ use gpui::{
actions, div, svg, AnyElement, AnyView, AppContext, Context, EventEmitter, FocusHandle, actions, div, svg, AnyElement, AnyView, AppContext, Context, EventEmitter, FocusHandle,
FocusableView, Global, HighlightStyle, InteractiveElement, IntoElement, Model, ParentElement, FocusableView, Global, HighlightStyle, InteractiveElement, IntoElement, Model, ParentElement,
@ -21,7 +22,8 @@ use gpui::{
WeakView, WindowContext, WeakView, WindowContext,
}; };
use language::{ use language::{
Bias, Buffer, Diagnostic, DiagnosticEntry, DiagnosticSeverity, Point, Selection, SelectionGoal, Bias, Buffer, BufferRow, BufferSnapshot, Diagnostic, DiagnosticEntry, DiagnosticSeverity,
Point, Selection, SelectionGoal, ToTreeSitterPoint,
}; };
use lsp::LanguageServerId; use lsp::LanguageServerId;
use project::{DiagnosticSummary, Project, ProjectPath}; use project::{DiagnosticSummary, Project, ProjectPath};
@ -29,9 +31,10 @@ use project_diagnostics_settings::ProjectDiagnosticsSettings;
use settings::Settings; use settings::Settings;
use std::{ use std::{
any::{Any, TypeId}, any::{Any, TypeId},
cmp,
cmp::Ordering, cmp::Ordering,
mem, mem,
ops::Range, ops::{Range, RangeInclusive},
sync::Arc, sync::Arc,
time::Duration, time::Duration,
}; };
@ -422,31 +425,28 @@ impl ProjectDiagnosticsEditor {
blocks: Default::default(), blocks: Default::default(),
block_count: 0, block_count: 0,
}; };
let mut pending_range: Option<(Range<Point>, usize)> = None; let mut pending_range: Option<(Range<Point>, Range<Point>, usize)> = None;
let mut is_first_excerpt_for_group = true; let mut is_first_excerpt_for_group = true;
for (ix, entry) in group.entries.iter().map(Some).chain([None]).enumerate() { for (ix, entry) in group.entries.iter().map(Some).chain([None]).enumerate() {
let resolved_entry = entry.map(|e| e.resolve::<Point>(&snapshot)); let resolved_entry = entry.map(|e| e.resolve::<Point>(&snapshot));
if let Some((range, start_ix)) = &mut pending_range { let expanded_range = resolved_entry.as_ref().map(|entry| {
if let Some(entry) = resolved_entry.as_ref() { context_range_for_entry(entry, self.context, &snapshot, cx)
if entry.range.start.row <= range.end.row + 1 + self.context * 2 { });
range.end = range.end.max(entry.range.end); if let Some((range, context_range, start_ix)) = &mut pending_range {
if let Some(expanded_range) = expanded_range.clone() {
// If the entries are overlapping or next to each-other, merge them into one excerpt.
if context_range.end.row + 1 >= expanded_range.start.row {
context_range.end = context_range.end.max(expanded_range.end);
continue; continue;
} }
} }
let excerpt_start =
Point::new(range.start.row.saturating_sub(self.context), 0);
let excerpt_end = snapshot.clip_point(
Point::new(range.end.row + self.context, u32::MAX),
Bias::Left,
);
let excerpt_id = excerpts let excerpt_id = excerpts
.insert_excerpts_after( .insert_excerpts_after(
prev_excerpt_id, prev_excerpt_id,
buffer.clone(), buffer.clone(),
[ExcerptRange { [ExcerptRange {
context: excerpt_start..excerpt_end, context: context_range.clone(),
primary: Some(range.clone()), primary: Some(range.clone()),
}], }],
cx, cx,
@ -503,8 +503,9 @@ impl ProjectDiagnosticsEditor {
pending_range.take(); pending_range.take();
} }
if let Some(entry) = resolved_entry { if let Some(entry) = resolved_entry.as_ref() {
pending_range = Some((entry.range.clone(), ix)); let range = entry.range.clone();
pending_range = Some((range, expanded_range.unwrap(), ix));
} }
} }
@ -923,3 +924,169 @@ fn compare_diagnostics(
}) })
.then_with(|| old.diagnostic.message.cmp(&new.diagnostic.message)) .then_with(|| old.diagnostic.message.cmp(&new.diagnostic.message))
} }
const DIAGNOSTIC_EXPANSION_ROW_LIMIT: u32 = 32;
fn context_range_for_entry(
entry: &DiagnosticEntry<Point>,
context: u32,
snapshot: &BufferSnapshot,
cx: &AppContext,
) -> Range<Point> {
if cx.is_staff() {
if let Some(rows) = heuristic_syntactic_expand(
entry.range.clone(),
DIAGNOSTIC_EXPANSION_ROW_LIMIT,
snapshot,
cx,
) {
return Range {
start: Point::new(*rows.start(), 0),
end: snapshot.clip_point(Point::new(*rows.end(), u32::MAX), Bias::Left),
};
}
}
Range {
start: Point::new(entry.range.start.row.saturating_sub(context), 0),
end: snapshot.clip_point(
Point::new(entry.range.end.row + context, u32::MAX),
Bias::Left,
),
}
}
/// Expands the input range using syntax information from TreeSitter. This expansion will be limited
/// to the specified `max_row_count`.
///
/// If there is a containing outline item that is less than `max_row_count`, it will be returned.
/// Otherwise fairly arbitrary heuristics are applied to attempt to return a logical block of code.
fn heuristic_syntactic_expand<'a>(
input_range: Range<Point>,
max_row_count: u32,
snapshot: &'a BufferSnapshot,
cx: &'a AppContext,
) -> Option<RangeInclusive<BufferRow>> {
let input_row_count = input_range.end.row - input_range.start.row;
if input_row_count > max_row_count {
return None;
}
// If the outline node contains the diagnostic and is small enough, just use that.
let outline_range = snapshot.outline_range_containing(input_range.clone());
if let Some(outline_range) = outline_range.clone() {
// Remove blank lines from start and end
if let Some(start_row) = (outline_range.start.row..outline_range.end.row)
.find(|row| !snapshot.line_indent_for_row(*row).is_line_blank())
{
if let Some(end_row) = (outline_range.start.row..outline_range.end.row + 1)
.rev()
.find(|row| !snapshot.line_indent_for_row(*row).is_line_blank())
{
let row_count = end_row.saturating_sub(start_row);
if row_count <= max_row_count {
return Some(RangeInclusive::new(
outline_range.start.row,
outline_range.end.row,
));
}
}
}
}
let mut node = snapshot.syntax_ancestor(input_range.clone())?;
loop {
let node_start = Point::from_ts_point(node.start_position());
let node_end = Point::from_ts_point(node.end_position());
let node_range = node_start..node_end;
let row_count = node_end.row - node_start.row + 1;
// Stop if we've exceeded the row count or reached an outline node. Then, find the interval
// of node children which contains the query range. For example, this allows just returning
// the header of a declaration rather than the entire declaration.
if row_count > max_row_count || outline_range == Some(node_range.clone()) {
let mut cursor = node.walk();
let mut included_child_start = None;
let mut included_child_end = None;
let mut previous_end = node_start;
if cursor.goto_first_child() {
loop {
let child_node = cursor.node();
let child_range = previous_end..Point::from_ts_point(child_node.end_position());
if included_child_start.is_none() && child_range.contains(&input_range.start) {
included_child_start = Some(child_range.start);
}
if child_range.contains(&input_range.end) {
included_child_end = Some(child_range.end);
}
previous_end = child_range.end;
if !cursor.goto_next_sibling() {
break;
}
}
}
let end = included_child_end.unwrap_or(node_range.end);
if let Some(start) = included_child_start {
let row_count = end.row - start.row;
if row_count < max_row_count {
return Some(RangeInclusive::new(start.row, end.row));
}
}
log::info!(
"Expanding to ancestor started on {} node exceeding row limit of {max_row_count}.",
node.grammar_name()
);
return None;
}
let node_name = node.grammar_name();
let node_row_range = RangeInclusive::new(node_range.start.row, node_range.end.row);
if node_name.ends_with("block") {
return Some(node_row_range);
} else if node_name.ends_with("statement") || node_name.ends_with("declaration") {
// Expand to the nearest dedent or blank line for statements and declarations.
let tab_size = snapshot.settings_at(node_range.start, cx).tab_size.get();
let indent_level = snapshot
.line_indent_for_row(node_range.start.row)
.len(tab_size);
let rows_remaining = max_row_count.saturating_sub(row_count);
let Some(start_row) = (node_range.start.row.saturating_sub(rows_remaining)
..node_range.start.row)
.rev()
.find(|row| is_line_blank_or_indented_less(indent_level, *row, tab_size, snapshot))
else {
return Some(node_row_range);
};
let rows_remaining = max_row_count.saturating_sub(node_range.end.row - start_row);
let Some(end_row) = (node_range.end.row + 1
..cmp::min(
node_range.end.row + rows_remaining + 1,
snapshot.row_count(),
))
.find(|row| is_line_blank_or_indented_less(indent_level, *row, tab_size, snapshot))
else {
return Some(node_row_range);
};
return Some(RangeInclusive::new(start_row, end_row));
}
// TODO: doing this instead of walking a cursor as that doesn't work - why?
let Some(parent) = node.parent() else {
log::info!(
"Expanding to ancestor reached the top node, so using default context line count.",
);
return None;
};
node = parent;
}
}
fn is_line_blank_or_indented_less(
indent_level: u32,
row: u32,
tab_size: u32,
snapshot: &BufferSnapshot,
) -> bool {
let line_indent = snapshot.line_indent_for_row(row);
line_indent.is_line_blank() || line_indent.len(tab_size) < indent_level
}

View file

@ -8774,9 +8774,10 @@ impl Editor {
.map(|selection| { .map(|selection| {
let old_range = selection.start..selection.end; let old_range = selection.start..selection.end;
let mut new_range = old_range.clone(); let mut new_range = old_range.clone();
while let Some(containing_range) = let mut new_node = None;
buffer.range_for_syntax_ancestor(new_range.clone()) while let Some((node, containing_range)) = buffer.syntax_ancestor(new_range.clone())
{ {
new_node = Some(node);
new_range = containing_range; new_range = containing_range;
if !display_map.intersects_fold(new_range.start) if !display_map.intersects_fold(new_range.start)
&& !display_map.intersects_fold(new_range.end) && !display_map.intersects_fold(new_range.end)
@ -8785,6 +8786,17 @@ impl Editor {
} }
} }
if let Some(node) = new_node {
// Log the ancestor, to support using this action as a way to explore TreeSitter
// nodes. Parent and grandparent are also logged because this operation will not
// visit nodes that have the same range as their parent.
log::info!("Node: {node:?}");
let parent = node.parent();
log::info!("Parent: {parent:?}");
let grandparent = parent.and_then(|x| x.parent());
log::info!("Grandparent: {grandparent:?}");
}
selected_larger_node |= new_range != old_range; selected_larger_node |= new_range != old_range;
Selection { Selection {
id: selection.id, id: selection.id,

View file

@ -429,7 +429,7 @@ fn show_hover(
}) })
.or_else(|| { .or_else(|| {
let snapshot = &snapshot.buffer_snapshot; let snapshot = &snapshot.buffer_snapshot;
let offset_range = snapshot.range_for_syntax_ancestor(anchor..anchor)?; let offset_range = snapshot.syntax_ancestor(anchor..anchor)?.1;
Some( Some(
snapshot.anchor_before(offset_range.start) snapshot.anchor_before(offset_range.start)
..snapshot.anchor_after(offset_range.end), ..snapshot.anchor_after(offset_range.end),

View file

@ -68,7 +68,7 @@ pub use text::{
use theme::SyntaxTheme; use theme::SyntaxTheme;
#[cfg(any(test, feature = "test-support"))] #[cfg(any(test, feature = "test-support"))]
use util::RandomCharIter; use util::RandomCharIter;
use util::{debug_panic, RangeExt}; use util::{debug_panic, maybe, RangeExt};
#[cfg(any(test, feature = "test-support"))] #[cfg(any(test, feature = "test-support"))]
pub use {tree_sitter_rust, tree_sitter_typescript}; pub use {tree_sitter_rust, tree_sitter_typescript};
@ -2923,10 +2923,13 @@ impl BufferSnapshot {
(start..end, word_kind) (start..end, word_kind)
} }
/// Returns the range for the closes syntax node enclosing the given range. /// Returns the closest syntax node enclosing the given range.
pub fn range_for_syntax_ancestor<T: ToOffset>(&self, range: Range<T>) -> Option<Range<usize>> { pub fn syntax_ancestor<'a, T: ToOffset>(
&'a self,
range: Range<T>,
) -> Option<tree_sitter::Node<'a>> {
let range = range.start.to_offset(self)..range.end.to_offset(self); let range = range.start.to_offset(self)..range.end.to_offset(self);
let mut result: Option<Range<usize>> = None; let mut result: Option<tree_sitter::Node<'a>> = None;
'outer: for layer in self 'outer: for layer in self
.syntax .syntax
.layers_for_range(range.clone(), &self.text, true) .layers_for_range(range.clone(), &self.text, true)
@ -2956,7 +2959,7 @@ impl BufferSnapshot {
} }
let left_node = cursor.node(); let left_node = cursor.node();
let mut layer_result = left_node.byte_range(); let mut layer_result = left_node;
// For an empty range, try to find another node immediately to the right of the range. // For an empty range, try to find another node immediately to the right of the range.
if left_node.end_byte() == range.start { if left_node.end_byte() == range.start {
@ -2979,13 +2982,13 @@ impl BufferSnapshot {
// If both nodes are the same in that regard, favor the right one. // If both nodes are the same in that regard, favor the right one.
if let Some(right_node) = right_node { if let Some(right_node) = right_node {
if right_node.is_named() || !left_node.is_named() { if right_node.is_named() || !left_node.is_named() {
layer_result = right_node.byte_range(); layer_result = right_node;
} }
} }
} }
if let Some(previous_result) = &result { if let Some(previous_result) = &result {
if previous_result.len() < layer_result.len() { if previous_result.byte_range().len() < layer_result.byte_range().len() {
continue; continue;
} }
} }
@ -3028,6 +3031,48 @@ impl BufferSnapshot {
Some(items) Some(items)
} }
pub fn outline_range_containing<T: ToOffset>(&self, range: Range<T>) -> Option<Range<Point>> {
let range = range.to_offset(self);
let mut matches = self.syntax.matches(range.clone(), &self.text, |grammar| {
grammar.outline_config.as_ref().map(|c| &c.query)
});
let configs = matches
.grammars()
.iter()
.map(|g| g.outline_config.as_ref().unwrap())
.collect::<Vec<_>>();
while let Some(mat) = matches.peek() {
let config = &configs[mat.grammar_index];
let containing_item_node = maybe!({
let item_node = mat.captures.iter().find_map(|cap| {
if cap.index == config.item_capture_ix {
Some(cap.node)
} else {
None
}
})?;
let item_byte_range = item_node.byte_range();
if item_byte_range.end < range.start || item_byte_range.start > range.end {
None
} else {
Some(item_node)
}
});
if let Some(item_node) = containing_item_node {
return Some(
Point::from_ts_point(item_node.start_position())
..Point::from_ts_point(item_node.end_position()),
);
}
matches.advance();
}
None
}
pub fn outline_items_containing<T: ToOffset>( pub fn outline_items_containing<T: ToOffset>(
&self, &self,
range: Range<T>, range: Range<T>,

View file

@ -1104,20 +1104,32 @@ fn test_range_for_syntax_ancestor(cx: &mut AppContext) {
let snapshot = buffer.snapshot(); let snapshot = buffer.snapshot();
assert_eq!( assert_eq!(
snapshot.range_for_syntax_ancestor(empty_range_at(text, "|")), snapshot
Some(range_of(text, "|")) .syntax_ancestor(empty_range_at(text, "|"))
.unwrap()
.byte_range(),
range_of(text, "|")
); );
assert_eq!( assert_eq!(
snapshot.range_for_syntax_ancestor(range_of(text, "|")), snapshot
Some(range_of(text, "|c|")) .syntax_ancestor(range_of(text, "|"))
.unwrap()
.byte_range(),
range_of(text, "|c|")
); );
assert_eq!( assert_eq!(
snapshot.range_for_syntax_ancestor(range_of(text, "|c|")), snapshot
Some(range_of(text, "|c| {}")) .syntax_ancestor(range_of(text, "|c|"))
.unwrap()
.byte_range(),
range_of(text, "|c| {}")
); );
assert_eq!( assert_eq!(
snapshot.range_for_syntax_ancestor(range_of(text, "|c| {}")), snapshot
Some(range_of(text, "(|c| {})")) .syntax_ancestor(range_of(text, "|c| {}"))
.unwrap()
.byte_range(),
range_of(text, "(|c| {})")
); );
buffer buffer

View file

@ -78,7 +78,7 @@ pub use language_registry::{
}; };
pub use lsp::LanguageServerId; pub use lsp::LanguageServerId;
pub use outline::*; pub use outline::*;
pub use syntax_map::{OwnedSyntaxLayer, SyntaxLayer, TreeSitterOptions}; pub use syntax_map::{OwnedSyntaxLayer, SyntaxLayer, ToTreeSitterPoint, TreeSitterOptions};
pub use text::{AnchorRangeExt, LineEnding}; pub use text::{AnchorRangeExt, LineEnding};
pub use tree_sitter::{Node, Parser, Tree, TreeCursor}; pub use tree_sitter::{Node, Parser, Tree, TreeCursor};

View file

@ -1845,7 +1845,7 @@ impl Drop for QueryCursorHandle {
} }
} }
pub(crate) trait ToTreeSitterPoint { pub trait ToTreeSitterPoint {
fn to_ts_point(self) -> tree_sitter::Point; fn to_ts_point(self) -> tree_sitter::Point;
fn from_ts_point(point: tree_sitter::Point) -> Self; fn from_ts_point(point: tree_sitter::Point) -> Self;
} }

View file

@ -39,6 +39,7 @@ smallvec.workspace = true
sum_tree.workspace = true sum_tree.workspace = true
text.workspace = true text.workspace = true
theme.workspace = true theme.workspace = true
tree-sitter.workspace = true
util.workspace = true util.workspace = true
[dev-dependencies] [dev-dependencies]

View file

@ -3920,15 +3920,16 @@ impl MultiBufferSnapshot {
}) })
} }
pub fn range_for_syntax_ancestor<T: ToOffset>(&self, range: Range<T>) -> Option<Range<usize>> { pub fn syntax_ancestor<T: ToOffset>(
&self,
range: Range<T>,
) -> Option<(tree_sitter::Node, Range<usize>)> {
let range = range.start.to_offset(self)..range.end.to_offset(self); let range = range.start.to_offset(self)..range.end.to_offset(self);
let excerpt = self.excerpt_containing(range.clone())?; let excerpt = self.excerpt_containing(range.clone())?;
let node = excerpt
let ancestor_buffer_range = excerpt
.buffer() .buffer()
.range_for_syntax_ancestor(excerpt.map_range_to_buffer(range))?; .syntax_ancestor(excerpt.map_range_to_buffer(range))?;
Some((node, excerpt.map_range_from_buffer(node.byte_range())))
Some(excerpt.map_range_from_buffer(ancestor_buffer_range))
} }
pub fn outline(&self, theme: Option<&SyntaxTheme>) -> Option<Outline<Anchor>> { pub fn outline(&self, theme: Option<&SyntaxTheme>) -> Option<Outline<Anchor>> {