From 9b2bc458e393066c21b2256b2f38ec31b1a8905a Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Fri, 20 Dec 2024 15:42:18 -0700 Subject: [PATCH] Expand diagnostic excerpts using heuristics on syntactic information from TreeSitter (#21942) This is quite experimental and untested in languages other than Rust. It's written to attempt to do something sensible in many languages. Due to its experimental nature, just releasing to staff, and so not including it in release notes. Future release note might be "Improved diagnostic excerpts by using syntactic info to determine the context lines to show." Release Notes: - N/A --- Cargo.lock | 2 + crates/diagnostics/Cargo.toml | 1 + crates/diagnostics/src/diagnostics.rs | 201 ++++++++++++++++++++++-- crates/editor/src/editor.rs | 16 +- crates/editor/src/hover_popover.rs | 2 +- crates/language/src/buffer.rs | 59 ++++++- crates/language/src/buffer_tests.rs | 28 +++- crates/language/src/language.rs | 2 +- crates/language/src/syntax_map.rs | 2 +- crates/multi_buffer/Cargo.toml | 1 + crates/multi_buffer/src/multi_buffer.rs | 13 +- 11 files changed, 284 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 03fa792d90..7bc9f96f95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3682,6 +3682,7 @@ dependencies = [ "ctor", "editor", "env_logger 0.11.5", + "feature_flags", "gpui", "language", "log", @@ -7697,6 +7698,7 @@ dependencies = [ "sum_tree", "text", "theme", + "tree-sitter", "util", ] diff --git a/crates/diagnostics/Cargo.toml b/crates/diagnostics/Cargo.toml index b851a2733f..f15792cdbe 100644 --- a/crates/diagnostics/Cargo.toml +++ b/crates/diagnostics/Cargo.toml @@ -18,6 +18,7 @@ collections.workspace = true ctor.workspace = true editor.workspace = true env_logger.workspace = true +feature_flags.workspace = true gpui.workspace = true language.workspace = true log.workspace = true diff --git a/crates/diagnostics/src/diagnostics.rs b/crates/diagnostics/src/diagnostics.rs index 13cb113acb..d22bfa6ee1 100644 --- a/crates/diagnostics/src/diagnostics.rs +++ b/crates/diagnostics/src/diagnostics.rs @@ -14,6 +14,7 @@ use editor::{ scroll::Autoscroll, Editor, EditorEvent, ExcerptId, ExcerptRange, MultiBuffer, ToOffset, }; +use feature_flags::FeatureFlagAppExt; use gpui::{ actions, div, svg, AnyElement, AnyView, AppContext, Context, EventEmitter, FocusHandle, FocusableView, Global, HighlightStyle, InteractiveElement, IntoElement, Model, ParentElement, @@ -21,7 +22,8 @@ use gpui::{ WeakView, WindowContext, }; use language::{ - Bias, Buffer, Diagnostic, DiagnosticEntry, DiagnosticSeverity, Point, Selection, SelectionGoal, + Bias, Buffer, BufferRow, BufferSnapshot, Diagnostic, DiagnosticEntry, DiagnosticSeverity, + Point, Selection, SelectionGoal, ToTreeSitterPoint, }; use lsp::LanguageServerId; use project::{DiagnosticSummary, Project, ProjectPath}; @@ -29,9 +31,10 @@ use project_diagnostics_settings::ProjectDiagnosticsSettings; use settings::Settings; use std::{ any::{Any, TypeId}, + cmp, cmp::Ordering, mem, - ops::Range, + ops::{Range, RangeInclusive}, sync::Arc, time::Duration, }; @@ -422,31 +425,28 @@ impl ProjectDiagnosticsEditor { blocks: Default::default(), block_count: 0, }; - let mut pending_range: Option<(Range, usize)> = None; + let mut pending_range: Option<(Range, Range, usize)> = None; let mut is_first_excerpt_for_group = true; for (ix, entry) in group.entries.iter().map(Some).chain([None]).enumerate() { let resolved_entry = entry.map(|e| e.resolve::(&snapshot)); - if let Some((range, start_ix)) = &mut pending_range { - if let Some(entry) = resolved_entry.as_ref() { - if entry.range.start.row <= range.end.row + 1 + self.context * 2 { - range.end = range.end.max(entry.range.end); + let expanded_range = resolved_entry.as_ref().map(|entry| { + context_range_for_entry(entry, self.context, &snapshot, cx) + }); + if let Some((range, context_range, start_ix)) = &mut pending_range { + if let Some(expanded_range) = expanded_range.clone() { + // If the entries are overlapping or next to each-other, merge them into one excerpt. + if context_range.end.row + 1 >= expanded_range.start.row { + context_range.end = context_range.end.max(expanded_range.end); continue; } } - let excerpt_start = - Point::new(range.start.row.saturating_sub(self.context), 0); - let excerpt_end = snapshot.clip_point( - Point::new(range.end.row + self.context, u32::MAX), - Bias::Left, - ); - let excerpt_id = excerpts .insert_excerpts_after( prev_excerpt_id, buffer.clone(), [ExcerptRange { - context: excerpt_start..excerpt_end, + context: context_range.clone(), primary: Some(range.clone()), }], cx, @@ -503,8 +503,9 @@ impl ProjectDiagnosticsEditor { pending_range.take(); } - if let Some(entry) = resolved_entry { - pending_range = Some((entry.range.clone(), ix)); + if let Some(entry) = resolved_entry.as_ref() { + let range = entry.range.clone(); + pending_range = Some((range, expanded_range.unwrap(), ix)); } } @@ -923,3 +924,169 @@ fn compare_diagnostics( }) .then_with(|| old.diagnostic.message.cmp(&new.diagnostic.message)) } + +const DIAGNOSTIC_EXPANSION_ROW_LIMIT: u32 = 32; + +fn context_range_for_entry( + entry: &DiagnosticEntry, + context: u32, + snapshot: &BufferSnapshot, + cx: &AppContext, +) -> Range { + if cx.is_staff() { + if let Some(rows) = heuristic_syntactic_expand( + entry.range.clone(), + DIAGNOSTIC_EXPANSION_ROW_LIMIT, + snapshot, + cx, + ) { + return Range { + start: Point::new(*rows.start(), 0), + end: snapshot.clip_point(Point::new(*rows.end(), u32::MAX), Bias::Left), + }; + } + } + Range { + start: Point::new(entry.range.start.row.saturating_sub(context), 0), + end: snapshot.clip_point( + Point::new(entry.range.end.row + context, u32::MAX), + Bias::Left, + ), + } +} + +/// Expands the input range using syntax information from TreeSitter. This expansion will be limited +/// to the specified `max_row_count`. +/// +/// If there is a containing outline item that is less than `max_row_count`, it will be returned. +/// Otherwise fairly arbitrary heuristics are applied to attempt to return a logical block of code. +fn heuristic_syntactic_expand<'a>( + input_range: Range, + max_row_count: u32, + snapshot: &'a BufferSnapshot, + cx: &'a AppContext, +) -> Option> { + let input_row_count = input_range.end.row - input_range.start.row; + if input_row_count > max_row_count { + return None; + } + + // If the outline node contains the diagnostic and is small enough, just use that. + let outline_range = snapshot.outline_range_containing(input_range.clone()); + if let Some(outline_range) = outline_range.clone() { + // Remove blank lines from start and end + if let Some(start_row) = (outline_range.start.row..outline_range.end.row) + .find(|row| !snapshot.line_indent_for_row(*row).is_line_blank()) + { + if let Some(end_row) = (outline_range.start.row..outline_range.end.row + 1) + .rev() + .find(|row| !snapshot.line_indent_for_row(*row).is_line_blank()) + { + let row_count = end_row.saturating_sub(start_row); + if row_count <= max_row_count { + return Some(RangeInclusive::new( + outline_range.start.row, + outline_range.end.row, + )); + } + } + } + } + + let mut node = snapshot.syntax_ancestor(input_range.clone())?; + loop { + let node_start = Point::from_ts_point(node.start_position()); + let node_end = Point::from_ts_point(node.end_position()); + let node_range = node_start..node_end; + let row_count = node_end.row - node_start.row + 1; + + // Stop if we've exceeded the row count or reached an outline node. Then, find the interval + // of node children which contains the query range. For example, this allows just returning + // the header of a declaration rather than the entire declaration. + if row_count > max_row_count || outline_range == Some(node_range.clone()) { + let mut cursor = node.walk(); + let mut included_child_start = None; + let mut included_child_end = None; + let mut previous_end = node_start; + if cursor.goto_first_child() { + loop { + let child_node = cursor.node(); + let child_range = previous_end..Point::from_ts_point(child_node.end_position()); + if included_child_start.is_none() && child_range.contains(&input_range.start) { + included_child_start = Some(child_range.start); + } + if child_range.contains(&input_range.end) { + included_child_end = Some(child_range.end); + } + previous_end = child_range.end; + if !cursor.goto_next_sibling() { + break; + } + } + } + let end = included_child_end.unwrap_or(node_range.end); + if let Some(start) = included_child_start { + let row_count = end.row - start.row; + if row_count < max_row_count { + return Some(RangeInclusive::new(start.row, end.row)); + } + } + + log::info!( + "Expanding to ancestor started on {} node exceeding row limit of {max_row_count}.", + node.grammar_name() + ); + return None; + } + + let node_name = node.grammar_name(); + let node_row_range = RangeInclusive::new(node_range.start.row, node_range.end.row); + if node_name.ends_with("block") { + return Some(node_row_range); + } else if node_name.ends_with("statement") || node_name.ends_with("declaration") { + // Expand to the nearest dedent or blank line for statements and declarations. + let tab_size = snapshot.settings_at(node_range.start, cx).tab_size.get(); + let indent_level = snapshot + .line_indent_for_row(node_range.start.row) + .len(tab_size); + let rows_remaining = max_row_count.saturating_sub(row_count); + let Some(start_row) = (node_range.start.row.saturating_sub(rows_remaining) + ..node_range.start.row) + .rev() + .find(|row| is_line_blank_or_indented_less(indent_level, *row, tab_size, snapshot)) + else { + return Some(node_row_range); + }; + let rows_remaining = max_row_count.saturating_sub(node_range.end.row - start_row); + let Some(end_row) = (node_range.end.row + 1 + ..cmp::min( + node_range.end.row + rows_remaining + 1, + snapshot.row_count(), + )) + .find(|row| is_line_blank_or_indented_less(indent_level, *row, tab_size, snapshot)) + else { + return Some(node_row_range); + }; + return Some(RangeInclusive::new(start_row, end_row)); + } + + // TODO: doing this instead of walking a cursor as that doesn't work - why? + let Some(parent) = node.parent() else { + log::info!( + "Expanding to ancestor reached the top node, so using default context line count.", + ); + return None; + }; + node = parent; + } +} + +fn is_line_blank_or_indented_less( + indent_level: u32, + row: u32, + tab_size: u32, + snapshot: &BufferSnapshot, +) -> bool { + let line_indent = snapshot.line_indent_for_row(row); + line_indent.is_line_blank() || line_indent.len(tab_size) < indent_level +} diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 36959e055c..20040bd58c 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -8774,9 +8774,10 @@ impl Editor { .map(|selection| { let old_range = selection.start..selection.end; let mut new_range = old_range.clone(); - while let Some(containing_range) = - buffer.range_for_syntax_ancestor(new_range.clone()) + let mut new_node = None; + while let Some((node, containing_range)) = buffer.syntax_ancestor(new_range.clone()) { + new_node = Some(node); new_range = containing_range; if !display_map.intersects_fold(new_range.start) && !display_map.intersects_fold(new_range.end) @@ -8785,6 +8786,17 @@ impl Editor { } } + if let Some(node) = new_node { + // Log the ancestor, to support using this action as a way to explore TreeSitter + // nodes. Parent and grandparent are also logged because this operation will not + // visit nodes that have the same range as their parent. + log::info!("Node: {node:?}"); + let parent = node.parent(); + log::info!("Parent: {parent:?}"); + let grandparent = parent.and_then(|x| x.parent()); + log::info!("Grandparent: {grandparent:?}"); + } + selected_larger_node |= new_range != old_range; Selection { id: selection.id, diff --git a/crates/editor/src/hover_popover.rs b/crates/editor/src/hover_popover.rs index 23dce30ca6..fe3668b2fe 100644 --- a/crates/editor/src/hover_popover.rs +++ b/crates/editor/src/hover_popover.rs @@ -429,7 +429,7 @@ fn show_hover( }) .or_else(|| { let snapshot = &snapshot.buffer_snapshot; - let offset_range = snapshot.range_for_syntax_ancestor(anchor..anchor)?; + let offset_range = snapshot.syntax_ancestor(anchor..anchor)?.1; Some( snapshot.anchor_before(offset_range.start) ..snapshot.anchor_after(offset_range.end), diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 925e648d9f..a55396e1e3 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -68,7 +68,7 @@ pub use text::{ use theme::SyntaxTheme; #[cfg(any(test, feature = "test-support"))] use util::RandomCharIter; -use util::{debug_panic, RangeExt}; +use util::{debug_panic, maybe, RangeExt}; #[cfg(any(test, feature = "test-support"))] pub use {tree_sitter_rust, tree_sitter_typescript}; @@ -2923,10 +2923,13 @@ impl BufferSnapshot { (start..end, word_kind) } - /// Returns the range for the closes syntax node enclosing the given range. - pub fn range_for_syntax_ancestor(&self, range: Range) -> Option> { + /// Returns the closest syntax node enclosing the given range. + pub fn syntax_ancestor<'a, T: ToOffset>( + &'a self, + range: Range, + ) -> Option> { let range = range.start.to_offset(self)..range.end.to_offset(self); - let mut result: Option> = None; + let mut result: Option> = None; 'outer: for layer in self .syntax .layers_for_range(range.clone(), &self.text, true) @@ -2956,7 +2959,7 @@ impl BufferSnapshot { } let left_node = cursor.node(); - let mut layer_result = left_node.byte_range(); + let mut layer_result = left_node; // For an empty range, try to find another node immediately to the right of the range. if left_node.end_byte() == range.start { @@ -2979,13 +2982,13 @@ impl BufferSnapshot { // If both nodes are the same in that regard, favor the right one. if let Some(right_node) = right_node { if right_node.is_named() || !left_node.is_named() { - layer_result = right_node.byte_range(); + layer_result = right_node; } } } if let Some(previous_result) = &result { - if previous_result.len() < layer_result.len() { + if previous_result.byte_range().len() < layer_result.byte_range().len() { continue; } } @@ -3028,6 +3031,48 @@ impl BufferSnapshot { Some(items) } + pub fn outline_range_containing(&self, range: Range) -> Option> { + let range = range.to_offset(self); + let mut matches = self.syntax.matches(range.clone(), &self.text, |grammar| { + grammar.outline_config.as_ref().map(|c| &c.query) + }); + let configs = matches + .grammars() + .iter() + .map(|g| g.outline_config.as_ref().unwrap()) + .collect::>(); + + while let Some(mat) = matches.peek() { + let config = &configs[mat.grammar_index]; + let containing_item_node = maybe!({ + let item_node = mat.captures.iter().find_map(|cap| { + if cap.index == config.item_capture_ix { + Some(cap.node) + } else { + None + } + })?; + + let item_byte_range = item_node.byte_range(); + if item_byte_range.end < range.start || item_byte_range.start > range.end { + None + } else { + Some(item_node) + } + }); + + if let Some(item_node) = containing_item_node { + return Some( + Point::from_ts_point(item_node.start_position()) + ..Point::from_ts_point(item_node.end_position()), + ); + } + + matches.advance(); + } + None + } + pub fn outline_items_containing( &self, range: Range, diff --git a/crates/language/src/buffer_tests.rs b/crates/language/src/buffer_tests.rs index a1d1a57f13..4eab416965 100644 --- a/crates/language/src/buffer_tests.rs +++ b/crates/language/src/buffer_tests.rs @@ -1104,20 +1104,32 @@ fn test_range_for_syntax_ancestor(cx: &mut AppContext) { let snapshot = buffer.snapshot(); assert_eq!( - snapshot.range_for_syntax_ancestor(empty_range_at(text, "|")), - Some(range_of(text, "|")) + snapshot + .syntax_ancestor(empty_range_at(text, "|")) + .unwrap() + .byte_range(), + range_of(text, "|") ); assert_eq!( - snapshot.range_for_syntax_ancestor(range_of(text, "|")), - Some(range_of(text, "|c|")) + snapshot + .syntax_ancestor(range_of(text, "|")) + .unwrap() + .byte_range(), + range_of(text, "|c|") ); assert_eq!( - snapshot.range_for_syntax_ancestor(range_of(text, "|c|")), - Some(range_of(text, "|c| {}")) + snapshot + .syntax_ancestor(range_of(text, "|c|")) + .unwrap() + .byte_range(), + range_of(text, "|c| {}") ); assert_eq!( - snapshot.range_for_syntax_ancestor(range_of(text, "|c| {}")), - Some(range_of(text, "(|c| {})")) + snapshot + .syntax_ancestor(range_of(text, "|c| {}")) + .unwrap() + .byte_range(), + range_of(text, "(|c| {})") ); buffer diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 9832c50482..9e006f72ad 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -78,7 +78,7 @@ pub use language_registry::{ }; pub use lsp::LanguageServerId; pub use outline::*; -pub use syntax_map::{OwnedSyntaxLayer, SyntaxLayer, TreeSitterOptions}; +pub use syntax_map::{OwnedSyntaxLayer, SyntaxLayer, ToTreeSitterPoint, TreeSitterOptions}; pub use text::{AnchorRangeExt, LineEnding}; pub use tree_sitter::{Node, Parser, Tree, TreeCursor}; diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 76c6dc75e3..f51eeb9688 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1845,7 +1845,7 @@ impl Drop for QueryCursorHandle { } } -pub(crate) trait ToTreeSitterPoint { +pub trait ToTreeSitterPoint { fn to_ts_point(self) -> tree_sitter::Point; fn from_ts_point(point: tree_sitter::Point) -> Self; } diff --git a/crates/multi_buffer/Cargo.toml b/crates/multi_buffer/Cargo.toml index 444fe3c75c..42cacc34b1 100644 --- a/crates/multi_buffer/Cargo.toml +++ b/crates/multi_buffer/Cargo.toml @@ -39,6 +39,7 @@ smallvec.workspace = true sum_tree.workspace = true text.workspace = true theme.workspace = true +tree-sitter.workspace = true util.workspace = true [dev-dependencies] diff --git a/crates/multi_buffer/src/multi_buffer.rs b/crates/multi_buffer/src/multi_buffer.rs index 1253e82ebc..2330d12ee4 100644 --- a/crates/multi_buffer/src/multi_buffer.rs +++ b/crates/multi_buffer/src/multi_buffer.rs @@ -3920,15 +3920,16 @@ impl MultiBufferSnapshot { }) } - pub fn range_for_syntax_ancestor(&self, range: Range) -> Option> { + pub fn syntax_ancestor( + &self, + range: Range, + ) -> Option<(tree_sitter::Node, Range)> { let range = range.start.to_offset(self)..range.end.to_offset(self); let excerpt = self.excerpt_containing(range.clone())?; - - let ancestor_buffer_range = excerpt + let node = excerpt .buffer() - .range_for_syntax_ancestor(excerpt.map_range_to_buffer(range))?; - - Some(excerpt.map_range_from_buffer(ancestor_buffer_range)) + .syntax_ancestor(excerpt.map_range_to_buffer(range))?; + Some((node, excerpt.map_range_from_buffer(node.byte_range()))) } pub fn outline(&self, theme: Option<&SyntaxTheme>) -> Option> {