Add textobjects queries (#20924)

Co-Authored-By: Max <max@zed.dev>

Release Notes:

- vim: Added motions `[[`, `[]`, `]]`, `][` for navigating by section,
`[m`, `]m`, `[M`, `]M` for navigating by method, and `[*`, `]*`, `[/`,
`]/` for comments. These currently only work for languages built in to
Zed, as they are powered by new tree-sitter queries.
- vim: Added new text objects: `ic`, `ac` for inside/around classes,
`if`,`af` for functions/methods, and `g c` for comments. These currently
only work for languages built in to Zed, as they are powered by new
tree-sitter queries.

---------

Co-authored-by: Max <max@zed.dev>
This commit is contained in:
Conrad Irwin 2024-12-03 09:37:01 -08:00 committed by GitHub
parent c443307c19
commit 75c9dc179b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 1205 additions and 26 deletions

View file

@ -14,7 +14,8 @@ use crate::{
SyntaxMapMatches, SyntaxSnapshot, ToTreeSitterPoint,
},
task_context::RunnableRange,
LanguageScope, Outline, OutlineConfig, RunnableCapture, RunnableTag,
LanguageScope, Outline, OutlineConfig, RunnableCapture, RunnableTag, TextObject,
TreeSitterOptions,
};
use anyhow::{anyhow, Context, Result};
use async_watch as watch;
@ -3412,6 +3413,72 @@ impl BufferSnapshot {
})
}
pub fn text_object_ranges<T: ToOffset>(
&self,
range: Range<T>,
options: TreeSitterOptions,
) -> impl Iterator<Item = (Range<usize>, TextObject)> + '_ {
let range = range.start.to_offset(self).saturating_sub(1)
..self.len().min(range.end.to_offset(self) + 1);
let mut matches =
self.syntax
.matches_with_options(range.clone(), &self.text, options, |grammar| {
grammar.text_object_config.as_ref().map(|c| &c.query)
});
let configs = matches
.grammars()
.iter()
.map(|grammar| grammar.text_object_config.as_ref())
.collect::<Vec<_>>();
let mut captures = Vec::<(Range<usize>, TextObject)>::new();
iter::from_fn(move || loop {
while let Some(capture) = captures.pop() {
if capture.0.overlaps(&range) {
return Some(capture);
}
}
let mat = matches.peek()?;
let Some(config) = configs[mat.grammar_index].as_ref() else {
matches.advance();
continue;
};
for capture in mat.captures {
let Some(ix) = config
.text_objects_by_capture_ix
.binary_search_by_key(&capture.index, |e| e.0)
.ok()
else {
continue;
};
let text_object = config.text_objects_by_capture_ix[ix].1;
let byte_range = capture.node.byte_range();
let mut found = false;
for (range, existing) in captures.iter_mut() {
if existing == &text_object {
range.start = range.start.min(byte_range.start);
range.end = range.end.max(byte_range.end);
found = true;
break;
}
}
if !found {
captures.push((byte_range, text_object));
}
}
matches.advance();
})
}
/// Returns enclosing bracket ranges containing the given range
pub fn enclosing_bracket_ranges<T: ToOffset>(
&self,

View file

@ -20,6 +20,7 @@ use std::{
sync::LazyLock,
time::{Duration, Instant},
};
use syntax_map::TreeSitterOptions;
use text::network::Network;
use text::{BufferId, LineEnding, LineIndent};
use text::{Point, ToPoint};
@ -915,6 +916,39 @@ async fn test_symbols_containing(cx: &mut gpui::TestAppContext) {
}
}
#[gpui::test]
fn test_text_objects(cx: &mut AppContext) {
let (text, ranges) = marked_text_ranges(
indoc! {r#"
impl Hello {
fn say() -> u8 { return /* ˇhi */ 1 }
}"#
},
false,
);
let buffer =
cx.new_model(|cx| Buffer::local(text.clone(), cx).with_language(Arc::new(rust_lang()), cx));
let snapshot = buffer.update(cx, |buffer, _| buffer.snapshot());
let matches = snapshot
.text_object_ranges(ranges[0].clone(), TreeSitterOptions::default())
.map(|(range, text_object)| (&text[range], text_object))
.collect::<Vec<_>>();
assert_eq!(
matches,
&[
("/* hi */", TextObject::AroundComment),
("return /* hi */ 1", TextObject::InsideFunction),
(
"fn say() -> u8 { return /* hi */ 1 }",
TextObject::AroundFunction
),
],
)
}
#[gpui::test]
fn test_enclosing_bracket_ranges(cx: &mut AppContext) {
let mut assert = |selection_text, range_markers| {
@ -3182,6 +3216,20 @@ fn rust_lang() -> Language {
"#,
)
.unwrap()
.with_text_object_query(
r#"
(function_item
body: (_
"{"
(_)* @function.inside
"}" )) @function.around
(line_comment)+ @comment.around
(block_comment) @comment.around
"#,
)
.unwrap()
.with_outline_query(
r#"
(line_comment) @annotation

View file

@ -78,7 +78,7 @@ pub use language_registry::{
};
pub use lsp::LanguageServerId;
pub use outline::*;
pub use syntax_map::{OwnedSyntaxLayer, SyntaxLayer};
pub use syntax_map::{OwnedSyntaxLayer, SyntaxLayer, TreeSitterOptions};
pub use text::{AnchorRangeExt, LineEnding};
pub use tree_sitter::{Node, Parser, Tree, TreeCursor};
@ -848,6 +848,7 @@ pub struct Grammar {
pub(crate) runnable_config: Option<RunnableConfig>,
pub(crate) indents_config: Option<IndentConfig>,
pub outline_config: Option<OutlineConfig>,
pub text_object_config: Option<TextObjectConfig>,
pub embedding_config: Option<EmbeddingConfig>,
pub(crate) injection_config: Option<InjectionConfig>,
pub(crate) override_config: Option<OverrideConfig>,
@ -873,6 +874,44 @@ pub struct OutlineConfig {
pub annotation_capture_ix: Option<u32>,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum TextObject {
InsideFunction,
AroundFunction,
InsideClass,
AroundClass,
InsideComment,
AroundComment,
}
impl TextObject {
pub fn from_capture_name(name: &str) -> Option<TextObject> {
match name {
"function.inside" => Some(TextObject::InsideFunction),
"function.around" => Some(TextObject::AroundFunction),
"class.inside" => Some(TextObject::InsideClass),
"class.around" => Some(TextObject::AroundClass),
"comment.inside" => Some(TextObject::InsideComment),
"comment.around" => Some(TextObject::AroundComment),
_ => None,
}
}
pub fn around(&self) -> Option<Self> {
match self {
TextObject::InsideFunction => Some(TextObject::AroundFunction),
TextObject::InsideClass => Some(TextObject::AroundClass),
TextObject::InsideComment => Some(TextObject::AroundComment),
_ => None,
}
}
}
pub struct TextObjectConfig {
pub query: Query,
pub text_objects_by_capture_ix: Vec<(u32, TextObject)>,
}
#[derive(Debug)]
pub struct EmbeddingConfig {
pub query: Query,
@ -950,6 +989,7 @@ impl Language {
highlights_query: None,
brackets_config: None,
outline_config: None,
text_object_config: None,
embedding_config: None,
indents_config: None,
injection_config: None,
@ -1020,7 +1060,12 @@ impl Language {
if let Some(query) = queries.runnables {
self = self
.with_runnable_query(query.as_ref())
.context("Error loading tests query")?;
.context("Error loading runnables query")?;
}
if let Some(query) = queries.text_objects {
self = self
.with_text_object_query(query.as_ref())
.context("Error loading textobject query")?;
}
Ok(self)
}
@ -1097,6 +1142,26 @@ impl Language {
Ok(self)
}
pub fn with_text_object_query(mut self, source: &str) -> Result<Self> {
let grammar = self
.grammar_mut()
.ok_or_else(|| anyhow!("cannot mutate grammar"))?;
let query = Query::new(&grammar.ts_language, source)?;
let mut text_objects_by_capture_ix = Vec::new();
for (ix, name) in query.capture_names().iter().enumerate() {
if let Some(text_object) = TextObject::from_capture_name(name) {
text_objects_by_capture_ix.push((ix as u32, text_object));
}
}
grammar.text_object_config = Some(TextObjectConfig {
query,
text_objects_by_capture_ix,
});
Ok(self)
}
pub fn with_embedding_query(mut self, source: &str) -> Result<Self> {
let grammar = self
.grammar_mut()

View file

@ -181,6 +181,7 @@ pub const QUERY_FILENAME_PREFIXES: &[(
("overrides", |q| &mut q.overrides),
("redactions", |q| &mut q.redactions),
("runnables", |q| &mut q.runnables),
("textobjects", |q| &mut q.text_objects),
];
/// Tree-sitter language queries for a given language.
@ -195,6 +196,7 @@ pub struct LanguageQueries {
pub overrides: Option<Cow<'static, str>>,
pub redactions: Option<Cow<'static, str>>,
pub runnables: Option<Cow<'static, str>>,
pub text_objects: Option<Cow<'static, str>>,
}
#[derive(Clone, Default)]

View file

@ -814,6 +814,23 @@ impl SyntaxSnapshot {
buffer.as_rope(),
self.layers_for_range(range, buffer, true),
query,
TreeSitterOptions::default(),
)
}
pub fn matches_with_options<'a>(
&'a self,
range: Range<usize>,
buffer: &'a BufferSnapshot,
options: TreeSitterOptions,
query: fn(&Grammar) -> Option<&Query>,
) -> SyntaxMapMatches<'a> {
SyntaxMapMatches::new(
range.clone(),
buffer.as_rope(),
self.layers_for_range(range, buffer, true),
query,
options,
)
}
@ -1001,12 +1018,25 @@ impl<'a> SyntaxMapCaptures<'a> {
}
}
#[derive(Default)]
pub struct TreeSitterOptions {
max_start_depth: Option<u32>,
}
impl TreeSitterOptions {
pub fn max_start_depth(max_start_depth: u32) -> Self {
Self {
max_start_depth: Some(max_start_depth),
}
}
}
impl<'a> SyntaxMapMatches<'a> {
fn new(
range: Range<usize>,
text: &'a Rope,
layers: impl Iterator<Item = SyntaxLayer<'a>>,
query: fn(&Grammar) -> Option<&Query>,
options: TreeSitterOptions,
) -> Self {
let mut result = Self::default();
for layer in layers {
@ -1027,6 +1057,7 @@ impl<'a> SyntaxMapMatches<'a> {
query_cursor.deref_mut(),
)
};
cursor.set_max_start_depth(options.max_start_depth);
cursor.set_byte_range(range.clone());
let matches = cursor.matches(query, layer.node(), TextProvider(text));