From d27cebd97718783df3e940391f70b2c7eaf06d11 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Tue, 22 Aug 2023 10:35:20 +0200 Subject: [PATCH] Z 2819 (#2872) This PR adds new config option to language config called `word_boundaries` that controls which characters should be recognised as word boundary for a given language. This will improve our UX for languages such as PHP and Tailwind. Release Notes: - Improved completions for PHP [#1820](https://github.com/zed-industries/community/issues/1820) --------- Co-authored-by: Julia Risley --- crates/editor/src/editor.rs | 1 - crates/editor/src/items.rs | 29 +++++++++++++---------- crates/editor/src/movement.rs | 30 +++++++++++++++++++----- crates/editor/src/multi_buffer.rs | 11 +++++---- crates/language/src/buffer.rs | 25 +++++++++++++------- crates/language/src/language.rs | 5 +++- crates/project/src/project.rs | 2 +- crates/project/src/search.rs | 27 ++++++++++++++++----- crates/vim/src/motion.rs | 18 ++++++++------ crates/vim/src/normal/change.rs | 9 ++++--- crates/vim/src/object.rs | 21 ++++++++++------- crates/zed/src/languages/php/config.toml | 1 + 12 files changed, 120 insertions(+), 59 deletions(-) diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index a38145f48c..cbc7a7cd42 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -2667,7 +2667,6 @@ impl Editor { false }); } - fn completion_query(buffer: &MultiBufferSnapshot, position: impl ToOffset) -> Option { let offset = position.to_offset(buffer); let (word_range, kind) = buffer.surrounding_word(offset); diff --git a/crates/editor/src/items.rs b/crates/editor/src/items.rs index b99977a60e..4a2b03bbdf 100644 --- a/crates/editor/src/items.rs +++ b/crates/editor/src/items.rs @@ -1028,7 +1028,7 @@ impl SearchableItem for Editor { if let Some((_, _, excerpt_buffer)) = buffer.as_singleton() { ranges.extend( query - .search(excerpt_buffer.as_rope()) + .search(excerpt_buffer, None) .await .into_iter() .map(|range| { @@ -1038,17 +1038,22 @@ impl SearchableItem for Editor { } else { for excerpt in buffer.excerpt_boundaries_in_range(0..buffer.len()) { let excerpt_range = excerpt.range.context.to_offset(&excerpt.buffer); - let rope = excerpt.buffer.as_rope().slice(excerpt_range.clone()); - ranges.extend(query.search(&rope).await.into_iter().map(|range| { - let start = excerpt - .buffer - .anchor_after(excerpt_range.start + range.start); - let end = excerpt - .buffer - .anchor_before(excerpt_range.start + range.end); - buffer.anchor_in_excerpt(excerpt.id.clone(), start) - ..buffer.anchor_in_excerpt(excerpt.id.clone(), end) - })); + ranges.extend( + query + .search(&excerpt.buffer, Some(excerpt_range.clone())) + .await + .into_iter() + .map(|range| { + let start = excerpt + .buffer + .anchor_after(excerpt_range.start + range.start); + let end = excerpt + .buffer + .anchor_before(excerpt_range.start + range.end); + buffer.anchor_in_excerpt(excerpt.id.clone(), start) + ..buffer.anchor_in_excerpt(excerpt.id.clone(), end) + }), + ); } } ranges diff --git a/crates/editor/src/movement.rs b/crates/editor/src/movement.rs index 4eec92c8eb..6b3032b2a3 100644 --- a/crates/editor/src/movement.rs +++ b/crates/editor/src/movement.rs @@ -176,14 +176,21 @@ pub fn line_end( } pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { + let raw_point = point.to_point(map); + let language = map.buffer_snapshot.language_at(raw_point); + find_preceding_boundary(map, point, |left, right| { - (char_kind(left) != char_kind(right) && !right.is_whitespace()) || left == '\n' + (char_kind(language, left) != char_kind(language, right) && !right.is_whitespace()) + || left == '\n' }) } pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { + let raw_point = point.to_point(map); + let language = map.buffer_snapshot.language_at(raw_point); find_preceding_boundary(map, point, |left, right| { - let is_word_start = char_kind(left) != char_kind(right) && !right.is_whitespace(); + let is_word_start = + char_kind(language, left) != char_kind(language, right) && !right.is_whitespace(); let is_subword_start = left == '_' && right != '_' || left.is_lowercase() && right.is_uppercase(); is_word_start || is_subword_start || left == '\n' @@ -191,14 +198,20 @@ pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> Dis } pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { + let raw_point = point.to_point(map); + let language = map.buffer_snapshot.language_at(raw_point); find_boundary(map, point, |left, right| { - (char_kind(left) != char_kind(right) && !left.is_whitespace()) || right == '\n' + (char_kind(language, left) != char_kind(language, right) && !left.is_whitespace()) + || right == '\n' }) } pub fn next_subword_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { + let raw_point = point.to_point(map); + let language = map.buffer_snapshot.language_at(raw_point); find_boundary(map, point, |left, right| { - let is_word_end = (char_kind(left) != char_kind(right)) && !left.is_whitespace(); + let is_word_end = + (char_kind(language, left) != char_kind(language, right)) && !left.is_whitespace(); let is_subword_end = left != '_' && right == '_' || left.is_lowercase() && right.is_uppercase(); is_word_end || is_subword_end || right == '\n' @@ -385,10 +398,15 @@ pub fn find_boundary_in_line( } pub fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool { + let raw_point = point.to_point(map); + let language = map.buffer_snapshot.language_at(raw_point); let ix = map.clip_point(point, Bias::Left).to_offset(map, Bias::Left); let text = &map.buffer_snapshot; - let next_char_kind = text.chars_at(ix).next().map(char_kind); - let prev_char_kind = text.reversed_chars_at(ix).next().map(char_kind); + let next_char_kind = text.chars_at(ix).next().map(|c| char_kind(language, c)); + let prev_char_kind = text + .reversed_chars_at(ix) + .next() + .map(|c| char_kind(language, c)); prev_char_kind.zip(next_char_kind) == Some((CharKind::Word, CharKind::Word)) } diff --git a/crates/editor/src/multi_buffer.rs b/crates/editor/src/multi_buffer.rs index 8417c411f2..d4061f25dc 100644 --- a/crates/editor/src/multi_buffer.rs +++ b/crates/editor/src/multi_buffer.rs @@ -1865,13 +1865,16 @@ impl MultiBufferSnapshot { let mut end = start; let mut next_chars = self.chars_at(start).peekable(); let mut prev_chars = self.reversed_chars_at(start).peekable(); + + let language = self.language_at(start); + let kind = |c| char_kind(language, c); let word_kind = cmp::max( - prev_chars.peek().copied().map(char_kind), - next_chars.peek().copied().map(char_kind), + prev_chars.peek().copied().map(kind), + next_chars.peek().copied().map(kind), ); for ch in prev_chars { - if Some(char_kind(ch)) == word_kind && ch != '\n' { + if Some(kind(ch)) == word_kind && ch != '\n' { start -= ch.len_utf8(); } else { break; @@ -1879,7 +1882,7 @@ impl MultiBufferSnapshot { } for ch in next_chars { - if Some(char_kind(ch)) == word_kind && ch != '\n' { + if Some(kind(ch)) == word_kind && ch != '\n' { end += ch.len_utf8(); } else { break; diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index e6ad3469ea..d032e8e025 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -2192,13 +2192,16 @@ impl BufferSnapshot { let mut end = start; let mut next_chars = self.chars_at(start).peekable(); let mut prev_chars = self.reversed_chars_at(start).peekable(); + + let language = self.language_at(start); + let kind = |c| char_kind(language, c); let word_kind = cmp::max( - prev_chars.peek().copied().map(char_kind), - next_chars.peek().copied().map(char_kind), + prev_chars.peek().copied().map(kind), + next_chars.peek().copied().map(kind), ); for ch in prev_chars { - if Some(char_kind(ch)) == word_kind && ch != '\n' { + if Some(kind(ch)) == word_kind && ch != '\n' { start -= ch.len_utf8(); } else { break; @@ -2206,7 +2209,7 @@ impl BufferSnapshot { } for ch in next_chars { - if Some(char_kind(ch)) == word_kind && ch != '\n' { + if Some(kind(ch)) == word_kind && ch != '\n' { end += ch.len_utf8(); } else { break; @@ -3003,14 +3006,18 @@ pub fn contiguous_ranges( }) } -pub fn char_kind(c: char) -> CharKind { +pub fn char_kind(language: Option<&Arc>, c: char) -> CharKind { if c.is_whitespace() { - CharKind::Whitespace + return CharKind::Whitespace; } else if c.is_alphanumeric() || c == '_' { - CharKind::Word - } else { - CharKind::Punctuation + return CharKind::Word; } + if let Some(language) = language { + if language.config.word_characters.contains(&c) { + return CharKind::Word; + } + } + CharKind::Punctuation } /// Find all of the ranges of whitespace that occur at the ends of lines diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 223f5679ae..82245d67ca 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -11,7 +11,7 @@ mod buffer_tests; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; -use collections::HashMap; +use collections::{HashMap, HashSet}; use futures::{ channel::oneshot, future::{BoxFuture, Shared}, @@ -344,6 +344,8 @@ pub struct LanguageConfig { pub block_comment: Option<(Arc, Arc)>, #[serde(default)] pub overrides: HashMap, + #[serde(default)] + pub word_characters: HashSet, } #[derive(Debug, Default)] @@ -411,6 +413,7 @@ impl Default for LanguageConfig { block_comment: Default::default(), overrides: Default::default(), collapsed_placeholder: Default::default(), + word_characters: Default::default(), } } } diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index adb067b403..b120baa951 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -5180,7 +5180,7 @@ impl Project { snapshot.file().map(|file| file.path().as_ref()), ) { query - .search(snapshot.as_rope()) + .search(&snapshot, None) .await .iter() .map(|range| { diff --git a/crates/project/src/search.rs b/crates/project/src/search.rs index 08ff803598..a3c6583052 100644 --- a/crates/project/src/search.rs +++ b/crates/project/src/search.rs @@ -3,7 +3,7 @@ use anyhow::{Context, Result}; use client::proto; use globset::{Glob, GlobMatcher}; use itertools::Itertools; -use language::{char_kind, Rope}; +use language::{char_kind, BufferSnapshot}; use regex::{Regex, RegexBuilder}; use smol::future::yield_now; use std::{ @@ -39,6 +39,7 @@ pub enum SearchQuery { case_sensitive: bool, inner: SearchInputs, }, + Regex { regex: Regex, @@ -214,12 +215,24 @@ impl SearchQuery { } } - pub async fn search(&self, rope: &Rope) -> Vec> { + pub async fn search( + &self, + buffer: &BufferSnapshot, + subrange: Option>, + ) -> Vec> { const YIELD_INTERVAL: usize = 20000; if self.as_str().is_empty() { return Default::default(); } + let language = buffer.language_at(0); + let rope = if let Some(range) = subrange { + buffer.as_rope().slice(range) + } else { + buffer.as_rope().clone() + }; + + let kind = |c| char_kind(language, c); let mut matches = Vec::new(); match self { @@ -236,10 +249,10 @@ impl SearchQuery { let mat = mat.unwrap(); if *whole_word { - let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind); - let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap()); - let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap()); - let next_kind = rope.chars_at(mat.end()).next().map(char_kind); + let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind); + let start_kind = kind(rope.chars_at(mat.start()).next().unwrap()); + let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap()); + let next_kind = rope.chars_at(mat.end()).next().map(kind); if Some(start_kind) == prev_kind || Some(end_kind) == next_kind { continue; } @@ -247,6 +260,7 @@ impl SearchQuery { matches.push(mat.start()..mat.end()) } } + Self::Regex { regex, multiline, .. } => { @@ -284,6 +298,7 @@ impl SearchQuery { } } } + matches } diff --git a/crates/vim/src/motion.rs b/crates/vim/src/motion.rs index 13c22afdaa..8cd29e5e9f 100644 --- a/crates/vim/src/motion.rs +++ b/crates/vim/src/motion.rs @@ -439,11 +439,12 @@ pub(crate) fn next_word_start( ignore_punctuation: bool, times: usize, ) -> DisplayPoint { + let language = map.buffer_snapshot.language_at(point.to_point(map)); for _ in 0..times { let mut crossed_newline = false; point = movement::find_boundary(map, point, |left, right| { - let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation); - let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation); + let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation); + let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation); let at_newline = right == '\n'; let found = (left_kind != right_kind && right_kind != CharKind::Whitespace) @@ -463,11 +464,12 @@ fn next_word_end( ignore_punctuation: bool, times: usize, ) -> DisplayPoint { + let language = map.buffer_snapshot.language_at(point.to_point(map)); for _ in 0..times { *point.column_mut() += 1; point = movement::find_boundary(map, point, |left, right| { - let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation); - let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation); + let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation); + let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation); left_kind != right_kind && left_kind != CharKind::Whitespace }); @@ -493,12 +495,13 @@ fn previous_word_start( ignore_punctuation: bool, times: usize, ) -> DisplayPoint { + let language = map.buffer_snapshot.language_at(point.to_point(map)); for _ in 0..times { // This works even though find_preceding_boundary is called for every character in the line containing // cursor because the newline is checked only once. point = movement::find_preceding_boundary(map, point, |left, right| { - let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation); - let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation); + let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation); + let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation); (left_kind != right_kind && !right.is_whitespace()) || left == '\n' }); @@ -508,6 +511,7 @@ fn previous_word_start( fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoint { let mut last_point = DisplayPoint::new(from.row(), 0); + let language = map.buffer_snapshot.language_at(from.to_point(map)); for (ch, point) in map.chars_at(last_point) { if ch == '\n' { return from; @@ -515,7 +519,7 @@ fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoi last_point = point; - if char_kind(ch) != CharKind::Whitespace { + if char_kind(language, ch) != CharKind::Whitespace { break; } } diff --git a/crates/vim/src/normal/change.rs b/crates/vim/src/normal/change.rs index d226c70410..50bc049a3a 100644 --- a/crates/vim/src/normal/change.rs +++ b/crates/vim/src/normal/change.rs @@ -82,16 +82,19 @@ fn expand_changed_word_selection( ignore_punctuation: bool, ) -> bool { if times.is_none() || times.unwrap() == 1 { + let language = map + .buffer_snapshot + .language_at(selection.start.to_point(map)); let in_word = map .chars_at(selection.head()) .next() - .map(|(c, _)| char_kind(c) != CharKind::Whitespace) + .map(|(c, _)| char_kind(language, c) != CharKind::Whitespace) .unwrap_or_default(); if in_word { selection.end = movement::find_boundary(map, selection.end, |left, right| { - let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation); - let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation); + let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation); + let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation); left_kind != right_kind && left_kind != CharKind::Whitespace }); diff --git a/crates/vim/src/object.rs b/crates/vim/src/object.rs index c203a89f72..dd922e7af6 100644 --- a/crates/vim/src/object.rs +++ b/crates/vim/src/object.rs @@ -177,17 +177,18 @@ fn in_word( ignore_punctuation: bool, ) -> Option> { // Use motion::right so that we consider the character under the cursor when looking for the start + let language = map.buffer_snapshot.language_at(relative_to.to_point(map)); let start = movement::find_preceding_boundary_in_line( map, right(map, relative_to, 1), |left, right| { - char_kind(left).coerce_punctuation(ignore_punctuation) - != char_kind(right).coerce_punctuation(ignore_punctuation) + char_kind(language, left).coerce_punctuation(ignore_punctuation) + != char_kind(language, right).coerce_punctuation(ignore_punctuation) }, ); let end = movement::find_boundary_in_line(map, relative_to, |left, right| { - char_kind(left).coerce_punctuation(ignore_punctuation) - != char_kind(right).coerce_punctuation(ignore_punctuation) + char_kind(language, left).coerce_punctuation(ignore_punctuation) + != char_kind(language, right).coerce_punctuation(ignore_punctuation) }); Some(start..end) @@ -210,10 +211,11 @@ fn around_word( relative_to: DisplayPoint, ignore_punctuation: bool, ) -> Option> { + let language = map.buffer_snapshot.language_at(relative_to.to_point(map)); let in_word = map .chars_at(relative_to) .next() - .map(|(c, _)| char_kind(c) != CharKind::Whitespace) + .map(|(c, _)| char_kind(language, c) != CharKind::Whitespace) .unwrap_or(false); if in_word { @@ -237,20 +239,21 @@ fn around_next_word( relative_to: DisplayPoint, ignore_punctuation: bool, ) -> Option> { + let language = map.buffer_snapshot.language_at(relative_to.to_point(map)); // Get the start of the word let start = movement::find_preceding_boundary_in_line( map, right(map, relative_to, 1), |left, right| { - char_kind(left).coerce_punctuation(ignore_punctuation) - != char_kind(right).coerce_punctuation(ignore_punctuation) + char_kind(language, left).coerce_punctuation(ignore_punctuation) + != char_kind(language, right).coerce_punctuation(ignore_punctuation) }, ); let mut word_found = false; let end = movement::find_boundary(map, relative_to, |left, right| { - let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation); - let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation); + let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation); + let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation); let found = (word_found && left_kind != right_kind) || right == '\n' && left == '\n'; diff --git a/crates/zed/src/languages/php/config.toml b/crates/zed/src/languages/php/config.toml index 19acb949e2..60dd233555 100644 --- a/crates/zed/src/languages/php/config.toml +++ b/crates/zed/src/languages/php/config.toml @@ -10,3 +10,4 @@ brackets = [ { start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] }, ] collapsed_placeholder = "/* ... */" +word_characters = ["$"]