From d27cebd97718783df3e940391f70b2c7eaf06d11 Mon Sep 17 00:00:00 2001
From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com>
Date: Tue, 22 Aug 2023 10:35:20 +0200
Subject: [PATCH] Z 2819 (#2872)

This PR adds new config option to language config called
`word_boundaries` that controls which characters should be recognised as
word boundary for a given language. This will improve our UX for
languages such as PHP and Tailwind.

Release Notes:

- Improved completions for PHP
[#1820](https://github.com/zed-industries/community/issues/1820)

---------

Co-authored-by: Julia Risley <julia@zed.dev>
---
 crates/editor/src/editor.rs              |  1 -
 crates/editor/src/items.rs               | 29 +++++++++++++----------
 crates/editor/src/movement.rs            | 30 +++++++++++++++++++-----
 crates/editor/src/multi_buffer.rs        | 11 +++++----
 crates/language/src/buffer.rs            | 25 +++++++++++++-------
 crates/language/src/language.rs          |  5 +++-
 crates/project/src/project.rs            |  2 +-
 crates/project/src/search.rs             | 27 ++++++++++++++++-----
 crates/vim/src/motion.rs                 | 18 ++++++++------
 crates/vim/src/normal/change.rs          |  9 ++++---
 crates/vim/src/object.rs                 | 21 ++++++++++-------
 crates/zed/src/languages/php/config.toml |  1 +
 12 files changed, 120 insertions(+), 59 deletions(-)
diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs
index a38145f48c..cbc7a7cd42 100644
--- a/crates/editor/src/editor.rs
+++ b/crates/editor/src/editor.rs
@@ -2667,7 +2667,6 @@ impl Editor {
             false
         });
     }
-
     fn completion_query(buffer: &MultiBufferSnapshot, position: impl ToOffset) -> Option<String> {
         let offset = position.to_offset(buffer);
         let (word_range, kind) = buffer.surrounding_word(offset);
diff --git a/crates/editor/src/items.rs b/crates/editor/src/items.rs
index b99977a60e..4a2b03bbdf 100644
--- a/crates/editor/src/items.rs
+++ b/crates/editor/src/items.rs
@@ -1028,7 +1028,7 @@ impl SearchableItem for Editor {
             if let Some((_, _, excerpt_buffer)) = buffer.as_singleton() {
                 ranges.extend(
                     query
-                        .search(excerpt_buffer.as_rope())
+                        .search(excerpt_buffer, None)
                         .await
                         .into_iter()
                         .map(|range| {
@@ -1038,17 +1038,22 @@ impl SearchableItem for Editor {
             } else {
                 for excerpt in buffer.excerpt_boundaries_in_range(0..buffer.len()) {
                     let excerpt_range = excerpt.range.context.to_offset(&excerpt.buffer);
-                    let rope = excerpt.buffer.as_rope().slice(excerpt_range.clone());
-                    ranges.extend(query.search(&rope).await.into_iter().map(|range| {
-                        let start = excerpt
-                            .buffer
-                            .anchor_after(excerpt_range.start + range.start);
-                        let end = excerpt
-                            .buffer
-                            .anchor_before(excerpt_range.start + range.end);
-                        buffer.anchor_in_excerpt(excerpt.id.clone(), start)
-                            ..buffer.anchor_in_excerpt(excerpt.id.clone(), end)
-                    }));
+                    ranges.extend(
+                        query
+                            .search(&excerpt.buffer, Some(excerpt_range.clone()))
+                            .await
+                            .into_iter()
+                            .map(|range| {
+                                let start = excerpt
+                                    .buffer
+                                    .anchor_after(excerpt_range.start + range.start);
+                                let end = excerpt
+                                    .buffer
+                                    .anchor_before(excerpt_range.start + range.end);
+                                buffer.anchor_in_excerpt(excerpt.id.clone(), start)
+                                    ..buffer.anchor_in_excerpt(excerpt.id.clone(), end)
+                            }),
+                    );
                 }
             }
             ranges
diff --git a/crates/editor/src/movement.rs b/crates/editor/src/movement.rs
index 4eec92c8eb..6b3032b2a3 100644
--- a/crates/editor/src/movement.rs
+++ b/crates/editor/src/movement.rs
@@ -176,14 +176,21 @@ pub fn line_end(
 }
 
 pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
+    let raw_point = point.to_point(map);
+    let language = map.buffer_snapshot.language_at(raw_point);
+
     find_preceding_boundary(map, point, |left, right| {
-        (char_kind(left) != char_kind(right) && !right.is_whitespace()) || left == '\n'
+        (char_kind(language, left) != char_kind(language, right) && !right.is_whitespace())
+            || left == '\n'
     })
 }
 
 pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
+    let raw_point = point.to_point(map);
+    let language = map.buffer_snapshot.language_at(raw_point);
     find_preceding_boundary(map, point, |left, right| {
-        let is_word_start = char_kind(left) != char_kind(right) && !right.is_whitespace();
+        let is_word_start =
+            char_kind(language, left) != char_kind(language, right) && !right.is_whitespace();
         let is_subword_start =
             left == '_' && right != '_' || left.is_lowercase() && right.is_uppercase();
         is_word_start || is_subword_start || left == '\n'
@@ -191,14 +198,20 @@ pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> Dis
 }
 
 pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
+    let raw_point = point.to_point(map);
+    let language = map.buffer_snapshot.language_at(raw_point);
     find_boundary(map, point, |left, right| {
-        (char_kind(left) != char_kind(right) && !left.is_whitespace()) || right == '\n'
+        (char_kind(language, left) != char_kind(language, right) && !left.is_whitespace())
+            || right == '\n'
     })
 }
 
 pub fn next_subword_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
+    let raw_point = point.to_point(map);
+    let language = map.buffer_snapshot.language_at(raw_point);
     find_boundary(map, point, |left, right| {
-        let is_word_end = (char_kind(left) != char_kind(right)) && !left.is_whitespace();
+        let is_word_end =
+            (char_kind(language, left) != char_kind(language, right)) && !left.is_whitespace();
         let is_subword_end =
             left != '_' && right == '_' || left.is_lowercase() && right.is_uppercase();
         is_word_end || is_subword_end || right == '\n'
@@ -385,10 +398,15 @@ pub fn find_boundary_in_line(
 }
 
 pub fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool {
+    let raw_point = point.to_point(map);
+    let language = map.buffer_snapshot.language_at(raw_point);
     let ix = map.clip_point(point, Bias::Left).to_offset(map, Bias::Left);
     let text = &map.buffer_snapshot;
-    let next_char_kind = text.chars_at(ix).next().map(char_kind);
-    let prev_char_kind = text.reversed_chars_at(ix).next().map(char_kind);
+    let next_char_kind = text.chars_at(ix).next().map(|c| char_kind(language, c));
+    let prev_char_kind = text
+        .reversed_chars_at(ix)
+        .next()
+        .map(|c| char_kind(language, c));
     prev_char_kind.zip(next_char_kind) == Some((CharKind::Word, CharKind::Word))
 }
 
diff --git a/crates/editor/src/multi_buffer.rs b/crates/editor/src/multi_buffer.rs
index 8417c411f2..d4061f25dc 100644
--- a/crates/editor/src/multi_buffer.rs
+++ b/crates/editor/src/multi_buffer.rs
@@ -1865,13 +1865,16 @@ impl MultiBufferSnapshot {
         let mut end = start;
         let mut next_chars = self.chars_at(start).peekable();
         let mut prev_chars = self.reversed_chars_at(start).peekable();
+
+        let language = self.language_at(start);
+        let kind = |c| char_kind(language, c);
         let word_kind = cmp::max(
-            prev_chars.peek().copied().map(char_kind),
-            next_chars.peek().copied().map(char_kind),
+            prev_chars.peek().copied().map(kind),
+            next_chars.peek().copied().map(kind),
         );
 
         for ch in prev_chars {
-            if Some(char_kind(ch)) == word_kind && ch != '\n' {
+            if Some(kind(ch)) == word_kind && ch != '\n' {
                 start -= ch.len_utf8();
             } else {
                 break;
@@ -1879,7 +1882,7 @@ impl MultiBufferSnapshot {
         }
 
         for ch in next_chars {
-            if Some(char_kind(ch)) == word_kind && ch != '\n' {
+            if Some(kind(ch)) == word_kind && ch != '\n' {
                 end += ch.len_utf8();
             } else {
                 break;
diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs
index e6ad3469ea..d032e8e025 100644
--- a/crates/language/src/buffer.rs
+++ b/crates/language/src/buffer.rs
@@ -2192,13 +2192,16 @@ impl BufferSnapshot {
         let mut end = start;
         let mut next_chars = self.chars_at(start).peekable();
         let mut prev_chars = self.reversed_chars_at(start).peekable();
+
+        let language = self.language_at(start);
+        let kind = |c| char_kind(language, c);
         let word_kind = cmp::max(
-            prev_chars.peek().copied().map(char_kind),
-            next_chars.peek().copied().map(char_kind),
+            prev_chars.peek().copied().map(kind),
+            next_chars.peek().copied().map(kind),
         );
 
         for ch in prev_chars {
-            if Some(char_kind(ch)) == word_kind && ch != '\n' {
+            if Some(kind(ch)) == word_kind && ch != '\n' {
                 start -= ch.len_utf8();
             } else {
                 break;
@@ -2206,7 +2209,7 @@ impl BufferSnapshot {
         }
 
         for ch in next_chars {
-            if Some(char_kind(ch)) == word_kind && ch != '\n' {
+            if Some(kind(ch)) == word_kind && ch != '\n' {
                 end += ch.len_utf8();
             } else {
                 break;
@@ -3003,14 +3006,18 @@ pub fn contiguous_ranges(
     })
 }
 
-pub fn char_kind(c: char) -> CharKind {
+pub fn char_kind(language: Option<&Arc<Language>>, c: char) -> CharKind {
     if c.is_whitespace() {
-        CharKind::Whitespace
+        return CharKind::Whitespace;
     } else if c.is_alphanumeric() || c == '_' {
-        CharKind::Word
-    } else {
-        CharKind::Punctuation
+        return CharKind::Word;
     }
+    if let Some(language) = language {
+        if language.config.word_characters.contains(&c) {
+            return CharKind::Word;
+        }
+    }
+    CharKind::Punctuation
 }
 
 /// Find all of the ranges of whitespace that occur at the ends of lines
diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs
index 223f5679ae..82245d67ca 100644
--- a/crates/language/src/language.rs
+++ b/crates/language/src/language.rs
@@ -11,7 +11,7 @@ mod buffer_tests;
 
 use anyhow::{anyhow, Context, Result};
 use async_trait::async_trait;
-use collections::HashMap;
+use collections::{HashMap, HashSet};
 use futures::{
     channel::oneshot,
     future::{BoxFuture, Shared},
@@ -344,6 +344,8 @@ pub struct LanguageConfig {
     pub block_comment: Option<(Arc<str>, Arc<str>)>,
     #[serde(default)]
     pub overrides: HashMap<String, LanguageConfigOverride>,
+    #[serde(default)]
+    pub word_characters: HashSet<char>,
 }
 
 #[derive(Debug, Default)]
@@ -411,6 +413,7 @@ impl Default for LanguageConfig {
             block_comment: Default::default(),
             overrides: Default::default(),
             collapsed_placeholder: Default::default(),
+            word_characters: Default::default(),
         }
     }
 }
diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs
index adb067b403..b120baa951 100644
--- a/crates/project/src/project.rs
+++ b/crates/project/src/project.rs
@@ -5180,7 +5180,7 @@ impl Project {
                                         snapshot.file().map(|file| file.path().as_ref()),
                                     ) {
                                         query
-                                            .search(snapshot.as_rope())
+                                            .search(&snapshot, None)
                                             .await
                                             .iter()
                                             .map(|range| {
diff --git a/crates/project/src/search.rs b/crates/project/src/search.rs
index 08ff803598..a3c6583052 100644
--- a/crates/project/src/search.rs
+++ b/crates/project/src/search.rs
@@ -3,7 +3,7 @@ use anyhow::{Context, Result};
 use client::proto;
 use globset::{Glob, GlobMatcher};
 use itertools::Itertools;
-use language::{char_kind, Rope};
+use language::{char_kind, BufferSnapshot};
 use regex::{Regex, RegexBuilder};
 use smol::future::yield_now;
 use std::{
@@ -39,6 +39,7 @@ pub enum SearchQuery {
         case_sensitive: bool,
         inner: SearchInputs,
     },
+
     Regex {
         regex: Regex,
 
@@ -214,12 +215,24 @@ impl SearchQuery {
         }
     }
 
-    pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
+    pub async fn search(
+        &self,
+        buffer: &BufferSnapshot,
+        subrange: Option<Range<usize>>,
+    ) -> Vec<Range<usize>> {
         const YIELD_INTERVAL: usize = 20000;
 
         if self.as_str().is_empty() {
             return Default::default();
         }
+        let language = buffer.language_at(0);
+        let rope = if let Some(range) = subrange {
+            buffer.as_rope().slice(range)
+        } else {
+            buffer.as_rope().clone()
+        };
+
+        let kind = |c| char_kind(language, c);
 
         let mut matches = Vec::new();
         match self {
@@ -236,10 +249,10 @@ impl SearchQuery {
 
                     let mat = mat.unwrap();
                     if *whole_word {
-                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
-                        let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
-                        let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
-                        let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
+                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
+                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
+                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
+                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
                         if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
                             continue;
                         }
@@ -247,6 +260,7 @@ impl SearchQuery {
                     matches.push(mat.start()..mat.end())
                 }
             }
+
             Self::Regex {
                 regex, multiline, ..
             } => {
@@ -284,6 +298,7 @@ impl SearchQuery {
                 }
             }
         }
+
         matches
     }
 
diff --git a/crates/vim/src/motion.rs b/crates/vim/src/motion.rs
index 13c22afdaa..8cd29e5e9f 100644
--- a/crates/vim/src/motion.rs
+++ b/crates/vim/src/motion.rs
@@ -439,11 +439,12 @@ pub(crate) fn next_word_start(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
+    let language = map.buffer_snapshot.language_at(point.to_point(map));
     for _ in 0..times {
         let mut crossed_newline = false;
         point = movement::find_boundary(map, point, |left, right| {
-            let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation);
-            let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation);
+            let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
+            let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
             let at_newline = right == '\n';
 
             let found = (left_kind != right_kind && right_kind != CharKind::Whitespace)
@@ -463,11 +464,12 @@ fn next_word_end(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
+    let language = map.buffer_snapshot.language_at(point.to_point(map));
     for _ in 0..times {
         *point.column_mut() += 1;
         point = movement::find_boundary(map, point, |left, right| {
-            let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation);
-            let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation);
+            let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
+            let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
 
             left_kind != right_kind && left_kind != CharKind::Whitespace
         });
@@ -493,12 +495,13 @@ fn previous_word_start(
     ignore_punctuation: bool,
     times: usize,
 ) -> DisplayPoint {
+    let language = map.buffer_snapshot.language_at(point.to_point(map));
     for _ in 0..times {
         // This works even though find_preceding_boundary is called for every character in the line containing
         // cursor because the newline is checked only once.
         point = movement::find_preceding_boundary(map, point, |left, right| {
-            let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation);
-            let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation);
+            let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
+            let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
 
             (left_kind != right_kind && !right.is_whitespace()) || left == '\n'
         });
@@ -508,6 +511,7 @@ fn previous_word_start(
 
 fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoint {
     let mut last_point = DisplayPoint::new(from.row(), 0);
+    let language = map.buffer_snapshot.language_at(from.to_point(map));
     for (ch, point) in map.chars_at(last_point) {
         if ch == '\n' {
             return from;
@@ -515,7 +519,7 @@ fn first_non_whitespace(map: &DisplaySnapshot, from: DisplayPoint) -> DisplayPoi
 
         last_point = point;
 
-        if char_kind(ch) != CharKind::Whitespace {
+        if char_kind(language, ch) != CharKind::Whitespace {
             break;
         }
     }
diff --git a/crates/vim/src/normal/change.rs b/crates/vim/src/normal/change.rs
index d226c70410..50bc049a3a 100644
--- a/crates/vim/src/normal/change.rs
+++ b/crates/vim/src/normal/change.rs
@@ -82,16 +82,19 @@ fn expand_changed_word_selection(
     ignore_punctuation: bool,
 ) -> bool {
     if times.is_none() || times.unwrap() == 1 {
+        let language = map
+            .buffer_snapshot
+            .language_at(selection.start.to_point(map));
         let in_word = map
             .chars_at(selection.head())
             .next()
-            .map(|(c, _)| char_kind(c) != CharKind::Whitespace)
+            .map(|(c, _)| char_kind(language, c) != CharKind::Whitespace)
             .unwrap_or_default();
 
         if in_word {
             selection.end = movement::find_boundary(map, selection.end, |left, right| {
-                let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation);
-                let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation);
+                let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
+                let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
 
                 left_kind != right_kind && left_kind != CharKind::Whitespace
             });
diff --git a/crates/vim/src/object.rs b/crates/vim/src/object.rs
index c203a89f72..dd922e7af6 100644
--- a/crates/vim/src/object.rs
+++ b/crates/vim/src/object.rs
@@ -177,17 +177,18 @@ fn in_word(
     ignore_punctuation: bool,
 ) -> Option<Range<DisplayPoint>> {
     // Use motion::right so that we consider the character under the cursor when looking for the start
+    let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
     let start = movement::find_preceding_boundary_in_line(
         map,
         right(map, relative_to, 1),
         |left, right| {
-            char_kind(left).coerce_punctuation(ignore_punctuation)
-                != char_kind(right).coerce_punctuation(ignore_punctuation)
+            char_kind(language, left).coerce_punctuation(ignore_punctuation)
+                != char_kind(language, right).coerce_punctuation(ignore_punctuation)
         },
     );
     let end = movement::find_boundary_in_line(map, relative_to, |left, right| {
-        char_kind(left).coerce_punctuation(ignore_punctuation)
-            != char_kind(right).coerce_punctuation(ignore_punctuation)
+        char_kind(language, left).coerce_punctuation(ignore_punctuation)
+            != char_kind(language, right).coerce_punctuation(ignore_punctuation)
     });
 
     Some(start..end)
@@ -210,10 +211,11 @@ fn around_word(
     relative_to: DisplayPoint,
     ignore_punctuation: bool,
 ) -> Option<Range<DisplayPoint>> {
+    let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
     let in_word = map
         .chars_at(relative_to)
         .next()
-        .map(|(c, _)| char_kind(c) != CharKind::Whitespace)
+        .map(|(c, _)| char_kind(language, c) != CharKind::Whitespace)
         .unwrap_or(false);
 
     if in_word {
@@ -237,20 +239,21 @@ fn around_next_word(
     relative_to: DisplayPoint,
     ignore_punctuation: bool,
 ) -> Option<Range<DisplayPoint>> {
+    let language = map.buffer_snapshot.language_at(relative_to.to_point(map));
     // Get the start of the word
     let start = movement::find_preceding_boundary_in_line(
         map,
         right(map, relative_to, 1),
         |left, right| {
-            char_kind(left).coerce_punctuation(ignore_punctuation)
-                != char_kind(right).coerce_punctuation(ignore_punctuation)
+            char_kind(language, left).coerce_punctuation(ignore_punctuation)
+                != char_kind(language, right).coerce_punctuation(ignore_punctuation)
         },
     );
 
     let mut word_found = false;
     let end = movement::find_boundary(map, relative_to, |left, right| {
-        let left_kind = char_kind(left).coerce_punctuation(ignore_punctuation);
-        let right_kind = char_kind(right).coerce_punctuation(ignore_punctuation);
+        let left_kind = char_kind(language, left).coerce_punctuation(ignore_punctuation);
+        let right_kind = char_kind(language, right).coerce_punctuation(ignore_punctuation);
 
         let found = (word_found && left_kind != right_kind) || right == '\n' && left == '\n';
 
diff --git a/crates/zed/src/languages/php/config.toml b/crates/zed/src/languages/php/config.toml
index 19acb949e2..60dd233555 100644
--- a/crates/zed/src/languages/php/config.toml
+++ b/crates/zed/src/languages/php/config.toml
@@ -10,3 +10,4 @@ brackets = [
     { start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] },
 ]
 collapsed_placeholder = "/* ... */"
+word_characters = ["$"]