Merge remote-tracking branch 'origin/main' into ai-refactoring

2023-08-25 13:37:32 +02:00 · 2023-08-25 13:37:32 +02:00 · 27c90f12f6
commit 27c90f12f6
parent 0444b5a775 bc7e9088fe
258 changed files with 25085 additions and 8057 deletions
--- a/crates/language/src/buffer.rs
+++ b/crates/language/src/buffer.rs
@ -359,6 +359,14 @@ impl Buffer {
        )
    }

+    pub fn remote(remote_id: u64, replica_id: ReplicaId, base_text: String) -> Self {
+        Self::build(
+            TextBuffer::new(replica_id, remote_id, base_text),
+            None,
+            None,
+        )
+    }
+
    pub fn from_proto(
        replica_id: ReplicaId,
        message: proto::BufferState,
@ -2165,27 +2173,46 @@ impl BufferSnapshot {

    pub fn language_scope_at<D: ToOffset>(&self, position: D) -> Option<LanguageScope> {
        let offset = position.to_offset(self);
-        let mut range = 0..self.len();
-        let mut scope = self.language.clone().map(|language| LanguageScope {
-            language,
-            override_id: None,
-        });
+        let mut scope = None;
+        let mut smallest_range: Option<Range<usize>> = None;

        // Use the layer that has the smallest node intersecting the given point.
        for layer in self.syntax.layers_for_range(offset..offset, &self.text) {
            let mut cursor = layer.node().walk();
-            while cursor.goto_first_child_for_byte(offset).is_some() {}
-            let node_range = cursor.node().byte_range();
-            if node_range.to_inclusive().contains(&offset) && node_range.len() < range.len() {
-                range = node_range;
-                scope = Some(LanguageScope {
-                    language: layer.language.clone(),
-                    override_id: layer.override_id(offset, &self.text),
-                });
+
+            let mut range = None;
+            loop {
+                let child_range = cursor.node().byte_range();
+                if !child_range.to_inclusive().contains(&offset) {
+                    break;
+                }
+
+                range = Some(child_range);
+                if cursor.goto_first_child_for_byte(offset).is_none() {
+                    break;
+                }
+            }
+
+            if let Some(range) = range {
+                if smallest_range
+                    .as_ref()
+                    .map_or(true, |smallest_range| range.len() < smallest_range.len())
+                {
+                    smallest_range = Some(range);
+                    scope = Some(LanguageScope {
+                        language: layer.language.clone(),
+                        override_id: layer.override_id(offset, &self.text),
+                    });
+                }
            }
        }

-        scope
+        scope.or_else(|| {
+            self.language.clone().map(|language| LanguageScope {
+                language,
+                override_id: None,
+            })
+        })
    }

    pub fn surrounding_word<T: ToOffset>(&self, start: T) -> (Range<usize>, Option<CharKind>) {
@ -2193,13 +2220,16 @@ impl BufferSnapshot {
        let mut end = start;
        let mut next_chars = self.chars_at(start).peekable();
        let mut prev_chars = self.reversed_chars_at(start).peekable();
+
+        let language = self.language_at(start);
+        let kind = |c| char_kind(language, c);
        let word_kind = cmp::max(
-            prev_chars.peek().copied().map(char_kind),
-            next_chars.peek().copied().map(char_kind),
+            prev_chars.peek().copied().map(kind),
+            next_chars.peek().copied().map(kind),
        );

        for ch in prev_chars {
-            if Some(char_kind(ch)) == word_kind && ch != '\n' {
+            if Some(kind(ch)) == word_kind && ch != '\n' {
                start -= ch.len_utf8();
            } else {
                break;
@ -2207,7 +2237,7 @@ impl BufferSnapshot {
        }

        for ch in next_chars {
-            if Some(char_kind(ch)) == word_kind && ch != '\n' {
+            if Some(kind(ch)) == word_kind && ch != '\n' {
                end += ch.len_utf8();
            } else {
                break;
@ -3004,14 +3034,18 @@ pub fn contiguous_ranges(
    })
 }

-pub fn char_kind(c: char) -> CharKind {
+pub fn char_kind(language: Option<&Arc<Language>>, c: char) -> CharKind {
    if c.is_whitespace() {
-        CharKind::Whitespace
+        return CharKind::Whitespace;
    } else if c.is_alphanumeric() || c == '_' {
-        CharKind::Word
-    } else {
-        CharKind::Punctuation
+        return CharKind::Word;
    }
+    if let Some(language) = language {
+        if language.config.word_characters.contains(&c) {
+            return CharKind::Word;
+        }
+    }
+    CharKind::Punctuation
 }

 /// Find all of the ranges of whitespace that occur at the ends of lines
--- a/crates/language/src/buffer_tests.rs
+++ b/crates/language/src/buffer_tests.rs
@ -1631,7 +1631,7 @@ fn test_autoindent_query_with_outdent_captures(cx: &mut AppContext) {
 }

 #[gpui::test]
-fn test_language_scope_at(cx: &mut AppContext) {
+fn test_language_scope_at_with_javascript(cx: &mut AppContext) {
    init_settings(cx, |_| {});

    cx.add_model(|cx| {
@ -1718,6 +1718,73 @@ fn test_language_scope_at(cx: &mut AppContext) {
    });
 }

+#[gpui::test]
+fn test_language_scope_at_with_rust(cx: &mut AppContext) {
+    init_settings(cx, |_| {});
+
+    cx.add_model(|cx| {
+        let language = Language::new(
+            LanguageConfig {
+                name: "Rust".into(),
+                brackets: BracketPairConfig {
+                    pairs: vec![
+                        BracketPair {
+                            start: "{".into(),
+                            end: "}".into(),
+                            close: true,
+                            newline: false,
+                        },
+                        BracketPair {
+                            start: "'".into(),
+                            end: "'".into(),
+                            close: true,
+                            newline: false,
+                        },
+                    ],
+                    disabled_scopes_by_bracket_ix: vec![
+                        Vec::new(), //
+                        vec!["string".into()],
+                    ],
+                },
+                ..Default::default()
+            },
+            Some(tree_sitter_rust::language()),
+        )
+        .with_override_query(
+            r#"
+                (string_literal) @string
+            "#,
+        )
+        .unwrap();
+
+        let text = r#"
+            const S: &'static str = "hello";
+        "#
+        .unindent();
+
+        let buffer = Buffer::new(0, text.clone(), cx).with_language(Arc::new(language), cx);
+        let snapshot = buffer.snapshot();
+
+        // By default, all brackets are enabled
+        let config = snapshot.language_scope_at(0).unwrap();
+        assert_eq!(
+            config.brackets().map(|e| e.1).collect::<Vec<_>>(),
+            &[true, true]
+        );
+
+        // Within a string, the quotation brackets are disabled.
+        let string_config = snapshot
+            .language_scope_at(text.find("ello").unwrap())
+            .unwrap();
+        assert_eq!(
+            string_config.brackets().map(|e| e.1).collect::<Vec<_>>(),
+            &[true, false]
+        );
+
+        buffer
+    });
+}
+
 #[gpui::test]
 fn test_language_scope_at_with_combined_injections(cx: &mut AppContext) {
    init_settings(cx, |_| {});
--- a/crates/language/src/language.rs
+++ b/crates/language/src/language.rs
@ -11,7 +11,7 @@ mod buffer_tests;

 use anyhow::{anyhow, Context, Result};
 use async_trait::async_trait;
-use collections::HashMap;
+use collections::{HashMap, HashSet};
 use futures::{
    channel::oneshot,
    future::{BoxFuture, Shared},
@ -344,6 +344,8 @@ pub struct LanguageConfig {
    pub block_comment: Option<(Arc<str>, Arc<str>)>,
    #[serde(default)]
    pub overrides: HashMap<String, LanguageConfigOverride>,
+    #[serde(default)]
+    pub word_characters: HashSet<char>,
 }

 #[derive(Debug, Default)]
@ -411,6 +413,7 @@ impl Default for LanguageConfig {
            block_comment: Default::default(),
            overrides: Default::default(),
            collapsed_placeholder: Default::default(),
+            word_characters: Default::default(),
        }
    }
 }
--- a/crates/language/src/proto.rs
+++ b/crates/language/src/proto.rs
@ -207,6 +207,7 @@ pub fn serialize_anchor(anchor: &Anchor) -> proto::Anchor {
    }
 }

+// This behavior is currently copied in the collab database, for snapshotting channel notes
 pub fn deserialize_operation(message: proto::Operation) -> Result<crate::Operation> {
    Ok(
        match message
--- a/crates/language/src/syntax_map.rs
+++ b/crates/language/src/syntax_map.rs
@ -72,7 +72,7 @@ pub struct SyntaxMapMatch<'a> {

 struct SyntaxMapCapturesLayer<'a> {
    depth: usize,
-    captures: QueryCaptures<'a, 'a, TextProvider<'a>>,
+    captures: QueryCaptures<'a, 'a, TextProvider<'a>, &'a [u8]>,
    next_capture: Option<QueryCapture<'a>>,
    grammar_index: usize,
    _query_cursor: QueryCursorHandle,
@ -83,7 +83,7 @@ struct SyntaxMapMatchesLayer<'a> {
    next_pattern_index: usize,
    next_captures: Vec<QueryCapture<'a>>,
    has_next: bool,
-    matches: QueryMatches<'a, 'a, TextProvider<'a>>,
+    matches: QueryMatches<'a, 'a, TextProvider<'a>, &'a [u8]>,
    grammar_index: usize,
    _query_cursor: QueryCursorHandle,
 }
@ -1279,7 +1279,9 @@ fn get_injections(
    }

    for (language, mut included_ranges) in combined_injection_ranges.drain() {
-        included_ranges.sort_unstable();
+        included_ranges.sort_unstable_by(|a, b| {
+            Ord::cmp(&a.start_byte, &b.start_byte).then_with(|| Ord::cmp(&a.end_byte, &b.end_byte))
+        });
        queue.push(ParseStep {
            depth,
            language: ParseStepLanguage::Loaded { language },
@ -1697,7 +1699,7 @@ impl std::fmt::Debug for SyntaxLayer {
    }
 }

-impl<'a> tree_sitter::TextProvider<'a> for TextProvider<'a> {
+impl<'a> tree_sitter::TextProvider<&'a [u8]> for TextProvider<'a> {
    type I = ByteChunks<'a>;

    fn text(&mut self, node: tree_sitter::Node) -> Self::I {