From a40ee74a1f45cc3ada75bff90ad5a71c8dbff6e3 Mon Sep 17 00:00:00 2001 From: Ben Kunkle Date: Fri, 6 Jun 2025 09:47:28 -0500 Subject: [PATCH] Improve handling of `injection.combined` injections in `SyntaxSnapshot::layers_for_range` (#32145) Closes #27596 The problem in this case was incorrect identification of which language (layer) contains the selection. Language layer selection incorrectly assumed that the deepest `SyntaxLayer` containing a range was the most specific. This worked for Markdown (base document + injected subtrees) but failed for PHP, where `injection.combined` injections are used to make HTML logically function as the base layer, despite being at a greater depth in the layer stack. This caused HTML to be incorrectly identified as the most specific language for PHP ranges. The solution is to track included sub-ranges for syntax layers and filter out layers that don't contain a sub-range covering the desired range. The top-level layer is never filtered to ensure gaps between sibling nodes always have a fallback language, as the top-level layer is likely more correct than the default language settings. Release Notes: - Fixed an issue in PHP where PHP language settings would be occasionally overridden by HTML language settings --- crates/language/src/buffer.rs | 21 ++++++++++++++++++++ crates/language/src/syntax_map.rs | 33 ++++++++++++++++++++++++++----- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 656ef5bfd5..93c46efd7f 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -1387,9 +1387,30 @@ impl Buffer { /// Returns the [`Language`] at the given location. pub fn language_at(&self, position: D) -> Option> { let offset = position.to_offset(self); + let mut is_first = true; + let start_anchor = self.anchor_before(offset); + let end_anchor = self.anchor_after(offset); self.syntax_map .lock() .layers_for_range(offset..offset, &self.text, false) + .filter(|layer| { + if is_first { + is_first = false; + return true; + } + let any_sub_ranges_contain_range = layer + .included_sub_ranges + .map(|sub_ranges| { + sub_ranges.iter().any(|sub_range| { + let is_before_start = sub_range.end.cmp(&start_anchor, self).is_lt(); + let is_after_end = sub_range.start.cmp(&end_anchor, self).is_gt(); + !is_before_start && !is_after_end + }) + }) + .unwrap_or(true); + let result = any_sub_ranges_contain_range; + return result; + }) .last() .map(|info| info.language.clone()) .or_else(|| self.language.clone()) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 14d9611140..0d131301cc 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -94,6 +94,7 @@ enum SyntaxLayerContent { Parsed { tree: tree_sitter::Tree, language: Arc, + included_sub_ranges: Option>>, }, Pending { language_name: Arc, @@ -122,6 +123,7 @@ impl SyntaxLayerContent { pub struct SyntaxLayer<'a> { /// The language for this layer. pub language: &'a Arc, + pub included_sub_ranges: Option<&'a [Range]>, pub(crate) depth: usize, tree: &'a Tree, pub(crate) offset: (usize, tree_sitter::Point), @@ -621,7 +623,7 @@ impl SyntaxSnapshot { grammar, text.as_rope(), step_start_byte, - included_ranges, + &included_ranges, Some(old_tree.clone()), ); match result { @@ -674,7 +676,7 @@ impl SyntaxSnapshot { grammar, text.as_rope(), step_start_byte, - included_ranges, + &included_ranges, None, ); match result { @@ -717,7 +719,21 @@ impl SyntaxSnapshot { ); } - SyntaxLayerContent::Parsed { tree, language } + let included_sub_ranges: Option>> = + (included_ranges.len() > 1).then_some( + included_ranges + .into_iter() + .map(|r| { + text.anchor_before(r.start_byte + step_start_byte) + ..text.anchor_after(r.end_byte + step_start_byte) + }) + .collect(), + ); + SyntaxLayerContent::Parsed { + tree, + language, + included_sub_ranges, + } } ParseStepLanguage::Pending { name } => SyntaxLayerContent::Pending { language_name: name, @@ -783,6 +799,7 @@ impl SyntaxSnapshot { [SyntaxLayer { language, tree, + included_sub_ranges: None, depth: 0, offset: (0, tree_sitter::Point::new(0, 0)), }] @@ -867,13 +884,19 @@ impl SyntaxSnapshot { iter::from_fn(move || { while let Some(layer) = cursor.item() { let mut info = None; - if let SyntaxLayerContent::Parsed { tree, language } = &layer.content { + if let SyntaxLayerContent::Parsed { + tree, + language, + included_sub_ranges, + } = &layer.content + { let layer_start_offset = layer.range.start.to_offset(buffer); let layer_start_point = layer.range.start.to_point(buffer).to_ts_point(); if include_hidden || !language.config.hidden { info = Some(SyntaxLayer { tree, language, + included_sub_ranges: included_sub_ranges.as_deref(), depth: layer.depth, offset: (layer_start_offset, layer_start_point), }); @@ -1231,7 +1254,7 @@ fn parse_text( grammar: &Grammar, text: &Rope, start_byte: usize, - ranges: Vec, + ranges: &[tree_sitter::Range], old_tree: Option, ) -> anyhow::Result { with_parser(|parser| {