Improve handling of injection.combined injections in SyntaxSnapshot::layers_for_range (#32145)

Closes #27596

The problem in this case was incorrect identification of which language
(layer) contains the selection.

Language layer selection incorrectly assumed that the deepest
`SyntaxLayer` containing a range was the most specific. This worked for
Markdown (base document + injected subtrees) but failed for PHP, where
`injection.combined` injections are used to make HTML logically function
as the base layer, despite being at a greater depth in the layer stack.
This caused HTML to be incorrectly identified as the most specific
language for PHP ranges.

The solution is to track included sub-ranges for syntax layers and
filter out layers that don't contain a sub-range covering the desired
range. The top-level layer is never filtered to ensure gaps between
sibling nodes always have a fallback language, as the top-level layer is
likely more correct than the default language settings.

Release Notes:

- Fixed an issue in PHP where PHP language settings would be
occasionally overridden by HTML language settings
This commit is contained in:
Ben Kunkle 2025-06-06 09:47:28 -05:00 committed by GitHub
parent 2e883be4b5
commit a40ee74a1f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 49 additions and 5 deletions

View file

@ -1387,9 +1387,30 @@ impl Buffer {
/// Returns the [`Language`] at the given location.
pub fn language_at<D: ToOffset>(&self, position: D) -> Option<Arc<Language>> {
let offset = position.to_offset(self);
let mut is_first = true;
let start_anchor = self.anchor_before(offset);
let end_anchor = self.anchor_after(offset);
self.syntax_map
.lock()
.layers_for_range(offset..offset, &self.text, false)
.filter(|layer| {
if is_first {
is_first = false;
return true;
}
let any_sub_ranges_contain_range = layer
.included_sub_ranges
.map(|sub_ranges| {
sub_ranges.iter().any(|sub_range| {
let is_before_start = sub_range.end.cmp(&start_anchor, self).is_lt();
let is_after_end = sub_range.start.cmp(&end_anchor, self).is_gt();
!is_before_start && !is_after_end
})
})
.unwrap_or(true);
let result = any_sub_ranges_contain_range;
return result;
})
.last()
.map(|info| info.language.clone())
.or_else(|| self.language.clone())

View file

@ -94,6 +94,7 @@ enum SyntaxLayerContent {
Parsed {
tree: tree_sitter::Tree,
language: Arc<Language>,
included_sub_ranges: Option<Vec<Range<Anchor>>>,
},
Pending {
language_name: Arc<str>,
@ -122,6 +123,7 @@ impl SyntaxLayerContent {
pub struct SyntaxLayer<'a> {
/// The language for this layer.
pub language: &'a Arc<Language>,
pub included_sub_ranges: Option<&'a [Range<Anchor>]>,
pub(crate) depth: usize,
tree: &'a Tree,
pub(crate) offset: (usize, tree_sitter::Point),
@ -621,7 +623,7 @@ impl SyntaxSnapshot {
grammar,
text.as_rope(),
step_start_byte,
included_ranges,
&included_ranges,
Some(old_tree.clone()),
);
match result {
@ -674,7 +676,7 @@ impl SyntaxSnapshot {
grammar,
text.as_rope(),
step_start_byte,
included_ranges,
&included_ranges,
None,
);
match result {
@ -717,7 +719,21 @@ impl SyntaxSnapshot {
);
}
SyntaxLayerContent::Parsed { tree, language }
let included_sub_ranges: Option<Vec<Range<Anchor>>> =
(included_ranges.len() > 1).then_some(
included_ranges
.into_iter()
.map(|r| {
text.anchor_before(r.start_byte + step_start_byte)
..text.anchor_after(r.end_byte + step_start_byte)
})
.collect(),
);
SyntaxLayerContent::Parsed {
tree,
language,
included_sub_ranges,
}
}
ParseStepLanguage::Pending { name } => SyntaxLayerContent::Pending {
language_name: name,
@ -783,6 +799,7 @@ impl SyntaxSnapshot {
[SyntaxLayer {
language,
tree,
included_sub_ranges: None,
depth: 0,
offset: (0, tree_sitter::Point::new(0, 0)),
}]
@ -867,13 +884,19 @@ impl SyntaxSnapshot {
iter::from_fn(move || {
while let Some(layer) = cursor.item() {
let mut info = None;
if let SyntaxLayerContent::Parsed { tree, language } = &layer.content {
if let SyntaxLayerContent::Parsed {
tree,
language,
included_sub_ranges,
} = &layer.content
{
let layer_start_offset = layer.range.start.to_offset(buffer);
let layer_start_point = layer.range.start.to_point(buffer).to_ts_point();
if include_hidden || !language.config.hidden {
info = Some(SyntaxLayer {
tree,
language,
included_sub_ranges: included_sub_ranges.as_deref(),
depth: layer.depth,
offset: (layer_start_offset, layer_start_point),
});
@ -1231,7 +1254,7 @@ fn parse_text(
grammar: &Grammar,
text: &Rope,
start_byte: usize,
ranges: Vec<tree_sitter::Range>,
ranges: &[tree_sitter::Range],
old_tree: Option<Tree>,
) -> anyhow::Result<Tree> {
with_parser(|parser| {