Add a test demonstrating ERB language loading bug (#32278)

Fixes https://github.com/zed-industries/zed/issues/12174

Release Notes:

- Fixed a bug where ERB files were not parsed correctly when the
languages were initially loaded.
This commit is contained in:
Max Brunsfeld 2025-06-10 21:03:42 -07:00 committed by GitHub
parent ad206a6a97
commit 72de3143c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 145 additions and 18 deletions

View file

@ -698,6 +698,8 @@ codegen-units = 16
[profile.dev.package]
taffy = { opt-level = 3 }
cranelift-codegen = { opt-level = 3 }
cranelift-codegen-meta = { opt-level = 3 }
cranelift-codegen-shared = { opt-level = 3 }
resvg = { opt-level = 3 }
rustybuzz = { opt-level = 3 }
ttf-parser = { opt-level = 3 }

View file

@ -39,7 +39,7 @@ use util::{ResultExt, maybe, post_inc};
#[derive(
Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
)]
pub struct LanguageName(SharedString);
pub struct LanguageName(pub SharedString);
impl LanguageName {
pub fn new(s: &str) -> Self {
@ -1000,6 +1000,7 @@ impl LanguageRegistry {
txs.push(tx);
}
AvailableGrammar::Unloaded(wasm_path) => {
log::trace!("start loading grammar {name:?}");
let this = self.clone();
let wasm_path = wasm_path.clone();
*grammar = AvailableGrammar::Loading(wasm_path.clone(), vec![tx]);
@ -1025,6 +1026,7 @@ impl LanguageRegistry {
Err(error) => AvailableGrammar::LoadFailed(error.clone()),
};
log::trace!("finish loading grammar {name:?}");
let old_value = this.state.write().grammars.insert(name, value);
if let Some(AvailableGrammar::Loading(_, txs)) = old_value {
for tx in txs {

View file

@ -7,6 +7,7 @@ use crate::{
use anyhow::Context as _;
use collections::HashMap;
use futures::FutureExt;
use gpui::SharedString;
use std::{
borrow::Cow,
cmp::{self, Ordering, Reverse},
@ -183,6 +184,13 @@ enum ParseStepLanguage {
}
impl ParseStepLanguage {
fn name(&self) -> SharedString {
match self {
ParseStepLanguage::Loaded { language } => language.name().0,
ParseStepLanguage::Pending { name } => name.into(),
}
}
fn id(&self) -> Option<LanguageId> {
match self {
ParseStepLanguage::Loaded { language } => Some(language.id),
@ -415,7 +423,9 @@ impl SyntaxSnapshot {
.and_then(|language| language.ok())
.is_some()
{
resolved_injection_ranges.push(layer.range.to_offset(text));
let range = layer.range.to_offset(text);
log::trace!("reparse range {range:?} for language {language_name:?}");
resolved_injection_ranges.push(range);
}
cursor.next(text);
@ -442,7 +452,10 @@ impl SyntaxSnapshot {
invalidated_ranges: Vec<Range<usize>>,
registry: Option<&Arc<LanguageRegistry>>,
) {
log::trace!("reparse. invalidated ranges:{:?}", invalidated_ranges);
log::trace!(
"reparse. invalidated ranges:{:?}",
LogOffsetRanges(&invalidated_ranges, text),
);
let max_depth = self.layers.summary().max_depth;
let mut cursor = self.layers.cursor::<SyntaxLayerSummary>(text);
@ -470,6 +483,13 @@ impl SyntaxSnapshot {
loop {
let step = queue.pop();
let position = if let Some(step) = &step {
log::trace!(
"parse step depth:{}, range:{:?}, language:{} ({:?})",
step.depth,
LogAnchorRange(&step.range, text),
step.language.name(),
step.language.id(),
);
SyntaxLayerPosition {
depth: step.depth,
range: step.range.clone(),
@ -568,13 +588,13 @@ impl SyntaxSnapshot {
.to_ts_point();
}
if let Some((SyntaxLayerContent::Parsed { tree: old_tree, .. }, layer_start)) =
old_layer.map(|layer| (&layer.content, layer.range.start))
if let Some((SyntaxLayerContent::Parsed { tree: old_tree, .. }, layer_range)) =
old_layer.map(|layer| (&layer.content, layer.range.clone()))
{
log::trace!(
"existing layer. language:{}, start:{:?}, ranges:{:?}",
"existing layer. language:{}, range:{:?}, included_ranges:{:?}",
language.name(),
LogPoint(layer_start.to_point(text)),
LogAnchorRange(&layer_range, text),
LogIncludedRanges(&old_tree.included_ranges())
);
@ -613,7 +633,7 @@ impl SyntaxSnapshot {
}
log::trace!(
"update layer. language:{}, start:{:?}, included_ranges:{:?}",
"update layer. language:{}, range:{:?}, included_ranges:{:?}",
language.name(),
LogAnchorRange(&step.range, text),
LogIncludedRanges(&included_ranges),
@ -761,28 +781,36 @@ impl SyntaxSnapshot {
#[cfg(debug_assertions)]
fn check_invariants(&self, text: &BufferSnapshot) {
let mut max_depth = 0;
let mut prev_range: Option<Range<Anchor>> = None;
let mut prev_layer: Option<(Range<Anchor>, Option<LanguageId>)> = None;
for layer in self.layers.iter() {
match Ord::cmp(&layer.depth, &max_depth) {
Ordering::Less => {
panic!("layers out of order")
}
Ordering::Equal => {
if let Some(prev_range) = prev_range {
if let Some((prev_range, prev_language_id)) = prev_layer {
match layer.range.start.cmp(&prev_range.start, text) {
Ordering::Less => panic!("layers out of order"),
Ordering::Equal => {
assert!(layer.range.end.cmp(&prev_range.end, text).is_ge())
}
Ordering::Equal => match layer.range.end.cmp(&prev_range.end, text) {
Ordering::Less => panic!("layers out of order"),
Ordering::Equal => {
if layer.content.language_id() < prev_language_id {
panic!("layers out of order")
}
}
Ordering::Greater => {}
},
Ordering::Greater => {}
}
}
prev_layer = Some((layer.range.clone(), layer.content.language_id()));
}
Ordering::Greater => {
prev_layer = None;
}
Ordering::Greater => {}
}
max_depth = layer.depth;
prev_range = Some(layer.range.clone());
}
}
@ -1642,7 +1670,7 @@ impl Ord for ParseStep {
Ord::cmp(&other.depth, &self.depth)
.then_with(|| Ord::cmp(&range_b.start, &range_a.start))
.then_with(|| Ord::cmp(&range_a.end, &range_b.end))
.then_with(|| self.language.id().cmp(&other.language.id()))
.then_with(|| other.language.id().cmp(&self.language.id()))
}
}
@ -1888,6 +1916,7 @@ impl ToTreeSitterPoint for Point {
struct LogIncludedRanges<'a>(&'a [tree_sitter::Range]);
struct LogPoint(Point);
struct LogAnchorRange<'a>(&'a Range<Anchor>, &'a text::BufferSnapshot);
struct LogOffsetRanges<'a>(&'a [Range<usize>], &'a text::BufferSnapshot);
struct LogChangedRegions<'a>(&'a ChangeRegionSet, &'a text::BufferSnapshot);
impl fmt::Debug for LogIncludedRanges<'_> {
@ -1909,6 +1938,16 @@ impl fmt::Debug for LogAnchorRange<'_> {
}
}
impl fmt::Debug for LogOffsetRanges<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list()
.entries(self.0.iter().map(|range| {
LogPoint(range.start.to_point(self.1))..LogPoint(range.end.to_point(self.1))
}))
.finish()
}
}
impl fmt::Debug for LogChangedRegions<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list()

View file

@ -788,15 +788,99 @@ fn test_empty_combined_injections_inside_injections(cx: &mut App) {
"(template...",
// Markdown inline content
"(inline)",
// HTML within the ERB
"(document (text))",
// The ruby syntax tree should be empty, since there are
// no interpolations in the ERB template.
"(program)",
// HTML within the ERB
"(document (text))",
],
);
}
#[gpui::test]
fn test_syntax_map_languages_loading_with_erb(cx: &mut App) {
let text = r#"
<body>
<% if @one %>
<div class=one>
<% else %>
<div class=two>
<% end %>
</div>
</body>
"#
.unindent();
let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
let mut buffer = Buffer::new(0, BufferId::new(1).unwrap(), text);
let mut syntax_map = SyntaxMap::new(&buffer);
syntax_map.set_language_registry(registry.clone());
let language = Arc::new(erb_lang());
log::info!("parsing");
registry.add(language.clone());
syntax_map.reparse(language.clone(), &buffer);
log::info!("loading html");
registry.add(Arc::new(html_lang()));
syntax_map.reparse(language.clone(), &buffer);
log::info!("loading ruby");
registry.add(Arc::new(ruby_lang()));
syntax_map.reparse(language.clone(), &buffer);
assert_capture_ranges(
&syntax_map,
&buffer,
&["tag", "ivar"],
"
<«body»>
<% if «@one» %>
<«div» class=one>
<% else %>
<«div» class=two>
<% end %>
</«div»>
</«body»>
",
);
let text = r#"
<body>
<% if @one«_hundred» %>
<div class=one>
<% else %>
<div class=two>
<% end %>
</div>
</body>
"#
.unindent();
log::info!("editing");
buffer.edit_via_marked_text(&text);
syntax_map.interpolate(&buffer);
syntax_map.reparse(language.clone(), &buffer);
assert_capture_ranges(
&syntax_map,
&buffer,
&["tag", "ivar"],
"
<«body»>
<% if «@one_hundred» %>
<«div» class=one>
<% else %>
<«div» class=two>
<% end %>
</«div»>
</«body»>
",
);
}
#[gpui::test(iterations = 50)]
fn test_random_syntax_map_edits_rust_macros(rng: StdRng, cx: &mut App) {
let text = r#"