From 72de3143c8e6d109d41330244facf3f8e46a46c7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 10 Jun 2025 21:03:42 -0700 Subject: [PATCH] Add a test demonstrating ERB language loading bug (#32278) Fixes https://github.com/zed-industries/zed/issues/12174 Release Notes: - Fixed a bug where ERB files were not parsed correctly when the languages were initially loaded. --- Cargo.toml | 2 + crates/language/src/language_registry.rs | 4 +- crates/language/src/syntax_map.rs | 69 +++++++++++---- .../src/syntax_map/syntax_map_tests.rs | 88 ++++++++++++++++++- 4 files changed, 145 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5d866ee19f..4789e5278f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -698,6 +698,8 @@ codegen-units = 16 [profile.dev.package] taffy = { opt-level = 3 } cranelift-codegen = { opt-level = 3 } +cranelift-codegen-meta = { opt-level = 3 } +cranelift-codegen-shared = { opt-level = 3 } resvg = { opt-level = 3 } rustybuzz = { opt-level = 3 } ttf-parser = { opt-level = 3 } diff --git a/crates/language/src/language_registry.rs b/crates/language/src/language_registry.rs index a860a75fbb..6a515e0166 100644 --- a/crates/language/src/language_registry.rs +++ b/crates/language/src/language_registry.rs @@ -39,7 +39,7 @@ use util::{ResultExt, maybe, post_inc}; #[derive( Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, JsonSchema, )] -pub struct LanguageName(SharedString); +pub struct LanguageName(pub SharedString); impl LanguageName { pub fn new(s: &str) -> Self { @@ -1000,6 +1000,7 @@ impl LanguageRegistry { txs.push(tx); } AvailableGrammar::Unloaded(wasm_path) => { + log::trace!("start loading grammar {name:?}"); let this = self.clone(); let wasm_path = wasm_path.clone(); *grammar = AvailableGrammar::Loading(wasm_path.clone(), vec![tx]); @@ -1025,6 +1026,7 @@ impl LanguageRegistry { Err(error) => AvailableGrammar::LoadFailed(error.clone()), }; + log::trace!("finish loading grammar {name:?}"); let old_value = this.state.write().grammars.insert(name, value); if let Some(AvailableGrammar::Loading(_, txs)) = old_value { for tx in txs { diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index a61bc1c90f..2dd00bb741 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -7,6 +7,7 @@ use crate::{ use anyhow::Context as _; use collections::HashMap; use futures::FutureExt; +use gpui::SharedString; use std::{ borrow::Cow, cmp::{self, Ordering, Reverse}, @@ -183,6 +184,13 @@ enum ParseStepLanguage { } impl ParseStepLanguage { + fn name(&self) -> SharedString { + match self { + ParseStepLanguage::Loaded { language } => language.name().0, + ParseStepLanguage::Pending { name } => name.into(), + } + } + fn id(&self) -> Option { match self { ParseStepLanguage::Loaded { language } => Some(language.id), @@ -415,7 +423,9 @@ impl SyntaxSnapshot { .and_then(|language| language.ok()) .is_some() { - resolved_injection_ranges.push(layer.range.to_offset(text)); + let range = layer.range.to_offset(text); + log::trace!("reparse range {range:?} for language {language_name:?}"); + resolved_injection_ranges.push(range); } cursor.next(text); @@ -442,7 +452,10 @@ impl SyntaxSnapshot { invalidated_ranges: Vec>, registry: Option<&Arc>, ) { - log::trace!("reparse. invalidated ranges:{:?}", invalidated_ranges); + log::trace!( + "reparse. invalidated ranges:{:?}", + LogOffsetRanges(&invalidated_ranges, text), + ); let max_depth = self.layers.summary().max_depth; let mut cursor = self.layers.cursor::(text); @@ -470,6 +483,13 @@ impl SyntaxSnapshot { loop { let step = queue.pop(); let position = if let Some(step) = &step { + log::trace!( + "parse step depth:{}, range:{:?}, language:{} ({:?})", + step.depth, + LogAnchorRange(&step.range, text), + step.language.name(), + step.language.id(), + ); SyntaxLayerPosition { depth: step.depth, range: step.range.clone(), @@ -568,13 +588,13 @@ impl SyntaxSnapshot { .to_ts_point(); } - if let Some((SyntaxLayerContent::Parsed { tree: old_tree, .. }, layer_start)) = - old_layer.map(|layer| (&layer.content, layer.range.start)) + if let Some((SyntaxLayerContent::Parsed { tree: old_tree, .. }, layer_range)) = + old_layer.map(|layer| (&layer.content, layer.range.clone())) { log::trace!( - "existing layer. language:{}, start:{:?}, ranges:{:?}", + "existing layer. language:{}, range:{:?}, included_ranges:{:?}", language.name(), - LogPoint(layer_start.to_point(text)), + LogAnchorRange(&layer_range, text), LogIncludedRanges(&old_tree.included_ranges()) ); @@ -613,7 +633,7 @@ impl SyntaxSnapshot { } log::trace!( - "update layer. language:{}, start:{:?}, included_ranges:{:?}", + "update layer. language:{}, range:{:?}, included_ranges:{:?}", language.name(), LogAnchorRange(&step.range, text), LogIncludedRanges(&included_ranges), @@ -761,28 +781,36 @@ impl SyntaxSnapshot { #[cfg(debug_assertions)] fn check_invariants(&self, text: &BufferSnapshot) { let mut max_depth = 0; - let mut prev_range: Option> = None; + let mut prev_layer: Option<(Range, Option)> = None; for layer in self.layers.iter() { match Ord::cmp(&layer.depth, &max_depth) { Ordering::Less => { panic!("layers out of order") } Ordering::Equal => { - if let Some(prev_range) = prev_range { + if let Some((prev_range, prev_language_id)) = prev_layer { match layer.range.start.cmp(&prev_range.start, text) { Ordering::Less => panic!("layers out of order"), - Ordering::Equal => { - assert!(layer.range.end.cmp(&prev_range.end, text).is_ge()) - } + Ordering::Equal => match layer.range.end.cmp(&prev_range.end, text) { + Ordering::Less => panic!("layers out of order"), + Ordering::Equal => { + if layer.content.language_id() < prev_language_id { + panic!("layers out of order") + } + } + Ordering::Greater => {} + }, Ordering::Greater => {} } } + prev_layer = Some((layer.range.clone(), layer.content.language_id())); + } + Ordering::Greater => { + prev_layer = None; } - Ordering::Greater => {} } max_depth = layer.depth; - prev_range = Some(layer.range.clone()); } } @@ -1642,7 +1670,7 @@ impl Ord for ParseStep { Ord::cmp(&other.depth, &self.depth) .then_with(|| Ord::cmp(&range_b.start, &range_a.start)) .then_with(|| Ord::cmp(&range_a.end, &range_b.end)) - .then_with(|| self.language.id().cmp(&other.language.id())) + .then_with(|| other.language.id().cmp(&self.language.id())) } } @@ -1888,6 +1916,7 @@ impl ToTreeSitterPoint for Point { struct LogIncludedRanges<'a>(&'a [tree_sitter::Range]); struct LogPoint(Point); struct LogAnchorRange<'a>(&'a Range, &'a text::BufferSnapshot); +struct LogOffsetRanges<'a>(&'a [Range], &'a text::BufferSnapshot); struct LogChangedRegions<'a>(&'a ChangeRegionSet, &'a text::BufferSnapshot); impl fmt::Debug for LogIncludedRanges<'_> { @@ -1909,6 +1938,16 @@ impl fmt::Debug for LogAnchorRange<'_> { } } +impl fmt::Debug for LogOffsetRanges<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list() + .entries(self.0.iter().map(|range| { + LogPoint(range.start.to_point(self.1))..LogPoint(range.end.to_point(self.1)) + })) + .finish() + } +} + impl fmt::Debug for LogChangedRegions<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list() diff --git a/crates/language/src/syntax_map/syntax_map_tests.rs b/crates/language/src/syntax_map/syntax_map_tests.rs index a4b0be917e..d576c95cd5 100644 --- a/crates/language/src/syntax_map/syntax_map_tests.rs +++ b/crates/language/src/syntax_map/syntax_map_tests.rs @@ -788,15 +788,99 @@ fn test_empty_combined_injections_inside_injections(cx: &mut App) { "(template...", // Markdown inline content "(inline)", - // HTML within the ERB - "(document (text))", // The ruby syntax tree should be empty, since there are // no interpolations in the ERB template. "(program)", + // HTML within the ERB + "(document (text))", ], ); } +#[gpui::test] +fn test_syntax_map_languages_loading_with_erb(cx: &mut App) { + let text = r#" + + <% if @one %> +
+ <% else %> +
+ <% end %> +
+ + "# + .unindent(); + + let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone())); + let mut buffer = Buffer::new(0, BufferId::new(1).unwrap(), text); + + let mut syntax_map = SyntaxMap::new(&buffer); + syntax_map.set_language_registry(registry.clone()); + + let language = Arc::new(erb_lang()); + + log::info!("parsing"); + registry.add(language.clone()); + syntax_map.reparse(language.clone(), &buffer); + + log::info!("loading html"); + registry.add(Arc::new(html_lang())); + syntax_map.reparse(language.clone(), &buffer); + + log::info!("loading ruby"); + registry.add(Arc::new(ruby_lang())); + syntax_map.reparse(language.clone(), &buffer); + + assert_capture_ranges( + &syntax_map, + &buffer, + &["tag", "ivar"], + " + <«body»> + <% if «@one» %> + <«div» class=one> + <% else %> + <«div» class=two> + <% end %> + + + ", + ); + + let text = r#" + + <% if @one«_hundred» %> +
+ <% else %> +
+ <% end %> +
+ + "# + .unindent(); + + log::info!("editing"); + buffer.edit_via_marked_text(&text); + syntax_map.interpolate(&buffer); + syntax_map.reparse(language.clone(), &buffer); + + assert_capture_ranges( + &syntax_map, + &buffer, + &["tag", "ivar"], + " + <«body»> + <% if «@one_hundred» %> + <«div» class=one> + <% else %> + <«div» class=two> + <% end %> + + + ", + ); +} + #[gpui::test(iterations = 50)] fn test_random_syntax_map_edits_rust_macros(rng: StdRng, cx: &mut App) { let text = r#"