From cb610f37f2dd0f6d449b1aa076e83a8fae808828 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Fri, 20 Jan 2023 10:56:20 +0100 Subject: [PATCH] WIP: Search language injections also by file extension There are still a few things left: 1. Add test to verify we can successfully locate a language by its extension 2. Add test to reproduce bug where changing the fenced code block language won't reparse the block with the new language 3. Reparse injections for which we couldn't find a language when the language registry changes. 4. Check why the markdown grammar considers the trailing triple backtick as `(code_block_content)`, as opposed to being part of the outer markdown. --- Cargo.lock | 1 + crates/language/Cargo.toml | 3 ++- crates/language/src/language.rs | 15 +++++++++++++++ crates/language/src/syntax_map.rs | 27 +++++++++++++++++++++++++-- 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 174965952f..abdf8b8a55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3153,6 +3153,7 @@ dependencies = [ "tree-sitter-html", "tree-sitter-javascript", "tree-sitter-json 0.19.0", + "tree-sitter-markdown", "tree-sitter-python", "tree-sitter-ruby", "tree-sitter-rust", diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml index 62de0c4e44..64db58c847 100644 --- a/crates/language/Cargo.toml +++ b/crates/language/Cargo.toml @@ -66,12 +66,13 @@ util = { path = "../util", features = ["test-support"] } ctor = "0.1" env_logger = "0.9" rand = "0.8.3" +tree-sitter-embedded-template = "*" tree-sitter-html = "*" tree-sitter-javascript = "*" tree-sitter-json = "*" +tree-sitter-markdown = { git = "https://github.com/MDeiml/tree-sitter-markdown", rev = "330ecab87a3e3a7211ac69bbadc19eabecdb1cca" } tree-sitter-rust = "*" tree-sitter-python = "*" tree-sitter-typescript = "*" tree-sitter-ruby = "*" -tree-sitter-embedded-template = "*" unindent = "0.1.7" diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 046076a48e..1ddd3e3939 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -476,6 +476,21 @@ impl LanguageRegistry { .cloned() } + pub fn language_for_extension(&self, extension: &str) -> Option> { + let extension = UniCase::new(extension); + self.languages + .read() + .iter() + .find(|language| { + language + .config + .path_suffixes + .iter() + .any(|suffix| UniCase::new(suffix) == extension) + }) + .cloned() + } + pub fn to_vec(&self) -> Vec> { self.languages.read().iter().cloned().collect() } diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 9ef4d82fd1..9707cf5471 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1015,8 +1015,10 @@ fn get_injections( }); if let Some(language_name) = language_name { - if let Some(language) = language_registry.language_for_name(language_name.as_ref()) - { + let language = language_registry + .language_for_name(&language_name) + .or_else(|| language_registry.language_for_extension(&language_name)); + if let Some(language) = language { result = true; let range = text.anchor_before(content_range.start) ..text.anchor_after(content_range.end); @@ -2255,6 +2257,7 @@ mod tests { registry.add(Arc::new(ruby_lang())); registry.add(Arc::new(html_lang())); registry.add(Arc::new(erb_lang())); + registry.add(Arc::new(markdown_lang())); let language = registry.language_for_name(language_name).unwrap(); let mut buffer = Buffer::new(0, 0, Default::default()); @@ -2393,6 +2396,26 @@ mod tests { .unwrap() } + fn markdown_lang() -> Language { + Language::new( + LanguageConfig { + name: "Markdown".into(), + path_suffixes: vec!["md".into()], + ..Default::default() + }, + Some(tree_sitter_markdown::language()), + ) + .with_injection_query( + r#" + (fenced_code_block + (info_string + (language) @language) + (code_fence_content) @content) + "#, + ) + .unwrap() + } + fn range_for_text(buffer: &Buffer, text: &str) -> Range { let start = buffer.as_rope().to_string().find(text).unwrap(); start..start + text.len()