WIP: Search language injections also by file extension

There are still a few things left:

1. Add test to verify we can successfully locate a language by its extension
2. Add test to reproduce bug where changing the fenced code block language
   won't reparse the block with the new language
3. Reparse injections for which we couldn't find a language when the language
   registry changes.
4. Check why the markdown grammar considers the trailing triple backtick as
   `(code_block_content)`, as opposed to being part of the outer markdown.
This commit is contained in:
Antonio Scandurra 2023-01-20 10:56:20 +01:00
parent 36e4dcef16
commit cb610f37f2
4 changed files with 43 additions and 3 deletions

1
Cargo.lock generated
View file

@ -3153,6 +3153,7 @@ dependencies = [
"tree-sitter-html",
"tree-sitter-javascript",
"tree-sitter-json 0.19.0",
"tree-sitter-markdown",
"tree-sitter-python",
"tree-sitter-ruby",
"tree-sitter-rust",

View file

@ -66,12 +66,13 @@ util = { path = "../util", features = ["test-support"] }
ctor = "0.1"
env_logger = "0.9"
rand = "0.8.3"
tree-sitter-embedded-template = "*"
tree-sitter-html = "*"
tree-sitter-javascript = "*"
tree-sitter-json = "*"
tree-sitter-markdown = { git = "https://github.com/MDeiml/tree-sitter-markdown", rev = "330ecab87a3e3a7211ac69bbadc19eabecdb1cca" }
tree-sitter-rust = "*"
tree-sitter-python = "*"
tree-sitter-typescript = "*"
tree-sitter-ruby = "*"
tree-sitter-embedded-template = "*"
unindent = "0.1.7"

View file

@ -476,6 +476,21 @@ impl LanguageRegistry {
.cloned()
}
pub fn language_for_extension(&self, extension: &str) -> Option<Arc<Language>> {
let extension = UniCase::new(extension);
self.languages
.read()
.iter()
.find(|language| {
language
.config
.path_suffixes
.iter()
.any(|suffix| UniCase::new(suffix) == extension)
})
.cloned()
}
pub fn to_vec(&self) -> Vec<Arc<Language>> {
self.languages.read().iter().cloned().collect()
}

View file

@ -1015,8 +1015,10 @@ fn get_injections(
});
if let Some(language_name) = language_name {
if let Some(language) = language_registry.language_for_name(language_name.as_ref())
{
let language = language_registry
.language_for_name(&language_name)
.or_else(|| language_registry.language_for_extension(&language_name));
if let Some(language) = language {
result = true;
let range = text.anchor_before(content_range.start)
..text.anchor_after(content_range.end);
@ -2255,6 +2257,7 @@ mod tests {
registry.add(Arc::new(ruby_lang()));
registry.add(Arc::new(html_lang()));
registry.add(Arc::new(erb_lang()));
registry.add(Arc::new(markdown_lang()));
let language = registry.language_for_name(language_name).unwrap();
let mut buffer = Buffer::new(0, 0, Default::default());
@ -2393,6 +2396,26 @@ mod tests {
.unwrap()
}
fn markdown_lang() -> Language {
Language::new(
LanguageConfig {
name: "Markdown".into(),
path_suffixes: vec!["md".into()],
..Default::default()
},
Some(tree_sitter_markdown::language()),
)
.with_injection_query(
r#"
(fenced_code_block
(info_string
(language) @language)
(code_fence_content) @content)
"#,
)
.unwrap()
}
fn range_for_text(buffer: &Buffer, text: &str) -> Range<usize> {
let start = buffer.as_rope().to_string().find(text).unwrap();
start..start + text.len()