language: Select language based on longest matching path extension (#29716)
Closes #8408 Closes #10997 This is a reboot of [my original PR](https://github.com/zed-industries/zed/pull/11697) from last year. I believe that I've addressed all the comments raised in that original review, but Zed has changed a lot in the past year, so I'm sure there will be some new stuff to consider too. - updates the language matching and lookup to consider not just "does the suffix/glob match" but also "... and is it the longest such match" - adds a new `LanguageCustomFileTypes` struct to pass user globs from settings to the registry - _minor/unrelated:_ updates a test for the JS extension that wasn't actually testing what is intended to - _minor/unrelated:_ removed 2 redundant path extensions from the JS lang extension **Languages that may use this** - Laravel Blade templates use the `blade.php` compound extension - [apparently](https://github.com/zed-industries/zed/issues/10765#issuecomment-2091293304) Angular uses `component.html` - see also https://github.com/zed-industries/extensions/issues/169 - _hypothetically_ someone could publish a "JS test" extension w/ custom highlights and/or snippets; many JS tests use `test.js` or `spec.js` **Verifying these changes** I added a number of assertions for this new behavior, and I also confirmed that the (recently patched) [Laravel Blade extension](https://github.com/bajrangCoder/zed-laravel-blade) opens as expected for `blade.php` files, whereas on `main` it does not. cc @maxbrunsfeld (reviewed my original PR last year), @osiewicz and @MrSubidubi (have recently been in this part of the code) Release Notes: - Added support for "compound" file extensions in language extensions, such `blade.php` and `component.html`. Closes #8408 and #10997.
This commit is contained in:
parent
649072d140
commit
f4b361f04d
3 changed files with 177 additions and 52 deletions
|
@ -83,6 +83,17 @@ fn test_select_language(cx: &mut App) {
|
|||
},
|
||||
Some(tree_sitter_rust::LANGUAGE.into()),
|
||||
)));
|
||||
registry.add(Arc::new(Language::new(
|
||||
LanguageConfig {
|
||||
name: "Rust with longer extension".into(),
|
||||
matcher: LanguageMatcher {
|
||||
path_suffixes: vec!["longer.rs".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
Some(tree_sitter_rust::LANGUAGE.into()),
|
||||
)));
|
||||
registry.add(Arc::new(Language::new(
|
||||
LanguageConfig {
|
||||
name: LanguageName::new("Make"),
|
||||
|
@ -109,6 +120,14 @@ fn test_select_language(cx: &mut App) {
|
|||
Some("Make".into())
|
||||
);
|
||||
|
||||
// matching longer, compound extension, part of which could also match another lang
|
||||
assert_eq!(
|
||||
registry
|
||||
.language_for_file(&file("src/lib.longer.rs"), None, cx)
|
||||
.map(|l| l.name()),
|
||||
Some("Rust with longer extension".into())
|
||||
);
|
||||
|
||||
// matching filename
|
||||
assert_eq!(
|
||||
registry
|
||||
|
@ -181,7 +200,11 @@ async fn test_language_for_file_with_custom_file_types(cx: &mut TestAppContext)
|
|||
init_settings(cx, |settings| {
|
||||
settings.file_types.extend([
|
||||
("TypeScript".into(), vec!["js".into()]),
|
||||
("C++".into(), vec!["c".into()]),
|
||||
(
|
||||
"JavaScript".into(),
|
||||
vec!["*longer.ts".into(), "ecmascript".into()],
|
||||
),
|
||||
("C++".into(), vec!["c".into(), "*.dev".into()]),
|
||||
(
|
||||
"Dockerfile".into(),
|
||||
vec!["Dockerfile".into(), "Dockerfile.*".into()],
|
||||
|
@ -204,7 +227,7 @@ async fn test_language_for_file_with_custom_file_types(cx: &mut TestAppContext)
|
|||
LanguageConfig {
|
||||
name: "TypeScript".into(),
|
||||
matcher: LanguageMatcher {
|
||||
path_suffixes: vec!["js".to_string()],
|
||||
path_suffixes: vec!["ts".to_string(), "ts.ecmascript".to_string()],
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
|
@ -237,6 +260,21 @@ async fn test_language_for_file_with_custom_file_types(cx: &mut TestAppContext)
|
|||
languages.add(Arc::new(Language::new(config, None)));
|
||||
}
|
||||
|
||||
// matches system-provided lang extension
|
||||
let language = cx
|
||||
.read(|cx| languages.language_for_file(&file("foo.ts"), None, cx))
|
||||
.unwrap();
|
||||
assert_eq!(language.name(), "TypeScript".into());
|
||||
let language = cx
|
||||
.read(|cx| languages.language_for_file(&file("foo.ts.ecmascript"), None, cx))
|
||||
.unwrap();
|
||||
assert_eq!(language.name(), "TypeScript".into());
|
||||
let language = cx
|
||||
.read(|cx| languages.language_for_file(&file("foo.cpp"), None, cx))
|
||||
.unwrap();
|
||||
assert_eq!(language.name(), "C++".into());
|
||||
|
||||
// user configured lang extension, same length as system-provided
|
||||
let language = cx
|
||||
.read(|cx| languages.language_for_file(&file("foo.js"), None, cx))
|
||||
.unwrap();
|
||||
|
@ -245,6 +283,25 @@ async fn test_language_for_file_with_custom_file_types(cx: &mut TestAppContext)
|
|||
.read(|cx| languages.language_for_file(&file("foo.c"), None, cx))
|
||||
.unwrap();
|
||||
assert_eq!(language.name(), "C++".into());
|
||||
|
||||
// user configured lang extension, longer than system-provided
|
||||
let language = cx
|
||||
.read(|cx| languages.language_for_file(&file("foo.longer.ts"), None, cx))
|
||||
.unwrap();
|
||||
assert_eq!(language.name(), "JavaScript".into());
|
||||
|
||||
// user configured lang extension, shorter than system-provided
|
||||
let language = cx
|
||||
.read(|cx| languages.language_for_file(&file("foo.ecmascript"), None, cx))
|
||||
.unwrap();
|
||||
assert_eq!(language.name(), "JavaScript".into());
|
||||
|
||||
// user configured glob matches
|
||||
let language = cx
|
||||
.read(|cx| languages.language_for_file(&file("c-plus-plus.dev"), None, cx))
|
||||
.unwrap();
|
||||
assert_eq!(language.name(), "C++".into());
|
||||
// should match Dockerfile.* => Dockerfile, not *.dev => C++
|
||||
let language = cx
|
||||
.read(|cx| languages.language_for_file(&file("Dockerfile.dev"), None, cx))
|
||||
.unwrap();
|
||||
|
|
|
@ -16,8 +16,6 @@ use futures::{
|
|||
};
|
||||
use globset::GlobSet;
|
||||
use gpui::{App, BackgroundExecutor, SharedString};
|
||||
use itertools::FoldWhile::{Continue, Done};
|
||||
use itertools::Itertools;
|
||||
use lsp::LanguageServerId;
|
||||
use parking_lot::{Mutex, RwLock};
|
||||
use postage::watch;
|
||||
|
@ -173,18 +171,12 @@ impl AvailableLanguage {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[derive(Copy, Clone, Default)]
|
||||
enum LanguageMatchPrecedence {
|
||||
#[default]
|
||||
Undetermined,
|
||||
PathOrContent,
|
||||
UserConfigured,
|
||||
}
|
||||
|
||||
impl LanguageMatchPrecedence {
|
||||
fn best_possible_match(&self) -> bool {
|
||||
*self == LanguageMatchPrecedence::UserConfigured
|
||||
}
|
||||
PathOrContent(usize),
|
||||
UserConfigured(usize),
|
||||
}
|
||||
|
||||
enum AvailableGrammar {
|
||||
|
@ -626,9 +618,14 @@ impl LanguageRegistry {
|
|||
) -> impl Future<Output = Result<Arc<Language>>> + use<> {
|
||||
let name = UniCase::new(name);
|
||||
let rx = self.get_or_load_language(|language_name, _, current_best_match| {
|
||||
(current_best_match < LanguageMatchPrecedence::PathOrContent
|
||||
&& UniCase::new(&language_name.0) == name)
|
||||
.then_some(LanguageMatchPrecedence::PathOrContent)
|
||||
match current_best_match {
|
||||
LanguageMatchPrecedence::Undetermined if UniCase::new(&language_name.0) == name => {
|
||||
Some(LanguageMatchPrecedence::PathOrContent(name.len()))
|
||||
}
|
||||
LanguageMatchPrecedence::Undetermined
|
||||
| LanguageMatchPrecedence::UserConfigured(_)
|
||||
| LanguageMatchPrecedence::PathOrContent(_) => None,
|
||||
}
|
||||
});
|
||||
async move { rx.await? }
|
||||
}
|
||||
|
@ -655,13 +652,23 @@ impl LanguageRegistry {
|
|||
) -> impl Future<Output = Result<Arc<Language>>> {
|
||||
let string = UniCase::new(string);
|
||||
let rx = self.get_or_load_language(|name, config, current_best_match| {
|
||||
(current_best_match < LanguageMatchPrecedence::PathOrContent
|
||||
&& (UniCase::new(&name.0) == string
|
||||
let name_matches = || {
|
||||
UniCase::new(&name.0) == string
|
||||
|| config
|
||||
.path_suffixes
|
||||
.iter()
|
||||
.any(|suffix| UniCase::new(suffix) == string)))
|
||||
.then_some(LanguageMatchPrecedence::PathOrContent)
|
||||
.any(|suffix| UniCase::new(suffix) == string)
|
||||
};
|
||||
|
||||
match current_best_match {
|
||||
LanguageMatchPrecedence::Undetermined => {
|
||||
name_matches().then_some(LanguageMatchPrecedence::PathOrContent(string.len()))
|
||||
}
|
||||
LanguageMatchPrecedence::PathOrContent(len) => (string.len() > len
|
||||
&& name_matches())
|
||||
.then_some(LanguageMatchPrecedence::PathOrContent(string.len())),
|
||||
LanguageMatchPrecedence::UserConfigured(_) => None,
|
||||
}
|
||||
});
|
||||
async move { rx.await? }
|
||||
}
|
||||
|
@ -717,10 +724,9 @@ impl LanguageRegistry {
|
|||
// and no other extension which is not the desired behavior here,
|
||||
// as we want `.zshrc` to result in extension being `Some("zshrc")`
|
||||
let extension = filename.and_then(|filename| filename.split('.').next_back());
|
||||
let path_suffixes = [extension, filename, path.to_str()];
|
||||
let path_suffixes_candidates = path_suffixes
|
||||
let path_suffixes = [extension, filename, path.to_str()]
|
||||
.iter()
|
||||
.filter_map(|suffix| suffix.map(globset::Candidate::new))
|
||||
.filter_map(|suffix| suffix.map(|suffix| (suffix, globset::Candidate::new(suffix))))
|
||||
.collect::<SmallVec<[_; 3]>>();
|
||||
let content = LazyCell::new(|| {
|
||||
content.map(|content| {
|
||||
|
@ -731,20 +737,37 @@ impl LanguageRegistry {
|
|||
});
|
||||
self.find_matching_language(move |language_name, config, current_best_match| {
|
||||
let path_matches_default_suffix = || {
|
||||
config
|
||||
.path_suffixes
|
||||
.iter()
|
||||
.any(|suffix| path_suffixes.contains(&Some(suffix.as_str())))
|
||||
let len =
|
||||
config
|
||||
.path_suffixes
|
||||
.iter()
|
||||
.fold(0, |acc: usize, path_suffix: &String| {
|
||||
let ext = ".".to_string() + path_suffix;
|
||||
|
||||
let matched_suffix_len = path_suffixes
|
||||
.iter()
|
||||
.find(|(suffix, _)| suffix.ends_with(&ext) || suffix == path_suffix)
|
||||
.map(|(suffix, _)| suffix.len());
|
||||
|
||||
match matched_suffix_len {
|
||||
Some(len) => acc.max(len),
|
||||
None => acc,
|
||||
}
|
||||
});
|
||||
(len > 0).then_some(len)
|
||||
};
|
||||
|
||||
let path_matches_custom_suffix = || {
|
||||
user_file_types
|
||||
.and_then(|types| types.get(language_name.as_ref()))
|
||||
.map_or(false, |custom_suffixes| {
|
||||
path_suffixes_candidates
|
||||
.map_or(None, |custom_suffixes| {
|
||||
path_suffixes
|
||||
.iter()
|
||||
.any(|suffix| custom_suffixes.is_match_candidate(suffix))
|
||||
.find(|(_, candidate)| custom_suffixes.is_match_candidate(candidate))
|
||||
.map(|(suffix, _)| suffix.len())
|
||||
})
|
||||
};
|
||||
|
||||
let content_matches = || {
|
||||
config.first_line_pattern.as_ref().map_or(false, |pattern| {
|
||||
content
|
||||
|
@ -756,17 +779,29 @@ impl LanguageRegistry {
|
|||
// Only return a match for the given file if we have a better match than
|
||||
// the current one.
|
||||
match current_best_match {
|
||||
LanguageMatchPrecedence::PathOrContent | LanguageMatchPrecedence::Undetermined
|
||||
if path_matches_custom_suffix() =>
|
||||
{
|
||||
Some(LanguageMatchPrecedence::UserConfigured)
|
||||
LanguageMatchPrecedence::PathOrContent(current_len) => {
|
||||
if let Some(len) = path_matches_custom_suffix() {
|
||||
// >= because user config should win tie with system ext len
|
||||
(len >= current_len).then_some(LanguageMatchPrecedence::UserConfigured(len))
|
||||
} else if let Some(len) = path_matches_default_suffix() {
|
||||
// >= because user config should win tie with system ext len
|
||||
(len >= current_len).then_some(LanguageMatchPrecedence::PathOrContent(len))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
LanguageMatchPrecedence::Undetermined
|
||||
if path_matches_default_suffix() || content_matches() =>
|
||||
{
|
||||
Some(LanguageMatchPrecedence::PathOrContent)
|
||||
LanguageMatchPrecedence::Undetermined => {
|
||||
if let Some(len) = path_matches_custom_suffix() {
|
||||
Some(LanguageMatchPrecedence::UserConfigured(len))
|
||||
} else if let Some(len) = path_matches_default_suffix() {
|
||||
Some(LanguageMatchPrecedence::PathOrContent(len))
|
||||
} else if content_matches() {
|
||||
Some(LanguageMatchPrecedence::PathOrContent(1))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
LanguageMatchPrecedence::UserConfigured(_) => None,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -784,28 +819,61 @@ impl LanguageRegistry {
|
|||
.available_languages
|
||||
.iter()
|
||||
.rev()
|
||||
.fold_while(None, |best_language_match, language| {
|
||||
.fold(None, |best_language_match, language| {
|
||||
let current_match_type = best_language_match
|
||||
.as_ref()
|
||||
.map_or(LanguageMatchPrecedence::default(), |(_, score)| *score);
|
||||
let language_score =
|
||||
callback(&language.name, &language.matcher, current_match_type);
|
||||
debug_assert!(
|
||||
language_score.is_none_or(|new_score| new_score > current_match_type),
|
||||
"Matching callback should only return a better match than the current one"
|
||||
);
|
||||
|
||||
match language_score {
|
||||
Some(new_score) if new_score.best_possible_match() => {
|
||||
Done(Some((language.clone(), new_score)))
|
||||
match (language_score, current_match_type) {
|
||||
// no current best, so our candidate is better
|
||||
(
|
||||
Some(
|
||||
LanguageMatchPrecedence::PathOrContent(_)
|
||||
| LanguageMatchPrecedence::UserConfigured(_),
|
||||
),
|
||||
LanguageMatchPrecedence::Undetermined,
|
||||
) => language_score.map(|new_score| (language.clone(), new_score)),
|
||||
|
||||
// our candidate is better only if the name is longer
|
||||
(
|
||||
Some(LanguageMatchPrecedence::PathOrContent(new_len)),
|
||||
LanguageMatchPrecedence::PathOrContent(current_len),
|
||||
)
|
||||
| (
|
||||
Some(LanguageMatchPrecedence::UserConfigured(new_len)),
|
||||
LanguageMatchPrecedence::UserConfigured(current_len),
|
||||
)
|
||||
| (
|
||||
Some(LanguageMatchPrecedence::PathOrContent(new_len)),
|
||||
LanguageMatchPrecedence::UserConfigured(current_len),
|
||||
) => {
|
||||
if new_len > current_len {
|
||||
language_score.map(|new_score| (language.clone(), new_score))
|
||||
} else {
|
||||
best_language_match
|
||||
}
|
||||
}
|
||||
Some(new_score) if current_match_type < new_score => {
|
||||
Continue(Some((language.clone(), new_score)))
|
||||
|
||||
// our candidate is better if the name is longer or equal to
|
||||
(
|
||||
Some(LanguageMatchPrecedence::UserConfigured(new_len)),
|
||||
LanguageMatchPrecedence::PathOrContent(current_len),
|
||||
) => {
|
||||
if new_len >= current_len {
|
||||
language_score.map(|new_score| (language.clone(), new_score))
|
||||
} else {
|
||||
best_language_match
|
||||
}
|
||||
}
|
||||
|
||||
// no candidate, use current best
|
||||
(None, _) | (Some(LanguageMatchPrecedence::Undetermined), _) => {
|
||||
best_language_match
|
||||
}
|
||||
_ => Continue(best_language_match),
|
||||
}
|
||||
})
|
||||
.into_inner()
|
||||
.map(|(available_language, _)| available_language);
|
||||
drop(state);
|
||||
available_language
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
name = "TypeScript"
|
||||
grammar = "typescript"
|
||||
path_suffixes = ["ts", "cts", "d.cts", "d.mts", "mts"]
|
||||
path_suffixes = ["ts", "cts", "mts"]
|
||||
first_line_pattern = '^#!.*\b(?:deno run|ts-node|bun|tsx)\b'
|
||||
line_comments = ["// "]
|
||||
block_comment = ["/*", "*/"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue