language: Select language based on longest matching path extension (#29716)

Closes #8408  
Closes #10997

This is a reboot of [my original
PR](https://github.com/zed-industries/zed/pull/11697) from last year. I
believe that I've addressed all the comments raised in that original
review, but Zed has changed a lot in the past year, so I'm sure there
will be some new stuff to consider too.

- updates the language matching and lookup to consider not just "does
the suffix/glob match" but also "... and is it the longest such match"
- adds a new `LanguageCustomFileTypes` struct to pass user globs from
settings to the registry
- _minor/unrelated:_ updates a test for the JS extension that wasn't
actually testing what is intended to
- _minor/unrelated:_ removed 2 redundant path extensions from the JS
lang extension

**Languages that may use this**

- Laravel Blade templates use the `blade.php` compound extension
-
[apparently](https://github.com/zed-industries/zed/issues/10765#issuecomment-2091293304)
Angular uses `component.html`
  - see also https://github.com/zed-industries/extensions/issues/169
- _hypothetically_ someone could publish a "JS test" extension w/ custom
highlights and/or snippets; many JS tests use `test.js` or `spec.js`

**Verifying these changes**

I added a number of assertions for this new behavior, and I also
confirmed that the (recently patched) [Laravel Blade
extension](https://github.com/bajrangCoder/zed-laravel-blade) opens as
expected for `blade.php` files, whereas on `main` it does not.

cc @maxbrunsfeld (reviewed my original PR last year), @osiewicz and
@MrSubidubi (have recently been in this part of the code)

Release Notes:

- Added support for "compound" file extensions in language extensions,
such `blade.php` and `component.html`. Closes #8408 and #10997.
This commit is contained in:
claytonrcarter 2025-05-26 14:00:05 -04:00 committed by GitHub
parent 649072d140
commit f4b361f04d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 177 additions and 52 deletions

View file

@ -83,6 +83,17 @@ fn test_select_language(cx: &mut App) {
},
Some(tree_sitter_rust::LANGUAGE.into()),
)));
registry.add(Arc::new(Language::new(
LanguageConfig {
name: "Rust with longer extension".into(),
matcher: LanguageMatcher {
path_suffixes: vec!["longer.rs".to_string()],
..Default::default()
},
..Default::default()
},
Some(tree_sitter_rust::LANGUAGE.into()),
)));
registry.add(Arc::new(Language::new(
LanguageConfig {
name: LanguageName::new("Make"),
@ -109,6 +120,14 @@ fn test_select_language(cx: &mut App) {
Some("Make".into())
);
// matching longer, compound extension, part of which could also match another lang
assert_eq!(
registry
.language_for_file(&file("src/lib.longer.rs"), None, cx)
.map(|l| l.name()),
Some("Rust with longer extension".into())
);
// matching filename
assert_eq!(
registry
@ -181,7 +200,11 @@ async fn test_language_for_file_with_custom_file_types(cx: &mut TestAppContext)
init_settings(cx, |settings| {
settings.file_types.extend([
("TypeScript".into(), vec!["js".into()]),
("C++".into(), vec!["c".into()]),
(
"JavaScript".into(),
vec!["*longer.ts".into(), "ecmascript".into()],
),
("C++".into(), vec!["c".into(), "*.dev".into()]),
(
"Dockerfile".into(),
vec!["Dockerfile".into(), "Dockerfile.*".into()],
@ -204,7 +227,7 @@ async fn test_language_for_file_with_custom_file_types(cx: &mut TestAppContext)
LanguageConfig {
name: "TypeScript".into(),
matcher: LanguageMatcher {
path_suffixes: vec!["js".to_string()],
path_suffixes: vec!["ts".to_string(), "ts.ecmascript".to_string()],
..Default::default()
},
..Default::default()
@ -237,6 +260,21 @@ async fn test_language_for_file_with_custom_file_types(cx: &mut TestAppContext)
languages.add(Arc::new(Language::new(config, None)));
}
// matches system-provided lang extension
let language = cx
.read(|cx| languages.language_for_file(&file("foo.ts"), None, cx))
.unwrap();
assert_eq!(language.name(), "TypeScript".into());
let language = cx
.read(|cx| languages.language_for_file(&file("foo.ts.ecmascript"), None, cx))
.unwrap();
assert_eq!(language.name(), "TypeScript".into());
let language = cx
.read(|cx| languages.language_for_file(&file("foo.cpp"), None, cx))
.unwrap();
assert_eq!(language.name(), "C++".into());
// user configured lang extension, same length as system-provided
let language = cx
.read(|cx| languages.language_for_file(&file("foo.js"), None, cx))
.unwrap();
@ -245,6 +283,25 @@ async fn test_language_for_file_with_custom_file_types(cx: &mut TestAppContext)
.read(|cx| languages.language_for_file(&file("foo.c"), None, cx))
.unwrap();
assert_eq!(language.name(), "C++".into());
// user configured lang extension, longer than system-provided
let language = cx
.read(|cx| languages.language_for_file(&file("foo.longer.ts"), None, cx))
.unwrap();
assert_eq!(language.name(), "JavaScript".into());
// user configured lang extension, shorter than system-provided
let language = cx
.read(|cx| languages.language_for_file(&file("foo.ecmascript"), None, cx))
.unwrap();
assert_eq!(language.name(), "JavaScript".into());
// user configured glob matches
let language = cx
.read(|cx| languages.language_for_file(&file("c-plus-plus.dev"), None, cx))
.unwrap();
assert_eq!(language.name(), "C++".into());
// should match Dockerfile.* => Dockerfile, not *.dev => C++
let language = cx
.read(|cx| languages.language_for_file(&file("Dockerfile.dev"), None, cx))
.unwrap();