language: Further optimize language_for_file (#28694)

Follow-up to #28671

This primarily follows two ideas:
1. We currently take the element with the highest score which appears
last in the iterator (see
[`last_by_key`](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.max_by_key)),
so we can also just reverse the iterator and take the first highest
match instead.
2. Once we have a match with a given precedence, we are not interested
in any matches with a lower or even the same priority, given what was
established in 1. Thus, we also only have to check whether any language
checked afterwards has a higher priority match.

Furthermore, once we have a match with the highest possible precedence,
there is no need to look for any more possible matches. Thus, this PR
also adds short-circuiting for that scenario.

Lastly, I also cleaned-up the custom suffix match (an empty glob-set
will never match so no need to iterate there) as well reorder the
zip-call in the content matches, as we never need the content if there
is no first line pattern present for the checked languages.

Release Notes:

- N/A
This commit is contained in:
Finn Evers 2025-04-15 01:31:45 +02:00 committed by GitHub
parent 77f32582e2
commit 12b012eab3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -16,6 +16,8 @@ use futures::{
};
use globset::GlobSet;
use gpui::{App, BackgroundExecutor, SharedString};
use itertools::FoldWhile::{Continue, Done};
use itertools::Itertools;
use lsp::LanguageServerId;
use parking_lot::{Mutex, RwLock};
use postage::watch;
@ -165,6 +167,20 @@ impl AvailableLanguage {
}
}
#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
enum LanguageMatchPrecedence {
#[default]
Undetermined,
PathOrContent,
UserConfigured,
}
impl LanguageMatchPrecedence {
fn best_possible_match(&self) -> bool {
*self == LanguageMatchPrecedence::UserConfigured
}
}
enum AvailableGrammar {
Native(tree_sitter::Language),
Loaded(#[allow(unused)] PathBuf, tree_sitter::Language),
@ -602,12 +618,10 @@ impl LanguageRegistry {
name: &str,
) -> impl Future<Output = Result<Arc<Language>>> + use<> {
let name = UniCase::new(name);
let rx = self.get_or_load_language(|language_name, _| {
if UniCase::new(&language_name.0) == name {
1
} else {
0
}
let rx = self.get_or_load_language(|language_name, _, current_best_match| {
(current_best_match < LanguageMatchPrecedence::PathOrContent
&& UniCase::new(&language_name.0) == name)
.then_some(LanguageMatchPrecedence::PathOrContent)
});
async move { rx.await? }
}
@ -617,17 +631,14 @@ impl LanguageRegistry {
string: &str,
) -> impl Future<Output = Result<Arc<Language>>> {
let string = UniCase::new(string);
let rx = self.get_or_load_language(|name, config| {
if UniCase::new(&name.0) == string
|| config
.path_suffixes
.iter()
.any(|suffix| UniCase::new(suffix) == string)
{
1
} else {
0
}
let rx = self.get_or_load_language(|name, config, current_best_match| {
(current_best_match < LanguageMatchPrecedence::PathOrContent
&& (UniCase::new(&name.0) == string
|| config
.path_suffixes
.iter()
.any(|suffix| UniCase::new(suffix) == string)))
.then_some(LanguageMatchPrecedence::PathOrContent)
});
async move { rx.await? }
}
@ -688,7 +699,6 @@ impl LanguageRegistry {
.iter()
.filter_map(|suffix| suffix.map(globset::Candidate::new))
.collect::<SmallVec<[_; 3]>>();
let empty = GlobSet::empty();
let content = LazyCell::new(|| {
content.map(|content| {
let end = content.clip_point(Point::new(0, 256), Bias::Left);
@ -696,7 +706,7 @@ impl LanguageRegistry {
content.chunks_in_range(0..end).collect::<String>()
})
});
self.find_matching_language(move |language_name, config| {
self.find_matching_language(move |language_name, config, current_best_match| {
let path_matches_default_suffix = || {
config
.path_suffixes
@ -704,47 +714,75 @@ impl LanguageRegistry {
.any(|suffix| path_suffixes.contains(&Some(suffix.as_str())))
};
let path_matches_custom_suffix = || {
let custom_suffixes = user_file_types
user_file_types
.and_then(|types| types.get(language_name.as_ref()))
.unwrap_or(&empty);
path_suffixes_candidates
.iter()
.any(|suffix| custom_suffixes.is_match_candidate(suffix))
.map_or(false, |custom_suffixes| {
path_suffixes_candidates
.iter()
.any(|suffix| custom_suffixes.is_match_candidate(suffix))
})
};
let content_matches = || {
content
.as_ref()
.zip(config.first_line_pattern.as_ref())
.map_or(false, |(text, pattern)| pattern.is_match(&text))
config.first_line_pattern.as_ref().map_or(false, |pattern| {
content
.as_ref()
.is_some_and(|content| pattern.is_match(content))
})
};
if path_matches_custom_suffix() {
2
} else if path_matches_default_suffix() || content_matches() {
1
} else {
0
// Only return a match for the given file if we have a better match than
// the current one.
match current_best_match {
LanguageMatchPrecedence::PathOrContent | LanguageMatchPrecedence::Undetermined
if path_matches_custom_suffix() =>
{
Some(LanguageMatchPrecedence::UserConfigured)
}
LanguageMatchPrecedence::Undetermined
if path_matches_default_suffix() || content_matches() =>
{
Some(LanguageMatchPrecedence::PathOrContent)
}
_ => None,
}
})
}
fn find_matching_language(
self: &Arc<Self>,
callback: impl Fn(&LanguageName, &LanguageMatcher) -> usize,
callback: impl Fn(
&LanguageName,
&LanguageMatcher,
LanguageMatchPrecedence,
) -> Option<LanguageMatchPrecedence>,
) -> Option<AvailableLanguage> {
let state = self.state.read();
let available_language = state
.available_languages
.iter()
.filter_map(|language| {
let score = callback(&language.name, &language.matcher);
if score > 0 {
Some((language.clone(), score))
} else {
None
.rev()
.fold_while(None, |best_language_match, language| {
let current_match_type = best_language_match
.as_ref()
.map_or(LanguageMatchPrecedence::default(), |(_, score)| *score);
let language_score =
callback(&language.name, &language.matcher, current_match_type);
debug_assert!(
language_score.is_none_or(|new_score| new_score > current_match_type),
"Matching callback should only return a better match than the current one"
);
match language_score {
Some(new_score) if new_score.best_possible_match() => {
Done(Some((language.clone(), new_score)))
}
Some(new_score) if current_match_type < new_score => {
Continue(Some((language.clone(), new_score)))
}
_ => Continue(best_language_match),
}
})
.max_by_key(|e| e.1)
.clone()
.into_inner()
.map(|(available_language, _)| available_language);
drop(state);
available_language
@ -839,7 +877,11 @@ impl LanguageRegistry {
fn get_or_load_language(
self: &Arc<Self>,
callback: impl Fn(&LanguageName, &LanguageMatcher) -> usize,
callback: impl Fn(
&LanguageName,
&LanguageMatcher,
LanguageMatchPrecedence,
) -> Option<LanguageMatchPrecedence>,
) -> oneshot::Receiver<Result<Arc<Language>>> {
let Some(language) = self.find_matching_language(callback) else {
let (tx, rx) = oneshot::channel();