language: Further optimize language_for_file
(#28694)
Follow-up to #28671 This primarily follows two ideas: 1. We currently take the element with the highest score which appears last in the iterator (see [`last_by_key`](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.max_by_key)), so we can also just reverse the iterator and take the first highest match instead. 2. Once we have a match with a given precedence, we are not interested in any matches with a lower or even the same priority, given what was established in 1. Thus, we also only have to check whether any language checked afterwards has a higher priority match. Furthermore, once we have a match with the highest possible precedence, there is no need to look for any more possible matches. Thus, this PR also adds short-circuiting for that scenario. Lastly, I also cleaned-up the custom suffix match (an empty glob-set will never match so no need to iterate there) as well reorder the zip-call in the content matches, as we never need the content if there is no first line pattern present for the checked languages. Release Notes: - N/A
This commit is contained in:
parent
77f32582e2
commit
12b012eab3
1 changed files with 86 additions and 44 deletions
|
@ -16,6 +16,8 @@ use futures::{
|
||||||
};
|
};
|
||||||
use globset::GlobSet;
|
use globset::GlobSet;
|
||||||
use gpui::{App, BackgroundExecutor, SharedString};
|
use gpui::{App, BackgroundExecutor, SharedString};
|
||||||
|
use itertools::FoldWhile::{Continue, Done};
|
||||||
|
use itertools::Itertools;
|
||||||
use lsp::LanguageServerId;
|
use lsp::LanguageServerId;
|
||||||
use parking_lot::{Mutex, RwLock};
|
use parking_lot::{Mutex, RwLock};
|
||||||
use postage::watch;
|
use postage::watch;
|
||||||
|
@ -165,6 +167,20 @@ impl AvailableLanguage {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
enum LanguageMatchPrecedence {
|
||||||
|
#[default]
|
||||||
|
Undetermined,
|
||||||
|
PathOrContent,
|
||||||
|
UserConfigured,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LanguageMatchPrecedence {
|
||||||
|
fn best_possible_match(&self) -> bool {
|
||||||
|
*self == LanguageMatchPrecedence::UserConfigured
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
enum AvailableGrammar {
|
enum AvailableGrammar {
|
||||||
Native(tree_sitter::Language),
|
Native(tree_sitter::Language),
|
||||||
Loaded(#[allow(unused)] PathBuf, tree_sitter::Language),
|
Loaded(#[allow(unused)] PathBuf, tree_sitter::Language),
|
||||||
|
@ -602,12 +618,10 @@ impl LanguageRegistry {
|
||||||
name: &str,
|
name: &str,
|
||||||
) -> impl Future<Output = Result<Arc<Language>>> + use<> {
|
) -> impl Future<Output = Result<Arc<Language>>> + use<> {
|
||||||
let name = UniCase::new(name);
|
let name = UniCase::new(name);
|
||||||
let rx = self.get_or_load_language(|language_name, _| {
|
let rx = self.get_or_load_language(|language_name, _, current_best_match| {
|
||||||
if UniCase::new(&language_name.0) == name {
|
(current_best_match < LanguageMatchPrecedence::PathOrContent
|
||||||
1
|
&& UniCase::new(&language_name.0) == name)
|
||||||
} else {
|
.then_some(LanguageMatchPrecedence::PathOrContent)
|
||||||
0
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
async move { rx.await? }
|
async move { rx.await? }
|
||||||
}
|
}
|
||||||
|
@ -617,17 +631,14 @@ impl LanguageRegistry {
|
||||||
string: &str,
|
string: &str,
|
||||||
) -> impl Future<Output = Result<Arc<Language>>> {
|
) -> impl Future<Output = Result<Arc<Language>>> {
|
||||||
let string = UniCase::new(string);
|
let string = UniCase::new(string);
|
||||||
let rx = self.get_or_load_language(|name, config| {
|
let rx = self.get_or_load_language(|name, config, current_best_match| {
|
||||||
if UniCase::new(&name.0) == string
|
(current_best_match < LanguageMatchPrecedence::PathOrContent
|
||||||
|| config
|
&& (UniCase::new(&name.0) == string
|
||||||
.path_suffixes
|
|| config
|
||||||
.iter()
|
.path_suffixes
|
||||||
.any(|suffix| UniCase::new(suffix) == string)
|
.iter()
|
||||||
{
|
.any(|suffix| UniCase::new(suffix) == string)))
|
||||||
1
|
.then_some(LanguageMatchPrecedence::PathOrContent)
|
||||||
} else {
|
|
||||||
0
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
async move { rx.await? }
|
async move { rx.await? }
|
||||||
}
|
}
|
||||||
|
@ -688,7 +699,6 @@ impl LanguageRegistry {
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|suffix| suffix.map(globset::Candidate::new))
|
.filter_map(|suffix| suffix.map(globset::Candidate::new))
|
||||||
.collect::<SmallVec<[_; 3]>>();
|
.collect::<SmallVec<[_; 3]>>();
|
||||||
let empty = GlobSet::empty();
|
|
||||||
let content = LazyCell::new(|| {
|
let content = LazyCell::new(|| {
|
||||||
content.map(|content| {
|
content.map(|content| {
|
||||||
let end = content.clip_point(Point::new(0, 256), Bias::Left);
|
let end = content.clip_point(Point::new(0, 256), Bias::Left);
|
||||||
|
@ -696,7 +706,7 @@ impl LanguageRegistry {
|
||||||
content.chunks_in_range(0..end).collect::<String>()
|
content.chunks_in_range(0..end).collect::<String>()
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
self.find_matching_language(move |language_name, config| {
|
self.find_matching_language(move |language_name, config, current_best_match| {
|
||||||
let path_matches_default_suffix = || {
|
let path_matches_default_suffix = || {
|
||||||
config
|
config
|
||||||
.path_suffixes
|
.path_suffixes
|
||||||
|
@ -704,47 +714,75 @@ impl LanguageRegistry {
|
||||||
.any(|suffix| path_suffixes.contains(&Some(suffix.as_str())))
|
.any(|suffix| path_suffixes.contains(&Some(suffix.as_str())))
|
||||||
};
|
};
|
||||||
let path_matches_custom_suffix = || {
|
let path_matches_custom_suffix = || {
|
||||||
let custom_suffixes = user_file_types
|
user_file_types
|
||||||
.and_then(|types| types.get(language_name.as_ref()))
|
.and_then(|types| types.get(language_name.as_ref()))
|
||||||
.unwrap_or(&empty);
|
.map_or(false, |custom_suffixes| {
|
||||||
path_suffixes_candidates
|
path_suffixes_candidates
|
||||||
.iter()
|
.iter()
|
||||||
.any(|suffix| custom_suffixes.is_match_candidate(suffix))
|
.any(|suffix| custom_suffixes.is_match_candidate(suffix))
|
||||||
|
})
|
||||||
};
|
};
|
||||||
let content_matches = || {
|
let content_matches = || {
|
||||||
content
|
config.first_line_pattern.as_ref().map_or(false, |pattern| {
|
||||||
.as_ref()
|
content
|
||||||
.zip(config.first_line_pattern.as_ref())
|
.as_ref()
|
||||||
.map_or(false, |(text, pattern)| pattern.is_match(&text))
|
.is_some_and(|content| pattern.is_match(content))
|
||||||
|
})
|
||||||
};
|
};
|
||||||
if path_matches_custom_suffix() {
|
|
||||||
2
|
// Only return a match for the given file if we have a better match than
|
||||||
} else if path_matches_default_suffix() || content_matches() {
|
// the current one.
|
||||||
1
|
match current_best_match {
|
||||||
} else {
|
LanguageMatchPrecedence::PathOrContent | LanguageMatchPrecedence::Undetermined
|
||||||
0
|
if path_matches_custom_suffix() =>
|
||||||
|
{
|
||||||
|
Some(LanguageMatchPrecedence::UserConfigured)
|
||||||
|
}
|
||||||
|
LanguageMatchPrecedence::Undetermined
|
||||||
|
if path_matches_default_suffix() || content_matches() =>
|
||||||
|
{
|
||||||
|
Some(LanguageMatchPrecedence::PathOrContent)
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_matching_language(
|
fn find_matching_language(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
callback: impl Fn(&LanguageName, &LanguageMatcher) -> usize,
|
callback: impl Fn(
|
||||||
|
&LanguageName,
|
||||||
|
&LanguageMatcher,
|
||||||
|
LanguageMatchPrecedence,
|
||||||
|
) -> Option<LanguageMatchPrecedence>,
|
||||||
) -> Option<AvailableLanguage> {
|
) -> Option<AvailableLanguage> {
|
||||||
let state = self.state.read();
|
let state = self.state.read();
|
||||||
let available_language = state
|
let available_language = state
|
||||||
.available_languages
|
.available_languages
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|language| {
|
.rev()
|
||||||
let score = callback(&language.name, &language.matcher);
|
.fold_while(None, |best_language_match, language| {
|
||||||
if score > 0 {
|
let current_match_type = best_language_match
|
||||||
Some((language.clone(), score))
|
.as_ref()
|
||||||
} else {
|
.map_or(LanguageMatchPrecedence::default(), |(_, score)| *score);
|
||||||
None
|
let language_score =
|
||||||
|
callback(&language.name, &language.matcher, current_match_type);
|
||||||
|
debug_assert!(
|
||||||
|
language_score.is_none_or(|new_score| new_score > current_match_type),
|
||||||
|
"Matching callback should only return a better match than the current one"
|
||||||
|
);
|
||||||
|
|
||||||
|
match language_score {
|
||||||
|
Some(new_score) if new_score.best_possible_match() => {
|
||||||
|
Done(Some((language.clone(), new_score)))
|
||||||
|
}
|
||||||
|
Some(new_score) if current_match_type < new_score => {
|
||||||
|
Continue(Some((language.clone(), new_score)))
|
||||||
|
}
|
||||||
|
_ => Continue(best_language_match),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.max_by_key(|e| e.1)
|
.into_inner()
|
||||||
.clone()
|
|
||||||
.map(|(available_language, _)| available_language);
|
.map(|(available_language, _)| available_language);
|
||||||
drop(state);
|
drop(state);
|
||||||
available_language
|
available_language
|
||||||
|
@ -839,7 +877,11 @@ impl LanguageRegistry {
|
||||||
|
|
||||||
fn get_or_load_language(
|
fn get_or_load_language(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
callback: impl Fn(&LanguageName, &LanguageMatcher) -> usize,
|
callback: impl Fn(
|
||||||
|
&LanguageName,
|
||||||
|
&LanguageMatcher,
|
||||||
|
LanguageMatchPrecedence,
|
||||||
|
) -> Option<LanguageMatchPrecedence>,
|
||||||
) -> oneshot::Receiver<Result<Arc<Language>>> {
|
) -> oneshot::Receiver<Result<Arc<Language>>> {
|
||||||
let Some(language) = self.find_matching_language(callback) else {
|
let Some(language) = self.find_matching_language(callback) else {
|
||||||
let (tx, rx) = oneshot::channel();
|
let (tx, rx) = oneshot::channel();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue