From 12b012eab375f97da9d05625c9a879b6d901ab8c Mon Sep 17 00:00:00 2001 From: Finn Evers Date: Tue, 15 Apr 2025 01:31:45 +0200 Subject: [PATCH] language: Further optimize `language_for_file` (#28694) Follow-up to #28671 This primarily follows two ideas: 1. We currently take the element with the highest score which appears last in the iterator (see [`last_by_key`](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.max_by_key)), so we can also just reverse the iterator and take the first highest match instead. 2. Once we have a match with a given precedence, we are not interested in any matches with a lower or even the same priority, given what was established in 1. Thus, we also only have to check whether any language checked afterwards has a higher priority match. Furthermore, once we have a match with the highest possible precedence, there is no need to look for any more possible matches. Thus, this PR also adds short-circuiting for that scenario. Lastly, I also cleaned-up the custom suffix match (an empty glob-set will never match so no need to iterate there) as well reorder the zip-call in the content matches, as we never need the content if there is no first line pattern present for the checked languages. Release Notes: - N/A --- crates/language/src/language_registry.rs | 130 +++++++++++++++-------- 1 file changed, 86 insertions(+), 44 deletions(-) diff --git a/crates/language/src/language_registry.rs b/crates/language/src/language_registry.rs index d7a4293ee4..7ba3f3b0ae 100644 --- a/crates/language/src/language_registry.rs +++ b/crates/language/src/language_registry.rs @@ -16,6 +16,8 @@ use futures::{ }; use globset::GlobSet; use gpui::{App, BackgroundExecutor, SharedString}; +use itertools::FoldWhile::{Continue, Done}; +use itertools::Itertools; use lsp::LanguageServerId; use parking_lot::{Mutex, RwLock}; use postage::watch; @@ -165,6 +167,20 @@ impl AvailableLanguage { } } +#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] +enum LanguageMatchPrecedence { + #[default] + Undetermined, + PathOrContent, + UserConfigured, +} + +impl LanguageMatchPrecedence { + fn best_possible_match(&self) -> bool { + *self == LanguageMatchPrecedence::UserConfigured + } +} + enum AvailableGrammar { Native(tree_sitter::Language), Loaded(#[allow(unused)] PathBuf, tree_sitter::Language), @@ -602,12 +618,10 @@ impl LanguageRegistry { name: &str, ) -> impl Future>> + use<> { let name = UniCase::new(name); - let rx = self.get_or_load_language(|language_name, _| { - if UniCase::new(&language_name.0) == name { - 1 - } else { - 0 - } + let rx = self.get_or_load_language(|language_name, _, current_best_match| { + (current_best_match < LanguageMatchPrecedence::PathOrContent + && UniCase::new(&language_name.0) == name) + .then_some(LanguageMatchPrecedence::PathOrContent) }); async move { rx.await? } } @@ -617,17 +631,14 @@ impl LanguageRegistry { string: &str, ) -> impl Future>> { let string = UniCase::new(string); - let rx = self.get_or_load_language(|name, config| { - if UniCase::new(&name.0) == string - || config - .path_suffixes - .iter() - .any(|suffix| UniCase::new(suffix) == string) - { - 1 - } else { - 0 - } + let rx = self.get_or_load_language(|name, config, current_best_match| { + (current_best_match < LanguageMatchPrecedence::PathOrContent + && (UniCase::new(&name.0) == string + || config + .path_suffixes + .iter() + .any(|suffix| UniCase::new(suffix) == string))) + .then_some(LanguageMatchPrecedence::PathOrContent) }); async move { rx.await? } } @@ -688,7 +699,6 @@ impl LanguageRegistry { .iter() .filter_map(|suffix| suffix.map(globset::Candidate::new)) .collect::>(); - let empty = GlobSet::empty(); let content = LazyCell::new(|| { content.map(|content| { let end = content.clip_point(Point::new(0, 256), Bias::Left); @@ -696,7 +706,7 @@ impl LanguageRegistry { content.chunks_in_range(0..end).collect::() }) }); - self.find_matching_language(move |language_name, config| { + self.find_matching_language(move |language_name, config, current_best_match| { let path_matches_default_suffix = || { config .path_suffixes @@ -704,47 +714,75 @@ impl LanguageRegistry { .any(|suffix| path_suffixes.contains(&Some(suffix.as_str()))) }; let path_matches_custom_suffix = || { - let custom_suffixes = user_file_types + user_file_types .and_then(|types| types.get(language_name.as_ref())) - .unwrap_or(&empty); - path_suffixes_candidates - .iter() - .any(|suffix| custom_suffixes.is_match_candidate(suffix)) + .map_or(false, |custom_suffixes| { + path_suffixes_candidates + .iter() + .any(|suffix| custom_suffixes.is_match_candidate(suffix)) + }) }; let content_matches = || { - content - .as_ref() - .zip(config.first_line_pattern.as_ref()) - .map_or(false, |(text, pattern)| pattern.is_match(&text)) + config.first_line_pattern.as_ref().map_or(false, |pattern| { + content + .as_ref() + .is_some_and(|content| pattern.is_match(content)) + }) }; - if path_matches_custom_suffix() { - 2 - } else if path_matches_default_suffix() || content_matches() { - 1 - } else { - 0 + + // Only return a match for the given file if we have a better match than + // the current one. + match current_best_match { + LanguageMatchPrecedence::PathOrContent | LanguageMatchPrecedence::Undetermined + if path_matches_custom_suffix() => + { + Some(LanguageMatchPrecedence::UserConfigured) + } + LanguageMatchPrecedence::Undetermined + if path_matches_default_suffix() || content_matches() => + { + Some(LanguageMatchPrecedence::PathOrContent) + } + _ => None, } }) } fn find_matching_language( self: &Arc, - callback: impl Fn(&LanguageName, &LanguageMatcher) -> usize, + callback: impl Fn( + &LanguageName, + &LanguageMatcher, + LanguageMatchPrecedence, + ) -> Option, ) -> Option { let state = self.state.read(); let available_language = state .available_languages .iter() - .filter_map(|language| { - let score = callback(&language.name, &language.matcher); - if score > 0 { - Some((language.clone(), score)) - } else { - None + .rev() + .fold_while(None, |best_language_match, language| { + let current_match_type = best_language_match + .as_ref() + .map_or(LanguageMatchPrecedence::default(), |(_, score)| *score); + let language_score = + callback(&language.name, &language.matcher, current_match_type); + debug_assert!( + language_score.is_none_or(|new_score| new_score > current_match_type), + "Matching callback should only return a better match than the current one" + ); + + match language_score { + Some(new_score) if new_score.best_possible_match() => { + Done(Some((language.clone(), new_score))) + } + Some(new_score) if current_match_type < new_score => { + Continue(Some((language.clone(), new_score))) + } + _ => Continue(best_language_match), } }) - .max_by_key(|e| e.1) - .clone() + .into_inner() .map(|(available_language, _)| available_language); drop(state); available_language @@ -839,7 +877,11 @@ impl LanguageRegistry { fn get_or_load_language( self: &Arc, - callback: impl Fn(&LanguageName, &LanguageMatcher) -> usize, + callback: impl Fn( + &LanguageName, + &LanguageMatcher, + LanguageMatchPrecedence, + ) -> Option, ) -> oneshot::Receiver>> { let Some(language) = self.find_matching_language(callback) else { let (tx, rx) = oneshot::channel();