diff --git a/crates/fuzzy/src/matcher.rs b/crates/fuzzy/src/matcher.rs index ae56b84f1e..1b039c16f5 100644 --- a/crates/fuzzy/src/matcher.rs +++ b/crates/fuzzy/src/matcher.rs @@ -14,7 +14,6 @@ pub struct Matcher<'a> { lowercase_query: &'a [char], query_char_bag: CharBag, smart_case: bool, - max_results: usize, min_score: f64, match_positions: Vec, last_positions: Vec, @@ -22,11 +21,6 @@ pub struct Matcher<'a> { best_position_matrix: Vec, } -pub trait Match: Ord { - fn score(&self) -> f64; - fn set_positions(&mut self, positions: Vec); -} - pub trait MatchCandidate { fn has_chars(&self, bag: CharBag) -> bool; fn to_string(&self) -> Cow<'_, str>; @@ -38,7 +32,6 @@ impl<'a> Matcher<'a> { lowercase_query: &'a [char], query_char_bag: CharBag, smart_case: bool, - max_results: usize, ) -> Self { Self { query, @@ -50,10 +43,11 @@ impl<'a> Matcher<'a> { score_matrix: Vec::new(), best_position_matrix: Vec::new(), smart_case, - max_results, } } + /// Filter and score fuzzy match candidates. Results are returned unsorted, in the same order as + /// the input candidates. pub fn match_candidates( &mut self, prefix: &[char], @@ -63,8 +57,7 @@ impl<'a> Matcher<'a> { cancel_flag: &AtomicBool, build_match: F, ) where - R: Match, - F: Fn(&C, f64) -> R, + F: Fn(&C, f64, &Vec) -> R, { let mut candidate_chars = Vec::new(); let mut lowercase_candidate_chars = Vec::new(); @@ -103,20 +96,7 @@ impl<'a> Matcher<'a> { ); if score > 0.0 { - let mut mat = build_match(&candidate, score); - if let Err(i) = results.binary_search_by(|m| mat.cmp(m)) { - if results.len() < self.max_results { - mat.set_positions(self.match_positions.clone()); - results.insert(i, mat); - } else if i < results.len() { - results.pop(); - mat.set_positions(self.match_positions.clone()); - results.insert(i, mat); - } - if results.len() == self.max_results { - self.min_score = results.last().unwrap().score(); - } - } + results.push(build_match(&candidate, score, &self.match_positions)); } } } @@ -325,18 +305,18 @@ mod tests { #[test] fn test_get_last_positions() { let mut query: &[char] = &['d', 'c']; - let mut matcher = Matcher::new(query, query, query.into(), false, 10); + let mut matcher = Matcher::new(query, query, query.into(), false); let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']); assert!(!result); query = &['c', 'd']; - let mut matcher = Matcher::new(query, query, query.into(), false, 10); + let mut matcher = Matcher::new(query, query, query.into(), false); let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']); assert!(result); assert_eq!(matcher.last_positions, vec![2, 4]); query = &['z', '/', 'z', 'f']; - let mut matcher = Matcher::new(query, query, query.into(), false, 10); + let mut matcher = Matcher::new(query, query, query.into(), false); let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']); assert!(result); assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]); @@ -451,7 +431,7 @@ mod tests { }); } - let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case, 100); + let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case); let cancel_flag = AtomicBool::new(false); let mut results = Vec::new(); @@ -462,16 +442,17 @@ mod tests { path_entries.into_iter(), &mut results, &cancel_flag, - |candidate, score| PathMatch { + |candidate, score, positions| PathMatch { score, worktree_id: 0, - positions: Vec::new(), + positions: positions.clone(), path: Arc::from(candidate.path), path_prefix: "".into(), distance_to_relative_ancestor: usize::MAX, is_dir: false, }, ); + results.sort_by(|a, b| b.cmp(a)); results .into_iter() diff --git a/crates/fuzzy/src/paths.rs b/crates/fuzzy/src/paths.rs index 2b4eec98ef..9f93b497b5 100644 --- a/crates/fuzzy/src/paths.rs +++ b/crates/fuzzy/src/paths.rs @@ -7,7 +7,7 @@ use std::{ }; use crate::{ - matcher::{Match, MatchCandidate, Matcher}, + matcher::{MatchCandidate, Matcher}, CharBag, }; @@ -42,16 +42,6 @@ pub trait PathMatchCandidateSet<'a>: Send + Sync { fn candidates(&'a self, start: usize) -> Self::Candidates; } -impl Match for PathMatch { - fn score(&self) -> f64 { - self.score - } - - fn set_positions(&mut self, positions: Vec) { - self.positions = positions; - } -} - impl<'a> MatchCandidate for PathMatchCandidate<'a> { fn has_chars(&self, bag: CharBag) -> bool { self.char_bag.is_superset(bag) @@ -102,13 +92,7 @@ pub fn match_fixed_path_set( let query = query.chars().collect::>(); let query_char_bag = CharBag::from(&lowercase_query[..]); - let mut matcher = Matcher::new( - &query, - &lowercase_query, - query_char_bag, - smart_case, - max_results, - ); + let mut matcher = Matcher::new(&query, &lowercase_query, query_char_bag, smart_case); let mut results = Vec::new(); matcher.match_candidates( @@ -117,16 +101,17 @@ pub fn match_fixed_path_set( candidates.into_iter(), &mut results, &AtomicBool::new(false), - |candidate, score| PathMatch { + |candidate, score, positions| PathMatch { score, worktree_id, - positions: Vec::new(), + positions: positions.clone(), is_dir: candidate.is_dir, path: Arc::from(candidate.path), path_prefix: Arc::default(), distance_to_relative_ancestor: usize::MAX, }, ); + util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a)); results } @@ -164,13 +149,8 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>( scope.spawn(async move { let segment_start = segment_idx * segment_size; let segment_end = segment_start + segment_size; - let mut matcher = Matcher::new( - query, - lowercase_query, - query_char_bag, - smart_case, - max_results, - ); + let mut matcher = + Matcher::new(query, lowercase_query, query_char_bag, smart_case); let mut tree_start = 0; for candidate_set in candidate_sets { @@ -193,10 +173,10 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>( candidates, results, cancel_flag, - |candidate, score| PathMatch { + |candidate, score, positions| PathMatch { score, worktree_id, - positions: Vec::new(), + positions: positions.clone(), path: Arc::from(candidate.path), is_dir: candidate.is_dir, path_prefix: candidate_set.prefix(), @@ -222,14 +202,8 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>( }) .await; - let mut results = Vec::new(); - for segment_result in segment_results { - if results.is_empty() { - results = segment_result; - } else { - util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(a)); - } - } + let mut results = segment_results.concat(); + util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a)); results } diff --git a/crates/fuzzy/src/strings.rs b/crates/fuzzy/src/strings.rs index e5fa6d7e61..da9b737e66 100644 --- a/crates/fuzzy/src/strings.rs +++ b/crates/fuzzy/src/strings.rs @@ -1,5 +1,5 @@ use crate::{ - matcher::{Match, MatchCandidate, Matcher}, + matcher::{MatchCandidate, Matcher}, CharBag, }; use gpui::BackgroundExecutor; @@ -46,16 +46,6 @@ pub struct StringMatch { pub string: String, } -impl Match for StringMatch { - fn score(&self) -> f64 { - self.score - } - - fn set_positions(&mut self, positions: Vec) { - self.positions = positions; - } -} - impl StringMatch { pub fn ranges(&self) -> impl '_ + Iterator> { let mut positions = self.positions.iter().peekable(); @@ -167,13 +157,8 @@ pub async fn match_strings( scope.spawn(async move { let segment_start = cmp::min(segment_idx * segment_size, candidates.len()); let segment_end = cmp::min(segment_start + segment_size, candidates.len()); - let mut matcher = Matcher::new( - query, - lowercase_query, - query_char_bag, - smart_case, - max_results, - ); + let mut matcher = + Matcher::new(query, lowercase_query, query_char_bag, smart_case); matcher.match_candidates( &[], @@ -181,10 +166,10 @@ pub async fn match_strings( candidates[segment_start..segment_end].iter(), results, cancel_flag, - |candidate, score| StringMatch { + |candidate, score, positions| StringMatch { candidate_id: candidate.id, score, - positions: Vec::new(), + positions: positions.clone(), string: candidate.string.to_string(), }, ); @@ -193,13 +178,7 @@ pub async fn match_strings( }) .await; - let mut results = Vec::new(); - for segment_result in segment_results { - if results.is_empty() { - results = segment_result; - } else { - util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(a)); - } - } + let mut results = segment_results.concat(); + util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a)); results } diff --git a/crates/util/src/util.rs b/crates/util/src/util.rs index a761958694..7df46eda27 100644 --- a/crates/util/src/util.rs +++ b/crates/util/src/util.rs @@ -8,7 +8,6 @@ pub mod test; use anyhow::{anyhow, Context as _, Result}; use futures::Future; - use itertools::Either; use regex::Regex; use std::sync::{LazyLock, OnceLock}; @@ -111,6 +110,27 @@ where } } +pub fn truncate_to_bottom_n_sorted_by(items: &mut Vec, limit: usize, compare: &F) +where + F: Fn(&T, &T) -> Ordering, +{ + if limit == 0 { + items.truncate(0); + } + if items.len() < limit { + return; + } + // When limit is near to items.len() it may be more efficient to sort the whole list and + // truncate, rather than always doing selection first as is done below. It's hard to analyze + // where the threshold for this should be since the quickselect style algorithm used by + // `select_nth_unstable_by` makes the prefix partially sorted, and so its work is not wasted - + // the expected number of comparisons needed by `sort_by` is less than it is for some arbitrary + // unsorted input. + items.select_nth_unstable_by(limit, compare); + items.truncate(limit); + items.sort_by(compare); +} + #[cfg(unix)] pub fn load_shell_from_passwd() -> Result<()> { let buflen = match unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) } {