Fuzzy match performance improvements redo (#22561)

Release Notes:

- N/A
This commit is contained in:
Michael Sloan 2025-01-01 22:31:06 -07:00 committed by GitHub
parent 28d1d2d939
commit 665717da9a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 89 additions and 96 deletions

View file

@ -14,7 +14,6 @@ pub struct Matcher<'a> {
lowercase_query: &'a [char], lowercase_query: &'a [char],
query_char_bag: CharBag, query_char_bag: CharBag,
smart_case: bool, smart_case: bool,
max_results: usize,
min_score: f64, min_score: f64,
match_positions: Vec<usize>, match_positions: Vec<usize>,
last_positions: Vec<usize>, last_positions: Vec<usize>,
@ -22,11 +21,6 @@ pub struct Matcher<'a> {
best_position_matrix: Vec<usize>, best_position_matrix: Vec<usize>,
} }
pub trait Match: Ord {
fn score(&self) -> f64;
fn set_positions(&mut self, positions: Vec<usize>);
}
pub trait MatchCandidate { pub trait MatchCandidate {
fn has_chars(&self, bag: CharBag) -> bool; fn has_chars(&self, bag: CharBag) -> bool;
fn to_string(&self) -> Cow<'_, str>; fn to_string(&self) -> Cow<'_, str>;
@ -38,7 +32,6 @@ impl<'a> Matcher<'a> {
lowercase_query: &'a [char], lowercase_query: &'a [char],
query_char_bag: CharBag, query_char_bag: CharBag,
smart_case: bool, smart_case: bool,
max_results: usize,
) -> Self { ) -> Self {
Self { Self {
query, query,
@ -50,10 +43,11 @@ impl<'a> Matcher<'a> {
score_matrix: Vec::new(), score_matrix: Vec::new(),
best_position_matrix: Vec::new(), best_position_matrix: Vec::new(),
smart_case, smart_case,
max_results,
} }
} }
/// Filter and score fuzzy match candidates. Results are returned unsorted, in the same order as
/// the input candidates.
pub fn match_candidates<C: MatchCandidate, R, F>( pub fn match_candidates<C: MatchCandidate, R, F>(
&mut self, &mut self,
prefix: &[char], prefix: &[char],
@ -63,8 +57,7 @@ impl<'a> Matcher<'a> {
cancel_flag: &AtomicBool, cancel_flag: &AtomicBool,
build_match: F, build_match: F,
) where ) where
R: Match, F: Fn(&C, f64, &Vec<usize>) -> R,
F: Fn(&C, f64) -> R,
{ {
let mut candidate_chars = Vec::new(); let mut candidate_chars = Vec::new();
let mut lowercase_candidate_chars = Vec::new(); let mut lowercase_candidate_chars = Vec::new();
@ -103,20 +96,7 @@ impl<'a> Matcher<'a> {
); );
if score > 0.0 { if score > 0.0 {
let mut mat = build_match(&candidate, score); results.push(build_match(&candidate, score, &self.match_positions));
if let Err(i) = results.binary_search_by(|m| mat.cmp(m)) {
if results.len() < self.max_results {
mat.set_positions(self.match_positions.clone());
results.insert(i, mat);
} else if i < results.len() {
results.pop();
mat.set_positions(self.match_positions.clone());
results.insert(i, mat);
}
if results.len() == self.max_results {
self.min_score = results.last().unwrap().score();
}
}
} }
} }
} }
@ -325,18 +305,18 @@ mod tests {
#[test] #[test]
fn test_get_last_positions() { fn test_get_last_positions() {
let mut query: &[char] = &['d', 'c']; let mut query: &[char] = &['d', 'c'];
let mut matcher = Matcher::new(query, query, query.into(), false, 10); let mut matcher = Matcher::new(query, query, query.into(), false);
let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']); let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
assert!(!result); assert!(!result);
query = &['c', 'd']; query = &['c', 'd'];
let mut matcher = Matcher::new(query, query, query.into(), false, 10); let mut matcher = Matcher::new(query, query, query.into(), false);
let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']); let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
assert!(result); assert!(result);
assert_eq!(matcher.last_positions, vec![2, 4]); assert_eq!(matcher.last_positions, vec![2, 4]);
query = &['z', '/', 'z', 'f']; query = &['z', '/', 'z', 'f'];
let mut matcher = Matcher::new(query, query, query.into(), false, 10); let mut matcher = Matcher::new(query, query, query.into(), false);
let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']); let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']);
assert!(result); assert!(result);
assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]); assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]);
@ -451,7 +431,7 @@ mod tests {
}); });
} }
let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case, 100); let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case);
let cancel_flag = AtomicBool::new(false); let cancel_flag = AtomicBool::new(false);
let mut results = Vec::new(); let mut results = Vec::new();
@ -462,16 +442,17 @@ mod tests {
path_entries.into_iter(), path_entries.into_iter(),
&mut results, &mut results,
&cancel_flag, &cancel_flag,
|candidate, score| PathMatch { |candidate, score, positions| PathMatch {
score, score,
worktree_id: 0, worktree_id: 0,
positions: Vec::new(), positions: positions.clone(),
path: Arc::from(candidate.path), path: Arc::from(candidate.path),
path_prefix: "".into(), path_prefix: "".into(),
distance_to_relative_ancestor: usize::MAX, distance_to_relative_ancestor: usize::MAX,
is_dir: false, is_dir: false,
}, },
); );
results.sort_by(|a, b| b.cmp(a));
results results
.into_iter() .into_iter()

View file

@ -3,11 +3,14 @@ use std::{
borrow::Cow, borrow::Cow,
cmp::{self, Ordering}, cmp::{self, Ordering},
path::Path, path::Path,
sync::{atomic::AtomicBool, Arc}, sync::{
atomic::{self, AtomicBool},
Arc,
},
}; };
use crate::{ use crate::{
matcher::{Match, MatchCandidate, Matcher}, matcher::{MatchCandidate, Matcher},
CharBag, CharBag,
}; };
@ -42,16 +45,6 @@ pub trait PathMatchCandidateSet<'a>: Send + Sync {
fn candidates(&'a self, start: usize) -> Self::Candidates; fn candidates(&'a self, start: usize) -> Self::Candidates;
} }
impl Match for PathMatch {
fn score(&self) -> f64 {
self.score
}
fn set_positions(&mut self, positions: Vec<usize>) {
self.positions = positions;
}
}
impl<'a> MatchCandidate for PathMatchCandidate<'a> { impl<'a> MatchCandidate for PathMatchCandidate<'a> {
fn has_chars(&self, bag: CharBag) -> bool { fn has_chars(&self, bag: CharBag) -> bool {
self.char_bag.is_superset(bag) self.char_bag.is_superset(bag)
@ -102,13 +95,7 @@ pub fn match_fixed_path_set(
let query = query.chars().collect::<Vec<_>>(); let query = query.chars().collect::<Vec<_>>();
let query_char_bag = CharBag::from(&lowercase_query[..]); let query_char_bag = CharBag::from(&lowercase_query[..]);
let mut matcher = Matcher::new( let mut matcher = Matcher::new(&query, &lowercase_query, query_char_bag, smart_case);
&query,
&lowercase_query,
query_char_bag,
smart_case,
max_results,
);
let mut results = Vec::new(); let mut results = Vec::new();
matcher.match_candidates( matcher.match_candidates(
@ -117,16 +104,17 @@ pub fn match_fixed_path_set(
candidates.into_iter(), candidates.into_iter(),
&mut results, &mut results,
&AtomicBool::new(false), &AtomicBool::new(false),
|candidate, score| PathMatch { |candidate, score, positions| PathMatch {
score, score,
worktree_id, worktree_id,
positions: Vec::new(), positions: positions.clone(),
is_dir: candidate.is_dir, is_dir: candidate.is_dir,
path: Arc::from(candidate.path), path: Arc::from(candidate.path),
path_prefix: Arc::default(), path_prefix: Arc::default(),
distance_to_relative_ancestor: usize::MAX, distance_to_relative_ancestor: usize::MAX,
}, },
); );
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
results results
} }
@ -164,16 +152,15 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
scope.spawn(async move { scope.spawn(async move {
let segment_start = segment_idx * segment_size; let segment_start = segment_idx * segment_size;
let segment_end = segment_start + segment_size; let segment_end = segment_start + segment_size;
let mut matcher = Matcher::new( let mut matcher =
query, Matcher::new(query, lowercase_query, query_char_bag, smart_case);
lowercase_query,
query_char_bag,
smart_case,
max_results,
);
let mut tree_start = 0; let mut tree_start = 0;
for candidate_set in candidate_sets { for candidate_set in candidate_sets {
if cancel_flag.load(atomic::Ordering::Relaxed) {
break;
}
let tree_end = tree_start + candidate_set.len(); let tree_end = tree_start + candidate_set.len();
if tree_start < segment_end && segment_start < tree_end { if tree_start < segment_end && segment_start < tree_end {
@ -193,10 +180,10 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
candidates, candidates,
results, results,
cancel_flag, cancel_flag,
|candidate, score| PathMatch { |candidate, score, positions| PathMatch {
score, score,
worktree_id, worktree_id,
positions: Vec::new(), positions: positions.clone(),
path: Arc::from(candidate.path), path: Arc::from(candidate.path),
is_dir: candidate.is_dir, is_dir: candidate.is_dir,
path_prefix: candidate_set.prefix(), path_prefix: candidate_set.prefix(),
@ -222,14 +209,12 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
}) })
.await; .await;
let mut results = Vec::new(); if cancel_flag.load(atomic::Ordering::Relaxed) {
for segment_result in segment_results { return Vec::new();
if results.is_empty() {
results = segment_result;
} else {
util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(a));
}
} }
let mut results = segment_results.concat();
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
results results
} }

View file

@ -1,5 +1,5 @@
use crate::{ use crate::{
matcher::{Match, MatchCandidate, Matcher}, matcher::{MatchCandidate, Matcher},
CharBag, CharBag,
}; };
use gpui::BackgroundExecutor; use gpui::BackgroundExecutor;
@ -8,7 +8,7 @@ use std::{
cmp::{self, Ordering}, cmp::{self, Ordering},
iter, iter,
ops::Range, ops::Range,
sync::atomic::AtomicBool, sync::atomic::{self, AtomicBool},
}; };
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -46,16 +46,6 @@ pub struct StringMatch {
pub string: String, pub string: String,
} }
impl Match for StringMatch {
fn score(&self) -> f64 {
self.score
}
fn set_positions(&mut self, positions: Vec<usize>) {
self.positions = positions;
}
}
impl StringMatch { impl StringMatch {
pub fn ranges(&self) -> impl '_ + Iterator<Item = Range<usize>> { pub fn ranges(&self) -> impl '_ + Iterator<Item = Range<usize>> {
let mut positions = self.positions.iter().peekable(); let mut positions = self.positions.iter().peekable();
@ -167,13 +157,8 @@ pub async fn match_strings(
scope.spawn(async move { scope.spawn(async move {
let segment_start = cmp::min(segment_idx * segment_size, candidates.len()); let segment_start = cmp::min(segment_idx * segment_size, candidates.len());
let segment_end = cmp::min(segment_start + segment_size, candidates.len()); let segment_end = cmp::min(segment_start + segment_size, candidates.len());
let mut matcher = Matcher::new( let mut matcher =
query, Matcher::new(query, lowercase_query, query_char_bag, smart_case);
lowercase_query,
query_char_bag,
smart_case,
max_results,
);
matcher.match_candidates( matcher.match_candidates(
&[], &[],
@ -181,10 +166,10 @@ pub async fn match_strings(
candidates[segment_start..segment_end].iter(), candidates[segment_start..segment_end].iter(),
results, results,
cancel_flag, cancel_flag,
|candidate, score| StringMatch { |candidate, score, positions| StringMatch {
candidate_id: candidate.id, candidate_id: candidate.id,
score, score,
positions: Vec::new(), positions: positions.clone(),
string: candidate.string.to_string(), string: candidate.string.to_string(),
}, },
); );
@ -193,13 +178,11 @@ pub async fn match_strings(
}) })
.await; .await;
let mut results = Vec::new(); if cancel_flag.load(atomic::Ordering::Relaxed) {
for segment_result in segment_results { return Vec::new();
if results.is_empty() {
results = segment_result;
} else {
util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(a));
}
} }
let mut results = segment_results.concat();
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
results results
} }

View file

@ -8,7 +8,6 @@ pub mod test;
use anyhow::{anyhow, Context as _, Result}; use anyhow::{anyhow, Context as _, Result};
use futures::Future; use futures::Future;
use itertools::Either; use itertools::Either;
use regex::Regex; use regex::Regex;
use std::sync::{LazyLock, OnceLock}; use std::sync::{LazyLock, OnceLock};
@ -111,6 +110,28 @@ where
} }
} }
pub fn truncate_to_bottom_n_sorted_by<T, F>(items: &mut Vec<T>, limit: usize, compare: &F)
where
F: Fn(&T, &T) -> Ordering,
{
if limit == 0 {
items.truncate(0);
}
if items.len() <= limit {
items.sort_by(compare);
return;
}
// When limit is near to items.len() it may be more efficient to sort the whole list and
// truncate, rather than always doing selection first as is done below. It's hard to analyze
// where the threshold for this should be since the quickselect style algorithm used by
// `select_nth_unstable_by` makes the prefix partially sorted, and so its work is not wasted -
// the expected number of comparisons needed by `sort_by` is less than it is for some arbitrary
// unsorted input.
items.select_nth_unstable_by(limit, compare);
items.truncate(limit);
items.sort_by(compare);
}
#[cfg(unix)] #[cfg(unix)]
pub fn load_shell_from_passwd() -> Result<()> { pub fn load_shell_from_passwd() -> Result<()> {
let buflen = match unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) } { let buflen = match unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) } {
@ -734,6 +755,29 @@ mod tests {
assert_eq!(vec, &[1000, 101, 21, 19, 17, 13, 9, 8]); assert_eq!(vec, &[1000, 101, 21, 19, 17, 13, 9, 8]);
} }
#[test]
fn test_truncate_to_bottom_n_sorted_by() {
let mut vec: Vec<u32> = vec![5, 2, 3, 4, 1];
truncate_to_bottom_n_sorted_by(&mut vec, 10, &u32::cmp);
assert_eq!(vec, &[1, 2, 3, 4, 5]);
vec = vec![5, 2, 3, 4, 1];
truncate_to_bottom_n_sorted_by(&mut vec, 5, &u32::cmp);
assert_eq!(vec, &[1, 2, 3, 4, 5]);
vec = vec![5, 2, 3, 4, 1];
truncate_to_bottom_n_sorted_by(&mut vec, 4, &u32::cmp);
assert_eq!(vec, &[1, 2, 3, 4]);
vec = vec![5, 2, 3, 4, 1];
truncate_to_bottom_n_sorted_by(&mut vec, 1, &u32::cmp);
assert_eq!(vec, &[1]);
vec = vec![5, 2, 3, 4, 1];
truncate_to_bottom_n_sorted_by(&mut vec, 0, &u32::cmp);
assert!(vec.is_empty());
}
#[test] #[test]
fn test_iife() { fn test_iife() {
fn option_returning_function() -> Option<()> { fn option_returning_function() -> Option<()> {