Improve fuzzy match performance and fix corner case that omits results (#22524)
* Removes `max_results` from the matcher interface as this is better dealt with in consumers once all results are known. The current implementation was quite inefficient as it was using binary search to find insertion points and then doing an insert which copies the entire suffix each time. * There was a corner case where if the binary search found a match candidate with the same score, it was dropped. Now fixed. * Uses of `util::extend_sorted` when merging results from worker threads also repeatedly uses binary search and insertion which copies the entire suffix. A followup will remove that and its usage. * Adds `util::truncate_to_bottom_n_sorted_by` which uses quickselect + sort to efficiently get a sorted count limited result. * Improves interface of Matcher::match_candidates by providing the match positions to the build function. This allows for removal of the `Match` trait. It also fixes a bug where the Match's own Ord wasn't being used, which seems relevant to PathMatch for cases where scores are the same. Release Notes: - N/A
This commit is contained in:
parent
f912c545e7
commit
6ef5d8f748
4 changed files with 50 additions and 96 deletions
|
@ -14,7 +14,6 @@ pub struct Matcher<'a> {
|
|||
lowercase_query: &'a [char],
|
||||
query_char_bag: CharBag,
|
||||
smart_case: bool,
|
||||
max_results: usize,
|
||||
min_score: f64,
|
||||
match_positions: Vec<usize>,
|
||||
last_positions: Vec<usize>,
|
||||
|
@ -22,11 +21,6 @@ pub struct Matcher<'a> {
|
|||
best_position_matrix: Vec<usize>,
|
||||
}
|
||||
|
||||
pub trait Match: Ord {
|
||||
fn score(&self) -> f64;
|
||||
fn set_positions(&mut self, positions: Vec<usize>);
|
||||
}
|
||||
|
||||
pub trait MatchCandidate {
|
||||
fn has_chars(&self, bag: CharBag) -> bool;
|
||||
fn to_string(&self) -> Cow<'_, str>;
|
||||
|
@ -38,7 +32,6 @@ impl<'a> Matcher<'a> {
|
|||
lowercase_query: &'a [char],
|
||||
query_char_bag: CharBag,
|
||||
smart_case: bool,
|
||||
max_results: usize,
|
||||
) -> Self {
|
||||
Self {
|
||||
query,
|
||||
|
@ -50,10 +43,11 @@ impl<'a> Matcher<'a> {
|
|||
score_matrix: Vec::new(),
|
||||
best_position_matrix: Vec::new(),
|
||||
smart_case,
|
||||
max_results,
|
||||
}
|
||||
}
|
||||
|
||||
/// Filter and score fuzzy match candidates. Results are returned unsorted, in the same order as
|
||||
/// the input candidates.
|
||||
pub fn match_candidates<C: MatchCandidate, R, F>(
|
||||
&mut self,
|
||||
prefix: &[char],
|
||||
|
@ -63,8 +57,7 @@ impl<'a> Matcher<'a> {
|
|||
cancel_flag: &AtomicBool,
|
||||
build_match: F,
|
||||
) where
|
||||
R: Match,
|
||||
F: Fn(&C, f64) -> R,
|
||||
F: Fn(&C, f64, &Vec<usize>) -> R,
|
||||
{
|
||||
let mut candidate_chars = Vec::new();
|
||||
let mut lowercase_candidate_chars = Vec::new();
|
||||
|
@ -103,20 +96,7 @@ impl<'a> Matcher<'a> {
|
|||
);
|
||||
|
||||
if score > 0.0 {
|
||||
let mut mat = build_match(&candidate, score);
|
||||
if let Err(i) = results.binary_search_by(|m| mat.cmp(m)) {
|
||||
if results.len() < self.max_results {
|
||||
mat.set_positions(self.match_positions.clone());
|
||||
results.insert(i, mat);
|
||||
} else if i < results.len() {
|
||||
results.pop();
|
||||
mat.set_positions(self.match_positions.clone());
|
||||
results.insert(i, mat);
|
||||
}
|
||||
if results.len() == self.max_results {
|
||||
self.min_score = results.last().unwrap().score();
|
||||
}
|
||||
}
|
||||
results.push(build_match(&candidate, score, &self.match_positions));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -325,18 +305,18 @@ mod tests {
|
|||
#[test]
|
||||
fn test_get_last_positions() {
|
||||
let mut query: &[char] = &['d', 'c'];
|
||||
let mut matcher = Matcher::new(query, query, query.into(), false, 10);
|
||||
let mut matcher = Matcher::new(query, query, query.into(), false);
|
||||
let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
|
||||
assert!(!result);
|
||||
|
||||
query = &['c', 'd'];
|
||||
let mut matcher = Matcher::new(query, query, query.into(), false, 10);
|
||||
let mut matcher = Matcher::new(query, query, query.into(), false);
|
||||
let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
|
||||
assert!(result);
|
||||
assert_eq!(matcher.last_positions, vec![2, 4]);
|
||||
|
||||
query = &['z', '/', 'z', 'f'];
|
||||
let mut matcher = Matcher::new(query, query, query.into(), false, 10);
|
||||
let mut matcher = Matcher::new(query, query, query.into(), false);
|
||||
let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']);
|
||||
assert!(result);
|
||||
assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]);
|
||||
|
@ -451,7 +431,7 @@ mod tests {
|
|||
});
|
||||
}
|
||||
|
||||
let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case, 100);
|
||||
let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case);
|
||||
|
||||
let cancel_flag = AtomicBool::new(false);
|
||||
let mut results = Vec::new();
|
||||
|
@ -462,16 +442,17 @@ mod tests {
|
|||
path_entries.into_iter(),
|
||||
&mut results,
|
||||
&cancel_flag,
|
||||
|candidate, score| PathMatch {
|
||||
|candidate, score, positions| PathMatch {
|
||||
score,
|
||||
worktree_id: 0,
|
||||
positions: Vec::new(),
|
||||
positions: positions.clone(),
|
||||
path: Arc::from(candidate.path),
|
||||
path_prefix: "".into(),
|
||||
distance_to_relative_ancestor: usize::MAX,
|
||||
is_dir: false,
|
||||
},
|
||||
);
|
||||
results.sort_by(|a, b| b.cmp(a));
|
||||
|
||||
results
|
||||
.into_iter()
|
||||
|
|
|
@ -7,7 +7,7 @@ use std::{
|
|||
};
|
||||
|
||||
use crate::{
|
||||
matcher::{Match, MatchCandidate, Matcher},
|
||||
matcher::{MatchCandidate, Matcher},
|
||||
CharBag,
|
||||
};
|
||||
|
||||
|
@ -42,16 +42,6 @@ pub trait PathMatchCandidateSet<'a>: Send + Sync {
|
|||
fn candidates(&'a self, start: usize) -> Self::Candidates;
|
||||
}
|
||||
|
||||
impl Match for PathMatch {
|
||||
fn score(&self) -> f64 {
|
||||
self.score
|
||||
}
|
||||
|
||||
fn set_positions(&mut self, positions: Vec<usize>) {
|
||||
self.positions = positions;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> MatchCandidate for PathMatchCandidate<'a> {
|
||||
fn has_chars(&self, bag: CharBag) -> bool {
|
||||
self.char_bag.is_superset(bag)
|
||||
|
@ -102,13 +92,7 @@ pub fn match_fixed_path_set(
|
|||
let query = query.chars().collect::<Vec<_>>();
|
||||
let query_char_bag = CharBag::from(&lowercase_query[..]);
|
||||
|
||||
let mut matcher = Matcher::new(
|
||||
&query,
|
||||
&lowercase_query,
|
||||
query_char_bag,
|
||||
smart_case,
|
||||
max_results,
|
||||
);
|
||||
let mut matcher = Matcher::new(&query, &lowercase_query, query_char_bag, smart_case);
|
||||
|
||||
let mut results = Vec::new();
|
||||
matcher.match_candidates(
|
||||
|
@ -117,16 +101,17 @@ pub fn match_fixed_path_set(
|
|||
candidates.into_iter(),
|
||||
&mut results,
|
||||
&AtomicBool::new(false),
|
||||
|candidate, score| PathMatch {
|
||||
|candidate, score, positions| PathMatch {
|
||||
score,
|
||||
worktree_id,
|
||||
positions: Vec::new(),
|
||||
positions: positions.clone(),
|
||||
is_dir: candidate.is_dir,
|
||||
path: Arc::from(candidate.path),
|
||||
path_prefix: Arc::default(),
|
||||
distance_to_relative_ancestor: usize::MAX,
|
||||
},
|
||||
);
|
||||
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
|
||||
results
|
||||
}
|
||||
|
||||
|
@ -164,13 +149,8 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
|
|||
scope.spawn(async move {
|
||||
let segment_start = segment_idx * segment_size;
|
||||
let segment_end = segment_start + segment_size;
|
||||
let mut matcher = Matcher::new(
|
||||
query,
|
||||
lowercase_query,
|
||||
query_char_bag,
|
||||
smart_case,
|
||||
max_results,
|
||||
);
|
||||
let mut matcher =
|
||||
Matcher::new(query, lowercase_query, query_char_bag, smart_case);
|
||||
|
||||
let mut tree_start = 0;
|
||||
for candidate_set in candidate_sets {
|
||||
|
@ -193,10 +173,10 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
|
|||
candidates,
|
||||
results,
|
||||
cancel_flag,
|
||||
|candidate, score| PathMatch {
|
||||
|candidate, score, positions| PathMatch {
|
||||
score,
|
||||
worktree_id,
|
||||
positions: Vec::new(),
|
||||
positions: positions.clone(),
|
||||
path: Arc::from(candidate.path),
|
||||
is_dir: candidate.is_dir,
|
||||
path_prefix: candidate_set.prefix(),
|
||||
|
@ -222,14 +202,8 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
|
|||
})
|
||||
.await;
|
||||
|
||||
let mut results = Vec::new();
|
||||
for segment_result in segment_results {
|
||||
if results.is_empty() {
|
||||
results = segment_result;
|
||||
} else {
|
||||
util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(a));
|
||||
}
|
||||
}
|
||||
let mut results = segment_results.concat();
|
||||
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
|
||||
results
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::{
|
||||
matcher::{Match, MatchCandidate, Matcher},
|
||||
matcher::{MatchCandidate, Matcher},
|
||||
CharBag,
|
||||
};
|
||||
use gpui::BackgroundExecutor;
|
||||
|
@ -46,16 +46,6 @@ pub struct StringMatch {
|
|||
pub string: String,
|
||||
}
|
||||
|
||||
impl Match for StringMatch {
|
||||
fn score(&self) -> f64 {
|
||||
self.score
|
||||
}
|
||||
|
||||
fn set_positions(&mut self, positions: Vec<usize>) {
|
||||
self.positions = positions;
|
||||
}
|
||||
}
|
||||
|
||||
impl StringMatch {
|
||||
pub fn ranges(&self) -> impl '_ + Iterator<Item = Range<usize>> {
|
||||
let mut positions = self.positions.iter().peekable();
|
||||
|
@ -167,13 +157,8 @@ pub async fn match_strings(
|
|||
scope.spawn(async move {
|
||||
let segment_start = cmp::min(segment_idx * segment_size, candidates.len());
|
||||
let segment_end = cmp::min(segment_start + segment_size, candidates.len());
|
||||
let mut matcher = Matcher::new(
|
||||
query,
|
||||
lowercase_query,
|
||||
query_char_bag,
|
||||
smart_case,
|
||||
max_results,
|
||||
);
|
||||
let mut matcher =
|
||||
Matcher::new(query, lowercase_query, query_char_bag, smart_case);
|
||||
|
||||
matcher.match_candidates(
|
||||
&[],
|
||||
|
@ -181,10 +166,10 @@ pub async fn match_strings(
|
|||
candidates[segment_start..segment_end].iter(),
|
||||
results,
|
||||
cancel_flag,
|
||||
|candidate, score| StringMatch {
|
||||
|candidate, score, positions| StringMatch {
|
||||
candidate_id: candidate.id,
|
||||
score,
|
||||
positions: Vec::new(),
|
||||
positions: positions.clone(),
|
||||
string: candidate.string.to_string(),
|
||||
},
|
||||
);
|
||||
|
@ -193,13 +178,7 @@ pub async fn match_strings(
|
|||
})
|
||||
.await;
|
||||
|
||||
let mut results = Vec::new();
|
||||
for segment_result in segment_results {
|
||||
if results.is_empty() {
|
||||
results = segment_result;
|
||||
} else {
|
||||
util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(a));
|
||||
}
|
||||
}
|
||||
let mut results = segment_results.concat();
|
||||
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
|
||||
results
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue