Fuzzy match performance improvements redo (#22561)
Release Notes: - N/A
This commit is contained in:
parent
28d1d2d939
commit
665717da9a
4 changed files with 89 additions and 96 deletions
|
@ -14,7 +14,6 @@ pub struct Matcher<'a> {
|
||||||
lowercase_query: &'a [char],
|
lowercase_query: &'a [char],
|
||||||
query_char_bag: CharBag,
|
query_char_bag: CharBag,
|
||||||
smart_case: bool,
|
smart_case: bool,
|
||||||
max_results: usize,
|
|
||||||
min_score: f64,
|
min_score: f64,
|
||||||
match_positions: Vec<usize>,
|
match_positions: Vec<usize>,
|
||||||
last_positions: Vec<usize>,
|
last_positions: Vec<usize>,
|
||||||
|
@ -22,11 +21,6 @@ pub struct Matcher<'a> {
|
||||||
best_position_matrix: Vec<usize>,
|
best_position_matrix: Vec<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait Match: Ord {
|
|
||||||
fn score(&self) -> f64;
|
|
||||||
fn set_positions(&mut self, positions: Vec<usize>);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait MatchCandidate {
|
pub trait MatchCandidate {
|
||||||
fn has_chars(&self, bag: CharBag) -> bool;
|
fn has_chars(&self, bag: CharBag) -> bool;
|
||||||
fn to_string(&self) -> Cow<'_, str>;
|
fn to_string(&self) -> Cow<'_, str>;
|
||||||
|
@ -38,7 +32,6 @@ impl<'a> Matcher<'a> {
|
||||||
lowercase_query: &'a [char],
|
lowercase_query: &'a [char],
|
||||||
query_char_bag: CharBag,
|
query_char_bag: CharBag,
|
||||||
smart_case: bool,
|
smart_case: bool,
|
||||||
max_results: usize,
|
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
query,
|
query,
|
||||||
|
@ -50,10 +43,11 @@ impl<'a> Matcher<'a> {
|
||||||
score_matrix: Vec::new(),
|
score_matrix: Vec::new(),
|
||||||
best_position_matrix: Vec::new(),
|
best_position_matrix: Vec::new(),
|
||||||
smart_case,
|
smart_case,
|
||||||
max_results,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Filter and score fuzzy match candidates. Results are returned unsorted, in the same order as
|
||||||
|
/// the input candidates.
|
||||||
pub fn match_candidates<C: MatchCandidate, R, F>(
|
pub fn match_candidates<C: MatchCandidate, R, F>(
|
||||||
&mut self,
|
&mut self,
|
||||||
prefix: &[char],
|
prefix: &[char],
|
||||||
|
@ -63,8 +57,7 @@ impl<'a> Matcher<'a> {
|
||||||
cancel_flag: &AtomicBool,
|
cancel_flag: &AtomicBool,
|
||||||
build_match: F,
|
build_match: F,
|
||||||
) where
|
) where
|
||||||
R: Match,
|
F: Fn(&C, f64, &Vec<usize>) -> R,
|
||||||
F: Fn(&C, f64) -> R,
|
|
||||||
{
|
{
|
||||||
let mut candidate_chars = Vec::new();
|
let mut candidate_chars = Vec::new();
|
||||||
let mut lowercase_candidate_chars = Vec::new();
|
let mut lowercase_candidate_chars = Vec::new();
|
||||||
|
@ -103,20 +96,7 @@ impl<'a> Matcher<'a> {
|
||||||
);
|
);
|
||||||
|
|
||||||
if score > 0.0 {
|
if score > 0.0 {
|
||||||
let mut mat = build_match(&candidate, score);
|
results.push(build_match(&candidate, score, &self.match_positions));
|
||||||
if let Err(i) = results.binary_search_by(|m| mat.cmp(m)) {
|
|
||||||
if results.len() < self.max_results {
|
|
||||||
mat.set_positions(self.match_positions.clone());
|
|
||||||
results.insert(i, mat);
|
|
||||||
} else if i < results.len() {
|
|
||||||
results.pop();
|
|
||||||
mat.set_positions(self.match_positions.clone());
|
|
||||||
results.insert(i, mat);
|
|
||||||
}
|
|
||||||
if results.len() == self.max_results {
|
|
||||||
self.min_score = results.last().unwrap().score();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -325,18 +305,18 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_get_last_positions() {
|
fn test_get_last_positions() {
|
||||||
let mut query: &[char] = &['d', 'c'];
|
let mut query: &[char] = &['d', 'c'];
|
||||||
let mut matcher = Matcher::new(query, query, query.into(), false, 10);
|
let mut matcher = Matcher::new(query, query, query.into(), false);
|
||||||
let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
|
let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
|
||||||
assert!(!result);
|
assert!(!result);
|
||||||
|
|
||||||
query = &['c', 'd'];
|
query = &['c', 'd'];
|
||||||
let mut matcher = Matcher::new(query, query, query.into(), false, 10);
|
let mut matcher = Matcher::new(query, query, query.into(), false);
|
||||||
let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
|
let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
|
||||||
assert!(result);
|
assert!(result);
|
||||||
assert_eq!(matcher.last_positions, vec![2, 4]);
|
assert_eq!(matcher.last_positions, vec![2, 4]);
|
||||||
|
|
||||||
query = &['z', '/', 'z', 'f'];
|
query = &['z', '/', 'z', 'f'];
|
||||||
let mut matcher = Matcher::new(query, query, query.into(), false, 10);
|
let mut matcher = Matcher::new(query, query, query.into(), false);
|
||||||
let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']);
|
let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']);
|
||||||
assert!(result);
|
assert!(result);
|
||||||
assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]);
|
assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]);
|
||||||
|
@ -451,7 +431,7 @@ mod tests {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case, 100);
|
let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case);
|
||||||
|
|
||||||
let cancel_flag = AtomicBool::new(false);
|
let cancel_flag = AtomicBool::new(false);
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
|
@ -462,16 +442,17 @@ mod tests {
|
||||||
path_entries.into_iter(),
|
path_entries.into_iter(),
|
||||||
&mut results,
|
&mut results,
|
||||||
&cancel_flag,
|
&cancel_flag,
|
||||||
|candidate, score| PathMatch {
|
|candidate, score, positions| PathMatch {
|
||||||
score,
|
score,
|
||||||
worktree_id: 0,
|
worktree_id: 0,
|
||||||
positions: Vec::new(),
|
positions: positions.clone(),
|
||||||
path: Arc::from(candidate.path),
|
path: Arc::from(candidate.path),
|
||||||
path_prefix: "".into(),
|
path_prefix: "".into(),
|
||||||
distance_to_relative_ancestor: usize::MAX,
|
distance_to_relative_ancestor: usize::MAX,
|
||||||
is_dir: false,
|
is_dir: false,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
results.sort_by(|a, b| b.cmp(a));
|
||||||
|
|
||||||
results
|
results
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
|
|
@ -3,11 +3,14 @@ use std::{
|
||||||
borrow::Cow,
|
borrow::Cow,
|
||||||
cmp::{self, Ordering},
|
cmp::{self, Ordering},
|
||||||
path::Path,
|
path::Path,
|
||||||
sync::{atomic::AtomicBool, Arc},
|
sync::{
|
||||||
|
atomic::{self, AtomicBool},
|
||||||
|
Arc,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
matcher::{Match, MatchCandidate, Matcher},
|
matcher::{MatchCandidate, Matcher},
|
||||||
CharBag,
|
CharBag,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -42,16 +45,6 @@ pub trait PathMatchCandidateSet<'a>: Send + Sync {
|
||||||
fn candidates(&'a self, start: usize) -> Self::Candidates;
|
fn candidates(&'a self, start: usize) -> Self::Candidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Match for PathMatch {
|
|
||||||
fn score(&self) -> f64 {
|
|
||||||
self.score
|
|
||||||
}
|
|
||||||
|
|
||||||
fn set_positions(&mut self, positions: Vec<usize>) {
|
|
||||||
self.positions = positions;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> MatchCandidate for PathMatchCandidate<'a> {
|
impl<'a> MatchCandidate for PathMatchCandidate<'a> {
|
||||||
fn has_chars(&self, bag: CharBag) -> bool {
|
fn has_chars(&self, bag: CharBag) -> bool {
|
||||||
self.char_bag.is_superset(bag)
|
self.char_bag.is_superset(bag)
|
||||||
|
@ -102,13 +95,7 @@ pub fn match_fixed_path_set(
|
||||||
let query = query.chars().collect::<Vec<_>>();
|
let query = query.chars().collect::<Vec<_>>();
|
||||||
let query_char_bag = CharBag::from(&lowercase_query[..]);
|
let query_char_bag = CharBag::from(&lowercase_query[..]);
|
||||||
|
|
||||||
let mut matcher = Matcher::new(
|
let mut matcher = Matcher::new(&query, &lowercase_query, query_char_bag, smart_case);
|
||||||
&query,
|
|
||||||
&lowercase_query,
|
|
||||||
query_char_bag,
|
|
||||||
smart_case,
|
|
||||||
max_results,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
matcher.match_candidates(
|
matcher.match_candidates(
|
||||||
|
@ -117,16 +104,17 @@ pub fn match_fixed_path_set(
|
||||||
candidates.into_iter(),
|
candidates.into_iter(),
|
||||||
&mut results,
|
&mut results,
|
||||||
&AtomicBool::new(false),
|
&AtomicBool::new(false),
|
||||||
|candidate, score| PathMatch {
|
|candidate, score, positions| PathMatch {
|
||||||
score,
|
score,
|
||||||
worktree_id,
|
worktree_id,
|
||||||
positions: Vec::new(),
|
positions: positions.clone(),
|
||||||
is_dir: candidate.is_dir,
|
is_dir: candidate.is_dir,
|
||||||
path: Arc::from(candidate.path),
|
path: Arc::from(candidate.path),
|
||||||
path_prefix: Arc::default(),
|
path_prefix: Arc::default(),
|
||||||
distance_to_relative_ancestor: usize::MAX,
|
distance_to_relative_ancestor: usize::MAX,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
|
||||||
results
|
results
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -164,16 +152,15 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
|
||||||
scope.spawn(async move {
|
scope.spawn(async move {
|
||||||
let segment_start = segment_idx * segment_size;
|
let segment_start = segment_idx * segment_size;
|
||||||
let segment_end = segment_start + segment_size;
|
let segment_end = segment_start + segment_size;
|
||||||
let mut matcher = Matcher::new(
|
let mut matcher =
|
||||||
query,
|
Matcher::new(query, lowercase_query, query_char_bag, smart_case);
|
||||||
lowercase_query,
|
|
||||||
query_char_bag,
|
|
||||||
smart_case,
|
|
||||||
max_results,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut tree_start = 0;
|
let mut tree_start = 0;
|
||||||
for candidate_set in candidate_sets {
|
for candidate_set in candidate_sets {
|
||||||
|
if cancel_flag.load(atomic::Ordering::Relaxed) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
let tree_end = tree_start + candidate_set.len();
|
let tree_end = tree_start + candidate_set.len();
|
||||||
|
|
||||||
if tree_start < segment_end && segment_start < tree_end {
|
if tree_start < segment_end && segment_start < tree_end {
|
||||||
|
@ -193,10 +180,10 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
|
||||||
candidates,
|
candidates,
|
||||||
results,
|
results,
|
||||||
cancel_flag,
|
cancel_flag,
|
||||||
|candidate, score| PathMatch {
|
|candidate, score, positions| PathMatch {
|
||||||
score,
|
score,
|
||||||
worktree_id,
|
worktree_id,
|
||||||
positions: Vec::new(),
|
positions: positions.clone(),
|
||||||
path: Arc::from(candidate.path),
|
path: Arc::from(candidate.path),
|
||||||
is_dir: candidate.is_dir,
|
is_dir: candidate.is_dir,
|
||||||
path_prefix: candidate_set.prefix(),
|
path_prefix: candidate_set.prefix(),
|
||||||
|
@ -222,14 +209,12 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let mut results = Vec::new();
|
if cancel_flag.load(atomic::Ordering::Relaxed) {
|
||||||
for segment_result in segment_results {
|
return Vec::new();
|
||||||
if results.is_empty() {
|
|
||||||
results = segment_result;
|
|
||||||
} else {
|
|
||||||
util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(a));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut results = segment_results.concat();
|
||||||
|
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
|
||||||
results
|
results
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::{
|
use crate::{
|
||||||
matcher::{Match, MatchCandidate, Matcher},
|
matcher::{MatchCandidate, Matcher},
|
||||||
CharBag,
|
CharBag,
|
||||||
};
|
};
|
||||||
use gpui::BackgroundExecutor;
|
use gpui::BackgroundExecutor;
|
||||||
|
@ -8,7 +8,7 @@ use std::{
|
||||||
cmp::{self, Ordering},
|
cmp::{self, Ordering},
|
||||||
iter,
|
iter,
|
||||||
ops::Range,
|
ops::Range,
|
||||||
sync::atomic::AtomicBool,
|
sync::atomic::{self, AtomicBool},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
|
@ -46,16 +46,6 @@ pub struct StringMatch {
|
||||||
pub string: String,
|
pub string: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Match for StringMatch {
|
|
||||||
fn score(&self) -> f64 {
|
|
||||||
self.score
|
|
||||||
}
|
|
||||||
|
|
||||||
fn set_positions(&mut self, positions: Vec<usize>) {
|
|
||||||
self.positions = positions;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StringMatch {
|
impl StringMatch {
|
||||||
pub fn ranges(&self) -> impl '_ + Iterator<Item = Range<usize>> {
|
pub fn ranges(&self) -> impl '_ + Iterator<Item = Range<usize>> {
|
||||||
let mut positions = self.positions.iter().peekable();
|
let mut positions = self.positions.iter().peekable();
|
||||||
|
@ -167,13 +157,8 @@ pub async fn match_strings(
|
||||||
scope.spawn(async move {
|
scope.spawn(async move {
|
||||||
let segment_start = cmp::min(segment_idx * segment_size, candidates.len());
|
let segment_start = cmp::min(segment_idx * segment_size, candidates.len());
|
||||||
let segment_end = cmp::min(segment_start + segment_size, candidates.len());
|
let segment_end = cmp::min(segment_start + segment_size, candidates.len());
|
||||||
let mut matcher = Matcher::new(
|
let mut matcher =
|
||||||
query,
|
Matcher::new(query, lowercase_query, query_char_bag, smart_case);
|
||||||
lowercase_query,
|
|
||||||
query_char_bag,
|
|
||||||
smart_case,
|
|
||||||
max_results,
|
|
||||||
);
|
|
||||||
|
|
||||||
matcher.match_candidates(
|
matcher.match_candidates(
|
||||||
&[],
|
&[],
|
||||||
|
@ -181,10 +166,10 @@ pub async fn match_strings(
|
||||||
candidates[segment_start..segment_end].iter(),
|
candidates[segment_start..segment_end].iter(),
|
||||||
results,
|
results,
|
||||||
cancel_flag,
|
cancel_flag,
|
||||||
|candidate, score| StringMatch {
|
|candidate, score, positions| StringMatch {
|
||||||
candidate_id: candidate.id,
|
candidate_id: candidate.id,
|
||||||
score,
|
score,
|
||||||
positions: Vec::new(),
|
positions: positions.clone(),
|
||||||
string: candidate.string.to_string(),
|
string: candidate.string.to_string(),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
@ -193,13 +178,11 @@ pub async fn match_strings(
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let mut results = Vec::new();
|
if cancel_flag.load(atomic::Ordering::Relaxed) {
|
||||||
for segment_result in segment_results {
|
return Vec::new();
|
||||||
if results.is_empty() {
|
|
||||||
results = segment_result;
|
|
||||||
} else {
|
|
||||||
util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(a));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut results = segment_results.concat();
|
||||||
|
util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
|
||||||
results
|
results
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,6 @@ pub mod test;
|
||||||
|
|
||||||
use anyhow::{anyhow, Context as _, Result};
|
use anyhow::{anyhow, Context as _, Result};
|
||||||
use futures::Future;
|
use futures::Future;
|
||||||
|
|
||||||
use itertools::Either;
|
use itertools::Either;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::sync::{LazyLock, OnceLock};
|
use std::sync::{LazyLock, OnceLock};
|
||||||
|
@ -111,6 +110,28 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn truncate_to_bottom_n_sorted_by<T, F>(items: &mut Vec<T>, limit: usize, compare: &F)
|
||||||
|
where
|
||||||
|
F: Fn(&T, &T) -> Ordering,
|
||||||
|
{
|
||||||
|
if limit == 0 {
|
||||||
|
items.truncate(0);
|
||||||
|
}
|
||||||
|
if items.len() <= limit {
|
||||||
|
items.sort_by(compare);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// When limit is near to items.len() it may be more efficient to sort the whole list and
|
||||||
|
// truncate, rather than always doing selection first as is done below. It's hard to analyze
|
||||||
|
// where the threshold for this should be since the quickselect style algorithm used by
|
||||||
|
// `select_nth_unstable_by` makes the prefix partially sorted, and so its work is not wasted -
|
||||||
|
// the expected number of comparisons needed by `sort_by` is less than it is for some arbitrary
|
||||||
|
// unsorted input.
|
||||||
|
items.select_nth_unstable_by(limit, compare);
|
||||||
|
items.truncate(limit);
|
||||||
|
items.sort_by(compare);
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
pub fn load_shell_from_passwd() -> Result<()> {
|
pub fn load_shell_from_passwd() -> Result<()> {
|
||||||
let buflen = match unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) } {
|
let buflen = match unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) } {
|
||||||
|
@ -734,6 +755,29 @@ mod tests {
|
||||||
assert_eq!(vec, &[1000, 101, 21, 19, 17, 13, 9, 8]);
|
assert_eq!(vec, &[1000, 101, 21, 19, 17, 13, 9, 8]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_truncate_to_bottom_n_sorted_by() {
|
||||||
|
let mut vec: Vec<u32> = vec![5, 2, 3, 4, 1];
|
||||||
|
truncate_to_bottom_n_sorted_by(&mut vec, 10, &u32::cmp);
|
||||||
|
assert_eq!(vec, &[1, 2, 3, 4, 5]);
|
||||||
|
|
||||||
|
vec = vec![5, 2, 3, 4, 1];
|
||||||
|
truncate_to_bottom_n_sorted_by(&mut vec, 5, &u32::cmp);
|
||||||
|
assert_eq!(vec, &[1, 2, 3, 4, 5]);
|
||||||
|
|
||||||
|
vec = vec![5, 2, 3, 4, 1];
|
||||||
|
truncate_to_bottom_n_sorted_by(&mut vec, 4, &u32::cmp);
|
||||||
|
assert_eq!(vec, &[1, 2, 3, 4]);
|
||||||
|
|
||||||
|
vec = vec![5, 2, 3, 4, 1];
|
||||||
|
truncate_to_bottom_n_sorted_by(&mut vec, 1, &u32::cmp);
|
||||||
|
assert_eq!(vec, &[1]);
|
||||||
|
|
||||||
|
vec = vec![5, 2, 3, 4, 1];
|
||||||
|
truncate_to_bottom_n_sorted_by(&mut vec, 0, &u32::cmp);
|
||||||
|
assert!(vec.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_iife() {
|
fn test_iife() {
|
||||||
fn option_returning_function() -> Option<()> {
|
fn option_returning_function() -> Option<()> {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue