Fix fuzzy matching after removing root dirname from stored paths

Co-Authored-By: Nathan Sobo <nathan@zed.dev>
This commit is contained in:
Max Brunsfeld 2021-04-23 15:22:47 -07:00
parent dced9469f5
commit 054203d21c
4 changed files with 139 additions and 68 deletions

View file

@ -24,6 +24,7 @@ pub struct FileFinder {
search_count: usize, search_count: usize,
latest_search_id: usize, latest_search_id: usize,
matches: Vec<PathMatch>, matches: Vec<PathMatch>,
include_root_name: bool,
selected: usize, selected: usize,
list_state: UniformListState, list_state: UniformListState,
} }
@ -138,7 +139,12 @@ impl FileFinder {
) -> Option<ElementBox> { ) -> Option<ElementBox> {
let tree_id = path_match.tree_id; let tree_id = path_match.tree_id;
self.worktree(tree_id, app).map(|_| { self.worktree(tree_id, app).map(|tree| {
let prefix = if self.include_root_name {
tree.root_name_chars()
} else {
&[]
};
let path = path_match.path.clone(); let path = path_match.path.clone();
let path_string = &path_match.path_string; let path_string = &path_match.path_string;
let file_name = Path::new(&path_string) let file_name = Path::new(&path_string)
@ -148,7 +154,8 @@ impl FileFinder {
.to_string(); .to_string();
let path_positions = path_match.positions.clone(); let path_positions = path_match.positions.clone();
let file_name_start = path_string.chars().count() - file_name.chars().count(); let file_name_start =
prefix.len() + path_string.chars().count() - file_name.chars().count();
let mut file_name_positions = Vec::new(); let mut file_name_positions = Vec::new();
file_name_positions.extend(path_positions.iter().filter_map(|pos| { file_name_positions.extend(path_positions.iter().filter_map(|pos| {
if pos >= &file_name_start { if pos >= &file_name_start {
@ -162,6 +169,9 @@ impl FileFinder {
let highlight_color = ColorU::from_u32(0x304ee2ff); let highlight_color = ColorU::from_u32(0x304ee2ff);
let bold = *Properties::new().weight(Weight::BOLD); let bold = *Properties::new().weight(Weight::BOLD);
let mut full_path = prefix.iter().collect::<String>();
full_path.push_str(&path_string);
let mut container = Container::new( let mut container = Container::new(
Flex::row() Flex::row()
.with_child( .with_child(
@ -191,7 +201,7 @@ impl FileFinder {
) )
.with_child( .with_child(
Label::new( Label::new(
path_string.into(), full_path,
settings.ui_font_family, settings.ui_font_family,
settings.ui_font_size, settings.ui_font_size,
) )
@ -275,6 +285,7 @@ impl FileFinder {
search_count: 0, search_count: 0,
latest_search_id: 0, latest_search_id: 0,
matches: Vec::new(), matches: Vec::new(),
include_root_name: false,
selected: 0, selected: 0,
list_state: UniformListState::new(), list_state: UniformListState::new(),
} }
@ -348,16 +359,17 @@ impl FileFinder {
let search_id = util::post_inc(&mut self.search_count); let search_id = util::post_inc(&mut self.search_count);
let pool = ctx.as_ref().thread_pool().clone(); let pool = ctx.as_ref().thread_pool().clone();
let task = ctx.background_executor().spawn(async move { let task = ctx.background_executor().spawn(async move {
let include_root_name = snapshots.len() > 1;
let matches = match_paths( let matches = match_paths(
snapshots.iter(), snapshots.iter(),
&query, &query,
snapshots.len() > 1, include_root_name,
false, false,
false, false,
100, 100,
pool, pool,
); );
(search_id, matches) (search_id, include_root_name, matches)
}); });
ctx.spawn(task, Self::update_matches).detach(); ctx.spawn(task, Self::update_matches).detach();
@ -365,12 +377,13 @@ impl FileFinder {
fn update_matches( fn update_matches(
&mut self, &mut self,
(search_id, matches): (usize, Vec<PathMatch>), (search_id, include_root_name, matches): (usize, bool, Vec<PathMatch>),
ctx: &mut ViewContext<Self>, ctx: &mut ViewContext<Self>,
) { ) {
if search_id >= self.latest_search_id { if search_id >= self.latest_search_id {
self.latest_search_id = search_id; self.latest_search_id = search_id;
self.matches = matches; self.matches = matches;
self.include_root_name = include_root_name;
self.selected = 0; self.selected = 0;
self.list_state.scroll_to(0); self.list_state.scroll_to(0);
ctx.notify(); ctx.notify();

View file

@ -32,6 +32,8 @@ use std::{
time::Duration, time::Duration,
}; };
use self::char_bag::CharBag;
lazy_static! { lazy_static! {
static ref GITIGNORE: &'static OsStr = OsStr::new(".gitignore"); static ref GITIGNORE: &'static OsStr = OsStr::new(".gitignore");
} }
@ -59,12 +61,17 @@ pub struct FileHandle {
impl Worktree { impl Worktree {
pub fn new(path: impl Into<Arc<Path>>, ctx: &mut ModelContext<Self>) -> Self { pub fn new(path: impl Into<Arc<Path>>, ctx: &mut ModelContext<Self>) -> Self {
let abs_path = path.into();
let root_name_chars = abs_path.file_name().map_or(Vec::new(), |n| {
n.to_string_lossy().chars().chain(Some('/')).collect()
});
let (scan_state_tx, scan_state_rx) = smol::channel::unbounded(); let (scan_state_tx, scan_state_rx) = smol::channel::unbounded();
let id = ctx.model_id(); let id = ctx.model_id();
let snapshot = Snapshot { let snapshot = Snapshot {
id, id,
scan_id: 0, scan_id: 0,
abs_path: path.into(), abs_path,
root_name_chars,
ignores: Default::default(), ignores: Default::default(),
entries: Default::default(), entries: Default::default(),
}; };
@ -209,6 +216,7 @@ pub struct Snapshot {
id: usize, id: usize,
scan_id: usize, scan_id: usize,
abs_path: Arc<Path>, abs_path: Arc<Path>,
root_name_chars: Vec<char>,
ignores: BTreeMap<Arc<Path>, (Arc<Gitignore>, usize)>, ignores: BTreeMap<Arc<Path>, (Arc<Gitignore>, usize)>,
entries: SumTree<Entry>, entries: SumTree<Entry>,
} }
@ -238,13 +246,17 @@ impl Snapshot {
} }
pub fn root_entry(&self) -> Entry { pub fn root_entry(&self) -> Entry {
self.entry_for_path(&self.abs_path).unwrap() self.entry_for_path("").unwrap()
} }
pub fn root_name(&self) -> Option<&OsStr> { pub fn root_name(&self) -> Option<&OsStr> {
self.abs_path.file_name() self.abs_path.file_name()
} }
pub fn root_name_chars(&self) -> &[char] {
&self.root_name_chars
}
fn entry_for_path(&self, path: impl AsRef<Path>) -> Option<Entry> { fn entry_for_path(&self, path: impl AsRef<Path>) -> Option<Entry> {
let mut cursor = self.entries.cursor::<_, ()>(); let mut cursor = self.entries.cursor::<_, ()>();
if cursor.seek(&PathSearch::Exact(path.as_ref()), SeekBias::Left) { if cursor.seek(&PathSearch::Exact(path.as_ref()), SeekBias::Left) {
@ -259,8 +271,6 @@ impl Snapshot {
} }
fn is_path_ignored(&self, path: &Path) -> Result<bool> { fn is_path_ignored(&self, path: &Path) -> Result<bool> {
dbg!(path);
let mut entry = self let mut entry = self
.entry_for_path(path) .entry_for_path(path)
.ok_or_else(|| anyhow!("entry does not exist in worktree"))?; .ok_or_else(|| anyhow!("entry does not exist in worktree"))?;
@ -272,7 +282,6 @@ impl Snapshot {
entry.path().parent().and_then(|p| self.entry_for_path(p)) entry.path().parent().and_then(|p| self.entry_for_path(p))
{ {
let parent_path = parent_entry.path(); let parent_path = parent_entry.path();
dbg!(parent_path);
if let Some((ignore, _)) = self.ignores.get(parent_path) { if let Some((ignore, _)) = self.ignores.get(parent_path) {
let relative_path = path.strip_prefix(parent_path).unwrap(); let relative_path = path.strip_prefix(parent_path).unwrap();
match ignore.matched_path_or_any_parents(relative_path, entry.is_dir()) { match ignore.matched_path_or_any_parents(relative_path, entry.is_dir()) {
@ -567,11 +576,14 @@ struct BackgroundScanner {
notify: Sender<ScanState>, notify: Sender<ScanState>,
other_mount_paths: HashSet<PathBuf>, other_mount_paths: HashSet<PathBuf>,
thread_pool: scoped_pool::Pool, thread_pool: scoped_pool::Pool,
root_char_bag: CharBag,
} }
impl BackgroundScanner { impl BackgroundScanner {
fn new(snapshot: Arc<Mutex<Snapshot>>, notify: Sender<ScanState>, worktree_id: usize) -> Self { fn new(snapshot: Arc<Mutex<Snapshot>>, notify: Sender<ScanState>, worktree_id: usize) -> Self {
let root_char_bag = CharBag::from(snapshot.lock().root_name_chars.as_slice());
let mut scanner = Self { let mut scanner = Self {
root_char_bag,
snapshot, snapshot,
notify, notify,
other_mount_paths: Default::default(), other_mount_paths: Default::default(),
@ -673,7 +685,7 @@ impl BackgroundScanner {
}); });
} else { } else {
self.snapshot.lock().insert_entry(Entry::File { self.snapshot.lock().insert_entry(Entry::File {
path_entry: PathEntry::new(inode, path.clone()), path_entry: PathEntry::new(inode, self.root_char_bag, path.clone()),
path, path,
inode, inode,
is_symlink, is_symlink,
@ -719,7 +731,7 @@ impl BackgroundScanner {
}); });
} else { } else {
new_entries.push(Entry::File { new_entries.push(Entry::File {
path_entry: PathEntry::new(child_inode, child_path.clone()), path_entry: PathEntry::new(child_inode, self.root_char_bag, child_path.clone()),
path: child_path, path: child_path,
inode: child_inode, inode: child_inode,
is_symlink: child_is_symlink, is_symlink: child_is_symlink,
@ -958,7 +970,7 @@ impl BackgroundScanner {
} }
} else { } else {
Entry::File { Entry::File {
path_entry: PathEntry::new(inode, path.clone()), path_entry: PathEntry::new(inode, self.root_char_bag, path.clone()),
path, path,
inode, inode,
is_symlink, is_symlink,
@ -1113,14 +1125,14 @@ mod tests {
10, 10,
ctx.thread_pool().clone(), ctx.thread_pool().clone(),
) )
.iter() .into_iter()
.map(|result| result.path.clone()) .map(|result| result.path)
.collect::<Vec<Arc<Path>>>(); .collect::<Vec<Arc<Path>>>();
assert_eq!( assert_eq!(
results, results,
vec![ vec![
PathBuf::from("root_link/banana/carrot/date").into(), PathBuf::from("banana/carrot/date").into(),
PathBuf::from("root_link/banana/carrot/endive").into(), PathBuf::from("banana/carrot/endive").into(),
] ]
); );
}) })
@ -1288,6 +1300,7 @@ mod tests {
abs_path: root_dir.path().into(), abs_path: root_dir.path().into(),
entries: Default::default(), entries: Default::default(),
ignores: Default::default(), ignores: Default::default(),
root_name_chars: Default::default(),
})), })),
notify_tx, notify_tx,
0, 0,
@ -1321,6 +1334,7 @@ mod tests {
abs_path: root_dir.path().into(), abs_path: root_dir.path().into(),
entries: Default::default(), entries: Default::default(),
ignores: Default::default(), ignores: Default::default(),
root_name_chars: Default::default(),
})), })),
notify_tx, notify_tx,
1, 1,

View file

@ -1,4 +1,4 @@
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug, Default)]
pub struct CharBag(u64); pub struct CharBag(u64);
impl CharBag { impl CharBag {
@ -23,6 +23,14 @@ impl CharBag {
} }
} }
impl Extend<char> for CharBag {
fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
for c in iter {
self.insert(c);
}
}
}
impl From<&str> for CharBag { impl From<&str> for CharBag {
fn from(s: &str) -> Self { fn from(s: &str) -> Self {
let mut bag = Self(0); let mut bag = Self(0);

View file

@ -21,11 +21,12 @@ pub struct PathEntry {
} }
impl PathEntry { impl PathEntry {
pub fn new(ino: u64, path: Arc<Path>) -> Self { pub fn new(ino: u64, root_char_bag: CharBag, path: Arc<Path>) -> Self {
let path_str = path.to_string_lossy(); let path_str = path.to_string_lossy();
let lowercase_path = path_str.to_lowercase().chars().collect::<Vec<_>>().into(); let lowercase_path = path_str.to_lowercase().chars().collect::<Vec<_>>().into();
let path_chars: Arc<[char]> = path_str.chars().collect::<Vec<_>>().into(); let path_chars: Arc<[char]> = path_str.chars().collect::<Vec<_>>().into();
let char_bag = CharBag::from(path_chars.as_ref()); let mut char_bag = root_char_bag;
char_bag.extend(path_chars.iter().copied());
Self { Self {
ino, ino,
@ -136,21 +137,9 @@ where
} }
}); });
let skipped_prefix_len = if include_root_name {
0
} else if let Entry::Dir { .. } = snapshot.root_entry() {
if let Some(name) = snapshot.root_name() {
name.to_string_lossy().chars().count() + 1
} else {
1
}
} else {
0
};
match_single_tree_paths( match_single_tree_paths(
snapshot, snapshot,
skipped_prefix_len, include_root_name,
path_entries, path_entries,
query, query,
lowercase_query, lowercase_query,
@ -186,7 +175,7 @@ where
fn match_single_tree_paths<'a>( fn match_single_tree_paths<'a>(
snapshot: &Snapshot, snapshot: &Snapshot,
skipped_prefix_len: usize, include_root_name: bool,
path_entries: impl Iterator<Item = &'a PathEntry>, path_entries: impl Iterator<Item = &'a PathEntry>,
query: &[char], query: &[char],
lowercase_query: &[char], lowercase_query: &[char],
@ -200,6 +189,12 @@ fn match_single_tree_paths<'a>(
score_matrix: &mut Vec<Option<f64>>, score_matrix: &mut Vec<Option<f64>>,
best_position_matrix: &mut Vec<usize>, best_position_matrix: &mut Vec<usize>,
) { ) {
let prefix = if include_root_name {
snapshot.root_name_chars.as_slice()
} else {
&[]
};
for path_entry in path_entries { for path_entry in path_entries {
if !path_entry.char_bag.is_superset(query_chars) { if !path_entry.char_bag.is_superset(query_chars) {
continue; continue;
@ -207,25 +202,25 @@ fn match_single_tree_paths<'a>(
if !find_last_positions( if !find_last_positions(
last_positions, last_positions,
skipped_prefix_len, prefix,
&path_entry.lowercase_path, &path_entry.lowercase_path,
&lowercase_query[..], &lowercase_query[..],
) { ) {
continue; continue;
} }
let matrix_len = query.len() * (path_entry.path_chars.len() - skipped_prefix_len); let matrix_len = query.len() * (path_entry.path_chars.len() + prefix.len());
score_matrix.clear(); score_matrix.clear();
score_matrix.resize(matrix_len, None); score_matrix.resize(matrix_len, None);
best_position_matrix.clear(); best_position_matrix.clear();
best_position_matrix.resize(matrix_len, skipped_prefix_len); best_position_matrix.resize(matrix_len, 0);
let score = score_match( let score = score_match(
&query[..], &query[..],
&lowercase_query[..], &lowercase_query[..],
&path_entry.path_chars, &path_entry.path_chars,
&path_entry.lowercase_path, &path_entry.lowercase_path,
skipped_prefix_len, prefix,
smart_case, smart_case,
&last_positions, &last_positions,
score_matrix, score_matrix,
@ -237,11 +232,7 @@ fn match_single_tree_paths<'a>(
if score > 0.0 { if score > 0.0 {
results.push(Reverse(PathMatch { results.push(Reverse(PathMatch {
tree_id: snapshot.id, tree_id: snapshot.id,
path_string: path_entry path_string: path_entry.path_chars.iter().collect(),
.path_chars
.iter()
.skip(skipped_prefix_len)
.collect(),
path: path_entry.path.clone(), path: path_entry.path.clone(),
score, score,
positions: match_positions.clone(), positions: match_positions.clone(),
@ -255,21 +246,20 @@ fn match_single_tree_paths<'a>(
fn find_last_positions( fn find_last_positions(
last_positions: &mut Vec<usize>, last_positions: &mut Vec<usize>,
skipped_prefix_len: usize, prefix: &[char],
path: &[char], path: &[char],
query: &[char], query: &[char],
) -> bool { ) -> bool {
let mut path = path.iter(); let mut path = path.iter();
let mut prefix_iter = prefix.iter();
for (i, char) in query.iter().enumerate().rev() { for (i, char) in query.iter().enumerate().rev() {
if let Some(j) = path.rposition(|c| c == char) { if let Some(j) = path.rposition(|c| c == char) {
if j >= skipped_prefix_len { last_positions[i] = j + prefix.len();
} else if let Some(j) = prefix_iter.rposition(|c| c == char) {
last_positions[i] = j; last_positions[i] = j;
} else { } else {
return false; return false;
} }
} else {
return false;
}
} }
true true
} }
@ -279,7 +269,7 @@ fn score_match(
query_cased: &[char], query_cased: &[char],
path: &[char], path: &[char],
path_cased: &[char], path_cased: &[char],
skipped_prefix_len: usize, prefix: &[char],
smart_case: bool, smart_case: bool,
last_positions: &[usize], last_positions: &[usize],
score_matrix: &mut [Option<f64>], score_matrix: &mut [Option<f64>],
@ -292,14 +282,14 @@ fn score_match(
query_cased, query_cased,
path, path,
path_cased, path_cased,
skipped_prefix_len, prefix,
smart_case, smart_case,
last_positions, last_positions,
score_matrix, score_matrix,
best_position_matrix, best_position_matrix,
min_score, min_score,
0, 0,
skipped_prefix_len, 0,
query.len() as f64, query.len() as f64,
) * query.len() as f64; ) * query.len() as f64;
@ -307,10 +297,10 @@ fn score_match(
return 0.0; return 0.0;
} }
let path_len = path.len() - skipped_prefix_len; let path_len = path.len() + prefix.len();
let mut cur_start = 0; let mut cur_start = 0;
for i in 0..query.len() { for i in 0..query.len() {
match_positions[i] = best_position_matrix[i * path_len + cur_start] - skipped_prefix_len; match_positions[i] = best_position_matrix[i * path_len + cur_start];
cur_start = match_positions[i] + 1; cur_start = match_positions[i] + 1;
} }
@ -322,7 +312,7 @@ fn recursive_score_match(
query_cased: &[char], query_cased: &[char],
path: &[char], path: &[char],
path_cased: &[char], path_cased: &[char],
skipped_prefix_len: usize, prefix: &[char],
smart_case: bool, smart_case: bool,
last_positions: &[usize], last_positions: &[usize],
score_matrix: &mut [Option<f64>], score_matrix: &mut [Option<f64>],
@ -336,9 +326,9 @@ fn recursive_score_match(
return 1.0; return 1.0;
} }
let path_len = path.len() - skipped_prefix_len; let path_len = prefix.len() + path.len();
if let Some(memoized) = score_matrix[query_idx * path_len + path_idx - skipped_prefix_len] { if let Some(memoized) = score_matrix[query_idx * path_len + path_idx] {
return memoized; return memoized;
} }
@ -350,7 +340,11 @@ fn recursive_score_match(
let mut last_slash = 0; let mut last_slash = 0;
for j in path_idx..=limit { for j in path_idx..=limit {
let path_char = path_cased[j]; let path_char = if j < prefix.len() {
prefix[j]
} else {
path_cased[j - prefix.len()]
};
let is_path_sep = path_char == '/' || path_char == '\\'; let is_path_sep = path_char == '/' || path_char == '\\';
if query_idx == 0 && is_path_sep { if query_idx == 0 && is_path_sep {
@ -358,10 +352,19 @@ fn recursive_score_match(
} }
if query_char == path_char || (is_path_sep && query_char == '_' || query_char == '\\') { if query_char == path_char || (is_path_sep && query_char == '_' || query_char == '\\') {
let curr = if j < prefix.len() {
prefix[j]
} else {
path[j - prefix.len()]
};
let mut char_score = 1.0; let mut char_score = 1.0;
if j > path_idx { if j > path_idx {
let last = path[j - 1]; let last = if j - 1 < prefix.len() {
let curr = path[j]; prefix[j - 1]
} else {
path[j - 1 - prefix.len()]
};
if last == '/' { if last == '/' {
char_score = 0.9; char_score = 0.9;
@ -384,15 +387,15 @@ fn recursive_score_match(
// Apply a severe penalty if the case doesn't match. // Apply a severe penalty if the case doesn't match.
// This will make the exact matches have higher score than the case-insensitive and the // This will make the exact matches have higher score than the case-insensitive and the
// path insensitive matches. // path insensitive matches.
if (smart_case || path[j] == '/') && query[query_idx] != path[j] { if (smart_case || curr == '/') && query[query_idx] != curr {
char_score *= 0.001; char_score *= 0.001;
} }
let mut multiplier = char_score; let mut multiplier = char_score;
// Scale the score based on how deep within the patch we found the match. // Scale the score based on how deep within the path we found the match.
if query_idx == 0 { if query_idx == 0 {
multiplier /= (path.len() - last_slash) as f64; multiplier /= ((prefix.len() + path.len()) - last_slash) as f64;
} }
let mut next_score = 1.0; let mut next_score = 1.0;
@ -413,7 +416,7 @@ fn recursive_score_match(
query_cased, query_cased,
path, path,
path_cased, path_cased,
skipped_prefix_len, prefix,
smart_case, smart_case,
last_positions, last_positions,
score_matrix, score_matrix,
@ -436,10 +439,10 @@ fn recursive_score_match(
} }
if best_position != 0 { if best_position != 0 {
best_position_matrix[query_idx * path_len + path_idx - skipped_prefix_len] = best_position; best_position_matrix[query_idx * path_len + path_idx] = best_position;
} }
score_matrix[query_idx * path_len + path_idx - skipped_prefix_len] = Some(score); score_matrix[query_idx * path_len + path_idx] = Some(score);
score score
} }
@ -448,6 +451,38 @@ mod tests {
use super::*; use super::*;
use std::path::PathBuf; use std::path::PathBuf;
#[test]
fn test_get_last_positions() {
let mut last_positions = vec![0; 2];
let result = find_last_positions(
&mut last_positions,
&['a', 'b', 'c'],
&['b', 'd', 'e', 'f'],
&['d', 'c'],
);
assert_eq!(result, false);
last_positions.resize(2, 0);
let result = find_last_positions(
&mut last_positions,
&['a', 'b', 'c'],
&['b', 'd', 'e', 'f'],
&['c', 'd'],
);
assert_eq!(result, true);
assert_eq!(last_positions, vec![2, 4]);
last_positions.resize(4, 0);
let result = find_last_positions(
&mut last_positions,
&['z', 'e', 'd', '/'],
&['z', 'e', 'd', '/', 'f'],
&['z', '/', 'z', 'f'],
);
assert_eq!(result, true);
assert_eq!(last_positions, vec![0, 3, 4, 8]);
}
#[test] #[test]
fn test_match_path_entries() { fn test_match_path_entries() {
let paths = vec![ let paths = vec![
@ -526,8 +561,9 @@ mod tests {
abs_path: PathBuf::new().into(), abs_path: PathBuf::new().into(),
ignores: Default::default(), ignores: Default::default(),
entries: Default::default(), entries: Default::default(),
root_name_chars: Vec::new(),
}, },
0, false,
path_entries.iter(), path_entries.iter(),
&query[..], &query[..],
&lowercase_query[..], &lowercase_query[..],