util: Use GlobSet in PathMatcher (#13197)

Previously we were using a single globset::Glob in PathMatcher; higher
up the stack, we were then resorting to using a list of PathMatchers.
globset crate exposes a GlobSet type that's better suited for this use
case. In my benchmarks, using a single PathMatcher with GlobSet instead
of a Vec of PathMatchers with Globs is about 3 times faster with the
default 'file_scan_exclusions' values. This slightly improves our
project load time for projects with large # of files, as showcased in
the following videos of loading a project with 100k source files. This
project is *not* a git repository, so it should measure raw overhead on
our side.

Current nightly: 51404d4ea0


https://github.com/zed-industries/zed/assets/24362066/e0aa9f8c-aae6-4348-8d42-d20bd41fcd76

versus this PR:


https://github.com/zed-industries/zed/assets/24362066/408dcab1-cee2-4c9e-a541-a31d14772dd7



Release Notes:

- Improved performance in large worktrees
This commit is contained in:
Piotr Osiewicz 2024-06-18 16:12:24 +02:00 committed by GitHub
parent 64d815a176
commit 5dc26c261d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 193 additions and 187 deletions

View file

@ -3929,7 +3929,15 @@ async fn test_search(cx: &mut gpui::TestAppContext) {
assert_eq!(
search(
&project,
SearchQuery::text("TWO", false, true, false, Vec::new(), Vec::new()).unwrap(),
SearchQuery::text(
"TWO",
false,
true,
false,
Default::default(),
Default::default()
)
.unwrap(),
cx
)
.await
@ -3954,7 +3962,15 @@ async fn test_search(cx: &mut gpui::TestAppContext) {
assert_eq!(
search(
&project,
SearchQuery::text("TWO", false, true, false, Vec::new(), Vec::new()).unwrap(),
SearchQuery::text(
"TWO",
false,
true,
false,
Default::default(),
Default::default()
)
.unwrap(),
cx
)
.await
@ -3994,8 +4010,8 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) {
false,
true,
false,
vec![PathMatcher::new("*.odd").unwrap()],
Vec::new()
PathMatcher::new(&["*.odd".to_owned()]).unwrap(),
Default::default()
)
.unwrap(),
cx
@ -4014,8 +4030,8 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) {
false,
true,
false,
vec![PathMatcher::new("*.rs").unwrap()],
Vec::new()
PathMatcher::new(&["*.rs".to_owned()]).unwrap(),
Default::default()
)
.unwrap(),
cx
@ -4037,11 +4053,10 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) {
false,
true,
false,
vec![
PathMatcher::new("*.ts").unwrap(),
PathMatcher::new("*.odd").unwrap(),
],
Vec::new()
PathMatcher::new(&["*.ts".to_owned(), "*.odd".to_owned()]).unwrap(),
Default::default(),
).unwrap(),
cx
)
@ -4062,12 +4077,10 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) {
false,
true,
false,
vec![
PathMatcher::new("*.rs").unwrap(),
PathMatcher::new("*.ts").unwrap(),
PathMatcher::new("*.odd").unwrap(),
],
Vec::new()
PathMatcher::new(&["*.rs".to_owned(), "*.ts".to_owned(), "*.odd".to_owned()]).unwrap(),
Default::default(),
).unwrap(),
cx
)
@ -4110,8 +4123,8 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) {
false,
true,
false,
Vec::new(),
vec![PathMatcher::new("*.odd").unwrap()],
Default::default(),
PathMatcher::new(&["*.odd".to_owned()]).unwrap(),
)
.unwrap(),
cx
@ -4135,8 +4148,8 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) {
false,
true,
false,
Vec::new(),
vec![PathMatcher::new("*.rs").unwrap()],
Default::default(),
PathMatcher::new(&["*.rs".to_owned()]).unwrap()
)
.unwrap(),
cx
@ -4158,11 +4171,10 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) {
false,
true,
false,
Vec::new(),
vec![
PathMatcher::new("*.ts").unwrap(),
PathMatcher::new("*.odd").unwrap(),
],
Default::default(),
PathMatcher::new(&["*.ts".to_owned(), "*.odd".to_owned()]).unwrap(),
).unwrap(),
cx
)
@ -4183,12 +4195,10 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) {
false,
true,
false,
Vec::new(),
vec![
PathMatcher::new("*.rs").unwrap(),
PathMatcher::new("*.ts").unwrap(),
PathMatcher::new("*.odd").unwrap(),
],
Default::default(),
PathMatcher::new(&["*.rs".to_owned(), "*.ts".to_owned(), "*.odd".to_owned()]).unwrap(),
).unwrap(),
cx
)
@ -4225,8 +4235,8 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex
false,
true,
false,
vec![PathMatcher::new("*.odd").unwrap()],
vec![PathMatcher::new("*.odd").unwrap()],
PathMatcher::new(&["*.odd".to_owned()]).unwrap(),
PathMatcher::new(&["*.odd".to_owned()]).unwrap(),
)
.unwrap(),
cx
@ -4245,8 +4255,8 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex
false,
true,
false,
vec![PathMatcher::new("*.ts").unwrap()],
vec![PathMatcher::new("*.ts").unwrap()],
PathMatcher::new(&["*.ts".to_owned()]).unwrap(),
PathMatcher::new(&["*.ts".to_owned()]).unwrap(),
).unwrap(),
cx
)
@ -4264,14 +4274,8 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex
false,
true,
false,
vec![
PathMatcher::new("*.ts").unwrap(),
PathMatcher::new("*.odd").unwrap()
],
vec![
PathMatcher::new("*.ts").unwrap(),
PathMatcher::new("*.odd").unwrap()
],
PathMatcher::new(&["*.ts".to_owned(), "*.odd".to_owned()]).unwrap(),
PathMatcher::new(&["*.ts".to_owned(), "*.odd".to_owned()]).unwrap(),
)
.unwrap(),
cx
@ -4290,14 +4294,8 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex
false,
true,
false,
vec![
PathMatcher::new("*.ts").unwrap(),
PathMatcher::new("*.odd").unwrap()
],
vec![
PathMatcher::new("*.rs").unwrap(),
PathMatcher::new("*.odd").unwrap()
],
PathMatcher::new(&["*.ts".to_owned(), "*.odd".to_owned()]).unwrap(),
PathMatcher::new(&["*.rs".to_owned(), "*.odd".to_owned()]).unwrap(),
)
.unwrap(),
cx
@ -4349,8 +4347,8 @@ async fn test_search_multiple_worktrees_with_inclusions(cx: &mut gpui::TestAppCo
false,
true,
false,
vec![PathMatcher::new("worktree-a/*.rs").unwrap()],
Vec::new()
PathMatcher::new(&["worktree-a/*.rs".to_owned()]).unwrap(),
Default::default()
)
.unwrap(),
cx
@ -4368,8 +4366,8 @@ async fn test_search_multiple_worktrees_with_inclusions(cx: &mut gpui::TestAppCo
false,
true,
false,
vec![PathMatcher::new("worktree-b/*.rs").unwrap()],
Vec::new()
PathMatcher::new(&["worktree-b/*.rs".to_owned()]).unwrap(),
Default::default()
)
.unwrap(),
cx
@ -4388,8 +4386,8 @@ async fn test_search_multiple_worktrees_with_inclusions(cx: &mut gpui::TestAppCo
false,
true,
false,
vec![PathMatcher::new("*.ts").unwrap()],
Vec::new()
PathMatcher::new(&["*.ts".to_owned()]).unwrap(),
Default::default()
)
.unwrap(),
cx
@ -4437,7 +4435,15 @@ async fn test_search_in_gitignored_dirs(cx: &mut gpui::TestAppContext) {
assert_eq!(
search(
&project,
SearchQuery::text(query, false, false, false, Vec::new(), Vec::new()).unwrap(),
SearchQuery::text(
query,
false,
false,
false,
Default::default(),
Default::default()
)
.unwrap(),
cx
)
.await
@ -4450,7 +4456,15 @@ async fn test_search_in_gitignored_dirs(cx: &mut gpui::TestAppContext) {
assert_eq!(
search(
&project,
SearchQuery::text(query, false, false, true, Vec::new(), Vec::new()).unwrap(),
SearchQuery::text(
query,
false,
false,
true,
Default::default(),
Default::default()
)
.unwrap(),
cx
)
.await
@ -4475,8 +4489,8 @@ async fn test_search_in_gitignored_dirs(cx: &mut gpui::TestAppContext) {
"Unrestricted search with ignored directories should find every file with the query"
);
let files_to_include = vec![PathMatcher::new("/dir/node_modules/prettier/**").unwrap()];
let files_to_exclude = vec![PathMatcher::new("*.ts").unwrap()];
let files_to_include = PathMatcher::new(&["/dir/node_modules/prettier/**".to_owned()]).unwrap();
let files_to_exclude = PathMatcher::new(&["*.ts".to_owned()]).unwrap();
let project = Project::test(fs.clone(), ["/dir".as_ref()], cx).await;
assert_eq!(
search(

View file

@ -1,7 +1,6 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use anyhow::{Context, Result};
use anyhow::Result;
use client::proto;
use itertools::Itertools;
use language::{char_kind, BufferSnapshot};
use regex::{Captures, Regex, RegexBuilder};
use smol::future::yield_now;
@ -19,18 +18,18 @@ static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::ne
#[derive(Clone, Debug)]
pub struct SearchInputs {
query: Arc<str>,
files_to_include: Vec<PathMatcher>,
files_to_exclude: Vec<PathMatcher>,
files_to_include: PathMatcher,
files_to_exclude: PathMatcher,
}
impl SearchInputs {
pub fn as_str(&self) -> &str {
self.query.as_ref()
}
pub fn files_to_include(&self) -> &[PathMatcher] {
pub fn files_to_include(&self) -> &PathMatcher {
&self.files_to_include
}
pub fn files_to_exclude(&self) -> &[PathMatcher] {
pub fn files_to_exclude(&self) -> &PathMatcher {
&self.files_to_exclude
}
}
@ -62,8 +61,8 @@ impl SearchQuery {
whole_word: bool,
case_sensitive: bool,
include_ignored: bool,
files_to_include: Vec<PathMatcher>,
files_to_exclude: Vec<PathMatcher>,
files_to_include: PathMatcher,
files_to_exclude: PathMatcher,
) -> Result<Self> {
let query = query.to_string();
let search = AhoCorasickBuilder::new()
@ -89,8 +88,8 @@ impl SearchQuery {
whole_word: bool,
case_sensitive: bool,
include_ignored: bool,
files_to_include: Vec<PathMatcher>,
files_to_exclude: Vec<PathMatcher>,
files_to_include: PathMatcher,
files_to_exclude: PathMatcher,
) -> Result<Self> {
let mut query = query.to_string();
let initial_query = Arc::from(query.as_str());
@ -167,16 +166,8 @@ impl SearchQuery {
whole_word: self.whole_word(),
case_sensitive: self.case_sensitive(),
include_ignored: self.include_ignored(),
files_to_include: self
.files_to_include()
.iter()
.map(|matcher| matcher.to_string())
.join(","),
files_to_exclude: self
.files_to_exclude()
.iter()
.map(|matcher| matcher.to_string())
.join(","),
files_to_include: self.files_to_include().sources().join(","),
files_to_exclude: self.files_to_exclude().sources().join(","),
}
}
@ -377,11 +368,11 @@ impl SearchQuery {
matches!(self, Self::Regex { .. })
}
pub fn files_to_include(&self) -> &[PathMatcher] {
pub fn files_to_include(&self) -> &PathMatcher {
self.as_inner().files_to_include()
}
pub fn files_to_exclude(&self) -> &[PathMatcher] {
pub fn files_to_exclude(&self) -> &PathMatcher {
self.as_inner().files_to_exclude()
}
@ -390,17 +381,10 @@ impl SearchQuery {
Some(file_path) => {
let mut path = file_path.to_path_buf();
loop {
if self
.files_to_exclude()
.iter()
.any(|exclude_glob| exclude_glob.is_match(&path))
{
if self.files_to_exclude().is_match(&path) {
return false;
} else if self.files_to_include().is_empty()
|| self
.files_to_include()
.iter()
.any(|include_glob| include_glob.is_match(&path))
} else if self.files_to_include().sources().is_empty()
|| self.files_to_include().is_match(&path)
{
return true;
} else if !path.pop() {
@ -408,7 +392,7 @@ impl SearchQuery {
}
}
}
None => self.files_to_include().is_empty(),
None => self.files_to_include().sources().is_empty(),
}
}
pub fn as_inner(&self) -> &SearchInputs {
@ -418,16 +402,13 @@ impl SearchQuery {
}
}
fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
glob_set
fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
let globs = glob_set
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| {
PathMatcher::new(glob_str)
.with_context(|| format!("deserializing path match glob {glob_str}"))
})
.collect()
.filter_map(|glob_str| (!glob_str.is_empty()).then(|| glob_str.to_owned()))
.collect::<Vec<_>>();
Ok(PathMatcher::new(&globs)?)
}
#[cfg(test)]
@ -445,7 +426,7 @@ mod tests {
"dir/[a-z].txt",
"../dir/filé",
] {
let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
panic!("Valid path {valid_path} should be accepted, but got: {e}")
});
assert!(
@ -458,7 +439,7 @@ mod tests {
#[test]
fn path_matcher_creation_for_globs() {
for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
match PathMatcher::new(invalid_glob) {
match PathMatcher::new(&[invalid_glob.to_owned()]) {
Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
Err(_expected) => {}
}
@ -471,9 +452,9 @@ mod tests {
"dir/[a-z].txt",
"{dir,file}",
] {
match PathMatcher::new(valid_glob) {
match PathMatcher::new(&[valid_glob.to_owned()]) {
Ok(_expected) => {}
Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
}
}
}