util: Use GlobSet in PathMatcher (#13197)
Previously we were using a single globset::Glob in PathMatcher; higher
up the stack, we were then resorting to using a list of PathMatchers.
globset crate exposes a GlobSet type that's better suited for this use
case. In my benchmarks, using a single PathMatcher with GlobSet instead
of a Vec of PathMatchers with Globs is about 3 times faster with the
default 'file_scan_exclusions' values. This slightly improves our
project load time for projects with large # of files, as showcased in
the following videos of loading a project with 100k source files. This
project is *not* a git repository, so it should measure raw overhead on
our side.
Current nightly: 51404d4ea0
https://github.com/zed-industries/zed/assets/24362066/e0aa9f8c-aae6-4348-8d42-d20bd41fcd76
versus this PR:
https://github.com/zed-industries/zed/assets/24362066/408dcab1-cee2-4c9e-a541-a31d14772dd7
Release Notes:
- Improved performance in large worktrees
This commit is contained in:
parent
64d815a176
commit
5dc26c261d
13 changed files with 193 additions and 187 deletions
|
@ -3,7 +3,7 @@ use std::{
|
|||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use globset::{Glob, GlobMatcher};
|
||||
use globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
|
@ -257,43 +257,51 @@ impl<P> PathLikeWithPosition<P> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct PathMatcher {
|
||||
source: String,
|
||||
glob: GlobMatcher,
|
||||
sources: Vec<String>,
|
||||
glob: GlobSet,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for PathMatcher {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.source.fmt(f)
|
||||
}
|
||||
}
|
||||
// impl std::fmt::Display for PathMatcher {
|
||||
// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// self.sources.fmt(f)
|
||||
// }
|
||||
// }
|
||||
|
||||
impl PartialEq for PathMatcher {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.source.eq(&other.source)
|
||||
self.sources.eq(&other.sources)
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for PathMatcher {}
|
||||
|
||||
impl PathMatcher {
|
||||
pub fn new(source: &str) -> Result<Self, globset::Error> {
|
||||
Ok(PathMatcher {
|
||||
glob: Glob::new(source)?.compile_matcher(),
|
||||
source: String::from(source),
|
||||
})
|
||||
pub fn new(globs: &[String]) -> Result<Self, globset::Error> {
|
||||
let globs = globs
|
||||
.into_iter()
|
||||
.map(|glob| Glob::new(&glob))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let sources = globs.iter().map(|glob| glob.glob().to_owned()).collect();
|
||||
let mut glob_builder = GlobSetBuilder::new();
|
||||
for single_glob in globs {
|
||||
glob_builder.add(single_glob);
|
||||
}
|
||||
let glob = glob_builder.build()?;
|
||||
Ok(PathMatcher { glob, sources })
|
||||
}
|
||||
|
||||
pub fn source(&self) -> &str {
|
||||
&self.source
|
||||
pub fn sources(&self) -> &[String] {
|
||||
&self.sources
|
||||
}
|
||||
|
||||
pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
|
||||
let other_path = other.as_ref();
|
||||
other_path.starts_with(Path::new(&self.source))
|
||||
|| other_path.ends_with(Path::new(&self.source))
|
||||
|| self.glob.is_match(other_path)
|
||||
self.sources.iter().any(|source| {
|
||||
let as_bytes = other_path.as_os_str().as_encoded_bytes();
|
||||
as_bytes.starts_with(source.as_bytes()) || as_bytes.ends_with(source.as_bytes())
|
||||
}) || self.glob.is_match(other_path)
|
||||
|| self.check_with_end_separator(other_path)
|
||||
}
|
||||
|
||||
|
@ -534,20 +542,20 @@ mod tests {
|
|||
#[test]
|
||||
fn edge_of_glob() {
|
||||
let path = Path::new("/work/node_modules");
|
||||
let path_matcher = PathMatcher::new("**/node_modules/**").unwrap();
|
||||
let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
|
||||
assert!(
|
||||
path_matcher.is_match(path),
|
||||
"Path matcher {path_matcher} should match {path:?}"
|
||||
"Path matcher should match {path:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn project_search() {
|
||||
let path = Path::new("/Users/someonetoignore/work/zed/zed.dev/node_modules");
|
||||
let path_matcher = PathMatcher::new("**/node_modules/**").unwrap();
|
||||
let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
|
||||
assert!(
|
||||
path_matcher.is_match(path),
|
||||
"Path matcher {path_matcher} should match {path:?}"
|
||||
"Path matcher should match {path:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue