util: Use GlobSet in PathMatcher (#13197)

Previously we were using a single globset::Glob in PathMatcher; higher
up the stack, we were then resorting to using a list of PathMatchers.
globset crate exposes a GlobSet type that's better suited for this use
case. In my benchmarks, using a single PathMatcher with GlobSet instead
of a Vec of PathMatchers with Globs is about 3 times faster with the
default 'file_scan_exclusions' values. This slightly improves our
project load time for projects with large # of files, as showcased in
the following videos of loading a project with 100k source files. This
project is *not* a git repository, so it should measure raw overhead on
our side.

Current nightly: 51404d4ea0


https://github.com/zed-industries/zed/assets/24362066/e0aa9f8c-aae6-4348-8d42-d20bd41fcd76

versus this PR:


https://github.com/zed-industries/zed/assets/24362066/408dcab1-cee2-4c9e-a541-a31d14772dd7



Release Notes:

- Improved performance in large worktrees
This commit is contained in:
Piotr Osiewicz 2024-06-18 16:12:24 +02:00 committed by GitHub
parent 64d815a176
commit 5dc26c261d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 193 additions and 187 deletions

View file

@ -1,5 +1,6 @@
use std::{path::Path, sync::Arc};
use std::path::Path;
use anyhow::Context;
use gpui::AppContext;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
@ -8,25 +9,19 @@ use util::paths::PathMatcher;
#[derive(Clone, PartialEq, Eq)]
pub struct WorktreeSettings {
pub file_scan_exclusions: Arc<[PathMatcher]>,
pub private_files: Arc<[PathMatcher]>,
pub file_scan_exclusions: PathMatcher,
pub private_files: PathMatcher,
}
impl WorktreeSettings {
pub fn is_path_private(&self, path: &Path) -> bool {
path.ancestors().any(|ancestor| {
self.private_files
.iter()
.any(|matcher| matcher.is_match(&ancestor))
})
path.ancestors()
.any(|ancestor| self.private_files.is_match(&ancestor))
}
pub fn is_path_excluded(&self, path: &Path) -> bool {
path.ancestors().any(|ancestor| {
self.file_scan_exclusions
.iter()
.any(|matcher| matcher.is_match(&ancestor))
})
path.ancestors()
.any(|ancestor| self.file_scan_exclusions.is_match(&ancestor))
}
}
@ -67,25 +62,12 @@ impl Settings for WorktreeSettings {
file_scan_exclusions.sort();
private_files.sort();
Ok(Self {
file_scan_exclusions: path_matchers(&file_scan_exclusions, "file_scan_exclusions"),
private_files: path_matchers(&private_files, "private_files"),
file_scan_exclusions: path_matchers(&file_scan_exclusions, "file_scan_exclusions")?,
private_files: path_matchers(&private_files, "private_files")?,
})
}
}
fn path_matchers(values: &[String], context: &'static str) -> Arc<[PathMatcher]> {
values
.iter()
.filter_map(|pattern| {
PathMatcher::new(pattern)
.map(Some)
.unwrap_or_else(|e| {
log::error!(
"Skipping pattern {pattern} in `{}` project settings due to parsing error: {e:#}", context
);
None
})
})
.collect::<Vec<_>>()
.into()
fn path_matchers(values: &[String], context: &'static str) -> anyhow::Result<PathMatcher> {
PathMatcher::new(values).with_context(|| format!("Failed to parse globs from {}", context))
}