util: Use GlobSet in PathMatcher (#13197)

Previously we were using a single globset::Glob in PathMatcher; higher
up the stack, we were then resorting to using a list of PathMatchers.
globset crate exposes a GlobSet type that's better suited for this use
case. In my benchmarks, using a single PathMatcher with GlobSet instead
of a Vec of PathMatchers with Globs is about 3 times faster with the
default 'file_scan_exclusions' values. This slightly improves our
project load time for projects with large # of files, as showcased in
the following videos of loading a project with 100k source files. This
project is *not* a git repository, so it should measure raw overhead on
our side.

Current nightly: 51404d4ea0


https://github.com/zed-industries/zed/assets/24362066/e0aa9f8c-aae6-4348-8d42-d20bd41fcd76

versus this PR:


https://github.com/zed-industries/zed/assets/24362066/408dcab1-cee2-4c9e-a541-a31d14772dd7



Release Notes:

- Improved performance in large worktrees
This commit is contained in:
Piotr Osiewicz 2024-06-18 16:12:24 +02:00 committed by GitHub
parent 64d815a176
commit 5dc26c261d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 193 additions and 187 deletions

View file

@ -220,7 +220,7 @@ struct Options {
const INCLUDE_WARNINGS_ARGUMENT: &str = "--include-warnings";
impl Options {
pub fn parse(arguments_line: Option<&str>) -> Self {
fn parse(arguments_line: Option<&str>) -> Self {
arguments_line
.map(|arguments_line| {
let args = arguments_line.split_whitespace().collect::<Vec<_>>();
@ -230,7 +230,7 @@ impl Options {
if arg == INCLUDE_WARNINGS_ARGUMENT {
include_warnings = true;
} else {
path_matcher = PathMatcher::new(arg).log_err();
path_matcher = PathMatcher::new(&[arg.to_owned()]).log_err();
}
}
Self {
@ -255,7 +255,8 @@ fn collect_diagnostics(
cx: &mut AppContext,
) -> Task<Result<(String, Vec<(Range<usize>, PlaceholderType)>)>> {
let error_source = if let Some(path_matcher) = &options.path_matcher {
Some(path_matcher.source().to_string())
debug_assert_eq!(path_matcher.sources().len(), 1);
Some(path_matcher.sources().first().cloned().unwrap_or_default())
} else {
None
};

View file

@ -183,7 +183,7 @@ fn collect_files(
fs: Arc<dyn Fs>,
cx: &mut AppContext,
) -> Task<Result<(String, Vec<(Range<usize>, PathBuf, EntryType)>)>> {
let Ok(matcher) = PathMatcher::new(glob_input) else {
let Ok(matcher) = PathMatcher::new(&[glob_input.to_owned()]) else {
return Task::ready(Err(anyhow!("invalid path")));
};