Optimize matching of multiple file-watch globs using the globset crate

This commit is contained in:
Max Brunsfeld 2023-05-19 09:13:31 -07:00
parent 4bda5c4d69
commit 459cc9c959
9 changed files with 119 additions and 252 deletions

View file

@ -1,6 +1,7 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use anyhow::Result;
use client::proto;
use globset::{Glob, GlobMatcher};
use itertools::Itertools;
use language::{char_kind, Rope};
use regex::{Regex, RegexBuilder};
@ -19,8 +20,8 @@ pub enum SearchQuery {
query: Arc<str>,
whole_word: bool,
case_sensitive: bool,
files_to_include: Vec<glob::Pattern>,
files_to_exclude: Vec<glob::Pattern>,
files_to_include: Vec<GlobMatcher>,
files_to_exclude: Vec<GlobMatcher>,
},
Regex {
regex: Regex,
@ -28,8 +29,8 @@ pub enum SearchQuery {
multiline: bool,
whole_word: bool,
case_sensitive: bool,
files_to_include: Vec<glob::Pattern>,
files_to_exclude: Vec<glob::Pattern>,
files_to_include: Vec<GlobMatcher>,
files_to_exclude: Vec<GlobMatcher>,
},
}
@ -38,8 +39,8 @@ impl SearchQuery {
query: impl ToString,
whole_word: bool,
case_sensitive: bool,
files_to_include: Vec<glob::Pattern>,
files_to_exclude: Vec<glob::Pattern>,
files_to_include: Vec<GlobMatcher>,
files_to_exclude: Vec<GlobMatcher>,
) -> Self {
let query = query.to_string();
let search = AhoCorasickBuilder::new()
@ -60,8 +61,8 @@ impl SearchQuery {
query: impl ToString,
whole_word: bool,
case_sensitive: bool,
files_to_include: Vec<glob::Pattern>,
files_to_exclude: Vec<glob::Pattern>,
files_to_include: Vec<GlobMatcher>,
files_to_exclude: Vec<GlobMatcher>,
) -> Result<Self> {
let mut query = query.to_string();
let initial_query = Arc::from(query.as_str());
@ -95,40 +96,16 @@ impl SearchQuery {
message.query,
message.whole_word,
message.case_sensitive,
message
.files_to_include
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| glob::Pattern::new(glob_str))
.collect::<Result<_, _>>()?,
message
.files_to_exclude
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| glob::Pattern::new(glob_str))
.collect::<Result<_, _>>()?,
deserialize_globs(&message.files_to_include)?,
deserialize_globs(&message.files_to_exclude)?,
)
} else {
Ok(Self::text(
message.query,
message.whole_word,
message.case_sensitive,
message
.files_to_include
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| glob::Pattern::new(glob_str))
.collect::<Result<_, _>>()?,
message
.files_to_exclude
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| glob::Pattern::new(glob_str))
.collect::<Result<_, _>>()?,
deserialize_globs(&message.files_to_include)?,
deserialize_globs(&message.files_to_exclude)?,
))
}
}
@ -143,12 +120,12 @@ impl SearchQuery {
files_to_include: self
.files_to_include()
.iter()
.map(ToString::to_string)
.map(|g| g.glob().to_string())
.join(","),
files_to_exclude: self
.files_to_exclude()
.iter()
.map(ToString::to_string)
.map(|g| g.glob().to_string())
.join(","),
}
}
@ -289,7 +266,7 @@ impl SearchQuery {
matches!(self, Self::Regex { .. })
}
pub fn files_to_include(&self) -> &[glob::Pattern] {
pub fn files_to_include(&self) -> &[GlobMatcher] {
match self {
Self::Text {
files_to_include, ..
@ -300,7 +277,7 @@ impl SearchQuery {
}
}
pub fn files_to_exclude(&self) -> &[glob::Pattern] {
pub fn files_to_exclude(&self) -> &[GlobMatcher] {
match self {
Self::Text {
files_to_exclude, ..
@ -317,14 +294,23 @@ impl SearchQuery {
!self
.files_to_exclude()
.iter()
.any(|exclude_glob| exclude_glob.matches_path(file_path))
.any(|exclude_glob| exclude_glob.is_match(file_path))
&& (self.files_to_include().is_empty()
|| self
.files_to_include()
.iter()
.any(|include_glob| include_glob.matches_path(file_path)))
.any(|include_glob| include_glob.is_match(file_path)))
}
None => self.files_to_include().is_empty(),
}
}
}
fn deserialize_globs(glob_set: &str) -> Result<Vec<GlobMatcher>> {
glob_set
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| Ok(Glob::new(glob_str)?.compile_matcher()))
.collect()
}