From 26f7f4f5b24e7b4d0397c890cee7ff79755bb177 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Thu, 24 Feb 2022 12:33:28 +0100 Subject: [PATCH] WIP: Remove ripgrep and start matching query for paths ourselves --- Cargo.lock | 119 +--------------------------------- crates/project/Cargo.toml | 3 +- crates/project/src/fs.rs | 11 +++- crates/project/src/project.rs | 95 ++++++++++++++++----------- 4 files changed, 72 insertions(+), 156 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 46fb54b550..e1bb0690f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -745,9 +745,7 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a40b47ad93e1a5404e6c18dec46b628214fee441c70f4ab5d6942142cc268a3d" dependencies = [ - "lazy_static", "memchr", - "regex-automata", ] [[package]] @@ -778,12 +776,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" -[[package]] -name = "bytecount" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e" - [[package]] name = "bytemuck" version = "1.5.1" @@ -1661,15 +1653,6 @@ dependencies = [ "cfg-if 1.0.0", ] -[[package]] -name = "encoding_rs_io" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83" -dependencies = [ - "encoding_rs", -] - [[package]] name = "entities" version = "1.0.1" @@ -2266,90 +2249,6 @@ dependencies = [ "syn", ] -[[package]] -name = "grep" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51cb840c560b45a2ffd8abf00190382789d3f596663d5ffeb2e05931c20e8657" -dependencies = [ - "grep-cli", - "grep-matcher", - "grep-printer", - "grep-regex", - "grep-searcher", -] - -[[package]] -name = "grep-cli" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dd110c34bb4460d0de5062413b773e385cbf8a85a63fc535590110a09e79e8a" -dependencies = [ - "atty", - "bstr", - "globset", - "lazy_static", - "log", - "regex", - "same-file", - "termcolor", - "winapi-util", -] - -[[package]] -name = "grep-matcher" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d27563c33062cd33003b166ade2bb4fd82db1fd6a86db764dfdad132d46c1cc" -dependencies = [ - "memchr", -] - -[[package]] -name = "grep-printer" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05c271a24daedf5675b61a275a1d0af06e03312ab7856d15433ae6cde044dc72" -dependencies = [ - "base64 0.13.0", - "bstr", - "grep-matcher", - "grep-searcher", - "serde", - "serde_json", - "termcolor", -] - -[[package]] -name = "grep-regex" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121553c9768c363839b92fc2d7cdbbad44a3b70e8d6e7b1b72b05c977527bd06" -dependencies = [ - "aho-corasick", - "bstr", - "grep-matcher", - "log", - "regex", - "regex-syntax", - "thread_local", -] - -[[package]] -name = "grep-searcher" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fbdbde90ba52adc240d2deef7b6ad1f99f53142d074b771fe9b7bede6c4c23d" -dependencies = [ - "bstr", - "bytecount", - "encoding_rs", - "encoding_rs_io", - "grep-matcher", - "log", - "memmap2 0.3.1", -] - [[package]] name = "group" version = "0.10.0" @@ -3013,15 +2912,6 @@ dependencies = [ "libc", ] -[[package]] -name = "memmap2" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b6c2ebff6180198788f5db08d7ce3bc1d0b617176678831a7510825973e357" -dependencies = [ - "libc", -] - [[package]] name = "memoffset" version = "0.6.3" @@ -3665,6 +3555,7 @@ dependencies = [ name = "project" version = "0.1.0" dependencies = [ + "aho-corasick", "anyhow", "async-trait", "client", @@ -3674,7 +3565,6 @@ dependencies = [ "futures", "fuzzy", "gpui", - "grep", "ignore", "language", "lazy_static", @@ -3684,6 +3574,7 @@ dependencies = [ "parking_lot", "postage", "rand 0.8.3", + "regex", "rpc", "serde", "serde_json", @@ -3978,12 +3869,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - [[package]] name = "regex-syntax" version = "0.6.25" diff --git a/crates/project/Cargo.toml b/crates/project/Cargo.toml index ec1669ec72..dea5a10279 100644 --- a/crates/project/Cargo.toml +++ b/crates/project/Cargo.toml @@ -26,10 +26,10 @@ lsp = { path = "../lsp" } rpc = { path = "../rpc" } sum_tree = { path = "../sum_tree" } util = { path = "../util" } +aho-corasick = "0.7" anyhow = "1.0.38" async-trait = "0.1" futures = "0.3" -grep = "0.2" ignore = "0.4" lazy_static = "1.4.0" libc = "0.2" @@ -37,6 +37,7 @@ log = "0.4" parking_lot = "0.11.1" postage = { version = "0.4.1", features = ["futures-traits"] } rand = "0.8.3" +regex = "1.5" serde = { version = "1", features = ["derive"] } serde_json = { version = "1.0.64", features = ["preserve_order"] } sha2 = "0.10" diff --git a/crates/project/src/fs.rs b/crates/project/src/fs.rs index 7f89c29c83..578be8cf82 100644 --- a/crates/project/src/fs.rs +++ b/crates/project/src/fs.rs @@ -18,6 +18,7 @@ pub trait Fs: Send + Sync { async fn rename(&self, source: &Path, target: &Path, options: RenameOptions) -> Result<()>; async fn remove_dir(&self, path: &Path, options: RemoveOptions) -> Result<()>; async fn remove_file(&self, path: &Path, options: RemoveOptions) -> Result<()>; + async fn open_sync(&self, path: &Path) -> Result>; async fn load(&self, path: &Path) -> Result; async fn save(&self, path: &Path, text: &Rope) -> Result<()>; async fn canonicalize(&self, path: &Path) -> Result; @@ -121,6 +122,10 @@ impl Fs for RealFs { } } + async fn open_sync(&self, path: &Path) -> Result> { + Ok(Box::new(std::fs::File::open(path)?)) + } + async fn load(&self, path: &Path) -> Result { let mut file = smol::fs::File::open(path).await?; let mut text = String::new(); @@ -203,7 +208,6 @@ impl Fs for RealFs { fn is_fake(&self) -> bool { false } - #[cfg(any(test, feature = "test-support"))] fn as_fake(&self) -> &FakeFs { panic!("called `RealFs::as_fake`") @@ -535,6 +539,11 @@ impl Fs for FakeFs { Ok(()) } + async fn open_sync(&self, path: &Path) -> Result> { + let text = self.load(path).await?; + Ok(Box::new(io::Cursor::new(text))) + } + async fn load(&self, path: &Path) -> Result { let path = normalize_path(path); self.executor.simulate_random_delay().await; diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 6942254870..b77d8add93 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -3,6 +3,7 @@ mod ignore; mod lsp_command; pub mod worktree; +use aho_corasick::AhoCorasickBuilder; use anyhow::{anyhow, Context, Result}; use client::{proto, Client, PeerId, TypedEnvelope, User, UserStore}; use clock::ReplicaId; @@ -13,7 +14,6 @@ use gpui::{ AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, MutableAppContext, Task, UpgradeModelHandle, WeakModelHandle, }; -use grep::{matcher::Matcher, searcher::Searcher}; use language::{ range_from_lsp, Anchor, AnchorRangeExt, Bias, Buffer, CodeAction, CodeLabel, Completion, Diagnostic, DiagnosticEntry, File as _, Language, LanguageRegistry, Operation, PointUtf16, @@ -152,6 +152,10 @@ pub struct Symbol { pub signature: [u8; 32], } +pub enum SearchQuery { + Plain(String), +} + pub struct BufferRequestHandle(Rc>); #[derive(Default)] @@ -2043,16 +2047,13 @@ impl Project { ) } - pub fn search( + pub fn search( &self, query: SearchQuery, cx: &mut ModelContext, - ) -> Task, Vec>>> - where - T: Matcher, - { + ) -> Task, Vec>>> { if self.is_local() { - let (queue_tx, queue_rx) = smol::channel::bounded(1024); + let (paths_to_search_tx, paths_to_search_rx) = smol::channel::bounded(1024); // Submit all worktree paths to the queue. let snapshots = self @@ -2063,55 +2064,75 @@ impl Project { }) .collect::>(); cx.background() - .spawn({ - let queue_tx = queue_tx.clone(); - async move { - for (snapshot_abs_path, snapshot) in snapshots { - for file in snapshot.files(false, 0) { - if queue_tx - .send((snapshot_abs_path.clone(), file.path.clone())) - .await - .is_err() - { - return; - } + .spawn(async move { + for (snapshot_abs_path, snapshot) in snapshots { + for file in snapshot.files(false, 0) { + if paths_to_search_tx + .send((snapshot_abs_path.clone(), file.path.clone())) + .await + .is_err() + { + return; } } } }) .detach(); - let matcher = Arc::new(matcher); + let SearchQuery::Plain(query) = query; + let search = Arc::new( + AhoCorasickBuilder::new() + .auto_configure(&[&query]) + // .ascii_case_insensitive(!case_sensitive) + .build(&[&query]), + ); + let (matching_paths_tx, matching_paths_rx) = smol::channel::bounded(1024); cx.background() .spawn({ + let fs = self.fs.clone(); let background = cx.background().clone(); let workers = background.num_cpus(); - let searcher = searcher.clone(); - let matcher = matcher.clone(); + let search = search.clone(); async move { + let fs = &fs; + let search = &search; + let matching_paths_tx = &matching_paths_tx; background .scoped(|scope| { for _ in 0..workers { - let mut paths_rx = queue_rx.clone(); + let mut paths_to_search_rx = paths_to_search_rx.clone(); scope.spawn(async move { let mut path = PathBuf::new(); while let Some((snapshot_abs_path, file_path)) = - paths_rx.next().await + paths_to_search_rx.next().await { - path.clear(); - path.push(snapshot_abs_path); - path.push(file_path); - let mut matched = false; - // searcher.search_path( - // matcher.as_ref(), - // &path, - // grep::searcher::sinks::Bytes(|_, _| { - // matched = true; - // Ok(false) - // }), - // ); + if matching_paths_tx.is_closed() { + break; + } - if matched {} + path.clear(); + path.push(&snapshot_abs_path); + path.push(&file_path); + let matches = if let Some(file) = + fs.open_sync(&path).await.log_err() + { + search + .stream_find_iter(file) + .next() + .map_or(false, |mat| mat.is_ok()) + } else { + false + }; + + if matches { + if matching_paths_tx + .send((snapshot_abs_path, file_path)) + .await + .is_err() + { + break; + } + } } }); }