search: Add heuristic for discarding matching of binary files (#23581)
Fixes #23398 Closes #23398 We'll bail on searches of files that we know are binary (thus even if we were to find a match in them, they'd be thrown away by buffer loader). Release Notes: - Improved project search performance in worktrees with binary files
This commit is contained in:
parent
35ddb432b3
commit
fb63f61755
3 changed files with 23 additions and 7 deletions
|
@ -101,7 +101,7 @@ pub trait Fs: Send + Sync {
|
||||||
self.remove_file(path, options).await
|
self.remove_file(path, options).await
|
||||||
}
|
}
|
||||||
async fn open_handle(&self, path: &Path) -> Result<Arc<dyn FileHandle>>;
|
async fn open_handle(&self, path: &Path) -> Result<Arc<dyn FileHandle>>;
|
||||||
async fn open_sync(&self, path: &Path) -> Result<Box<dyn io::Read>>;
|
async fn open_sync(&self, path: &Path) -> Result<Box<dyn io::Read + Send + Sync>>;
|
||||||
async fn load(&self, path: &Path) -> Result<String> {
|
async fn load(&self, path: &Path) -> Result<String> {
|
||||||
Ok(String::from_utf8(self.load_bytes(path).await?)?)
|
Ok(String::from_utf8(self.load_bytes(path).await?)?)
|
||||||
}
|
}
|
||||||
|
@ -499,7 +499,7 @@ impl Fs for RealFs {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn open_sync(&self, path: &Path) -> Result<Box<dyn io::Read>> {
|
async fn open_sync(&self, path: &Path) -> Result<Box<dyn io::Read + Send + Sync>> {
|
||||||
Ok(Box::new(std::fs::File::open(path)?))
|
Ok(Box::new(std::fs::File::open(path)?))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1746,7 +1746,7 @@ impl Fs for FakeFs {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn open_sync(&self, path: &Path) -> Result<Box<dyn io::Read>> {
|
async fn open_sync(&self, path: &Path) -> Result<Box<dyn io::Read + Send + Sync>> {
|
||||||
let bytes = self.load_internal(path).await?;
|
let bytes = self.load_internal(path).await?;
|
||||||
Ok(Box::new(io::Cursor::new(bytes)))
|
Ok(Box::new(io::Cursor::new(bytes)))
|
||||||
}
|
}
|
||||||
|
|
|
@ -210,14 +210,17 @@ impl SearchQuery {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
|
pub(crate) fn detect(
|
||||||
|
&self,
|
||||||
|
mut reader: BufReader<Box<dyn Read + Send + Sync>>,
|
||||||
|
) -> Result<bool> {
|
||||||
if self.as_str().is_empty() {
|
if self.as_str().is_empty() {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
Self::Text { search, .. } => {
|
Self::Text { search, .. } => {
|
||||||
let mat = search.stream_find_iter(stream).next();
|
let mat = search.stream_find_iter(reader).next();
|
||||||
match mat {
|
match mat {
|
||||||
Some(Ok(_)) => Ok(true),
|
Some(Ok(_)) => Ok(true),
|
||||||
Some(Err(err)) => Err(err.into()),
|
Some(Err(err)) => Err(err.into()),
|
||||||
|
@ -227,7 +230,6 @@ impl SearchQuery {
|
||||||
Self::Regex {
|
Self::Regex {
|
||||||
regex, multiline, ..
|
regex, multiline, ..
|
||||||
} => {
|
} => {
|
||||||
let mut reader = BufReader::new(stream);
|
|
||||||
if *multiline {
|
if *multiline {
|
||||||
let mut text = String::new();
|
let mut text = String::new();
|
||||||
if let Err(err) = reader.read_to_string(&mut text) {
|
if let Err(err) = reader.read_to_string(&mut text) {
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use std::{
|
use std::{
|
||||||
|
io::{BufRead, BufReader},
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
pin::pin,
|
pin::pin,
|
||||||
sync::{atomic::AtomicUsize, Arc},
|
sync::{atomic::AtomicUsize, Arc},
|
||||||
|
@ -985,7 +986,6 @@ impl WorktreeStore {
|
||||||
}
|
}
|
||||||
|
|
||||||
repo.change_branch(&new_branch)?;
|
repo.change_branch(&new_branch)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1020,6 +1020,20 @@ impl WorktreeStore {
|
||||||
let Some(file) = fs.open_sync(&abs_path).await.log_err() else {
|
let Some(file) = fs.open_sync(&abs_path).await.log_err() else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut file = BufReader::new(file);
|
||||||
|
let file_start = file.fill_buf()?;
|
||||||
|
|
||||||
|
if let Err(Some(starting_position)) =
|
||||||
|
std::str::from_utf8(file_start).map_err(|e| e.error_len())
|
||||||
|
{
|
||||||
|
// Before attempting to match the file content, throw away files that have invalid UTF-8 sequences early on;
|
||||||
|
// That way we can still match files in a streaming fashion without having look at "obviously binary" files.
|
||||||
|
return Err(anyhow!(
|
||||||
|
"Invalid UTF-8 sequence at position {starting_position}"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
if query.detect(file).unwrap_or(false) {
|
if query.detect(file).unwrap_or(false) {
|
||||||
entry.respond.send(entry.path).await?
|
entry.respond.send(entry.path).await?
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue