search: Add heuristic for discarding matching of binary files (#23581)
Fixes #23398 Closes #23398 We'll bail on searches of files that we know are binary (thus even if we were to find a match in them, they'd be thrown away by buffer loader). Release Notes: - Improved project search performance in worktrees with binary files
This commit is contained in:
parent
35ddb432b3
commit
fb63f61755
3 changed files with 23 additions and 7 deletions
|
@ -210,14 +210,17 @@ impl SearchQuery {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
|
||||
pub(crate) fn detect(
|
||||
&self,
|
||||
mut reader: BufReader<Box<dyn Read + Send + Sync>>,
|
||||
) -> Result<bool> {
|
||||
if self.as_str().is_empty() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
match self {
|
||||
Self::Text { search, .. } => {
|
||||
let mat = search.stream_find_iter(stream).next();
|
||||
let mat = search.stream_find_iter(reader).next();
|
||||
match mat {
|
||||
Some(Ok(_)) => Ok(true),
|
||||
Some(Err(err)) => Err(err.into()),
|
||||
|
@ -227,7 +230,6 @@ impl SearchQuery {
|
|||
Self::Regex {
|
||||
regex, multiline, ..
|
||||
} => {
|
||||
let mut reader = BufReader::new(stream);
|
||||
if *multiline {
|
||||
let mut text = String::new();
|
||||
if let Err(err) = reader.read_to_string(&mut text) {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use std::{
|
||||
io::{BufRead, BufReader},
|
||||
path::{Path, PathBuf},
|
||||
pin::pin,
|
||||
sync::{atomic::AtomicUsize, Arc},
|
||||
|
@ -985,7 +986,6 @@ impl WorktreeStore {
|
|||
}
|
||||
|
||||
repo.change_branch(&new_branch)?;
|
||||
|
||||
Ok(())
|
||||
});
|
||||
|
||||
|
@ -1020,6 +1020,20 @@ impl WorktreeStore {
|
|||
let Some(file) = fs.open_sync(&abs_path).await.log_err() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let mut file = BufReader::new(file);
|
||||
let file_start = file.fill_buf()?;
|
||||
|
||||
if let Err(Some(starting_position)) =
|
||||
std::str::from_utf8(file_start).map_err(|e| e.error_len())
|
||||
{
|
||||
// Before attempting to match the file content, throw away files that have invalid UTF-8 sequences early on;
|
||||
// That way we can still match files in a streaming fashion without having look at "obviously binary" files.
|
||||
return Err(anyhow!(
|
||||
"Invalid UTF-8 sequence at position {starting_position}"
|
||||
));
|
||||
}
|
||||
|
||||
if query.detect(file).unwrap_or(false) {
|
||||
entry.respond.send(entry.path).await?
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue