More git status optimizations (#2779)
Follow-up to https://github.com/zed-industries/zed/pull/2777 Refs https://github.com/zed-industries/community/issues/1770 In this PR, I reworked the way that git statuses are retrieved. In a huge repository like `WebKit`, the really slow part of computing a list of git statuses is the *unstaged* portion of the diff. For the *staged* diff, `git` can avoid comparing the contents of unchanged directories, because the index contains hashes of every tree. But for the *unstaged* portion, Git needs to compare every file in the worktree against the index. In the common case, when there are no changes, it's enough to check the `mtime` of every file (because the index stores the mtimes of files when they are added). But this still requires an `lstat` call to retrieve each file's metadata. I realized that this is redundant work, because the worktree is *already* calling `lstat` on every file, and caching their metadata. So in this PR, I've changed the `Repository` API so that there are separate methods for retrieving a file's *staged* and *unstaged* statuses. The *staged* statuses are retrieved in one giant batch, like before, to reduce our git calls (which also have an inherent cost). But the `unstaged` statuses are retrieved one-by-one, after we load files' mtimes. Often, all that's required is an index lookup, and an mtime comparison. With this optimization, it once again becomes pretty responsive to open `WebKit` or `chromium` in Zed. Release Notes: - Optimized the loading of project file when working in very large git repositories
This commit is contained in:
commit
a01d973477
2 changed files with 169 additions and 56 deletions
|
@ -1,6 +1,6 @@
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use collections::HashMap;
|
use collections::HashMap;
|
||||||
use git2::{BranchType, ErrorCode};
|
use git2::{BranchType, StatusShow};
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use rpc::proto;
|
use rpc::proto;
|
||||||
use serde_derive::{Deserialize, Serialize};
|
use serde_derive::{Deserialize, Serialize};
|
||||||
|
@ -10,6 +10,7 @@ use std::{
|
||||||
os::unix::prelude::OsStrExt,
|
os::unix::prelude::OsStrExt,
|
||||||
path::{Component, Path, PathBuf},
|
path::{Component, Path, PathBuf},
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
|
time::SystemTime,
|
||||||
};
|
};
|
||||||
use sum_tree::{MapSeekTarget, TreeMap};
|
use sum_tree::{MapSeekTarget, TreeMap};
|
||||||
use util::ResultExt;
|
use util::ResultExt;
|
||||||
|
@ -27,8 +28,25 @@ pub trait GitRepository: Send {
|
||||||
fn reload_index(&self);
|
fn reload_index(&self);
|
||||||
fn load_index_text(&self, relative_file_path: &Path) -> Option<String>;
|
fn load_index_text(&self, relative_file_path: &Path) -> Option<String>;
|
||||||
fn branch_name(&self) -> Option<String>;
|
fn branch_name(&self) -> Option<String>;
|
||||||
fn statuses(&self) -> TreeMap<RepoPath, GitFileStatus>;
|
|
||||||
fn status(&self, path: &RepoPath) -> Result<Option<GitFileStatus>>;
|
/// Get the statuses of all of the files in the index that start with the given
|
||||||
|
/// path and have changes with resepect to the HEAD commit. This is fast because
|
||||||
|
/// the index stores hashes of trees, so that unchanged directories can be skipped.
|
||||||
|
fn staged_statuses(&self, path_prefix: &Path) -> TreeMap<RepoPath, GitFileStatus>;
|
||||||
|
|
||||||
|
/// Get the status of a given file in the working directory with respect to
|
||||||
|
/// the index. In the common case, when there are no changes, this only requires
|
||||||
|
/// an index lookup. The index stores the mtime of each file when it was added,
|
||||||
|
/// so there's no work to do if the mtime matches.
|
||||||
|
fn unstaged_status(&self, path: &RepoPath, mtime: SystemTime) -> Option<GitFileStatus>;
|
||||||
|
|
||||||
|
/// Get the status of a given file in the working directory with respect to
|
||||||
|
/// the HEAD commit. In the common case, when there are no changes, this only
|
||||||
|
/// requires an index lookup and blob comparison between the index and the HEAD
|
||||||
|
/// commit. The index stores the mtime of each file when it was added, so there's
|
||||||
|
/// no need to consider the working directory file if the mtime matches.
|
||||||
|
fn status(&self, path: &RepoPath, mtime: SystemTime) -> Option<GitFileStatus>;
|
||||||
|
|
||||||
fn branches(&self) -> Result<Vec<Branch>>;
|
fn branches(&self) -> Result<Vec<Branch>>;
|
||||||
fn change_branch(&self, _: &str) -> Result<()>;
|
fn change_branch(&self, _: &str) -> Result<()>;
|
||||||
fn create_branch(&self, _: &str) -> Result<()>;
|
fn create_branch(&self, _: &str) -> Result<()>;
|
||||||
|
@ -40,7 +58,6 @@ impl std::fmt::Debug for dyn GitRepository {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
|
||||||
impl GitRepository for LibGitRepository {
|
impl GitRepository for LibGitRepository {
|
||||||
fn reload_index(&self) {
|
fn reload_index(&self) {
|
||||||
if let Ok(mut index) = self.index() {
|
if let Ok(mut index) = self.index() {
|
||||||
|
@ -78,37 +95,67 @@ impl GitRepository for LibGitRepository {
|
||||||
Some(branch.to_string())
|
Some(branch.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn statuses(&self) -> TreeMap<RepoPath, GitFileStatus> {
|
fn staged_statuses(&self, path_prefix: &Path) -> TreeMap<RepoPath, GitFileStatus> {
|
||||||
let mut map = TreeMap::default();
|
let mut map = TreeMap::default();
|
||||||
if let Some(statuses) = self.statuses(None).log_err() {
|
|
||||||
for status in statuses
|
|
||||||
.iter()
|
|
||||||
.filter(|status| !status.status().contains(git2::Status::IGNORED))
|
|
||||||
{
|
|
||||||
let path = RepoPath(PathBuf::from(OsStr::from_bytes(status.path_bytes())));
|
|
||||||
let Some(status) = read_status(status.status()) else {
|
|
||||||
continue
|
|
||||||
};
|
|
||||||
|
|
||||||
map.insert(path, status)
|
let mut options = git2::StatusOptions::new();
|
||||||
|
options.pathspec(path_prefix);
|
||||||
|
options.show(StatusShow::Index);
|
||||||
|
|
||||||
|
if let Some(statuses) = self.statuses(Some(&mut options)).log_err() {
|
||||||
|
for status in statuses.iter() {
|
||||||
|
let path = RepoPath(PathBuf::from(OsStr::from_bytes(status.path_bytes())));
|
||||||
|
let status = status.status();
|
||||||
|
if !status.contains(git2::Status::IGNORED) {
|
||||||
|
if let Some(status) = read_status(status) {
|
||||||
|
map.insert(path, status)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
map
|
map
|
||||||
}
|
}
|
||||||
|
|
||||||
fn status(&self, path: &RepoPath) -> Result<Option<GitFileStatus>> {
|
fn unstaged_status(&self, path: &RepoPath, mtime: SystemTime) -> Option<GitFileStatus> {
|
||||||
let status = self.status_file(path);
|
// If the file has not changed since it was added to the index, then
|
||||||
match status {
|
// there can't be any changes.
|
||||||
Ok(status) => Ok(read_status(status)),
|
if matches_index(self, path, mtime) {
|
||||||
Err(e) => {
|
return None;
|
||||||
if e.code() == ErrorCode::NotFound {
|
|
||||||
Ok(None)
|
|
||||||
} else {
|
|
||||||
Err(e.into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut options = git2::StatusOptions::new();
|
||||||
|
options.pathspec(&path.0);
|
||||||
|
options.disable_pathspec_match(true);
|
||||||
|
options.include_untracked(true);
|
||||||
|
options.recurse_untracked_dirs(true);
|
||||||
|
options.include_unmodified(true);
|
||||||
|
options.show(StatusShow::Workdir);
|
||||||
|
|
||||||
|
let statuses = self.statuses(Some(&mut options)).log_err()?;
|
||||||
|
let status = statuses.get(0).and_then(|s| read_status(s.status()));
|
||||||
|
status
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn status(&self, path: &RepoPath, mtime: SystemTime) -> Option<GitFileStatus> {
|
||||||
|
let mut options = git2::StatusOptions::new();
|
||||||
|
options.pathspec(&path.0);
|
||||||
|
options.disable_pathspec_match(true);
|
||||||
|
options.include_untracked(true);
|
||||||
|
options.recurse_untracked_dirs(true);
|
||||||
|
options.include_unmodified(true);
|
||||||
|
|
||||||
|
// If the file has not changed since it was added to the index, then
|
||||||
|
// there's no need to examine the working directory file: just compare
|
||||||
|
// the blob in the index to the one in the HEAD commit.
|
||||||
|
if matches_index(self, path, mtime) {
|
||||||
|
options.show(StatusShow::Index);
|
||||||
|
}
|
||||||
|
|
||||||
|
let statuses = self.statuses(Some(&mut options)).log_err()?;
|
||||||
|
let status = statuses.get(0).and_then(|s| read_status(s.status()));
|
||||||
|
status
|
||||||
|
}
|
||||||
|
|
||||||
fn branches(&self) -> Result<Vec<Branch>> {
|
fn branches(&self) -> Result<Vec<Branch>> {
|
||||||
let local_branches = self.branches(Some(BranchType::Local))?;
|
let local_branches = self.branches(Some(BranchType::Local))?;
|
||||||
let valid_branches = local_branches
|
let valid_branches = local_branches
|
||||||
|
@ -151,6 +198,21 @@ impl GitRepository for LibGitRepository {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn matches_index(repo: &LibGitRepository, path: &RepoPath, mtime: SystemTime) -> bool {
|
||||||
|
if let Some(index) = repo.index().log_err() {
|
||||||
|
if let Some(entry) = index.get_path(&path, 0) {
|
||||||
|
if let Some(mtime) = mtime.duration_since(SystemTime::UNIX_EPOCH).log_err() {
|
||||||
|
if entry.mtime.seconds() == mtime.as_secs() as i32
|
||||||
|
&& entry.mtime.nanoseconds() == mtime.subsec_nanos()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
fn read_status(status: git2::Status) -> Option<GitFileStatus> {
|
fn read_status(status: git2::Status) -> Option<GitFileStatus> {
|
||||||
if status.contains(git2::Status::CONFLICTED) {
|
if status.contains(git2::Status::CONFLICTED) {
|
||||||
Some(GitFileStatus::Conflict)
|
Some(GitFileStatus::Conflict)
|
||||||
|
@ -200,18 +262,24 @@ impl GitRepository for FakeGitRepository {
|
||||||
state.branch_name.clone()
|
state.branch_name.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn statuses(&self) -> TreeMap<RepoPath, GitFileStatus> {
|
fn staged_statuses(&self, path_prefix: &Path) -> TreeMap<RepoPath, GitFileStatus> {
|
||||||
let mut map = TreeMap::default();
|
let mut map = TreeMap::default();
|
||||||
let state = self.state.lock();
|
let state = self.state.lock();
|
||||||
for (repo_path, status) in state.worktree_statuses.iter() {
|
for (repo_path, status) in state.worktree_statuses.iter() {
|
||||||
map.insert(repo_path.to_owned(), status.to_owned());
|
if repo_path.0.starts_with(path_prefix) {
|
||||||
|
map.insert(repo_path.to_owned(), status.to_owned());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
map
|
map
|
||||||
}
|
}
|
||||||
|
|
||||||
fn status(&self, path: &RepoPath) -> Result<Option<GitFileStatus>> {
|
fn unstaged_status(&self, _path: &RepoPath, _mtime: SystemTime) -> Option<GitFileStatus> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn status(&self, path: &RepoPath, _mtime: SystemTime) -> Option<GitFileStatus> {
|
||||||
let state = self.state.lock();
|
let state = self.state.lock();
|
||||||
Ok(state.worktree_statuses.get(path).cloned())
|
state.worktree_statuses.get(path).cloned()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn branches(&self) -> Result<Vec<Branch>> {
|
fn branches(&self) -> Result<Vec<Branch>> {
|
||||||
|
|
|
@ -2162,10 +2162,18 @@ impl BackgroundScannerState {
|
||||||
let path = entry.path.clone();
|
let path = entry.path.clone();
|
||||||
let ignore_stack = self.snapshot.ignore_stack_for_abs_path(&abs_path, true);
|
let ignore_stack = self.snapshot.ignore_stack_for_abs_path(&abs_path, true);
|
||||||
let mut ancestor_inodes = self.snapshot.ancestor_inodes_for_path(&path);
|
let mut ancestor_inodes = self.snapshot.ancestor_inodes_for_path(&path);
|
||||||
let containing_repository = self
|
let mut containing_repository = None;
|
||||||
.snapshot
|
if !ignore_stack.is_all() {
|
||||||
.local_repo_for_path(&path)
|
if let Some((workdir_path, repo)) = self.snapshot.local_repo_for_path(&path) {
|
||||||
.map(|(path, repo)| (path, repo.repo_ptr.lock().statuses()));
|
if let Ok(repo_path) = path.strip_prefix(&workdir_path.0) {
|
||||||
|
containing_repository = Some((
|
||||||
|
workdir_path,
|
||||||
|
repo.repo_ptr.clone(),
|
||||||
|
repo.repo_ptr.lock().staged_statuses(repo_path),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if !ancestor_inodes.contains(&entry.inode) {
|
if !ancestor_inodes.contains(&entry.inode) {
|
||||||
ancestor_inodes.insert(entry.inode);
|
ancestor_inodes.insert(entry.inode);
|
||||||
scan_job_tx
|
scan_job_tx
|
||||||
|
@ -2355,8 +2363,7 @@ impl BackgroundScannerState {
|
||||||
.repository_entries
|
.repository_entries
|
||||||
.update(&work_dir, |entry| entry.branch = branch.map(Into::into));
|
.update(&work_dir, |entry| entry.branch = branch.map(Into::into));
|
||||||
|
|
||||||
let statuses = repository.statuses();
|
self.update_git_statuses(&work_dir, &*repository);
|
||||||
self.update_git_statuses(&work_dir, &statuses);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2381,7 +2388,11 @@ impl BackgroundScannerState {
|
||||||
&mut self,
|
&mut self,
|
||||||
dot_git_path: Arc<Path>,
|
dot_git_path: Arc<Path>,
|
||||||
fs: &dyn Fs,
|
fs: &dyn Fs,
|
||||||
) -> Option<(RepositoryWorkDirectory, TreeMap<RepoPath, GitFileStatus>)> {
|
) -> Option<(
|
||||||
|
RepositoryWorkDirectory,
|
||||||
|
Arc<Mutex<dyn GitRepository>>,
|
||||||
|
TreeMap<RepoPath, GitFileStatus>,
|
||||||
|
)> {
|
||||||
log::info!("build git repository {:?}", dot_git_path);
|
log::info!("build git repository {:?}", dot_git_path);
|
||||||
|
|
||||||
let work_dir_path: Arc<Path> = dot_git_path.parent().unwrap().into();
|
let work_dir_path: Arc<Path> = dot_git_path.parent().unwrap().into();
|
||||||
|
@ -2413,27 +2424,28 @@ impl BackgroundScannerState {
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
let statuses = repo_lock.statuses();
|
let staged_statuses = self.update_git_statuses(&work_directory, &*repo_lock);
|
||||||
self.update_git_statuses(&work_directory, &statuses);
|
|
||||||
drop(repo_lock);
|
drop(repo_lock);
|
||||||
|
|
||||||
self.snapshot.git_repositories.insert(
|
self.snapshot.git_repositories.insert(
|
||||||
work_dir_id,
|
work_dir_id,
|
||||||
LocalRepositoryEntry {
|
LocalRepositoryEntry {
|
||||||
git_dir_scan_id: 0,
|
git_dir_scan_id: 0,
|
||||||
repo_ptr: repository,
|
repo_ptr: repository.clone(),
|
||||||
git_dir_path: dot_git_path.clone(),
|
git_dir_path: dot_git_path.clone(),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
Some((work_directory, statuses))
|
Some((work_directory, repository, staged_statuses))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_git_statuses(
|
fn update_git_statuses(
|
||||||
&mut self,
|
&mut self,
|
||||||
work_directory: &RepositoryWorkDirectory,
|
work_directory: &RepositoryWorkDirectory,
|
||||||
statuses: &TreeMap<RepoPath, GitFileStatus>,
|
repo: &dyn GitRepository,
|
||||||
) {
|
) -> TreeMap<RepoPath, GitFileStatus> {
|
||||||
|
let staged_statuses = repo.staged_statuses(Path::new(""));
|
||||||
|
|
||||||
let mut changes = vec![];
|
let mut changes = vec![];
|
||||||
let mut edits = vec![];
|
let mut edits = vec![];
|
||||||
|
|
||||||
|
@ -2446,7 +2458,10 @@ impl BackgroundScannerState {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let repo_path = RepoPath(repo_path.to_path_buf());
|
let repo_path = RepoPath(repo_path.to_path_buf());
|
||||||
let git_file_status = statuses.get(&repo_path).copied();
|
let git_file_status = combine_git_statuses(
|
||||||
|
staged_statuses.get(&repo_path).copied(),
|
||||||
|
repo.unstaged_status(&repo_path, entry.mtime),
|
||||||
|
);
|
||||||
if entry.git_status != git_file_status {
|
if entry.git_status != git_file_status {
|
||||||
entry.git_status = git_file_status;
|
entry.git_status = git_file_status;
|
||||||
changes.push(entry.path.clone());
|
changes.push(entry.path.clone());
|
||||||
|
@ -2456,6 +2471,7 @@ impl BackgroundScannerState {
|
||||||
|
|
||||||
self.snapshot.entries_by_path.edit(edits, &());
|
self.snapshot.entries_by_path.edit(edits, &());
|
||||||
util::extend_sorted(&mut self.changed_paths, changes, usize::MAX, Ord::cmp);
|
util::extend_sorted(&mut self.changed_paths, changes, usize::MAX, Ord::cmp);
|
||||||
|
staged_statuses
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3517,10 +3533,19 @@ impl BackgroundScanner {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
child_entry.is_ignored = ignore_stack.is_abs_path_ignored(&child_abs_path, false);
|
child_entry.is_ignored = ignore_stack.is_abs_path_ignored(&child_abs_path, false);
|
||||||
|
if !child_entry.is_ignored {
|
||||||
if let Some((repository_dir, statuses)) = &job.containing_repository {
|
if let Some((repository_dir, repository, staged_statuses)) =
|
||||||
if let Ok(repo_path) = child_entry.path.strip_prefix(&repository_dir.0) {
|
&job.containing_repository
|
||||||
child_entry.git_status = statuses.get(&RepoPath(repo_path.into())).copied();
|
{
|
||||||
|
if let Ok(repo_path) = child_entry.path.strip_prefix(&repository_dir.0) {
|
||||||
|
let repo_path = RepoPath(repo_path.into());
|
||||||
|
child_entry.git_status = combine_git_statuses(
|
||||||
|
staged_statuses.get(&repo_path).copied(),
|
||||||
|
repository
|
||||||
|
.lock()
|
||||||
|
.unstaged_status(&repo_path, child_entry.mtime),
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3630,13 +3655,10 @@ impl BackgroundScanner {
|
||||||
if let Some((work_dir, repo)) =
|
if let Some((work_dir, repo)) =
|
||||||
state.snapshot.local_repo_for_path(&path)
|
state.snapshot.local_repo_for_path(&path)
|
||||||
{
|
{
|
||||||
if let Ok(path) = path.strip_prefix(work_dir.0) {
|
if let Ok(repo_path) = path.strip_prefix(work_dir.0) {
|
||||||
fs_entry.git_status = repo
|
let repo_path = RepoPath(repo_path.into());
|
||||||
.repo_ptr
|
let repo = repo.repo_ptr.lock();
|
||||||
.lock()
|
fs_entry.git_status = repo.status(&repo_path, fs_entry.mtime);
|
||||||
.status(&RepoPath(path.into()))
|
|
||||||
.log_err()
|
|
||||||
.flatten()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3990,7 +4012,11 @@ struct ScanJob {
|
||||||
scan_queue: Sender<ScanJob>,
|
scan_queue: Sender<ScanJob>,
|
||||||
ancestor_inodes: TreeSet<u64>,
|
ancestor_inodes: TreeSet<u64>,
|
||||||
is_external: bool,
|
is_external: bool,
|
||||||
containing_repository: Option<(RepositoryWorkDirectory, TreeMap<RepoPath, GitFileStatus>)>,
|
containing_repository: Option<(
|
||||||
|
RepositoryWorkDirectory,
|
||||||
|
Arc<Mutex<dyn GitRepository>>,
|
||||||
|
TreeMap<RepoPath, GitFileStatus>,
|
||||||
|
)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct UpdateIgnoreStatusJob {
|
struct UpdateIgnoreStatusJob {
|
||||||
|
@ -4317,3 +4343,22 @@ impl<'a> TryFrom<(&'a CharBag, proto::Entry)> for Entry {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn combine_git_statuses(
|
||||||
|
staged: Option<GitFileStatus>,
|
||||||
|
unstaged: Option<GitFileStatus>,
|
||||||
|
) -> Option<GitFileStatus> {
|
||||||
|
if let Some(staged) = staged {
|
||||||
|
if let Some(unstaged) = unstaged {
|
||||||
|
if unstaged != staged {
|
||||||
|
Some(GitFileStatus::Modified)
|
||||||
|
} else {
|
||||||
|
Some(staged)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Some(staged)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unstaged
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue