More git status optimizations (#2779)

Follow-up to https://github.com/zed-industries/zed/pull/2777
Refs https://github.com/zed-industries/community/issues/1770

In this PR, I reworked the way that git statuses are retrieved. In a
huge repository like `WebKit`, the really slow part of computing a list
of git statuses is the *unstaged* portion of the diff. For the *staged*
diff, `git` can avoid comparing the contents of unchanged directories,
because the index contains hashes of every tree. But for the *unstaged*
portion, Git needs to compare every file in the worktree against the
index. In the common case, when there are no changes, it's enough to
check the `mtime` of every file (because the index stores the mtimes of
files when they are added). But this still requires an `lstat` call to
retrieve each file's metadata.

I realized that this is redundant work, because the worktree is
*already* calling `lstat` on every file, and caching their metadata. So
in this PR, I've changed the `Repository` API so that there are separate
methods for retrieving a file's *staged* and *unstaged* statuses. The
*staged* statuses are retrieved in one giant batch, like before, to
reduce our git calls (which also have an inherent cost). But the
`unstaged` statuses are retrieved one-by-one, after we load files'
mtimes. Often, all that's required is an index lookup, and an mtime
comparison.

With this optimization, it once again becomes pretty responsive to open
`WebKit` or `chromium` in Zed.

Release Notes:

- Optimized the loading of project file when working in very large git
repositories
This commit is contained in:
Max Brunsfeld 2023-07-24 11:23:32 -07:00 committed by GitHub
commit a01d973477
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 169 additions and 56 deletions

View file

@ -2162,10 +2162,18 @@ impl BackgroundScannerState {
let path = entry.path.clone();
let ignore_stack = self.snapshot.ignore_stack_for_abs_path(&abs_path, true);
let mut ancestor_inodes = self.snapshot.ancestor_inodes_for_path(&path);
let containing_repository = self
.snapshot
.local_repo_for_path(&path)
.map(|(path, repo)| (path, repo.repo_ptr.lock().statuses()));
let mut containing_repository = None;
if !ignore_stack.is_all() {
if let Some((workdir_path, repo)) = self.snapshot.local_repo_for_path(&path) {
if let Ok(repo_path) = path.strip_prefix(&workdir_path.0) {
containing_repository = Some((
workdir_path,
repo.repo_ptr.clone(),
repo.repo_ptr.lock().staged_statuses(repo_path),
));
}
}
}
if !ancestor_inodes.contains(&entry.inode) {
ancestor_inodes.insert(entry.inode);
scan_job_tx
@ -2355,8 +2363,7 @@ impl BackgroundScannerState {
.repository_entries
.update(&work_dir, |entry| entry.branch = branch.map(Into::into));
let statuses = repository.statuses();
self.update_git_statuses(&work_dir, &statuses);
self.update_git_statuses(&work_dir, &*repository);
}
}
}
@ -2381,7 +2388,11 @@ impl BackgroundScannerState {
&mut self,
dot_git_path: Arc<Path>,
fs: &dyn Fs,
) -> Option<(RepositoryWorkDirectory, TreeMap<RepoPath, GitFileStatus>)> {
) -> Option<(
RepositoryWorkDirectory,
Arc<Mutex<dyn GitRepository>>,
TreeMap<RepoPath, GitFileStatus>,
)> {
log::info!("build git repository {:?}", dot_git_path);
let work_dir_path: Arc<Path> = dot_git_path.parent().unwrap().into();
@ -2413,27 +2424,28 @@ impl BackgroundScannerState {
},
);
let statuses = repo_lock.statuses();
self.update_git_statuses(&work_directory, &statuses);
let staged_statuses = self.update_git_statuses(&work_directory, &*repo_lock);
drop(repo_lock);
self.snapshot.git_repositories.insert(
work_dir_id,
LocalRepositoryEntry {
git_dir_scan_id: 0,
repo_ptr: repository,
repo_ptr: repository.clone(),
git_dir_path: dot_git_path.clone(),
},
);
Some((work_directory, statuses))
Some((work_directory, repository, staged_statuses))
}
fn update_git_statuses(
&mut self,
work_directory: &RepositoryWorkDirectory,
statuses: &TreeMap<RepoPath, GitFileStatus>,
) {
repo: &dyn GitRepository,
) -> TreeMap<RepoPath, GitFileStatus> {
let staged_statuses = repo.staged_statuses(Path::new(""));
let mut changes = vec![];
let mut edits = vec![];
@ -2446,7 +2458,10 @@ impl BackgroundScannerState {
continue;
};
let repo_path = RepoPath(repo_path.to_path_buf());
let git_file_status = statuses.get(&repo_path).copied();
let git_file_status = combine_git_statuses(
staged_statuses.get(&repo_path).copied(),
repo.unstaged_status(&repo_path, entry.mtime),
);
if entry.git_status != git_file_status {
entry.git_status = git_file_status;
changes.push(entry.path.clone());
@ -2456,6 +2471,7 @@ impl BackgroundScannerState {
self.snapshot.entries_by_path.edit(edits, &());
util::extend_sorted(&mut self.changed_paths, changes, usize::MAX, Ord::cmp);
staged_statuses
}
}
@ -3517,10 +3533,19 @@ impl BackgroundScanner {
}
} else {
child_entry.is_ignored = ignore_stack.is_abs_path_ignored(&child_abs_path, false);
if let Some((repository_dir, statuses)) = &job.containing_repository {
if let Ok(repo_path) = child_entry.path.strip_prefix(&repository_dir.0) {
child_entry.git_status = statuses.get(&RepoPath(repo_path.into())).copied();
if !child_entry.is_ignored {
if let Some((repository_dir, repository, staged_statuses)) =
&job.containing_repository
{
if let Ok(repo_path) = child_entry.path.strip_prefix(&repository_dir.0) {
let repo_path = RepoPath(repo_path.into());
child_entry.git_status = combine_git_statuses(
staged_statuses.get(&repo_path).copied(),
repository
.lock()
.unstaged_status(&repo_path, child_entry.mtime),
);
}
}
}
}
@ -3630,13 +3655,10 @@ impl BackgroundScanner {
if let Some((work_dir, repo)) =
state.snapshot.local_repo_for_path(&path)
{
if let Ok(path) = path.strip_prefix(work_dir.0) {
fs_entry.git_status = repo
.repo_ptr
.lock()
.status(&RepoPath(path.into()))
.log_err()
.flatten()
if let Ok(repo_path) = path.strip_prefix(work_dir.0) {
let repo_path = RepoPath(repo_path.into());
let repo = repo.repo_ptr.lock();
fs_entry.git_status = repo.status(&repo_path, fs_entry.mtime);
}
}
}
@ -3990,7 +4012,11 @@ struct ScanJob {
scan_queue: Sender<ScanJob>,
ancestor_inodes: TreeSet<u64>,
is_external: bool,
containing_repository: Option<(RepositoryWorkDirectory, TreeMap<RepoPath, GitFileStatus>)>,
containing_repository: Option<(
RepositoryWorkDirectory,
Arc<Mutex<dyn GitRepository>>,
TreeMap<RepoPath, GitFileStatus>,
)>,
}
struct UpdateIgnoreStatusJob {
@ -4317,3 +4343,22 @@ impl<'a> TryFrom<(&'a CharBag, proto::Entry)> for Entry {
}
}
}
fn combine_git_statuses(
staged: Option<GitFileStatus>,
unstaged: Option<GitFileStatus>,
) -> Option<GitFileStatus> {
if let Some(staged) = staged {
if let Some(unstaged) = unstaged {
if unstaged != staged {
Some(GitFileStatus::Modified)
} else {
Some(staged)
}
} else {
Some(staged)
}
} else {
unstaged
}
}