worktree: Improve performance with large # of repositories (#35052)

In this PR we've reworked how git status updates are processed. Most
notable change is moving the processing into a background thread (and
splitting it across multiple background workers). We believe it is safe
to do so, as worktree events are not deterministic (fs updates are not
guaranteed to come in any order etc), so I've figured that git store
should not be overly order-reliant anyways.

Note that this PR does not solve perf issues wholesale - other parts of
the system are still slow to process stuff (which I plan to nuke soon).

Related to #34302

Release Notes:

- Improved Zed's performance in projects with large # of repositories

---------

Co-authored-by: Anthony Eid <hello@anthonyeid.me>
This commit is contained in:
Piotr Osiewicz 2025-07-25 13:18:19 +02:00 committed by GitHub
parent af0c909924
commit 631f9a1b31
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 142 additions and 38 deletions

View file

@ -1,6 +1,6 @@
use collections::HashMap;
use git::status::GitSummary;
use std::{ops::Deref, path::Path};
use git::{repository::RepoPath, status::GitSummary};
use std::{collections::BTreeMap, ops::Deref, path::Path};
use sum_tree::Cursor;
use text::Bias;
use worktree::{Entry, PathProgress, PathTarget, Traversal};
@ -11,7 +11,7 @@ use super::{RepositoryId, RepositorySnapshot, StatusEntry};
pub struct GitTraversal<'a> {
traversal: Traversal<'a>,
current_entry_summary: Option<GitSummary>,
repo_snapshots: &'a HashMap<RepositoryId, RepositorySnapshot>,
repo_root_to_snapshot: BTreeMap<&'a Path, &'a RepositorySnapshot>,
repo_location: Option<(RepositoryId, Cursor<'a, StatusEntry, PathProgress<'a>>)>,
}
@ -20,16 +20,46 @@ impl<'a> GitTraversal<'a> {
repo_snapshots: &'a HashMap<RepositoryId, RepositorySnapshot>,
traversal: Traversal<'a>,
) -> GitTraversal<'a> {
let repo_root_to_snapshot = repo_snapshots
.values()
.map(|snapshot| (&*snapshot.work_directory_abs_path, snapshot))
.collect();
let mut this = GitTraversal {
traversal,
repo_snapshots,
current_entry_summary: None,
repo_location: None,
repo_root_to_snapshot,
};
this.synchronize_statuses(true);
this
}
fn repo_root_for_path(&self, path: &Path) -> Option<(&'a RepositorySnapshot, RepoPath)> {
// We might need to perform a range search multiple times, as there may be a nested repository inbetween
// the target and our path. E.g:
// /our_root_repo/
// .git/
// other_repo/
// .git/
// our_query.txt
let mut query = path.ancestors();
while let Some(query) = query.next() {
let (_, snapshot) = self
.repo_root_to_snapshot
.range(Path::new("")..=query)
.last()?;
let stripped = snapshot
.abs_path_to_repo_path(path)
.map(|repo_path| (*snapshot, repo_path));
if stripped.is_some() {
return stripped;
}
}
None
}
fn synchronize_statuses(&mut self, reset: bool) {
self.current_entry_summary = None;
@ -42,15 +72,7 @@ impl<'a> GitTraversal<'a> {
return;
};
let Some((repo, repo_path)) = self
.repo_snapshots
.values()
.filter_map(|repo_snapshot| {
let repo_path = repo_snapshot.abs_path_to_repo_path(&abs_path)?;
Some((repo_snapshot, repo_path))
})
.max_by_key(|(repo, _)| repo.work_directory_abs_path.clone())
else {
let Some((repo, repo_path)) = self.repo_root_for_path(&abs_path) else {
self.repo_location = None;
return;
};