worktree: Improve performance with large # of repositories (#35052)
In this PR we've reworked how git status updates are processed. Most notable change is moving the processing into a background thread (and splitting it across multiple background workers). We believe it is safe to do so, as worktree events are not deterministic (fs updates are not guaranteed to come in any order etc), so I've figured that git store should not be overly order-reliant anyways. Note that this PR does not solve perf issues wholesale - other parts of the system are still slow to process stuff (which I plan to nuke soon). Related to #34302 Release Notes: - Improved Zed's performance in projects with large # of repositories --------- Co-authored-by: Anthony Eid <hello@anthonyeid.me>
This commit is contained in:
parent
af0c909924
commit
631f9a1b31
2 changed files with 142 additions and 38 deletions
|
@ -14,9 +14,10 @@ use collections::HashMap;
|
|||
pub use conflict_set::{ConflictRegion, ConflictSet, ConflictSetSnapshot, ConflictSetUpdate};
|
||||
use fs::Fs;
|
||||
use futures::{
|
||||
FutureExt, StreamExt as _,
|
||||
FutureExt, StreamExt,
|
||||
channel::{mpsc, oneshot},
|
||||
future::{self, Shared},
|
||||
stream::FuturesOrdered,
|
||||
};
|
||||
use git::{
|
||||
BuildPermalinkParams, GitHostingProviderRegistry, WORK_DIRECTORY_REPO_PATH,
|
||||
|
@ -63,8 +64,8 @@ use sum_tree::{Edit, SumTree, TreeSet};
|
|||
use text::{Bias, BufferId};
|
||||
use util::{ResultExt, debug_panic, post_inc};
|
||||
use worktree::{
|
||||
File, PathKey, PathProgress, PathSummary, PathTarget, UpdatedGitRepositoriesSet,
|
||||
UpdatedGitRepository, Worktree,
|
||||
File, PathChange, PathKey, PathProgress, PathSummary, PathTarget, ProjectEntryId,
|
||||
UpdatedGitRepositoriesSet, UpdatedGitRepository, Worktree,
|
||||
};
|
||||
|
||||
pub struct GitStore {
|
||||
|
@ -1083,27 +1084,26 @@ impl GitStore {
|
|||
|
||||
match event {
|
||||
WorktreeStoreEvent::WorktreeUpdatedEntries(worktree_id, updated_entries) => {
|
||||
let mut paths_by_git_repo = HashMap::<_, Vec<_>>::default();
|
||||
for (relative_path, _, _) in updated_entries.iter() {
|
||||
let Some((repo, repo_path)) = self.repository_and_path_for_project_path(
|
||||
&(*worktree_id, relative_path.clone()).into(),
|
||||
cx,
|
||||
) else {
|
||||
continue;
|
||||
};
|
||||
paths_by_git_repo.entry(repo).or_default().push(repo_path)
|
||||
}
|
||||
|
||||
for (repo, paths) in paths_by_git_repo {
|
||||
repo.update(cx, |repo, cx| {
|
||||
repo.paths_changed(
|
||||
paths,
|
||||
downstream
|
||||
.as_ref()
|
||||
.map(|downstream| downstream.updates_tx.clone()),
|
||||
cx,
|
||||
);
|
||||
});
|
||||
if let Some(worktree) = self
|
||||
.worktree_store
|
||||
.read(cx)
|
||||
.worktree_for_id(*worktree_id, cx)
|
||||
{
|
||||
let paths_by_git_repo =
|
||||
self.process_updated_entries(&worktree, updated_entries, cx);
|
||||
let downstream = downstream
|
||||
.as_ref()
|
||||
.map(|downstream| downstream.updates_tx.clone());
|
||||
cx.spawn(async move |_, cx| {
|
||||
let paths_by_git_repo = paths_by_git_repo.await;
|
||||
for (repo, paths) in paths_by_git_repo {
|
||||
repo.update(cx, |repo, cx| {
|
||||
repo.paths_changed(paths, downstream.clone(), cx);
|
||||
})
|
||||
.ok();
|
||||
}
|
||||
})
|
||||
.detach();
|
||||
}
|
||||
}
|
||||
WorktreeStoreEvent::WorktreeUpdatedGitRepositories(worktree_id, changed_repos) => {
|
||||
|
@ -2191,6 +2191,80 @@ impl GitStore {
|
|||
.map(|(id, repo)| (*id, repo.read(cx).snapshot.clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn process_updated_entries(
|
||||
&self,
|
||||
worktree: &Entity<Worktree>,
|
||||
updated_entries: &[(Arc<Path>, ProjectEntryId, PathChange)],
|
||||
cx: &mut App,
|
||||
) -> Task<HashMap<Entity<Repository>, Vec<RepoPath>>> {
|
||||
let mut repo_paths = self
|
||||
.repositories
|
||||
.values()
|
||||
.map(|repo| (repo.read(cx).work_directory_abs_path.clone(), repo.clone()))
|
||||
.collect::<Vec<_>>();
|
||||
let mut entries: Vec<_> = updated_entries
|
||||
.iter()
|
||||
.map(|(path, _, _)| path.clone())
|
||||
.collect();
|
||||
entries.sort();
|
||||
let worktree = worktree.read(cx);
|
||||
|
||||
let entries = entries
|
||||
.into_iter()
|
||||
.filter_map(|path| worktree.absolutize(&path).ok())
|
||||
.collect::<Arc<[_]>>();
|
||||
|
||||
let executor = cx.background_executor().clone();
|
||||
cx.background_executor().spawn(async move {
|
||||
repo_paths.sort_by(|lhs, rhs| lhs.0.cmp(&rhs.0));
|
||||
let mut paths_by_git_repo = HashMap::<_, Vec<_>>::default();
|
||||
let mut tasks = FuturesOrdered::new();
|
||||
for (repo_path, repo) in repo_paths.into_iter().rev() {
|
||||
let entries = entries.clone();
|
||||
let task = executor.spawn(async move {
|
||||
// Find all repository paths that belong to this repo
|
||||
let mut ix = entries.partition_point(|path| path < &*repo_path);
|
||||
if ix == entries.len() {
|
||||
return None;
|
||||
};
|
||||
|
||||
let mut paths = vec![];
|
||||
// All paths prefixed by a given repo will constitute a continuous range.
|
||||
while let Some(path) = entries.get(ix)
|
||||
&& let Some(repo_path) =
|
||||
RepositorySnapshot::abs_path_to_repo_path_inner(&repo_path, &path)
|
||||
{
|
||||
paths.push((repo_path, ix));
|
||||
ix += 1;
|
||||
}
|
||||
Some((repo, paths))
|
||||
});
|
||||
tasks.push_back(task);
|
||||
}
|
||||
|
||||
// Now, let's filter out the "duplicate" entries that were processed by multiple distinct repos.
|
||||
let mut path_was_used = vec![false; entries.len()];
|
||||
let tasks = tasks.collect::<Vec<_>>().await;
|
||||
// Process tasks from the back: iterating backwards allows us to see more-specific paths first.
|
||||
// We always want to assign a path to it's innermost repository.
|
||||
for t in tasks {
|
||||
let Some((repo, paths)) = t else {
|
||||
continue;
|
||||
};
|
||||
let entry = paths_by_git_repo.entry(repo).or_default();
|
||||
for (repo_path, ix) in paths {
|
||||
if path_was_used[ix] {
|
||||
continue;
|
||||
}
|
||||
path_was_used[ix] = true;
|
||||
entry.push(repo_path);
|
||||
}
|
||||
}
|
||||
|
||||
paths_by_git_repo
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl BufferGitState {
|
||||
|
@ -2660,8 +2734,16 @@ impl RepositorySnapshot {
|
|||
}
|
||||
|
||||
pub fn abs_path_to_repo_path(&self, abs_path: &Path) -> Option<RepoPath> {
|
||||
Self::abs_path_to_repo_path_inner(&self.work_directory_abs_path, abs_path)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn abs_path_to_repo_path_inner(
|
||||
work_directory_abs_path: &Path,
|
||||
abs_path: &Path,
|
||||
) -> Option<RepoPath> {
|
||||
abs_path
|
||||
.strip_prefix(&self.work_directory_abs_path)
|
||||
.strip_prefix(&work_directory_abs_path)
|
||||
.map(RepoPath::from)
|
||||
.ok()
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use collections::HashMap;
|
||||
use git::status::GitSummary;
|
||||
use std::{ops::Deref, path::Path};
|
||||
use git::{repository::RepoPath, status::GitSummary};
|
||||
use std::{collections::BTreeMap, ops::Deref, path::Path};
|
||||
use sum_tree::Cursor;
|
||||
use text::Bias;
|
||||
use worktree::{Entry, PathProgress, PathTarget, Traversal};
|
||||
|
@ -11,7 +11,7 @@ use super::{RepositoryId, RepositorySnapshot, StatusEntry};
|
|||
pub struct GitTraversal<'a> {
|
||||
traversal: Traversal<'a>,
|
||||
current_entry_summary: Option<GitSummary>,
|
||||
repo_snapshots: &'a HashMap<RepositoryId, RepositorySnapshot>,
|
||||
repo_root_to_snapshot: BTreeMap<&'a Path, &'a RepositorySnapshot>,
|
||||
repo_location: Option<(RepositoryId, Cursor<'a, StatusEntry, PathProgress<'a>>)>,
|
||||
}
|
||||
|
||||
|
@ -20,16 +20,46 @@ impl<'a> GitTraversal<'a> {
|
|||
repo_snapshots: &'a HashMap<RepositoryId, RepositorySnapshot>,
|
||||
traversal: Traversal<'a>,
|
||||
) -> GitTraversal<'a> {
|
||||
let repo_root_to_snapshot = repo_snapshots
|
||||
.values()
|
||||
.map(|snapshot| (&*snapshot.work_directory_abs_path, snapshot))
|
||||
.collect();
|
||||
let mut this = GitTraversal {
|
||||
traversal,
|
||||
repo_snapshots,
|
||||
current_entry_summary: None,
|
||||
repo_location: None,
|
||||
repo_root_to_snapshot,
|
||||
};
|
||||
this.synchronize_statuses(true);
|
||||
this
|
||||
}
|
||||
|
||||
fn repo_root_for_path(&self, path: &Path) -> Option<(&'a RepositorySnapshot, RepoPath)> {
|
||||
// We might need to perform a range search multiple times, as there may be a nested repository inbetween
|
||||
// the target and our path. E.g:
|
||||
// /our_root_repo/
|
||||
// .git/
|
||||
// other_repo/
|
||||
// .git/
|
||||
// our_query.txt
|
||||
let mut query = path.ancestors();
|
||||
while let Some(query) = query.next() {
|
||||
let (_, snapshot) = self
|
||||
.repo_root_to_snapshot
|
||||
.range(Path::new("")..=query)
|
||||
.last()?;
|
||||
|
||||
let stripped = snapshot
|
||||
.abs_path_to_repo_path(path)
|
||||
.map(|repo_path| (*snapshot, repo_path));
|
||||
if stripped.is_some() {
|
||||
return stripped;
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn synchronize_statuses(&mut self, reset: bool) {
|
||||
self.current_entry_summary = None;
|
||||
|
||||
|
@ -42,15 +72,7 @@ impl<'a> GitTraversal<'a> {
|
|||
return;
|
||||
};
|
||||
|
||||
let Some((repo, repo_path)) = self
|
||||
.repo_snapshots
|
||||
.values()
|
||||
.filter_map(|repo_snapshot| {
|
||||
let repo_path = repo_snapshot.abs_path_to_repo_path(&abs_path)?;
|
||||
Some((repo_snapshot, repo_path))
|
||||
})
|
||||
.max_by_key(|(repo, _)| repo.work_directory_abs_path.clone())
|
||||
else {
|
||||
let Some((repo, repo_path)) = self.repo_root_for_path(&abs_path) else {
|
||||
self.repo_location = None;
|
||||
return;
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue