Add a data driven representation of the current git repository state to the worktree snapshots

WIP: Switch git repositories to use SumTrees

Co-authored-by: Nathan <nathan@zed.dev>
This commit is contained in:
Mikayla Maki 2023-05-01 15:35:22 -07:00
parent bb93447a0d
commit a58d3d8128
No known key found for this signature in database
4 changed files with 154 additions and 50 deletions

View file

@ -15,6 +15,12 @@ pub trait GitRepository: Send {
fn load_index_text(&self, relative_file_path: &Path) -> Option<String>; fn load_index_text(&self, relative_file_path: &Path) -> Option<String>;
} }
impl std::fmt::Debug for dyn GitRepository {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("dyn GitRepository<...>").finish()
}
}
#[async_trait::async_trait] #[async_trait::async_trait]
impl GitRepository for LibGitRepository { impl GitRepository for LibGitRepository {
fn reload_index(&self) { fn reload_index(&self) {

View file

@ -4696,7 +4696,7 @@ impl Project {
fn update_local_worktree_buffers_git_repos( fn update_local_worktree_buffers_git_repos(
&mut self, &mut self,
worktree: ModelHandle<Worktree>, worktree: ModelHandle<Worktree>,
repos: &[GitRepositoryEntry], repos: &[LocalGitRepositoryEntry],
cx: &mut ModelContext<Self>, cx: &mut ModelContext<Self>,
) { ) {
for (_, buffer) in &self.opened_buffers { for (_, buffer) in &self.opened_buffers {

View file

@ -51,7 +51,7 @@ use std::{
}, },
time::{Duration, SystemTime}, time::{Duration, SystemTime},
}; };
use sum_tree::{Bias, Edit, SeekTarget, SumTree, TreeSet}; use sum_tree::{Bias, Edit, SeekTarget, SumTree, TreeMap, TreeSet};
use util::{paths::HOME, ResultExt, TryFutureExt}; use util::{paths::HOME, ResultExt, TryFutureExt};
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)] #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)]
@ -102,6 +102,7 @@ pub struct Snapshot {
root_char_bag: CharBag, root_char_bag: CharBag,
entries_by_path: SumTree<Entry>, entries_by_path: SumTree<Entry>,
entries_by_id: SumTree<PathEntry>, entries_by_id: SumTree<PathEntry>,
repository_entries: TreeMap<RepositoryEntryKey, RepositoryEntry>,
/// A number that increases every time the worktree begins scanning /// A number that increases every time the worktree begins scanning
/// a set of paths from the filesystem. This scanning could be caused /// a set of paths from the filesystem. This scanning could be caused
@ -116,8 +117,35 @@ pub struct Snapshot {
completed_scan_id: usize, completed_scan_id: usize,
} }
#[derive(Clone, Debug)]
pub struct RepositoryEntry {
// Path to the actual .git folder.
// Note: if .git is a file, this points to the folder indicated by the .git file
pub(crate) git_dir_path: Arc<Path>,
pub(crate) git_dir_entry_id: ProjectEntryId,
pub(crate) scan_id: usize,
// TODO: pub(crate) head_ref: Arc<str>,
}
impl RepositoryEntry {
// Note that this path should be relative to the worktree root.
pub(crate) fn in_dot_git(&self, path: &Path) -> bool {
path.starts_with(self.git_dir_path.as_ref())
}
}
/// This path corresponds to the 'content path' (the folder that contains the .git)
#[derive(Clone, Debug, Ord, PartialOrd, Eq, PartialEq)]
pub struct RepositoryEntryKey(Arc<Path>);
impl Default for RepositoryEntryKey {
fn default() -> Self {
RepositoryEntryKey(Arc::from(Path::new("")))
}
}
#[derive(Clone)] #[derive(Clone)]
pub struct GitRepositoryEntry { pub struct LocalGitRepositoryEntry {
pub(crate) repo: Arc<Mutex<dyn GitRepository>>, pub(crate) repo: Arc<Mutex<dyn GitRepository>>,
pub(crate) scan_id: usize, pub(crate) scan_id: usize,
@ -128,7 +156,7 @@ pub struct GitRepositoryEntry {
pub(crate) git_dir_path: Arc<Path>, pub(crate) git_dir_path: Arc<Path>,
} }
impl std::fmt::Debug for GitRepositoryEntry { impl std::fmt::Debug for LocalGitRepositoryEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GitRepositoryEntry") f.debug_struct("GitRepositoryEntry")
.field("content_path", &self.content_path) .field("content_path", &self.content_path)
@ -137,27 +165,17 @@ impl std::fmt::Debug for GitRepositoryEntry {
} }
} }
#[derive(Debug)] #[derive(Debug, Clone)]
pub struct LocalSnapshot { pub struct LocalSnapshot {
ignores_by_parent_abs_path: HashMap<Arc<Path>, (Arc<Gitignore>, usize)>, ignores_by_parent_abs_path: HashMap<Arc<Path>, (Arc<Gitignore>, usize)>,
git_repositories: Vec<GitRepositoryEntry>, git_repositories_old: Vec<LocalGitRepositoryEntry>,
// The ProjectEntryId corresponds to the entry for the .git dir
git_repositories: TreeMap<ProjectEntryId, Arc<Mutex<dyn GitRepository>>>,
removed_entry_ids: HashMap<u64, ProjectEntryId>, removed_entry_ids: HashMap<u64, ProjectEntryId>,
next_entry_id: Arc<AtomicUsize>, next_entry_id: Arc<AtomicUsize>,
snapshot: Snapshot, snapshot: Snapshot,
} }
impl Clone for LocalSnapshot {
fn clone(&self) -> Self {
Self {
ignores_by_parent_abs_path: self.ignores_by_parent_abs_path.clone(),
git_repositories: self.git_repositories.iter().cloned().collect(),
removed_entry_ids: self.removed_entry_ids.clone(),
next_entry_id: self.next_entry_id.clone(),
snapshot: self.snapshot.clone(),
}
}
}
impl Deref for LocalSnapshot { impl Deref for LocalSnapshot {
type Target = Snapshot; type Target = Snapshot;
@ -191,7 +209,7 @@ struct ShareState {
pub enum Event { pub enum Event {
UpdatedEntries(HashMap<Arc<Path>, PathChange>), UpdatedEntries(HashMap<Arc<Path>, PathChange>),
UpdatedGitRepositories(Vec<GitRepositoryEntry>), UpdatedGitRepositories(Vec<LocalGitRepositoryEntry>),
} }
impl Entity for Worktree { impl Entity for Worktree {
@ -222,8 +240,9 @@ impl Worktree {
let mut snapshot = LocalSnapshot { let mut snapshot = LocalSnapshot {
ignores_by_parent_abs_path: Default::default(), ignores_by_parent_abs_path: Default::default(),
git_repositories: Default::default(), git_repositories_old: Default::default(),
removed_entry_ids: Default::default(), removed_entry_ids: Default::default(),
git_repositories: Default::default(),
next_entry_id, next_entry_id,
snapshot: Snapshot { snapshot: Snapshot {
id: WorktreeId::from_usize(cx.model_id()), id: WorktreeId::from_usize(cx.model_id()),
@ -232,6 +251,7 @@ impl Worktree {
root_char_bag: root_name.chars().map(|c| c.to_ascii_lowercase()).collect(), root_char_bag: root_name.chars().map(|c| c.to_ascii_lowercase()).collect(),
entries_by_path: Default::default(), entries_by_path: Default::default(),
entries_by_id: Default::default(), entries_by_id: Default::default(),
repository_entries: Default::default(),
scan_id: 1, scan_id: 1,
completed_scan_id: 0, completed_scan_id: 0,
}, },
@ -330,6 +350,7 @@ impl Worktree {
.collect(), .collect(),
entries_by_path: Default::default(), entries_by_path: Default::default(),
entries_by_id: Default::default(), entries_by_id: Default::default(),
repository_entries: Default::default(),
scan_id: 1, scan_id: 1,
completed_scan_id: 0, completed_scan_id: 0,
}; };
@ -599,8 +620,8 @@ impl LocalWorktree {
fn set_snapshot(&mut self, new_snapshot: LocalSnapshot, cx: &mut ModelContext<Worktree>) { fn set_snapshot(&mut self, new_snapshot: LocalSnapshot, cx: &mut ModelContext<Worktree>) {
let updated_repos = Self::changed_repos( let updated_repos = Self::changed_repos(
&self.snapshot.git_repositories, &self.snapshot.git_repositories_old,
&new_snapshot.git_repositories, &new_snapshot.git_repositories_old,
); );
self.snapshot = new_snapshot; self.snapshot = new_snapshot;
@ -614,13 +635,13 @@ impl LocalWorktree {
} }
fn changed_repos( fn changed_repos(
old_repos: &[GitRepositoryEntry], old_repos: &[LocalGitRepositoryEntry],
new_repos: &[GitRepositoryEntry], new_repos: &[LocalGitRepositoryEntry],
) -> Vec<GitRepositoryEntry> { ) -> Vec<LocalGitRepositoryEntry> {
fn diff<'a>( fn diff<'a>(
a: &'a [GitRepositoryEntry], a: &'a [LocalGitRepositoryEntry],
b: &'a [GitRepositoryEntry], b: &'a [LocalGitRepositoryEntry],
updated: &mut HashMap<&'a Path, GitRepositoryEntry>, updated: &mut HashMap<&'a Path, LocalGitRepositoryEntry>,
) { ) {
for a_repo in a { for a_repo in a {
let matched = b.iter().find(|b_repo| { let matched = b.iter().find(|b_repo| {
@ -633,7 +654,7 @@ impl LocalWorktree {
} }
} }
let mut updated = HashMap::<&Path, GitRepositoryEntry>::default(); let mut updated = HashMap::<&Path, LocalGitRepositoryEntry>::default();
diff(old_repos, new_repos, &mut updated); diff(old_repos, new_repos, &mut updated);
diff(new_repos, old_repos, &mut updated); diff(new_repos, old_repos, &mut updated);
@ -1002,9 +1023,10 @@ impl LocalWorktree {
let mut share_tx = Some(share_tx); let mut share_tx = Some(share_tx);
let mut prev_snapshot = LocalSnapshot { let mut prev_snapshot = LocalSnapshot {
ignores_by_parent_abs_path: Default::default(), ignores_by_parent_abs_path: Default::default(),
git_repositories: Default::default(), git_repositories_old: Default::default(),
removed_entry_ids: Default::default(), removed_entry_ids: Default::default(),
next_entry_id: Default::default(), next_entry_id: Default::default(),
git_repositories: Default::default(),
snapshot: Snapshot { snapshot: Snapshot {
id: WorktreeId(worktree_id as usize), id: WorktreeId(worktree_id as usize),
abs_path: Path::new("").into(), abs_path: Path::new("").into(),
@ -1012,6 +1034,7 @@ impl LocalWorktree {
root_char_bag: Default::default(), root_char_bag: Default::default(),
entries_by_path: Default::default(), entries_by_path: Default::default(),
entries_by_id: Default::default(), entries_by_id: Default::default(),
repository_entries: Default::default(),
scan_id: 0, scan_id: 0,
completed_scan_id: 0, completed_scan_id: 0,
}, },
@ -1409,8 +1432,8 @@ impl Snapshot {
impl LocalSnapshot { impl LocalSnapshot {
// Gives the most specific git repository for a given path // Gives the most specific git repository for a given path
pub(crate) fn repo_for(&self, path: &Path) -> Option<GitRepositoryEntry> { pub(crate) fn repo_for(&self, path: &Path) -> Option<LocalGitRepositoryEntry> {
self.git_repositories self.git_repositories_old
.iter() .iter()
.rev() //git_repository is ordered lexicographically .rev() //git_repository is ordered lexicographically
.find(|repo| repo.manages(path)) .find(|repo| repo.manages(path))
@ -1420,9 +1443,9 @@ impl LocalSnapshot {
pub(crate) fn repo_with_dot_git_containing( pub(crate) fn repo_with_dot_git_containing(
&mut self, &mut self,
path: &Path, path: &Path,
) -> Option<&mut GitRepositoryEntry> { ) -> Option<&mut LocalGitRepositoryEntry> {
// Git repositories cannot be nested, so we don't need to reverse the order // Git repositories cannot be nested, so we don't need to reverse the order
self.git_repositories self.git_repositories_old
.iter_mut() .iter_mut()
.find(|repo| repo.in_dot_git(path)) .find(|repo| repo.in_dot_git(path))
} }
@ -1597,14 +1620,31 @@ impl LocalSnapshot {
if parent_path.file_name() == Some(&DOT_GIT) { if parent_path.file_name() == Some(&DOT_GIT) {
let abs_path = self.abs_path.join(&parent_path); let abs_path = self.abs_path.join(&parent_path);
let content_path: Arc<Path> = parent_path.parent().unwrap().into(); let content_path: Arc<Path> = parent_path.parent().unwrap().into();
let key = RepositoryEntryKey(content_path.clone());
if self.repository_entries.get(&key).is_none() {
if let Some(repo) = fs.open_repo(abs_path.as_path()) {
self.repository_entries.insert(
key,
RepositoryEntry {
git_dir_path: parent_path.clone(),
git_dir_entry_id: parent_entry.id,
scan_id: 0,
},
);
self.git_repositories.insert(parent_entry.id, repo)
}
}
if let Err(ix) = self if let Err(ix) = self
.git_repositories .git_repositories_old
.binary_search_by_key(&&content_path, |repo| &repo.content_path) .binary_search_by_key(&&content_path, |repo| &repo.content_path)
{ {
if let Some(repo) = fs.open_repo(abs_path.as_path()) { if let Some(repo) = fs.open_repo(abs_path.as_path()) {
self.git_repositories.insert( self.git_repositories_old.insert(
ix, ix,
GitRepositoryEntry { LocalGitRepositoryEntry {
repo, repo,
scan_id: 0, scan_id: 0,
content_path, content_path,
@ -1672,12 +1712,17 @@ impl LocalSnapshot {
*scan_id = self.snapshot.scan_id; *scan_id = self.snapshot.scan_id;
} }
} else if path.file_name() == Some(&DOT_GIT) { } else if path.file_name() == Some(&DOT_GIT) {
let repo_entry_key = RepositoryEntryKey(path.parent().unwrap().into());
self.snapshot
.repository_entries
.update(&repo_entry_key, |repo| repo.scan_id = self.snapshot.scan_id);
let parent_path = path.parent().unwrap(); let parent_path = path.parent().unwrap();
if let Ok(ix) = self if let Ok(ix) = self
.git_repositories .git_repositories_old
.binary_search_by_key(&parent_path, |repo| repo.git_dir_path.as_ref()) .binary_search_by_key(&parent_path, |repo| repo.content_path.as_ref())
{ {
self.git_repositories[ix].scan_id = self.snapshot.scan_id; self.git_repositories_old[ix].scan_id = self.snapshot.scan_id;
} }
} }
} }
@ -1719,12 +1764,12 @@ impl LocalSnapshot {
ignore_stack ignore_stack
} }
pub fn git_repo_entries(&self) -> &[GitRepositoryEntry] { pub fn git_repo_entries(&self) -> &[LocalGitRepositoryEntry] {
&self.git_repositories &self.git_repositories_old
} }
} }
impl GitRepositoryEntry { impl LocalGitRepositoryEntry {
// Note that these paths should be relative to the worktree root. // Note that these paths should be relative to the worktree root.
pub(crate) fn manages(&self, path: &Path) -> bool { pub(crate) fn manages(&self, path: &Path) -> bool {
path.starts_with(self.content_path.as_ref()) path.starts_with(self.content_path.as_ref())
@ -2318,9 +2363,15 @@ impl BackgroundScanner {
self.update_ignore_statuses().await; self.update_ignore_statuses().await;
let mut snapshot = self.snapshot.lock(); let mut snapshot = self.snapshot.lock();
let mut git_repositories = mem::take(&mut snapshot.git_repositories);
let mut git_repositories = mem::take(&mut snapshot.git_repositories_old);
git_repositories.retain(|repo| snapshot.entry_for_path(&repo.git_dir_path).is_some()); git_repositories.retain(|repo| snapshot.entry_for_path(&repo.git_dir_path).is_some());
snapshot.git_repositories_old = git_repositories;
let mut git_repositories = mem::take(&mut snapshot.git_repositories);
git_repositories.retain(|project_entry_id, _| snapshot.contains_entry(*project_entry_id));
snapshot.git_repositories = git_repositories; snapshot.git_repositories = git_repositories;
snapshot.removed_entry_ids.clear(); snapshot.removed_entry_ids.clear();
snapshot.completed_scan_id = snapshot.scan_id; snapshot.completed_scan_id = snapshot.scan_id;
drop(snapshot); drop(snapshot);
@ -2607,11 +2658,22 @@ impl BackgroundScanner {
snapshot.insert_entry(fs_entry, self.fs.as_ref()); snapshot.insert_entry(fs_entry, self.fs.as_ref());
let scan_id = snapshot.scan_id; let scan_id = snapshot.scan_id;
if let Some(repo) = snapshot.repo_with_dot_git_containing(&path) { if let Some(repo) = snapshot.repo_with_dot_git_containing(&path) {
repo.repo.lock().reload_index(); repo.repo.lock().reload_index();
repo.scan_id = scan_id; repo.scan_id = scan_id;
} }
let repo_with_path_in_dotgit = snapshot
.repository_entries
.iter()
.find_map(|(key, repo)| repo.in_dot_git(&path).then(|| key.clone()));
if let Some(key) = repo_with_path_in_dotgit {
snapshot
.repository_entries
.update(&key, |entry| entry.scan_id = scan_id);
}
if let Some(scan_queue_tx) = &scan_queue_tx { if let Some(scan_queue_tx) = &scan_queue_tx {
let mut ancestor_inodes = snapshot.ancestor_inodes_for_path(&path); let mut ancestor_inodes = snapshot.ancestor_inodes_for_path(&path);
if metadata.is_dir && !ancestor_inodes.contains(&metadata.inode) { if metadata.is_dir && !ancestor_inodes.contains(&metadata.inode) {
@ -3427,8 +3489,8 @@ mod tests {
#[test] #[test]
fn test_changed_repos() { fn test_changed_repos() {
fn fake_entry(git_dir_path: impl AsRef<Path>, scan_id: usize) -> GitRepositoryEntry { fn fake_entry(git_dir_path: impl AsRef<Path>, scan_id: usize) -> LocalGitRepositoryEntry {
GitRepositoryEntry { LocalGitRepositoryEntry {
repo: Arc::new(Mutex::new(FakeGitRepository::default())), repo: Arc::new(Mutex::new(FakeGitRepository::default())),
scan_id, scan_id,
content_path: git_dir_path.as_ref().parent().unwrap().into(), content_path: git_dir_path.as_ref().parent().unwrap().into(),
@ -3436,13 +3498,13 @@ mod tests {
} }
} }
let prev_repos: Vec<GitRepositoryEntry> = vec![ let prev_repos: Vec<LocalGitRepositoryEntry> = vec![
fake_entry("/.git", 0), fake_entry("/.git", 0),
fake_entry("/a/.git", 0), fake_entry("/a/.git", 0),
fake_entry("/a/b/.git", 0), fake_entry("/a/b/.git", 0),
]; ];
let new_repos: Vec<GitRepositoryEntry> = vec![ let new_repos: Vec<LocalGitRepositoryEntry> = vec![
fake_entry("/a/.git", 1), fake_entry("/a/.git", 1),
fake_entry("/a/b/.git", 0), fake_entry("/a/b/.git", 0),
fake_entry("/a/c/.git", 0), fake_entry("/a/c/.git", 0),

View file

@ -2,13 +2,13 @@ use std::{cmp::Ordering, fmt::Debug};
use crate::{Bias, Dimension, Item, KeyedItem, SeekTarget, SumTree, Summary}; use crate::{Bias, Dimension, Item, KeyedItem, SeekTarget, SumTree, Summary};
#[derive(Clone)] #[derive(Clone, Debug)]
pub struct TreeMap<K, V>(SumTree<MapEntry<K, V>>) pub struct TreeMap<K, V>(SumTree<MapEntry<K, V>>)
where where
K: Clone + Debug + Default + Ord, K: Clone + Debug + Default + Ord,
V: Clone + Debug; V: Clone + Debug;
#[derive(Clone)] #[derive(Clone, Debug)]
pub struct MapEntry<K, V> { pub struct MapEntry<K, V> {
key: K, key: K,
value: V, value: V,
@ -73,6 +73,42 @@ impl<K: Clone + Debug + Default + Ord, V: Clone + Debug> TreeMap<K, V> {
removed removed
} }
pub fn update<F, T>(&mut self, key: &K, f: F) -> Option<T>
where
F: FnOnce(&mut V) -> T,
{
let mut cursor = self.0.cursor::<MapKeyRef<'_, K>>();
let key = MapKeyRef(Some(key));
let mut new_tree = cursor.slice(&key, Bias::Left, &());
let mut result = None;
if key.cmp(&cursor.end(&()), &()) == Ordering::Equal {
let mut updated = cursor.item().unwrap().clone();
result = Some(f(&mut updated.value));
new_tree.push(updated, &());
cursor.next(&());
}
new_tree.push_tree(cursor.suffix(&()), &());
drop(cursor);
self.0 = new_tree;
result
}
pub fn retain<F: FnMut(&K, &V) -> bool>(&mut self, mut predicate: F) {
let mut cursor = self.0.cursor::<MapKeyRef<'_, K>>();
cursor.seek(&MapKeyRef(None), Bias::Left, &());
let mut new_map = SumTree::<MapEntry<K, V>>::default();
if let Some(item) = cursor.item() {
if predicate(&item.key, &item.value) {
new_map.push(item.clone(), &());
}
cursor.next(&());
}
drop(cursor);
self.0 = new_map;
}
pub fn iter(&self) -> impl Iterator<Item = (&K, &V)> + '_ { pub fn iter(&self) -> impl Iterator<Item = (&K, &V)> + '_ {
self.0.iter().map(|entry| (&entry.key, &entry.value)) self.0.iter().map(|entry| (&entry.key, &entry.value))
} }