Represent git statuses more faithfully (#23082)

First, parse the output of `git status --porcelain=v1` into a
representation that can handle the full "grammar" and doesn't lose
information.

Second, as part of pushing this throughout the codebase, expand the use
of the existing `GitSummary` type to all the places where status
propagation is in play (i.e., anywhere we're dealing with a mix of files
and directories), and get rid of the previous `GitSummary ->
GitFileStatus` conversion.

- [x] Synchronize new representation over collab
  - [x] Update zed.proto
  - [x] Update DB models
- [x] Update `GitSummary` and summarization for the new `FileStatus`
- [x] Fix all tests
  - [x] worktree
  - [x] collab
- [x] Clean up `FILE_*` constants
- [x] New collab tests to exercise syncing of complex statuses
- [x] Run it locally and make sure it looks good

Release Notes:

- N/A

---------

Co-authored-by: Mikayla <mikayla@zed.dev>
Co-authored-by: Conrad <conrad@zed.dev>
This commit is contained in:
Cole Miller 2025-01-15 19:01:38 -05:00 committed by GitHub
parent 224f3d4746
commit a41d72ee81
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 1015 additions and 552 deletions

View file

@ -1,4 +1,4 @@
use crate::status::GitStatusPair;
use crate::status::FileStatus;
use crate::GitHostingProviderRegistry;
use crate::{blame::Blame, status::GitStatus};
use anyhow::{anyhow, Context, Result};
@ -7,7 +7,6 @@ use git2::BranchType;
use gpui::SharedString;
use parking_lot::Mutex;
use rope::Rope;
use serde::{Deserialize, Serialize};
use std::borrow::Borrow;
use std::sync::LazyLock;
use std::{
@ -294,7 +293,7 @@ pub struct FakeGitRepositoryState {
pub event_emitter: smol::channel::Sender<PathBuf>,
pub index_contents: HashMap<PathBuf, String>,
pub blames: HashMap<PathBuf, Blame>,
pub worktree_statuses: HashMap<RepoPath, GitFileStatus>,
pub statuses: HashMap<RepoPath, FileStatus>,
pub current_branch_name: Option<String>,
pub branches: HashSet<String>,
}
@ -312,7 +311,7 @@ impl FakeGitRepositoryState {
event_emitter,
index_contents: Default::default(),
blames: Default::default(),
worktree_statuses: Default::default(),
statuses: Default::default(),
current_branch_name: Default::default(),
branches: Default::default(),
}
@ -349,20 +348,14 @@ impl GitRepository for FakeGitRepository {
let state = self.state.lock();
let mut entries = state
.worktree_statuses
.statuses
.iter()
.filter_map(|(repo_path, status_worktree)| {
.filter_map(|(repo_path, status)| {
if path_prefixes
.iter()
.any(|path_prefix| repo_path.0.starts_with(path_prefix))
{
Some((
repo_path.to_owned(),
GitStatusPair {
index_status: None,
worktree_status: Some(*status_worktree),
},
))
Some((repo_path.to_owned(), *status))
} else {
None
}
@ -461,51 +454,6 @@ fn check_path_to_repo_path_errors(relative_file_path: &Path) -> Result<()> {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum GitFileStatus {
Added,
Modified,
// TODO conflicts should be represented by the GitStatusPair
Conflict,
Deleted,
Untracked,
}
impl GitFileStatus {
pub fn merge(
this: Option<GitFileStatus>,
other: Option<GitFileStatus>,
prefer_other: bool,
) -> Option<GitFileStatus> {
if prefer_other {
return other;
}
match (this, other) {
(Some(GitFileStatus::Conflict), _) | (_, Some(GitFileStatus::Conflict)) => {
Some(GitFileStatus::Conflict)
}
(Some(GitFileStatus::Modified), _) | (_, Some(GitFileStatus::Modified)) => {
Some(GitFileStatus::Modified)
}
(Some(GitFileStatus::Added), _) | (_, Some(GitFileStatus::Added)) => {
Some(GitFileStatus::Added)
}
_ => None,
}
}
pub fn from_byte(byte: u8) -> Option<Self> {
match byte {
b'M' => Some(GitFileStatus::Modified),
b'A' => Some(GitFileStatus::Added),
b'D' => Some(GitFileStatus::Deleted),
b'?' => Some(GitFileStatus::Untracked),
_ => None,
}
}
}
pub static WORK_DIRECTORY_REPO_PATH: LazyLock<RepoPath> =
LazyLock::new(|| RepoPath(Path::new("").into()));

View file

@ -1,34 +1,316 @@
use crate::repository::{GitFileStatus, RepoPath};
use crate::repository::RepoPath;
use anyhow::{anyhow, Result};
use serde::{Deserialize, Serialize};
use std::{path::Path, process::Stdio, sync::Arc};
use util::ResultExt;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct GitStatusPair {
// Not both `None`.
pub index_status: Option<GitFileStatus>,
pub worktree_status: Option<GitFileStatus>,
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum FileStatus {
Untracked,
Ignored,
Unmerged(UnmergedStatus),
Tracked(TrackedStatus),
}
impl GitStatusPair {
pub fn is_staged(&self) -> Option<bool> {
match (self.index_status, self.worktree_status) {
(Some(_), None) => Some(true),
(None, Some(_)) => Some(false),
(Some(GitFileStatus::Untracked), Some(GitFileStatus::Untracked)) => Some(false),
(Some(_), Some(_)) => None,
(None, None) => unreachable!(),
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct UnmergedStatus {
pub first_head: UnmergedStatusCode,
pub second_head: UnmergedStatusCode,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum UnmergedStatusCode {
Added,
Deleted,
Updated,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct TrackedStatus {
pub index_status: StatusCode,
pub worktree_status: StatusCode,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum StatusCode {
Modified,
TypeChanged,
Added,
Deleted,
Renamed,
Copied,
Unmodified,
}
impl From<UnmergedStatus> for FileStatus {
fn from(value: UnmergedStatus) -> Self {
FileStatus::Unmerged(value)
}
}
impl From<TrackedStatus> for FileStatus {
fn from(value: TrackedStatus) -> Self {
FileStatus::Tracked(value)
}
}
impl FileStatus {
pub const fn worktree(worktree_status: StatusCode) -> Self {
FileStatus::Tracked(TrackedStatus {
index_status: StatusCode::Unmodified,
worktree_status,
})
}
/// Generate a FileStatus Code from a byte pair, as described in
/// https://git-scm.com/docs/git-status#_output
///
/// NOTE: That instead of '', we use ' ' to denote no change
fn from_bytes(bytes: [u8; 2]) -> anyhow::Result<Self> {
let status = match bytes {
[b'?', b'?'] => FileStatus::Untracked,
[b'!', b'!'] => FileStatus::Ignored,
[b'A', b'A'] => UnmergedStatus {
first_head: UnmergedStatusCode::Added,
second_head: UnmergedStatusCode::Added,
}
.into(),
[b'D', b'D'] => UnmergedStatus {
first_head: UnmergedStatusCode::Added,
second_head: UnmergedStatusCode::Added,
}
.into(),
[x, b'U'] => UnmergedStatus {
first_head: UnmergedStatusCode::from_byte(x)?,
second_head: UnmergedStatusCode::Updated,
}
.into(),
[b'U', y] => UnmergedStatus {
first_head: UnmergedStatusCode::Updated,
second_head: UnmergedStatusCode::from_byte(y)?,
}
.into(),
[x, y] => TrackedStatus {
index_status: StatusCode::from_byte(x)?,
worktree_status: StatusCode::from_byte(y)?,
}
.into(),
};
Ok(status)
}
pub fn is_staged(self) -> Option<bool> {
match self {
FileStatus::Untracked | FileStatus::Ignored | FileStatus::Unmerged { .. } => {
Some(false)
}
FileStatus::Tracked(tracked) => match (tracked.index_status, tracked.worktree_status) {
(StatusCode::Unmodified, _) => Some(false),
(_, StatusCode::Unmodified) => Some(true),
_ => None,
},
}
}
// TODO reconsider uses of this
pub fn combined(&self) -> GitFileStatus {
self.index_status.or(self.worktree_status).unwrap()
pub fn is_conflicted(self) -> bool {
match self {
FileStatus::Unmerged { .. } => true,
_ => false,
}
}
pub fn is_ignored(self) -> bool {
match self {
FileStatus::Ignored => true,
_ => false,
}
}
pub fn is_modified(self) -> bool {
match self {
FileStatus::Tracked(tracked) => match (tracked.index_status, tracked.worktree_status) {
(StatusCode::Modified, _) | (_, StatusCode::Modified) => true,
_ => false,
},
_ => false,
}
}
pub fn is_created(self) -> bool {
match self {
FileStatus::Tracked(tracked) => match (tracked.index_status, tracked.worktree_status) {
(StatusCode::Added, _) | (_, StatusCode::Added) => true,
_ => false,
},
_ => false,
}
}
pub fn is_deleted(self) -> bool {
match self {
FileStatus::Tracked(tracked) => match (tracked.index_status, tracked.worktree_status) {
(StatusCode::Deleted, _) | (_, StatusCode::Deleted) => true,
_ => false,
},
_ => false,
}
}
pub fn is_untracked(self) -> bool {
match self {
FileStatus::Untracked => true,
_ => false,
}
}
pub fn summary(self) -> GitSummary {
match self {
FileStatus::Ignored => GitSummary::UNCHANGED,
FileStatus::Untracked => GitSummary::UNTRACKED,
FileStatus::Unmerged(_) => GitSummary::CONFLICT,
FileStatus::Tracked(TrackedStatus {
index_status,
worktree_status,
}) => index_status.summary() + worktree_status.summary(),
}
}
}
impl StatusCode {
fn from_byte(byte: u8) -> anyhow::Result<Self> {
match byte {
b'M' => Ok(StatusCode::Modified),
b'T' => Ok(StatusCode::TypeChanged),
b'A' => Ok(StatusCode::Added),
b'D' => Ok(StatusCode::Deleted),
b'R' => Ok(StatusCode::Renamed),
b'C' => Ok(StatusCode::Copied),
b' ' => Ok(StatusCode::Unmodified),
_ => Err(anyhow!("Invalid status code: {byte}")),
}
}
fn summary(self) -> GitSummary {
match self {
StatusCode::Modified | StatusCode::TypeChanged => GitSummary::MODIFIED,
StatusCode::Added => GitSummary::ADDED,
StatusCode::Deleted => GitSummary::DELETED,
StatusCode::Renamed | StatusCode::Copied | StatusCode::Unmodified => {
GitSummary::UNCHANGED
}
}
}
}
impl UnmergedStatusCode {
fn from_byte(byte: u8) -> anyhow::Result<Self> {
match byte {
b'A' => Ok(UnmergedStatusCode::Added),
b'D' => Ok(UnmergedStatusCode::Deleted),
b'U' => Ok(UnmergedStatusCode::Updated),
_ => Err(anyhow!("Invalid unmerged status code: {byte}")),
}
}
}
#[derive(Clone, Debug, Default, Copy, PartialEq, Eq)]
pub struct GitSummary {
pub added: usize,
pub modified: usize,
pub conflict: usize,
pub untracked: usize,
pub deleted: usize,
}
impl GitSummary {
pub const ADDED: Self = Self {
added: 1,
..Self::UNCHANGED
};
pub const MODIFIED: Self = Self {
modified: 1,
..Self::UNCHANGED
};
pub const CONFLICT: Self = Self {
conflict: 1,
..Self::UNCHANGED
};
pub const DELETED: Self = Self {
deleted: 1,
..Self::UNCHANGED
};
pub const UNTRACKED: Self = Self {
untracked: 1,
..Self::UNCHANGED
};
pub const UNCHANGED: Self = Self {
added: 0,
modified: 0,
conflict: 0,
untracked: 0,
deleted: 0,
};
}
impl From<FileStatus> for GitSummary {
fn from(status: FileStatus) -> Self {
status.summary()
}
}
impl sum_tree::Summary for GitSummary {
type Context = ();
fn zero(_: &Self::Context) -> Self {
Default::default()
}
fn add_summary(&mut self, rhs: &Self, _: &Self::Context) {
*self += *rhs;
}
}
impl std::ops::Add<Self> for GitSummary {
type Output = Self;
fn add(mut self, rhs: Self) -> Self {
self += rhs;
self
}
}
impl std::ops::AddAssign for GitSummary {
fn add_assign(&mut self, rhs: Self) {
self.added += rhs.added;
self.modified += rhs.modified;
self.conflict += rhs.conflict;
self.untracked += rhs.untracked;
self.deleted += rhs.deleted;
}
}
impl std::ops::Sub for GitSummary {
type Output = GitSummary;
fn sub(self, rhs: Self) -> Self::Output {
GitSummary {
added: self.added - rhs.added,
modified: self.modified - rhs.modified,
conflict: self.conflict - rhs.conflict,
untracked: self.untracked - rhs.untracked,
deleted: self.deleted - rhs.deleted,
}
}
}
#[derive(Clone)]
pub struct GitStatus {
pub entries: Arc<[(RepoPath, GitStatusPair)]>,
pub entries: Arc<[(RepoPath, FileStatus)]>,
}
impl GitStatus {
@ -77,20 +359,10 @@ impl GitStatus {
return None;
};
let path = &entry[3..];
let status = entry[0..2].as_bytes();
let index_status = GitFileStatus::from_byte(status[0]);
let worktree_status = GitFileStatus::from_byte(status[1]);
if (index_status, worktree_status) == (None, None) {
return None;
}
let status = entry[0..2].as_bytes().try_into().unwrap();
let status = FileStatus::from_bytes(status).log_err()?;
let path = RepoPath(Path::new(path).into());
Some((
path,
GitStatusPair {
index_status,
worktree_status,
},
))
Some((path, status))
})
.collect::<Vec<_>>();
entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(&b));