From 7f5c874a3898cde68dec8f600d0deb8f3b3849cf Mon Sep 17 00:00:00 2001 From: Cole Miller Date: Fri, 25 Apr 2025 14:46:02 -0400 Subject: [PATCH] git: Use the CLI for loading commit SHAs and details (#29351) Since #28065 merged we've seen deadlocks inside iconv when opening Zed in a repository containing many submodules. These calls to iconv happen inside libgit2, in our implementations of the methods `head_sha`, `merge_head_shas`, and `show` on `RealGitRepository`. This PR moves those methods to use the git CLI instead, sidestepping the issue. For the sake of efficiency, a new `revparse_batch` method is added that uses `git cat-file` to resolve several ref names in one invocation. I originally intended to make `show` operate in batch mode as well (or instead), but I can't see a good way to do that with the git CLI; `git show` always bails on the first ref that it can't resolve, and `for-each-ref` doesn't support symbolic refs like `HEAD`. Separately, I removed the calls to `show` in `MergeDetails::load`, going back to only loading the SHAs of the various merge heads. Loading full commit details was intended to support the inlays feature that ended up being cut from #28065, and we can add it back in when we need it. Release Notes: - N/A --- crates/agent/src/thread.rs | 2 +- crates/fs/src/fake_git_repo.rs | 25 ++-- crates/git/src/repository.rs | 145 ++++++++++++------- crates/project/src/git_store.rs | 72 +++------ crates/project/src/git_store/conflict_set.rs | 2 +- 5 files changed, 129 insertions(+), 117 deletions(-) diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index 87b6861737..7d7c6fefa9 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -1964,7 +1964,7 @@ impl Thread { }; let remote_url = backend.remote_url("origin"); - let head_sha = backend.head_sha(); + let head_sha = backend.head_sha().await; let diff = backend.diff(DiffType::HeadToWorktree).await.ok(); GitState { diff --git a/crates/fs/src/fake_git_repo.rs b/crates/fs/src/fake_git_repo.rs index ad5a2b0264..18c39c83da 100644 --- a/crates/fs/src/fake_git_repo.rs +++ b/crates/fs/src/fake_git_repo.rs @@ -34,8 +34,8 @@ pub struct FakeGitRepositoryState { pub blames: HashMap, pub current_branch_name: Option, pub branches: HashSet, - pub merge_head_shas: Vec, pub simulated_index_write_error_message: Option, + pub refs: HashMap, } impl FakeGitRepositoryState { @@ -48,20 +48,13 @@ impl FakeGitRepositoryState { blames: Default::default(), current_branch_name: Default::default(), branches: Default::default(), - merge_head_shas: Default::default(), simulated_index_write_error_message: Default::default(), + refs: HashMap::from_iter([("HEAD".into(), "abc".into())]), } } } impl FakeGitRepository { - fn with_state(&self, write: bool, f: F) -> Result - where - F: FnOnce(&mut FakeGitRepositoryState) -> T, - { - self.fs.with_git_state(&self.dot_git_path, write, f) - } - fn with_state_async(&self, write: bool, f: F) -> BoxFuture<'static, Result> where F: 'static + Send + FnOnce(&mut FakeGitRepositoryState) -> Result, @@ -141,13 +134,13 @@ impl GitRepository for FakeGitRepository { None } - fn head_sha(&self) -> Option { - None - } - - fn merge_head_shas(&self) -> Vec { - self.with_state(false, |state| state.merge_head_shas.clone()) - .unwrap() + fn revparse_batch(&self, revs: Vec) -> BoxFuture>>> { + self.with_state_async(false, |state| { + Ok(revs + .into_iter() + .map(|rev| state.refs.get(&rev).cloned()) + .collect()) + }) } fn show(&self, commit: String) -> BoxFuture> { diff --git a/crates/git/src/repository.rs b/crates/git/src/repository.rs index eeea34367f..fc293d8a4f 100644 --- a/crates/git/src/repository.rs +++ b/crates/git/src/repository.rs @@ -1,7 +1,7 @@ use crate::commit::parse_git_diff_name_status; use crate::status::{GitStatus, StatusCode}; use crate::{Oid, SHORT_SHA_LENGTH}; -use anyhow::{Context as _, Result, anyhow}; +use anyhow::{Context as _, Result, anyhow, bail}; use collections::HashMap; use futures::future::BoxFuture; use futures::{AsyncWriteExt, FutureExt as _, select_biased}; @@ -13,17 +13,16 @@ use schemars::JsonSchema; use serde::Deserialize; use std::borrow::{Borrow, Cow}; use std::ffi::{OsStr, OsString}; +use std::io::prelude::*; use std::path::Component; use std::process::{ExitStatus, Stdio}; use std::sync::LazyLock; use std::{ cmp::Ordering, - path::{Path, PathBuf}, - sync::Arc, -}; -use std::{ future, io::{BufRead, BufReader, BufWriter, Read}, + path::{Path, PathBuf}, + sync::Arc, }; use sum_tree::MapSeekTarget; use thiserror::Error; @@ -197,10 +196,20 @@ pub trait GitRepository: Send + Sync { /// Returns the URL of the remote with the given name. fn remote_url(&self, name: &str) -> Option; - /// Returns the SHA of the current HEAD. - fn head_sha(&self) -> Option; + /// Resolve a list of refs to SHAs. + fn revparse_batch(&self, revs: Vec) -> BoxFuture>>>; - fn merge_head_shas(&self) -> Vec; + fn head_sha(&self) -> BoxFuture> { + async move { + self.revparse_batch(vec!["HEAD".into()]) + .await + .unwrap_or_default() + .into_iter() + .next() + .flatten() + } + .boxed() + } fn merge_message(&self) -> BoxFuture>; @@ -392,27 +401,37 @@ impl GitRepository for RealGitRepository { } fn show(&self, commit: String) -> BoxFuture> { - let repo = self.repository.clone(); + let working_directory = self.working_directory(); self.executor .spawn(async move { - let repo = repo.lock(); - let Ok(commit) = repo.revparse_single(&commit)?.into_commit() else { - anyhow::bail!("{} is not a commit", commit); - }; - let details = CommitDetails { - sha: commit.id().to_string().into(), - message: String::from_utf8_lossy(commit.message_raw_bytes()) - .to_string() - .into(), - commit_timestamp: commit.time().seconds(), - author_email: String::from_utf8_lossy(commit.author().email_bytes()) - .to_string() - .into(), - author_name: String::from_utf8_lossy(commit.author().name_bytes()) - .to_string() - .into(), - }; - Ok(details) + let working_directory = working_directory?; + let output = new_std_command("git") + .current_dir(&working_directory) + .args([ + "--no-optional-locks", + "show", + "--no-patch", + "--format=%H%x00%B%x00%at%x00%ae%x00%an", + &commit, + ]) + .output()?; + let output = std::str::from_utf8(&output.stdout)?; + let fields = output.split('\0').collect::>(); + if fields.len() != 5 { + bail!("unexpected git-show output for {commit:?}: {output:?}") + } + let sha = fields[0].to_string().into(); + let message = fields[1].to_string().into(); + let commit_timestamp = fields[2].parse()?; + let author_email = fields[3].to_string().into(); + let author_name = fields[4].to_string().into(); + Ok(CommitDetails { + sha, + message, + commit_timestamp, + author_email, + author_name, + }) }) .boxed() } @@ -702,34 +721,62 @@ impl GitRepository for RealGitRepository { remote.url().map(|url| url.to_string()) } - fn head_sha(&self) -> Option { - Some(self.repository.lock().head().ok()?.target()?.to_string()) - } + fn revparse_batch(&self, revs: Vec) -> BoxFuture>>> { + let working_directory = self.working_directory(); + self.executor + .spawn(async move { + let working_directory = working_directory?; + let mut process = new_std_command("git") + .current_dir(&working_directory) + .args([ + "--no-optional-locks", + "cat-file", + "--batch-check=%(objectname)", + "-z", + ]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; - fn merge_head_shas(&self) -> Vec { - let mut shas = Vec::default(); - self.repository - .lock() - .mergehead_foreach(|oid| { - shas.push(oid.to_string()); - true + let stdin = process + .stdin + .take() + .ok_or_else(|| anyhow!("no stdin for git cat-file subprocess"))?; + let mut stdin = BufWriter::new(stdin); + for rev in &revs { + write!(&mut stdin, "{rev}\0")?; + } + drop(stdin); + + let output = process.wait_with_output()?; + let output = std::str::from_utf8(&output.stdout)?; + let shas = output + .lines() + .map(|line| { + if line.ends_with("missing") { + None + } else { + Some(line.to_string()) + } + }) + .collect::>(); + + if shas.len() != revs.len() { + // In an octopus merge, git cat-file still only outputs the first sha from MERGE_HEAD. + bail!("unexpected number of shas") + } + + Ok(shas) }) - .ok(); - if let Some(oid) = self - .repository - .lock() - .find_reference("CHERRY_PICK_HEAD") - .ok() - .and_then(|reference| reference.target()) - { - shas.push(oid.to_string()) - } - shas + .boxed() } fn merge_message(&self) -> BoxFuture> { let path = self.path().join("MERGE_MSG"); - async move { std::fs::read_to_string(&path).ok() }.boxed() + self.executor + .spawn(async move { std::fs::read_to_string(&path).ok() }) + .boxed() } fn status(&self, path_prefixes: &[RepoPath]) -> BoxFuture> { diff --git a/crates/project/src/git_store.rs b/crates/project/src/git_store.rs index f8e96d7730..aaf66e8ea9 100644 --- a/crates/project/src/git_store.rs +++ b/crates/project/src/git_store.rs @@ -16,7 +16,7 @@ use fs::Fs; use futures::{ FutureExt, StreamExt as _, channel::{mpsc, oneshot}, - future::{self, Shared, try_join_all}, + future::{self, Shared}, }; use git::{ BuildPermalinkParams, GitHostingProviderRegistry, WORK_DIRECTORY_REPO_PATH, @@ -233,11 +233,7 @@ pub struct RepositoryId(pub u64); pub struct MergeDetails { pub conflicted_paths: TreeSet, pub message: Option, - pub apply_head: Option, - pub cherry_pick_head: Option, - pub merge_heads: Vec, - pub rebase_head: Option, - pub revert_head: Option, + pub heads: Vec>, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -1005,6 +1001,7 @@ impl GitStore { let sha = backend .head_sha() + .await .ok_or_else(|| anyhow!("failed to read HEAD SHA"))?; let provider_registry = @@ -2695,43 +2692,22 @@ impl MergeDetails { status: &SumTree, prev_snapshot: &RepositorySnapshot, ) -> Result<(MergeDetails, bool)> { - fn sha_eq<'a>( - l: impl IntoIterator, - r: impl IntoIterator, - ) -> bool { - l.into_iter() - .map(|commit| &commit.sha) - .eq(r.into_iter().map(|commit| &commit.sha)) - } - - let merge_heads = try_join_all( - backend - .merge_head_shas() - .into_iter() - .map(|sha| backend.show(sha)), - ) - .await?; - let cherry_pick_head = backend.show("CHERRY_PICK_HEAD".into()).await.ok(); - let rebase_head = backend.show("REBASE_HEAD".into()).await.ok(); - let revert_head = backend.show("REVERT_HEAD".into()).await.ok(); - let apply_head = backend.show("APPLY_HEAD".into()).await.ok(); - let message = backend.merge_message().await.map(SharedString::from); - let merge_heads_changed = !sha_eq( - merge_heads.as_slice(), - prev_snapshot.merge.merge_heads.as_slice(), - ) || !sha_eq( - cherry_pick_head.as_ref(), - prev_snapshot.merge.cherry_pick_head.as_ref(), - ) || !sha_eq( - apply_head.as_ref(), - prev_snapshot.merge.apply_head.as_ref(), - ) || !sha_eq( - rebase_head.as_ref(), - prev_snapshot.merge.rebase_head.as_ref(), - ) || !sha_eq( - revert_head.as_ref(), - prev_snapshot.merge.revert_head.as_ref(), - ); + let message = backend.merge_message().await; + let heads = backend + .revparse_batch(vec![ + "MERGE_HEAD".into(), + "CHERRY_PICK_HEAD".into(), + "REBASE_HEAD".into(), + "REVERT_HEAD".into(), + "APPLY_HEAD".into(), + ]) + .await + .log_err() + .unwrap_or_default() + .into_iter() + .map(|opt| opt.map(SharedString::from)) + .collect::>(); + let merge_heads_changed = heads != prev_snapshot.merge.heads; let conflicted_paths = if merge_heads_changed { TreeSet::from_ordered_entries( status @@ -2744,12 +2720,8 @@ impl MergeDetails { }; let details = MergeDetails { conflicted_paths, - message, - apply_head, - cherry_pick_head, - merge_heads, - rebase_head, - revert_head, + message: message.map(SharedString::from), + heads, }; Ok((details, merge_heads_changed)) } @@ -4578,7 +4550,7 @@ async fn compute_snapshot( } // Useful when branch is None in detached head state - let head_commit = match backend.head_sha() { + let head_commit = match backend.head_sha().await { Some(head_sha) => backend.show(head_sha).await.log_err(), None => None, }; diff --git a/crates/project/src/git_store/conflict_set.rs b/crates/project/src/git_store/conflict_set.rs index 7590db599c..3c05e93ab7 100644 --- a/crates/project/src/git_store/conflict_set.rs +++ b/crates/project/src/git_store/conflict_set.rs @@ -532,7 +532,7 @@ mod tests { }, ); // Cause the repository to emit MergeHeadsChanged. - state.merge_head_shas = vec!["abc".into(), "def".into()] + state.refs.insert("MERGE_HEAD".into(), "123".into()) }) .unwrap();