git: Use the CLI for loading commit SHAs and details (#29351)

Since #28065 merged we've seen deadlocks inside iconv when opening Zed
in a repository containing many submodules. These calls to iconv happen
inside libgit2, in our implementations of the methods `head_sha`,
`merge_head_shas`, and `show` on `RealGitRepository`. This PR moves
those methods to use the git CLI instead, sidestepping the issue. For
the sake of efficiency, a new `revparse_batch` method is added that uses
`git cat-file` to resolve several ref names in one invocation. I
originally intended to make `show` operate in batch mode as well (or
instead), but I can't see a good way to do that with the git CLI; `git
show` always bails on the first ref that it can't resolve, and
`for-each-ref` doesn't support symbolic refs like `HEAD`.

Separately, I removed the calls to `show` in `MergeDetails::load`, going
back to only loading the SHAs of the various merge heads. Loading full
commit details was intended to support the inlays feature that ended up
being cut from #28065, and we can add it back in when we need it.

Release Notes:

- N/A
This commit is contained in:
Cole Miller 2025-04-25 14:46:02 -04:00 committed by GitHub
parent 8cc2ade21c
commit 7f5c874a38
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 129 additions and 117 deletions

View file

@ -1964,7 +1964,7 @@ impl Thread {
};
let remote_url = backend.remote_url("origin");
let head_sha = backend.head_sha();
let head_sha = backend.head_sha().await;
let diff = backend.diff(DiffType::HeadToWorktree).await.ok();
GitState {

View file

@ -34,8 +34,8 @@ pub struct FakeGitRepositoryState {
pub blames: HashMap<RepoPath, Blame>,
pub current_branch_name: Option<String>,
pub branches: HashSet<String>,
pub merge_head_shas: Vec<String>,
pub simulated_index_write_error_message: Option<String>,
pub refs: HashMap<String, String>,
}
impl FakeGitRepositoryState {
@ -48,20 +48,13 @@ impl FakeGitRepositoryState {
blames: Default::default(),
current_branch_name: Default::default(),
branches: Default::default(),
merge_head_shas: Default::default(),
simulated_index_write_error_message: Default::default(),
refs: HashMap::from_iter([("HEAD".into(), "abc".into())]),
}
}
}
impl FakeGitRepository {
fn with_state<F, T>(&self, write: bool, f: F) -> Result<T>
where
F: FnOnce(&mut FakeGitRepositoryState) -> T,
{
self.fs.with_git_state(&self.dot_git_path, write, f)
}
fn with_state_async<F, T>(&self, write: bool, f: F) -> BoxFuture<'static, Result<T>>
where
F: 'static + Send + FnOnce(&mut FakeGitRepositoryState) -> Result<T>,
@ -141,13 +134,13 @@ impl GitRepository for FakeGitRepository {
None
}
fn head_sha(&self) -> Option<String> {
None
}
fn merge_head_shas(&self) -> Vec<String> {
self.with_state(false, |state| state.merge_head_shas.clone())
.unwrap()
fn revparse_batch(&self, revs: Vec<String>) -> BoxFuture<Result<Vec<Option<String>>>> {
self.with_state_async(false, |state| {
Ok(revs
.into_iter()
.map(|rev| state.refs.get(&rev).cloned())
.collect())
})
}
fn show(&self, commit: String) -> BoxFuture<Result<CommitDetails>> {

View file

@ -1,7 +1,7 @@
use crate::commit::parse_git_diff_name_status;
use crate::status::{GitStatus, StatusCode};
use crate::{Oid, SHORT_SHA_LENGTH};
use anyhow::{Context as _, Result, anyhow};
use anyhow::{Context as _, Result, anyhow, bail};
use collections::HashMap;
use futures::future::BoxFuture;
use futures::{AsyncWriteExt, FutureExt as _, select_biased};
@ -13,17 +13,16 @@ use schemars::JsonSchema;
use serde::Deserialize;
use std::borrow::{Borrow, Cow};
use std::ffi::{OsStr, OsString};
use std::io::prelude::*;
use std::path::Component;
use std::process::{ExitStatus, Stdio};
use std::sync::LazyLock;
use std::{
cmp::Ordering,
path::{Path, PathBuf},
sync::Arc,
};
use std::{
future,
io::{BufRead, BufReader, BufWriter, Read},
path::{Path, PathBuf},
sync::Arc,
};
use sum_tree::MapSeekTarget;
use thiserror::Error;
@ -197,10 +196,20 @@ pub trait GitRepository: Send + Sync {
/// Returns the URL of the remote with the given name.
fn remote_url(&self, name: &str) -> Option<String>;
/// Returns the SHA of the current HEAD.
fn head_sha(&self) -> Option<String>;
/// Resolve a list of refs to SHAs.
fn revparse_batch(&self, revs: Vec<String>) -> BoxFuture<Result<Vec<Option<String>>>>;
fn merge_head_shas(&self) -> Vec<String>;
fn head_sha(&self) -> BoxFuture<Option<String>> {
async move {
self.revparse_batch(vec!["HEAD".into()])
.await
.unwrap_or_default()
.into_iter()
.next()
.flatten()
}
.boxed()
}
fn merge_message(&self) -> BoxFuture<Option<String>>;
@ -392,27 +401,37 @@ impl GitRepository for RealGitRepository {
}
fn show(&self, commit: String) -> BoxFuture<Result<CommitDetails>> {
let repo = self.repository.clone();
let working_directory = self.working_directory();
self.executor
.spawn(async move {
let repo = repo.lock();
let Ok(commit) = repo.revparse_single(&commit)?.into_commit() else {
anyhow::bail!("{} is not a commit", commit);
};
let details = CommitDetails {
sha: commit.id().to_string().into(),
message: String::from_utf8_lossy(commit.message_raw_bytes())
.to_string()
.into(),
commit_timestamp: commit.time().seconds(),
author_email: String::from_utf8_lossy(commit.author().email_bytes())
.to_string()
.into(),
author_name: String::from_utf8_lossy(commit.author().name_bytes())
.to_string()
.into(),
};
Ok(details)
let working_directory = working_directory?;
let output = new_std_command("git")
.current_dir(&working_directory)
.args([
"--no-optional-locks",
"show",
"--no-patch",
"--format=%H%x00%B%x00%at%x00%ae%x00%an",
&commit,
])
.output()?;
let output = std::str::from_utf8(&output.stdout)?;
let fields = output.split('\0').collect::<Vec<_>>();
if fields.len() != 5 {
bail!("unexpected git-show output for {commit:?}: {output:?}")
}
let sha = fields[0].to_string().into();
let message = fields[1].to_string().into();
let commit_timestamp = fields[2].parse()?;
let author_email = fields[3].to_string().into();
let author_name = fields[4].to_string().into();
Ok(CommitDetails {
sha,
message,
commit_timestamp,
author_email,
author_name,
})
})
.boxed()
}
@ -702,34 +721,62 @@ impl GitRepository for RealGitRepository {
remote.url().map(|url| url.to_string())
}
fn head_sha(&self) -> Option<String> {
Some(self.repository.lock().head().ok()?.target()?.to_string())
}
fn revparse_batch(&self, revs: Vec<String>) -> BoxFuture<Result<Vec<Option<String>>>> {
let working_directory = self.working_directory();
self.executor
.spawn(async move {
let working_directory = working_directory?;
let mut process = new_std_command("git")
.current_dir(&working_directory)
.args([
"--no-optional-locks",
"cat-file",
"--batch-check=%(objectname)",
"-z",
])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
fn merge_head_shas(&self) -> Vec<String> {
let mut shas = Vec::default();
self.repository
.lock()
.mergehead_foreach(|oid| {
shas.push(oid.to_string());
true
let stdin = process
.stdin
.take()
.ok_or_else(|| anyhow!("no stdin for git cat-file subprocess"))?;
let mut stdin = BufWriter::new(stdin);
for rev in &revs {
write!(&mut stdin, "{rev}\0")?;
}
drop(stdin);
let output = process.wait_with_output()?;
let output = std::str::from_utf8(&output.stdout)?;
let shas = output
.lines()
.map(|line| {
if line.ends_with("missing") {
None
} else {
Some(line.to_string())
}
})
.collect::<Vec<_>>();
if shas.len() != revs.len() {
// In an octopus merge, git cat-file still only outputs the first sha from MERGE_HEAD.
bail!("unexpected number of shas")
}
Ok(shas)
})
.ok();
if let Some(oid) = self
.repository
.lock()
.find_reference("CHERRY_PICK_HEAD")
.ok()
.and_then(|reference| reference.target())
{
shas.push(oid.to_string())
}
shas
.boxed()
}
fn merge_message(&self) -> BoxFuture<Option<String>> {
let path = self.path().join("MERGE_MSG");
async move { std::fs::read_to_string(&path).ok() }.boxed()
self.executor
.spawn(async move { std::fs::read_to_string(&path).ok() })
.boxed()
}
fn status(&self, path_prefixes: &[RepoPath]) -> BoxFuture<Result<GitStatus>> {

View file

@ -16,7 +16,7 @@ use fs::Fs;
use futures::{
FutureExt, StreamExt as _,
channel::{mpsc, oneshot},
future::{self, Shared, try_join_all},
future::{self, Shared},
};
use git::{
BuildPermalinkParams, GitHostingProviderRegistry, WORK_DIRECTORY_REPO_PATH,
@ -233,11 +233,7 @@ pub struct RepositoryId(pub u64);
pub struct MergeDetails {
pub conflicted_paths: TreeSet<RepoPath>,
pub message: Option<SharedString>,
pub apply_head: Option<CommitDetails>,
pub cherry_pick_head: Option<CommitDetails>,
pub merge_heads: Vec<CommitDetails>,
pub rebase_head: Option<CommitDetails>,
pub revert_head: Option<CommitDetails>,
pub heads: Vec<Option<SharedString>>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
@ -1005,6 +1001,7 @@ impl GitStore {
let sha = backend
.head_sha()
.await
.ok_or_else(|| anyhow!("failed to read HEAD SHA"))?;
let provider_registry =
@ -2695,43 +2692,22 @@ impl MergeDetails {
status: &SumTree<StatusEntry>,
prev_snapshot: &RepositorySnapshot,
) -> Result<(MergeDetails, bool)> {
fn sha_eq<'a>(
l: impl IntoIterator<Item = &'a CommitDetails>,
r: impl IntoIterator<Item = &'a CommitDetails>,
) -> bool {
l.into_iter()
.map(|commit| &commit.sha)
.eq(r.into_iter().map(|commit| &commit.sha))
}
let merge_heads = try_join_all(
backend
.merge_head_shas()
.into_iter()
.map(|sha| backend.show(sha)),
)
.await?;
let cherry_pick_head = backend.show("CHERRY_PICK_HEAD".into()).await.ok();
let rebase_head = backend.show("REBASE_HEAD".into()).await.ok();
let revert_head = backend.show("REVERT_HEAD".into()).await.ok();
let apply_head = backend.show("APPLY_HEAD".into()).await.ok();
let message = backend.merge_message().await.map(SharedString::from);
let merge_heads_changed = !sha_eq(
merge_heads.as_slice(),
prev_snapshot.merge.merge_heads.as_slice(),
) || !sha_eq(
cherry_pick_head.as_ref(),
prev_snapshot.merge.cherry_pick_head.as_ref(),
) || !sha_eq(
apply_head.as_ref(),
prev_snapshot.merge.apply_head.as_ref(),
) || !sha_eq(
rebase_head.as_ref(),
prev_snapshot.merge.rebase_head.as_ref(),
) || !sha_eq(
revert_head.as_ref(),
prev_snapshot.merge.revert_head.as_ref(),
);
let message = backend.merge_message().await;
let heads = backend
.revparse_batch(vec![
"MERGE_HEAD".into(),
"CHERRY_PICK_HEAD".into(),
"REBASE_HEAD".into(),
"REVERT_HEAD".into(),
"APPLY_HEAD".into(),
])
.await
.log_err()
.unwrap_or_default()
.into_iter()
.map(|opt| opt.map(SharedString::from))
.collect::<Vec<_>>();
let merge_heads_changed = heads != prev_snapshot.merge.heads;
let conflicted_paths = if merge_heads_changed {
TreeSet::from_ordered_entries(
status
@ -2744,12 +2720,8 @@ impl MergeDetails {
};
let details = MergeDetails {
conflicted_paths,
message,
apply_head,
cherry_pick_head,
merge_heads,
rebase_head,
revert_head,
message: message.map(SharedString::from),
heads,
};
Ok((details, merge_heads_changed))
}
@ -4578,7 +4550,7 @@ async fn compute_snapshot(
}
// Useful when branch is None in detached head state
let head_commit = match backend.head_sha() {
let head_commit = match backend.head_sha().await {
Some(head_sha) => backend.show(head_sha).await.log_err(),
None => None,
};

View file

@ -532,7 +532,7 @@ mod tests {
},
);
// Cause the repository to emit MergeHeadsChanged.
state.merge_head_shas = vec!["abc".into(), "def".into()]
state.refs.insert("MERGE_HEAD".into(), "123".into())
})
.unwrap();