git: Use the CLI for loading commit SHAs and details (#29351)

Since #28065 merged we've seen deadlocks inside iconv when opening Zed
in a repository containing many submodules. These calls to iconv happen
inside libgit2, in our implementations of the methods `head_sha`,
`merge_head_shas`, and `show` on `RealGitRepository`. This PR moves
those methods to use the git CLI instead, sidestepping the issue. For
the sake of efficiency, a new `revparse_batch` method is added that uses
`git cat-file` to resolve several ref names in one invocation. I
originally intended to make `show` operate in batch mode as well (or
instead), but I can't see a good way to do that with the git CLI; `git
show` always bails on the first ref that it can't resolve, and
`for-each-ref` doesn't support symbolic refs like `HEAD`.

Separately, I removed the calls to `show` in `MergeDetails::load`, going
back to only loading the SHAs of the various merge heads. Loading full
commit details was intended to support the inlays feature that ended up
being cut from #28065, and we can add it back in when we need it.

Release Notes:

- N/A
This commit is contained in:
Cole Miller 2025-04-25 14:46:02 -04:00 committed by GitHub
parent 8cc2ade21c
commit 7f5c874a38
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 129 additions and 117 deletions

View file

@ -1964,7 +1964,7 @@ impl Thread {
}; };
let remote_url = backend.remote_url("origin"); let remote_url = backend.remote_url("origin");
let head_sha = backend.head_sha(); let head_sha = backend.head_sha().await;
let diff = backend.diff(DiffType::HeadToWorktree).await.ok(); let diff = backend.diff(DiffType::HeadToWorktree).await.ok();
GitState { GitState {

View file

@ -34,8 +34,8 @@ pub struct FakeGitRepositoryState {
pub blames: HashMap<RepoPath, Blame>, pub blames: HashMap<RepoPath, Blame>,
pub current_branch_name: Option<String>, pub current_branch_name: Option<String>,
pub branches: HashSet<String>, pub branches: HashSet<String>,
pub merge_head_shas: Vec<String>,
pub simulated_index_write_error_message: Option<String>, pub simulated_index_write_error_message: Option<String>,
pub refs: HashMap<String, String>,
} }
impl FakeGitRepositoryState { impl FakeGitRepositoryState {
@ -48,20 +48,13 @@ impl FakeGitRepositoryState {
blames: Default::default(), blames: Default::default(),
current_branch_name: Default::default(), current_branch_name: Default::default(),
branches: Default::default(), branches: Default::default(),
merge_head_shas: Default::default(),
simulated_index_write_error_message: Default::default(), simulated_index_write_error_message: Default::default(),
refs: HashMap::from_iter([("HEAD".into(), "abc".into())]),
} }
} }
} }
impl FakeGitRepository { impl FakeGitRepository {
fn with_state<F, T>(&self, write: bool, f: F) -> Result<T>
where
F: FnOnce(&mut FakeGitRepositoryState) -> T,
{
self.fs.with_git_state(&self.dot_git_path, write, f)
}
fn with_state_async<F, T>(&self, write: bool, f: F) -> BoxFuture<'static, Result<T>> fn with_state_async<F, T>(&self, write: bool, f: F) -> BoxFuture<'static, Result<T>>
where where
F: 'static + Send + FnOnce(&mut FakeGitRepositoryState) -> Result<T>, F: 'static + Send + FnOnce(&mut FakeGitRepositoryState) -> Result<T>,
@ -141,13 +134,13 @@ impl GitRepository for FakeGitRepository {
None None
} }
fn head_sha(&self) -> Option<String> { fn revparse_batch(&self, revs: Vec<String>) -> BoxFuture<Result<Vec<Option<String>>>> {
None self.with_state_async(false, |state| {
} Ok(revs
.into_iter()
fn merge_head_shas(&self) -> Vec<String> { .map(|rev| state.refs.get(&rev).cloned())
self.with_state(false, |state| state.merge_head_shas.clone()) .collect())
.unwrap() })
} }
fn show(&self, commit: String) -> BoxFuture<Result<CommitDetails>> { fn show(&self, commit: String) -> BoxFuture<Result<CommitDetails>> {

View file

@ -1,7 +1,7 @@
use crate::commit::parse_git_diff_name_status; use crate::commit::parse_git_diff_name_status;
use crate::status::{GitStatus, StatusCode}; use crate::status::{GitStatus, StatusCode};
use crate::{Oid, SHORT_SHA_LENGTH}; use crate::{Oid, SHORT_SHA_LENGTH};
use anyhow::{Context as _, Result, anyhow}; use anyhow::{Context as _, Result, anyhow, bail};
use collections::HashMap; use collections::HashMap;
use futures::future::BoxFuture; use futures::future::BoxFuture;
use futures::{AsyncWriteExt, FutureExt as _, select_biased}; use futures::{AsyncWriteExt, FutureExt as _, select_biased};
@ -13,17 +13,16 @@ use schemars::JsonSchema;
use serde::Deserialize; use serde::Deserialize;
use std::borrow::{Borrow, Cow}; use std::borrow::{Borrow, Cow};
use std::ffi::{OsStr, OsString}; use std::ffi::{OsStr, OsString};
use std::io::prelude::*;
use std::path::Component; use std::path::Component;
use std::process::{ExitStatus, Stdio}; use std::process::{ExitStatus, Stdio};
use std::sync::LazyLock; use std::sync::LazyLock;
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
path::{Path, PathBuf},
sync::Arc,
};
use std::{
future, future,
io::{BufRead, BufReader, BufWriter, Read}, io::{BufRead, BufReader, BufWriter, Read},
path::{Path, PathBuf},
sync::Arc,
}; };
use sum_tree::MapSeekTarget; use sum_tree::MapSeekTarget;
use thiserror::Error; use thiserror::Error;
@ -197,10 +196,20 @@ pub trait GitRepository: Send + Sync {
/// Returns the URL of the remote with the given name. /// Returns the URL of the remote with the given name.
fn remote_url(&self, name: &str) -> Option<String>; fn remote_url(&self, name: &str) -> Option<String>;
/// Returns the SHA of the current HEAD. /// Resolve a list of refs to SHAs.
fn head_sha(&self) -> Option<String>; fn revparse_batch(&self, revs: Vec<String>) -> BoxFuture<Result<Vec<Option<String>>>>;
fn merge_head_shas(&self) -> Vec<String>; fn head_sha(&self) -> BoxFuture<Option<String>> {
async move {
self.revparse_batch(vec!["HEAD".into()])
.await
.unwrap_or_default()
.into_iter()
.next()
.flatten()
}
.boxed()
}
fn merge_message(&self) -> BoxFuture<Option<String>>; fn merge_message(&self) -> BoxFuture<Option<String>>;
@ -392,27 +401,37 @@ impl GitRepository for RealGitRepository {
} }
fn show(&self, commit: String) -> BoxFuture<Result<CommitDetails>> { fn show(&self, commit: String) -> BoxFuture<Result<CommitDetails>> {
let repo = self.repository.clone(); let working_directory = self.working_directory();
self.executor self.executor
.spawn(async move { .spawn(async move {
let repo = repo.lock(); let working_directory = working_directory?;
let Ok(commit) = repo.revparse_single(&commit)?.into_commit() else { let output = new_std_command("git")
anyhow::bail!("{} is not a commit", commit); .current_dir(&working_directory)
}; .args([
let details = CommitDetails { "--no-optional-locks",
sha: commit.id().to_string().into(), "show",
message: String::from_utf8_lossy(commit.message_raw_bytes()) "--no-patch",
.to_string() "--format=%H%x00%B%x00%at%x00%ae%x00%an",
.into(), &commit,
commit_timestamp: commit.time().seconds(), ])
author_email: String::from_utf8_lossy(commit.author().email_bytes()) .output()?;
.to_string() let output = std::str::from_utf8(&output.stdout)?;
.into(), let fields = output.split('\0').collect::<Vec<_>>();
author_name: String::from_utf8_lossy(commit.author().name_bytes()) if fields.len() != 5 {
.to_string() bail!("unexpected git-show output for {commit:?}: {output:?}")
.into(), }
}; let sha = fields[0].to_string().into();
Ok(details) let message = fields[1].to_string().into();
let commit_timestamp = fields[2].parse()?;
let author_email = fields[3].to_string().into();
let author_name = fields[4].to_string().into();
Ok(CommitDetails {
sha,
message,
commit_timestamp,
author_email,
author_name,
})
}) })
.boxed() .boxed()
} }
@ -702,34 +721,62 @@ impl GitRepository for RealGitRepository {
remote.url().map(|url| url.to_string()) remote.url().map(|url| url.to_string())
} }
fn head_sha(&self) -> Option<String> { fn revparse_batch(&self, revs: Vec<String>) -> BoxFuture<Result<Vec<Option<String>>>> {
Some(self.repository.lock().head().ok()?.target()?.to_string()) let working_directory = self.working_directory();
} self.executor
.spawn(async move {
let working_directory = working_directory?;
let mut process = new_std_command("git")
.current_dir(&working_directory)
.args([
"--no-optional-locks",
"cat-file",
"--batch-check=%(objectname)",
"-z",
])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
fn merge_head_shas(&self) -> Vec<String> { let stdin = process
let mut shas = Vec::default(); .stdin
self.repository .take()
.lock() .ok_or_else(|| anyhow!("no stdin for git cat-file subprocess"))?;
.mergehead_foreach(|oid| { let mut stdin = BufWriter::new(stdin);
shas.push(oid.to_string()); for rev in &revs {
true write!(&mut stdin, "{rev}\0")?;
}
drop(stdin);
let output = process.wait_with_output()?;
let output = std::str::from_utf8(&output.stdout)?;
let shas = output
.lines()
.map(|line| {
if line.ends_with("missing") {
None
} else {
Some(line.to_string())
}
})
.collect::<Vec<_>>();
if shas.len() != revs.len() {
// In an octopus merge, git cat-file still only outputs the first sha from MERGE_HEAD.
bail!("unexpected number of shas")
}
Ok(shas)
}) })
.ok(); .boxed()
if let Some(oid) = self
.repository
.lock()
.find_reference("CHERRY_PICK_HEAD")
.ok()
.and_then(|reference| reference.target())
{
shas.push(oid.to_string())
}
shas
} }
fn merge_message(&self) -> BoxFuture<Option<String>> { fn merge_message(&self) -> BoxFuture<Option<String>> {
let path = self.path().join("MERGE_MSG"); let path = self.path().join("MERGE_MSG");
async move { std::fs::read_to_string(&path).ok() }.boxed() self.executor
.spawn(async move { std::fs::read_to_string(&path).ok() })
.boxed()
} }
fn status(&self, path_prefixes: &[RepoPath]) -> BoxFuture<Result<GitStatus>> { fn status(&self, path_prefixes: &[RepoPath]) -> BoxFuture<Result<GitStatus>> {

View file

@ -16,7 +16,7 @@ use fs::Fs;
use futures::{ use futures::{
FutureExt, StreamExt as _, FutureExt, StreamExt as _,
channel::{mpsc, oneshot}, channel::{mpsc, oneshot},
future::{self, Shared, try_join_all}, future::{self, Shared},
}; };
use git::{ use git::{
BuildPermalinkParams, GitHostingProviderRegistry, WORK_DIRECTORY_REPO_PATH, BuildPermalinkParams, GitHostingProviderRegistry, WORK_DIRECTORY_REPO_PATH,
@ -233,11 +233,7 @@ pub struct RepositoryId(pub u64);
pub struct MergeDetails { pub struct MergeDetails {
pub conflicted_paths: TreeSet<RepoPath>, pub conflicted_paths: TreeSet<RepoPath>,
pub message: Option<SharedString>, pub message: Option<SharedString>,
pub apply_head: Option<CommitDetails>, pub heads: Vec<Option<SharedString>>,
pub cherry_pick_head: Option<CommitDetails>,
pub merge_heads: Vec<CommitDetails>,
pub rebase_head: Option<CommitDetails>,
pub revert_head: Option<CommitDetails>,
} }
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
@ -1005,6 +1001,7 @@ impl GitStore {
let sha = backend let sha = backend
.head_sha() .head_sha()
.await
.ok_or_else(|| anyhow!("failed to read HEAD SHA"))?; .ok_or_else(|| anyhow!("failed to read HEAD SHA"))?;
let provider_registry = let provider_registry =
@ -2695,43 +2692,22 @@ impl MergeDetails {
status: &SumTree<StatusEntry>, status: &SumTree<StatusEntry>,
prev_snapshot: &RepositorySnapshot, prev_snapshot: &RepositorySnapshot,
) -> Result<(MergeDetails, bool)> { ) -> Result<(MergeDetails, bool)> {
fn sha_eq<'a>( let message = backend.merge_message().await;
l: impl IntoIterator<Item = &'a CommitDetails>, let heads = backend
r: impl IntoIterator<Item = &'a CommitDetails>, .revparse_batch(vec![
) -> bool { "MERGE_HEAD".into(),
l.into_iter() "CHERRY_PICK_HEAD".into(),
.map(|commit| &commit.sha) "REBASE_HEAD".into(),
.eq(r.into_iter().map(|commit| &commit.sha)) "REVERT_HEAD".into(),
} "APPLY_HEAD".into(),
])
let merge_heads = try_join_all( .await
backend .log_err()
.merge_head_shas() .unwrap_or_default()
.into_iter() .into_iter()
.map(|sha| backend.show(sha)), .map(|opt| opt.map(SharedString::from))
) .collect::<Vec<_>>();
.await?; let merge_heads_changed = heads != prev_snapshot.merge.heads;
let cherry_pick_head = backend.show("CHERRY_PICK_HEAD".into()).await.ok();
let rebase_head = backend.show("REBASE_HEAD".into()).await.ok();
let revert_head = backend.show("REVERT_HEAD".into()).await.ok();
let apply_head = backend.show("APPLY_HEAD".into()).await.ok();
let message = backend.merge_message().await.map(SharedString::from);
let merge_heads_changed = !sha_eq(
merge_heads.as_slice(),
prev_snapshot.merge.merge_heads.as_slice(),
) || !sha_eq(
cherry_pick_head.as_ref(),
prev_snapshot.merge.cherry_pick_head.as_ref(),
) || !sha_eq(
apply_head.as_ref(),
prev_snapshot.merge.apply_head.as_ref(),
) || !sha_eq(
rebase_head.as_ref(),
prev_snapshot.merge.rebase_head.as_ref(),
) || !sha_eq(
revert_head.as_ref(),
prev_snapshot.merge.revert_head.as_ref(),
);
let conflicted_paths = if merge_heads_changed { let conflicted_paths = if merge_heads_changed {
TreeSet::from_ordered_entries( TreeSet::from_ordered_entries(
status status
@ -2744,12 +2720,8 @@ impl MergeDetails {
}; };
let details = MergeDetails { let details = MergeDetails {
conflicted_paths, conflicted_paths,
message, message: message.map(SharedString::from),
apply_head, heads,
cherry_pick_head,
merge_heads,
rebase_head,
revert_head,
}; };
Ok((details, merge_heads_changed)) Ok((details, merge_heads_changed))
} }
@ -4578,7 +4550,7 @@ async fn compute_snapshot(
} }
// Useful when branch is None in detached head state // Useful when branch is None in detached head state
let head_commit = match backend.head_sha() { let head_commit = match backend.head_sha().await {
Some(head_sha) => backend.show(head_sha).await.log_err(), Some(head_sha) => backend.show(head_sha).await.log_err(),
None => None, None => None,
}; };

View file

@ -532,7 +532,7 @@ mod tests {
}, },
); );
// Cause the repository to emit MergeHeadsChanged. // Cause the repository to emit MergeHeadsChanged.
state.merge_head_shas = vec!["abc".into(), "def".into()] state.refs.insert("MERGE_HEAD".into(), "123".into())
}) })
.unwrap(); .unwrap();