diff --git a/crates/git/src/checkpoint.gitignore b/crates/git/src/checkpoint.gitignore new file mode 100644 index 0000000000..8921e677df --- /dev/null +++ b/crates/git/src/checkpoint.gitignore @@ -0,0 +1,91 @@ +# This lists files that we don't track in checkpoints + +# Compiled source and executables +*.exe +*.dll +*.so +*.dylib +*.a +*.lib +*.o +*.obj +*.elf +*.out +*.app +*.deb +*.rpm +*.dmg +*.pkg +*.msi + +# Archives and compressed files +*.7z +*.zip +*.tar +*.tar.gz +*.tgz +*.tar.bz2 +*.tbz2 +*.tar.xz +*.txz +*.rar +*.jar +*.war +*.ear + +# Media files +*.jpg +*.jpeg +*.png +*.gif +*.ico +*.svg +*.webp +*.bmp +*.tiff +*.mp3 +*.mp4 +*.avi +*.mov +*.wmv +*.flv +*.mkv +*.webm +*.wav +*.flac +*.aac + +# Database files +*.db +*.sqlite +*.sqlite3 +*.mdb + +# Documents (often binary) +*.pdf +*.doc +*.docx +*.xls +*.xlsx +*.ppt +*.pptx + +# IDE and editor files +.idea/ +.vscode/ +*.swp +*.swo +*~ +.DS_Store +Thumbs.db + +# Language-specific files +*.rlib +*.rmeta +*.pdb +*.class +*.egg +*.egg-info/ +*.pyc +*.pto +__pycache__ diff --git a/crates/git/src/repository.rs b/crates/git/src/repository.rs index 0391fe8837..72f24b7285 100644 --- a/crates/git/src/repository.rs +++ b/crates/git/src/repository.rs @@ -193,6 +193,72 @@ pub enum ResetMode { Mixed, } +/// Modifies .git/info/exclude temporarily +pub struct GitExcludeOverride { + git_exclude_path: PathBuf, + original_excludes: Option, + added_excludes: Option, +} + +impl GitExcludeOverride { + pub async fn new(git_exclude_path: PathBuf) -> Result { + let original_excludes = smol::fs::read_to_string(&git_exclude_path).await.ok(); + + Ok(GitExcludeOverride { + git_exclude_path, + original_excludes, + added_excludes: None, + }) + } + + pub async fn add_excludes(&mut self, excludes: &str) -> Result<()> { + self.added_excludes = Some(if let Some(ref already_added) = self.added_excludes { + format!("{already_added}\n{excludes}") + } else { + excludes.to_string() + }); + + let mut content = self.original_excludes.clone().unwrap_or_default(); + content.push_str("\n\n# ====== Auto-added by Zed: =======\n"); + content.push_str(self.added_excludes.as_ref().unwrap()); + content.push('\n'); + + smol::fs::write(&self.git_exclude_path, content).await?; + Ok(()) + } + + pub async fn restore_original(&mut self) -> Result<()> { + if let Some(ref original) = self.original_excludes { + smol::fs::write(&self.git_exclude_path, original).await?; + } else { + if self.git_exclude_path.exists() { + smol::fs::remove_file(&self.git_exclude_path).await?; + } + } + + self.added_excludes = None; + + Ok(()) + } +} + +impl Drop for GitExcludeOverride { + fn drop(&mut self) { + if self.added_excludes.is_some() { + let git_exclude_path = self.git_exclude_path.clone(); + let original_excludes = self.original_excludes.clone(); + smol::spawn(async move { + if let Some(original) = original_excludes { + smol::fs::write(&git_exclude_path, original).await + } else { + smol::fs::remove_file(&git_exclude_path).await + } + }) + .detach(); + } + } +} + pub trait GitRepository: Send + Sync { fn reload_index(&self); @@ -1263,10 +1329,12 @@ impl GitRepository for RealGitRepository { self.executor .spawn(async move { let working_directory = working_directory?; - let mut git = GitBinary::new(git_binary_path, working_directory, executor) + let mut git = GitBinary::new(git_binary_path, working_directory.clone(), executor) .envs(checkpoint_author_envs()); git.with_temp_index(async |git| { let head_sha = git.run(&["rev-parse", "HEAD"]).await.ok(); + let mut excludes = exclude_files(git).await?; + git.run(&["add", "--all"]).await?; let tree = git.run(&["write-tree"]).await?; let checkpoint_sha = if let Some(head_sha) = head_sha.as_deref() { @@ -1276,6 +1344,8 @@ impl GitRepository for RealGitRepository { git.run(&["commit-tree", &tree, "-m", "Checkpoint"]).await? }; + excludes.restore_original().await?; + Ok(GitRepositoryCheckpoint { commit_sha: checkpoint_sha.parse()?, }) @@ -1294,7 +1364,7 @@ impl GitRepository for RealGitRepository { .spawn(async move { let working_directory = working_directory?; - let mut git = GitBinary::new(git_binary_path, working_directory, executor); + let git = GitBinary::new(git_binary_path, working_directory, executor); git.run(&[ "restore", "--source", @@ -1304,12 +1374,16 @@ impl GitRepository for RealGitRepository { ]) .await?; - git.with_temp_index(async move |git| { - git.run(&["read-tree", &checkpoint.commit_sha.to_string()]) - .await?; - git.run(&["clean", "-d", "--force"]).await - }) - .await?; + // TODO: We don't track binary and large files anymore, + // so the following call would delete them. + // Implement an alternative way to track files added by agent. + // + // git.with_temp_index(async move |git| { + // git.run(&["read-tree", &checkpoint.commit_sha.to_string()]) + // .await?; + // git.run(&["clean", "-d", "--force"]).await + // }) + // .await?; Ok(()) }) @@ -1400,6 +1474,44 @@ fn git_status_args(path_prefixes: &[RepoPath]) -> Vec { args } +/// Temporarily git-ignore commonly ignored files and files over 2MB +async fn exclude_files(git: &GitBinary) -> Result { + const MAX_SIZE: u64 = 2 * 1024 * 1024; // 2 MB + let mut excludes = git.with_exclude_overrides().await?; + excludes + .add_excludes(include_str!("./checkpoint.gitignore")) + .await?; + + let working_directory = git.working_directory.clone(); + let untracked_files = git.list_untracked_files().await?; + let excluded_paths = untracked_files.into_iter().map(|path| { + let working_directory = working_directory.clone(); + smol::spawn(async move { + let full_path = working_directory.join(path.clone()); + match smol::fs::metadata(&full_path).await { + Ok(metadata) if metadata.is_file() && metadata.len() >= MAX_SIZE => { + Some(PathBuf::from("/").join(path.clone())) + } + _ => None, + } + }) + }); + + let excluded_paths = futures::future::join_all(excluded_paths).await; + let excluded_paths = excluded_paths.into_iter().flatten().collect::>(); + + if !excluded_paths.is_empty() { + let exclude_patterns = excluded_paths + .into_iter() + .map(|path| path.to_string_lossy().to_string()) + .collect::>() + .join("\n"); + excludes.add_excludes(&exclude_patterns).await?; + } + + Ok(excludes) +} + struct GitBinary { git_binary_path: PathBuf, working_directory: PathBuf, @@ -1423,6 +1535,19 @@ impl GitBinary { } } + async fn list_untracked_files(&self) -> Result> { + let status_output = self + .run(&["status", "--porcelain=v1", "--untracked-files=all", "-z"]) + .await?; + + let paths = status_output + .split('\0') + .filter(|entry| entry.len() >= 3 && entry.starts_with("?? ")) + .map(|entry| PathBuf::from(&entry[3..])) + .collect::>(); + Ok(paths) + } + fn envs(mut self, envs: HashMap) -> Self { self.envs = envs; self @@ -1466,6 +1591,16 @@ impl GitBinary { Ok(result) } + pub async fn with_exclude_overrides(&self) -> Result { + let path = self + .working_directory + .join(".git") + .join("info") + .join("exclude"); + + GitExcludeOverride::new(path).await + } + fn path_for_index_id(&self, id: Uuid) -> PathBuf { self.working_directory .join(".git") @@ -1878,12 +2013,13 @@ mod tests { .unwrap(), "1" ); - assert_eq!( - smol::fs::read_to_string(repo_dir.path().join("new_file_after_checkpoint")) - .await - .ok(), - None - ); + // See TODO above + // assert_eq!( + // smol::fs::read_to_string(repo_dir.path().join("new_file_after_checkpoint")) + // .await + // .ok(), + // None + // ); } #[gpui::test] @@ -1916,12 +2052,13 @@ mod tests { .unwrap(), "foo" ); - assert_eq!( - smol::fs::read_to_string(repo_dir.path().join("baz")) - .await - .ok(), - None - ); + // See TODOs above + // assert_eq!( + // smol::fs::read_to_string(repo_dir.path().join("baz")) + // .await + // .ok(), + // None + // ); } #[gpui::test] @@ -1958,6 +2095,65 @@ mod tests { ); } + #[gpui::test] + async fn test_checkpoint_exclude_binary_files(cx: &mut TestAppContext) { + cx.executor().allow_parking(); + + let repo_dir = tempfile::tempdir().unwrap(); + let text_path = repo_dir.path().join("main.rs"); + let bin_path = repo_dir.path().join("binary.o"); + + git2::Repository::init(repo_dir.path()).unwrap(); + + smol::fs::write(&text_path, "fn main() {}").await.unwrap(); + + smol::fs::write(&bin_path, "some binary file here") + .await + .unwrap(); + + let repo = + RealGitRepository::new(&repo_dir.path().join(".git"), None, cx.executor()).unwrap(); + + // initial commit + repo.stage_paths( + vec![RepoPath::from_str("main.rs")], + Arc::new(HashMap::default()), + ) + .await + .unwrap(); + repo.commit( + "Initial commit".into(), + None, + CommitOptions::default(), + Arc::new(checkpoint_author_envs()), + ) + .await + .unwrap(); + + let checkpoint = repo.checkpoint().await.unwrap(); + + smol::fs::write(&text_path, "fn main() { println!(\"Modified\"); }") + .await + .unwrap(); + smol::fs::write(&bin_path, "Modified binary file") + .await + .unwrap(); + + repo.restore_checkpoint(checkpoint).await.unwrap(); + + // Text files should be restored to checkpoint state, + // but binaries should not (they aren't tracked) + assert_eq!( + smol::fs::read_to_string(&text_path).await.unwrap(), + "fn main() {}" + ); + + assert_eq!( + smol::fs::read_to_string(&bin_path).await.unwrap(), + "Modified binary file" + ); + } + #[test] fn test_branches_parsing() { // suppress "help: octal escapes are not supported, `\0` is always null"