agent: Don't track large and common binary files (#31352)
## Issue The agent may run very slowly on projects that contain many or large binary files not listed in `.gitignore`. ## Solution Temporarily rewrite `.git/info/exludes` to ignore: - Common binary files based on the extension - Files larger than 2 MB ## Benchmark I measure the time between sending an agent message in UI ("hitting Enter") and actually sending it to an LLM. Ideally, it should be instant. Numbers for a 7.7 GB Rust project with no .gitignore. Filter | Time ----------------------------------|----- No filter (= before this change) | 62 s Exclude common file types only | 1.46 s Exclude files >2MB only | 1.16 s Exclude both | 0.10 s ## Planned changes: - [x] Exclude common binary file types - [x] Exclude large files - [ ] Track files added by agent so we could delete them (we can't rely on git for that anymore) - [ ] Don't block on waiting for a checkpoint to complete until we really need it - [ ] Only `git add` files that are about to change Closes #ISSUE Release Notes: - Improved agent latency on repositories containing many files or large files
This commit is contained in:
parent
134463f043
commit
9da9ef860b
2 changed files with 307 additions and 20 deletions
91
crates/git/src/checkpoint.gitignore
Normal file
91
crates/git/src/checkpoint.gitignore
Normal file
|
@ -0,0 +1,91 @@
|
|||
# This lists files that we don't track in checkpoints
|
||||
|
||||
# Compiled source and executables
|
||||
*.exe
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
*.a
|
||||
*.lib
|
||||
*.o
|
||||
*.obj
|
||||
*.elf
|
||||
*.out
|
||||
*.app
|
||||
*.deb
|
||||
*.rpm
|
||||
*.dmg
|
||||
*.pkg
|
||||
*.msi
|
||||
|
||||
# Archives and compressed files
|
||||
*.7z
|
||||
*.zip
|
||||
*.tar
|
||||
*.tar.gz
|
||||
*.tgz
|
||||
*.tar.bz2
|
||||
*.tbz2
|
||||
*.tar.xz
|
||||
*.txz
|
||||
*.rar
|
||||
*.jar
|
||||
*.war
|
||||
*.ear
|
||||
|
||||
# Media files
|
||||
*.jpg
|
||||
*.jpeg
|
||||
*.png
|
||||
*.gif
|
||||
*.ico
|
||||
*.svg
|
||||
*.webp
|
||||
*.bmp
|
||||
*.tiff
|
||||
*.mp3
|
||||
*.mp4
|
||||
*.avi
|
||||
*.mov
|
||||
*.wmv
|
||||
*.flv
|
||||
*.mkv
|
||||
*.webm
|
||||
*.wav
|
||||
*.flac
|
||||
*.aac
|
||||
|
||||
# Database files
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
*.mdb
|
||||
|
||||
# Documents (often binary)
|
||||
*.pdf
|
||||
*.doc
|
||||
*.docx
|
||||
*.xls
|
||||
*.xlsx
|
||||
*.ppt
|
||||
*.pptx
|
||||
|
||||
# IDE and editor files
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Language-specific files
|
||||
*.rlib
|
||||
*.rmeta
|
||||
*.pdb
|
||||
*.class
|
||||
*.egg
|
||||
*.egg-info/
|
||||
*.pyc
|
||||
*.pto
|
||||
__pycache__
|
|
@ -193,6 +193,72 @@ pub enum ResetMode {
|
|||
Mixed,
|
||||
}
|
||||
|
||||
/// Modifies .git/info/exclude temporarily
|
||||
pub struct GitExcludeOverride {
|
||||
git_exclude_path: PathBuf,
|
||||
original_excludes: Option<String>,
|
||||
added_excludes: Option<String>,
|
||||
}
|
||||
|
||||
impl GitExcludeOverride {
|
||||
pub async fn new(git_exclude_path: PathBuf) -> Result<Self> {
|
||||
let original_excludes = smol::fs::read_to_string(&git_exclude_path).await.ok();
|
||||
|
||||
Ok(GitExcludeOverride {
|
||||
git_exclude_path,
|
||||
original_excludes,
|
||||
added_excludes: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn add_excludes(&mut self, excludes: &str) -> Result<()> {
|
||||
self.added_excludes = Some(if let Some(ref already_added) = self.added_excludes {
|
||||
format!("{already_added}\n{excludes}")
|
||||
} else {
|
||||
excludes.to_string()
|
||||
});
|
||||
|
||||
let mut content = self.original_excludes.clone().unwrap_or_default();
|
||||
content.push_str("\n\n# ====== Auto-added by Zed: =======\n");
|
||||
content.push_str(self.added_excludes.as_ref().unwrap());
|
||||
content.push('\n');
|
||||
|
||||
smol::fs::write(&self.git_exclude_path, content).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn restore_original(&mut self) -> Result<()> {
|
||||
if let Some(ref original) = self.original_excludes {
|
||||
smol::fs::write(&self.git_exclude_path, original).await?;
|
||||
} else {
|
||||
if self.git_exclude_path.exists() {
|
||||
smol::fs::remove_file(&self.git_exclude_path).await?;
|
||||
}
|
||||
}
|
||||
|
||||
self.added_excludes = None;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for GitExcludeOverride {
|
||||
fn drop(&mut self) {
|
||||
if self.added_excludes.is_some() {
|
||||
let git_exclude_path = self.git_exclude_path.clone();
|
||||
let original_excludes = self.original_excludes.clone();
|
||||
smol::spawn(async move {
|
||||
if let Some(original) = original_excludes {
|
||||
smol::fs::write(&git_exclude_path, original).await
|
||||
} else {
|
||||
smol::fs::remove_file(&git_exclude_path).await
|
||||
}
|
||||
})
|
||||
.detach();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait GitRepository: Send + Sync {
|
||||
fn reload_index(&self);
|
||||
|
||||
|
@ -1263,10 +1329,12 @@ impl GitRepository for RealGitRepository {
|
|||
self.executor
|
||||
.spawn(async move {
|
||||
let working_directory = working_directory?;
|
||||
let mut git = GitBinary::new(git_binary_path, working_directory, executor)
|
||||
let mut git = GitBinary::new(git_binary_path, working_directory.clone(), executor)
|
||||
.envs(checkpoint_author_envs());
|
||||
git.with_temp_index(async |git| {
|
||||
let head_sha = git.run(&["rev-parse", "HEAD"]).await.ok();
|
||||
let mut excludes = exclude_files(git).await?;
|
||||
|
||||
git.run(&["add", "--all"]).await?;
|
||||
let tree = git.run(&["write-tree"]).await?;
|
||||
let checkpoint_sha = if let Some(head_sha) = head_sha.as_deref() {
|
||||
|
@ -1276,6 +1344,8 @@ impl GitRepository for RealGitRepository {
|
|||
git.run(&["commit-tree", &tree, "-m", "Checkpoint"]).await?
|
||||
};
|
||||
|
||||
excludes.restore_original().await?;
|
||||
|
||||
Ok(GitRepositoryCheckpoint {
|
||||
commit_sha: checkpoint_sha.parse()?,
|
||||
})
|
||||
|
@ -1294,7 +1364,7 @@ impl GitRepository for RealGitRepository {
|
|||
.spawn(async move {
|
||||
let working_directory = working_directory?;
|
||||
|
||||
let mut git = GitBinary::new(git_binary_path, working_directory, executor);
|
||||
let git = GitBinary::new(git_binary_path, working_directory, executor);
|
||||
git.run(&[
|
||||
"restore",
|
||||
"--source",
|
||||
|
@ -1304,12 +1374,16 @@ impl GitRepository for RealGitRepository {
|
|||
])
|
||||
.await?;
|
||||
|
||||
git.with_temp_index(async move |git| {
|
||||
git.run(&["read-tree", &checkpoint.commit_sha.to_string()])
|
||||
.await?;
|
||||
git.run(&["clean", "-d", "--force"]).await
|
||||
})
|
||||
.await?;
|
||||
// TODO: We don't track binary and large files anymore,
|
||||
// so the following call would delete them.
|
||||
// Implement an alternative way to track files added by agent.
|
||||
//
|
||||
// git.with_temp_index(async move |git| {
|
||||
// git.run(&["read-tree", &checkpoint.commit_sha.to_string()])
|
||||
// .await?;
|
||||
// git.run(&["clean", "-d", "--force"]).await
|
||||
// })
|
||||
// .await?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
|
@ -1400,6 +1474,44 @@ fn git_status_args(path_prefixes: &[RepoPath]) -> Vec<OsString> {
|
|||
args
|
||||
}
|
||||
|
||||
/// Temporarily git-ignore commonly ignored files and files over 2MB
|
||||
async fn exclude_files(git: &GitBinary) -> Result<GitExcludeOverride> {
|
||||
const MAX_SIZE: u64 = 2 * 1024 * 1024; // 2 MB
|
||||
let mut excludes = git.with_exclude_overrides().await?;
|
||||
excludes
|
||||
.add_excludes(include_str!("./checkpoint.gitignore"))
|
||||
.await?;
|
||||
|
||||
let working_directory = git.working_directory.clone();
|
||||
let untracked_files = git.list_untracked_files().await?;
|
||||
let excluded_paths = untracked_files.into_iter().map(|path| {
|
||||
let working_directory = working_directory.clone();
|
||||
smol::spawn(async move {
|
||||
let full_path = working_directory.join(path.clone());
|
||||
match smol::fs::metadata(&full_path).await {
|
||||
Ok(metadata) if metadata.is_file() && metadata.len() >= MAX_SIZE => {
|
||||
Some(PathBuf::from("/").join(path.clone()))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
let excluded_paths = futures::future::join_all(excluded_paths).await;
|
||||
let excluded_paths = excluded_paths.into_iter().flatten().collect::<Vec<_>>();
|
||||
|
||||
if !excluded_paths.is_empty() {
|
||||
let exclude_patterns = excluded_paths
|
||||
.into_iter()
|
||||
.map(|path| path.to_string_lossy().to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
excludes.add_excludes(&exclude_patterns).await?;
|
||||
}
|
||||
|
||||
Ok(excludes)
|
||||
}
|
||||
|
||||
struct GitBinary {
|
||||
git_binary_path: PathBuf,
|
||||
working_directory: PathBuf,
|
||||
|
@ -1423,6 +1535,19 @@ impl GitBinary {
|
|||
}
|
||||
}
|
||||
|
||||
async fn list_untracked_files(&self) -> Result<Vec<PathBuf>> {
|
||||
let status_output = self
|
||||
.run(&["status", "--porcelain=v1", "--untracked-files=all", "-z"])
|
||||
.await?;
|
||||
|
||||
let paths = status_output
|
||||
.split('\0')
|
||||
.filter(|entry| entry.len() >= 3 && entry.starts_with("?? "))
|
||||
.map(|entry| PathBuf::from(&entry[3..]))
|
||||
.collect::<Vec<_>>();
|
||||
Ok(paths)
|
||||
}
|
||||
|
||||
fn envs(mut self, envs: HashMap<String, String>) -> Self {
|
||||
self.envs = envs;
|
||||
self
|
||||
|
@ -1466,6 +1591,16 @@ impl GitBinary {
|
|||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn with_exclude_overrides(&self) -> Result<GitExcludeOverride> {
|
||||
let path = self
|
||||
.working_directory
|
||||
.join(".git")
|
||||
.join("info")
|
||||
.join("exclude");
|
||||
|
||||
GitExcludeOverride::new(path).await
|
||||
}
|
||||
|
||||
fn path_for_index_id(&self, id: Uuid) -> PathBuf {
|
||||
self.working_directory
|
||||
.join(".git")
|
||||
|
@ -1878,12 +2013,13 @@ mod tests {
|
|||
.unwrap(),
|
||||
"1"
|
||||
);
|
||||
assert_eq!(
|
||||
smol::fs::read_to_string(repo_dir.path().join("new_file_after_checkpoint"))
|
||||
.await
|
||||
.ok(),
|
||||
None
|
||||
);
|
||||
// See TODO above
|
||||
// assert_eq!(
|
||||
// smol::fs::read_to_string(repo_dir.path().join("new_file_after_checkpoint"))
|
||||
// .await
|
||||
// .ok(),
|
||||
// None
|
||||
// );
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
|
@ -1916,12 +2052,13 @@ mod tests {
|
|||
.unwrap(),
|
||||
"foo"
|
||||
);
|
||||
assert_eq!(
|
||||
smol::fs::read_to_string(repo_dir.path().join("baz"))
|
||||
.await
|
||||
.ok(),
|
||||
None
|
||||
);
|
||||
// See TODOs above
|
||||
// assert_eq!(
|
||||
// smol::fs::read_to_string(repo_dir.path().join("baz"))
|
||||
// .await
|
||||
// .ok(),
|
||||
// None
|
||||
// );
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
|
@ -1958,6 +2095,65 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_checkpoint_exclude_binary_files(cx: &mut TestAppContext) {
|
||||
cx.executor().allow_parking();
|
||||
|
||||
let repo_dir = tempfile::tempdir().unwrap();
|
||||
let text_path = repo_dir.path().join("main.rs");
|
||||
let bin_path = repo_dir.path().join("binary.o");
|
||||
|
||||
git2::Repository::init(repo_dir.path()).unwrap();
|
||||
|
||||
smol::fs::write(&text_path, "fn main() {}").await.unwrap();
|
||||
|
||||
smol::fs::write(&bin_path, "some binary file here")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let repo =
|
||||
RealGitRepository::new(&repo_dir.path().join(".git"), None, cx.executor()).unwrap();
|
||||
|
||||
// initial commit
|
||||
repo.stage_paths(
|
||||
vec![RepoPath::from_str("main.rs")],
|
||||
Arc::new(HashMap::default()),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
repo.commit(
|
||||
"Initial commit".into(),
|
||||
None,
|
||||
CommitOptions::default(),
|
||||
Arc::new(checkpoint_author_envs()),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let checkpoint = repo.checkpoint().await.unwrap();
|
||||
|
||||
smol::fs::write(&text_path, "fn main() { println!(\"Modified\"); }")
|
||||
.await
|
||||
.unwrap();
|
||||
smol::fs::write(&bin_path, "Modified binary file")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
repo.restore_checkpoint(checkpoint).await.unwrap();
|
||||
|
||||
// Text files should be restored to checkpoint state,
|
||||
// but binaries should not (they aren't tracked)
|
||||
assert_eq!(
|
||||
smol::fs::read_to_string(&text_path).await.unwrap(),
|
||||
"fn main() {}"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
smol::fs::read_to_string(&bin_path).await.unwrap(),
|
||||
"Modified binary file"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_branches_parsing() {
|
||||
// suppress "help: octal escapes are not supported, `\0` is always null"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue