Chunk git status entries (#25627)

Prevents us trying to write 5M untracked files to postgres in one commit

Release Notes:

- N/A *or* Added/Fixed/Improved ...
This commit is contained in:
Conrad Irwin 2025-02-26 12:38:16 -07:00 committed by GitHub
parent b2a685f00a
commit afb0fd609b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 32 additions and 23 deletions

View file

@ -18,7 +18,6 @@ doctest = false
[dependencies]
anyhow.workspace = true
collections.workspace = true
prost.workspace = true
serde.workspace = true

View file

@ -7,7 +7,6 @@ mod typed_envelope;
pub use error::*;
pub use typed_envelope::*;
use collections::HashMap;
pub use prost::{DecodeError, Message};
use serde::Serialize;
use std::{
@ -746,16 +745,10 @@ pub const MAX_WORKTREE_UPDATE_MAX_CHUNK_SIZE: usize = 2;
pub const MAX_WORKTREE_UPDATE_MAX_CHUNK_SIZE: usize = 256;
pub fn split_worktree_update(mut message: UpdateWorktree) -> impl Iterator<Item = UpdateWorktree> {
let mut done_files = false;
let mut repository_map = message
.updated_repositories
.into_iter()
.map(|repo| (repo.work_directory_id, repo))
.collect::<HashMap<_, _>>();
let mut done = false;
iter::from_fn(move || {
if done_files {
if done {
return None;
}
@ -777,28 +770,45 @@ pub fn split_worktree_update(mut message: UpdateWorktree) -> impl Iterator<Item
.drain(..removed_entries_chunk_size)
.collect();
done_files = message.updated_entries.is_empty() && message.removed_entries.is_empty();
let mut updated_repositories = Vec::new();
let mut limit = MAX_WORKTREE_UPDATE_MAX_CHUNK_SIZE;
while let Some(repo) = message.updated_repositories.first_mut() {
let updated_statuses_limit = cmp::min(repo.updated_statuses.len(), limit);
let removed_statuses_limit = cmp::min(repo.removed_statuses.len(), limit);
if !repository_map.is_empty() {
for entry in &updated_entries {
if let Some(repo) = repository_map.remove(&entry.id) {
updated_repositories.push(repo);
}
updated_repositories.push(RepositoryEntry {
work_directory_id: repo.work_directory_id,
branch: repo.branch.clone(),
branch_summary: repo.branch_summary.clone(),
updated_statuses: repo
.updated_statuses
.drain(..updated_statuses_limit)
.collect(),
removed_statuses: repo
.removed_statuses
.drain(..removed_statuses_limit)
.collect(),
current_merge_conflicts: repo.current_merge_conflicts.clone(),
});
if repo.removed_statuses.is_empty() && repo.updated_statuses.is_empty() {
message.updated_repositories.remove(0);
}
limit = limit.saturating_sub(removed_statuses_limit + updated_statuses_limit);
if limit == 0 {
break;
}
}
let removed_repositories = if done_files {
done = message.updated_entries.is_empty()
&& message.removed_entries.is_empty()
&& message.updated_repositories.is_empty();
let removed_repositories = if done {
mem::take(&mut message.removed_repositories)
} else {
Default::default()
};
if done_files {
updated_repositories.extend(mem::take(&mut repository_map).into_values());
}
Some(UpdateWorktree {
project_id: message.project_id,
worktree_id: message.worktree_id,
@ -807,7 +817,7 @@ pub fn split_worktree_update(mut message: UpdateWorktree) -> impl Iterator<Item
updated_entries,
removed_entries,
scan_id: message.scan_id,
is_last_update: done_files && message.is_last_update,
is_last_update: done && message.is_last_update,
updated_repositories,
removed_repositories,
})