Optimize construction and insertion of large SumTree
s (#7731)
This does two things: 1. It optimizes the constructions of `SumTree`s to not insert nodes one-by-one, but instead inserts them level-by-level. That makes it more efficient to construct large `SumTree`s. 2. It adds a `from_par_iter` constructor that parallelizes the construction of `SumTree`s. In combination, **loading a 500MB plain text file went from from ~18seconds down to ~2seconds**. Disclaimer: I didn't write any of this code, lol! It's all @as-cii and @nathansobo. Release Notes: - Improved performance when opening very large files. --------- Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Julia <julia@zed.dev>
This commit is contained in:
parent
798c9a7d8b
commit
33f713a8ab
4 changed files with 192 additions and 91 deletions
|
@ -84,45 +84,49 @@ impl Rope {
|
|||
self.slice(start..end)
|
||||
}
|
||||
|
||||
pub fn push(&mut self, text: &str) {
|
||||
let mut new_chunks = SmallVec::<[_; 16]>::new();
|
||||
let mut new_chunk = ArrayString::new();
|
||||
for ch in text.chars() {
|
||||
if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
|
||||
new_chunks.push(Chunk(new_chunk));
|
||||
new_chunk = ArrayString::new();
|
||||
}
|
||||
|
||||
new_chunk.push(ch);
|
||||
}
|
||||
if !new_chunk.is_empty() {
|
||||
new_chunks.push(Chunk(new_chunk));
|
||||
}
|
||||
|
||||
let mut new_chunks = new_chunks.into_iter();
|
||||
let mut first_new_chunk = new_chunks.next();
|
||||
pub fn push(&mut self, mut text: &str) {
|
||||
self.chunks.update_last(
|
||||
|last_chunk| {
|
||||
if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
|
||||
if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
|
||||
last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
|
||||
} else {
|
||||
let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
|
||||
text.push_str(&last_chunk.0);
|
||||
text.push_str(&first_new_chunk_ref.0);
|
||||
let (left, right) = text.split_at(find_split_ix(&text));
|
||||
last_chunk.0.clear();
|
||||
last_chunk.0.push_str(left);
|
||||
first_new_chunk_ref.0.clear();
|
||||
first_new_chunk_ref.0.push_str(right);
|
||||
let split_ix = if last_chunk.0.len() + text.len() <= 2 * CHUNK_BASE {
|
||||
text.len()
|
||||
} else {
|
||||
let mut split_ix =
|
||||
cmp::min(CHUNK_BASE.saturating_sub(last_chunk.0.len()), text.len());
|
||||
while !text.is_char_boundary(split_ix) {
|
||||
split_ix += 1;
|
||||
}
|
||||
}
|
||||
split_ix
|
||||
};
|
||||
|
||||
let (suffix, remainder) = text.split_at(split_ix);
|
||||
last_chunk.0.push_str(suffix);
|
||||
text = remainder;
|
||||
},
|
||||
&(),
|
||||
);
|
||||
|
||||
self.chunks
|
||||
.extend(first_new_chunk.into_iter().chain(new_chunks), &());
|
||||
let mut new_chunks = SmallVec::<[_; 16]>::new();
|
||||
while !text.is_empty() {
|
||||
let mut split_ix = cmp::min(2 * CHUNK_BASE, text.len());
|
||||
while !text.is_char_boundary(split_ix) {
|
||||
split_ix -= 1;
|
||||
}
|
||||
let (chunk, remainder) = text.split_at(split_ix);
|
||||
new_chunks.push(Chunk(ArrayString::from(chunk).unwrap()));
|
||||
text = remainder;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
const PARALLEL_THRESHOLD: usize = 4;
|
||||
#[cfg(not(test))]
|
||||
const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
|
||||
|
||||
if new_chunks.len() >= PARALLEL_THRESHOLD {
|
||||
self.chunks.par_extend(new_chunks.into_vec(), &());
|
||||
} else {
|
||||
self.chunks.extend(new_chunks, &());
|
||||
}
|
||||
|
||||
self.check_invariants();
|
||||
}
|
||||
|
||||
|
@ -1167,25 +1171,6 @@ impl TextDimension for PointUtf16 {
|
|||
}
|
||||
}
|
||||
|
||||
fn find_split_ix(text: &str) -> usize {
|
||||
let mut ix = text.len() / 2;
|
||||
while !text.is_char_boundary(ix) {
|
||||
if ix < 2 * CHUNK_BASE {
|
||||
ix += 1;
|
||||
} else {
|
||||
ix = (text.len() / 2) - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
while !text.is_char_boundary(ix) {
|
||||
ix -= 1;
|
||||
}
|
||||
|
||||
debug_assert!(ix <= 2 * CHUNK_BASE);
|
||||
debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
|
||||
ix
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue