Optimize construction and insertion of large SumTrees (#7731)

This does two things: 1. It optimizes the constructions of `SumTree`s to not insert nodes one-by-one, but instead inserts them level-by-level. That makes it more efficient to construct large `SumTree`s. 2. It adds a `from_par_iter` constructor that parallelizes the construction of `SumTree`s. In combination, **loading a 500MB plain text file went from from ~18seconds down to ~2seconds**. Disclaimer: I didn't write any of this code, lol! It's all @as-cii and @nathansobo. Release Notes: - Improved performance when opening very large files. --------- Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Julia <julia@zed.dev>
2024-02-13 16:24:40 +01:00 · 2024-02-13 16:24:40 +01:00 · 33f713a8ab
commit 33f713a8ab
parent 798c9a7d8b
4 changed files with 192 additions and 91 deletions
--- a/crates/rope/src/rope.rs
+++ b/crates/rope/src/rope.rs
@ -84,45 +84,49 @@ impl Rope {
        self.slice(start..end)
    }

-    pub fn push(&mut self, text: &str) {
-        let mut new_chunks = SmallVec::<[_; 16]>::new();
-        let mut new_chunk = ArrayString::new();
-        for ch in text.chars() {
-            if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
-                new_chunks.push(Chunk(new_chunk));
-                new_chunk = ArrayString::new();
-            }
-
-            new_chunk.push(ch);
-        }
-        if !new_chunk.is_empty() {
-            new_chunks.push(Chunk(new_chunk));
-        }
-
-        let mut new_chunks = new_chunks.into_iter();
-        let mut first_new_chunk = new_chunks.next();
+    pub fn push(&mut self, mut text: &str) {
        self.chunks.update_last(
            |last_chunk| {
-                if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
-                    if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
-                        last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
-                    } else {
-                        let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
-                        text.push_str(&last_chunk.0);
-                        text.push_str(&first_new_chunk_ref.0);
-                        let (left, right) = text.split_at(find_split_ix(&text));
-                        last_chunk.0.clear();
-                        last_chunk.0.push_str(left);
-                        first_new_chunk_ref.0.clear();
-                        first_new_chunk_ref.0.push_str(right);
+                let split_ix = if last_chunk.0.len() + text.len() <= 2 * CHUNK_BASE {
+                    text.len()
+                } else {
+                    let mut split_ix =
+                        cmp::min(CHUNK_BASE.saturating_sub(last_chunk.0.len()), text.len());
+                    while !text.is_char_boundary(split_ix) {
+                        split_ix += 1;
                    }
-                }
+                    split_ix
+                };
+
+                let (suffix, remainder) = text.split_at(split_ix);
+                last_chunk.0.push_str(suffix);
+                text = remainder;
            },
            &(),
        );

-        self.chunks
-            .extend(first_new_chunk.into_iter().chain(new_chunks), &());
+        let mut new_chunks = SmallVec::<[_; 16]>::new();
+        while !text.is_empty() {
+            let mut split_ix = cmp::min(2 * CHUNK_BASE, text.len());
+            while !text.is_char_boundary(split_ix) {
+                split_ix -= 1;
+            }
+            let (chunk, remainder) = text.split_at(split_ix);
+            new_chunks.push(Chunk(ArrayString::from(chunk).unwrap()));
+            text = remainder;
+        }
+
+        #[cfg(test)]
+        const PARALLEL_THRESHOLD: usize = 4;
+        #[cfg(not(test))]
+        const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
+
+        if new_chunks.len() >= PARALLEL_THRESHOLD {
+            self.chunks.par_extend(new_chunks.into_vec(), &());
+        } else {
+            self.chunks.extend(new_chunks, &());
+        }
+
        self.check_invariants();
    }

@ -1167,25 +1171,6 @@ impl TextDimension for PointUtf16 {
    }
 }

-fn find_split_ix(text: &str) -> usize {
-    let mut ix = text.len() / 2;
-    while !text.is_char_boundary(ix) {
-        if ix < 2 * CHUNK_BASE {
-            ix += 1;
-        } else {
-            ix = (text.len() / 2) - 1;
-            break;
-        }
-    }
-    while !text.is_char_boundary(ix) {
-        ix -= 1;
-    }
-
-    debug_assert!(ix <= 2 * CHUNK_BASE);
-    debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
-    ix
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;