Start work on optomizing tab map

We currently iterate over each character when looking for tab bytes even though chunks keeps a bitmask that represents each tab position. This commit is the first step in using the bitmask Co-authored-by: Remco Smits <djsmits12@gmail.com> Co-authored-by: Cole Miller <m@cole-miller.net>
2025-06-05 17:34:17 -04:00 · 2025-06-05 17:34:17 -04:00 · 765ed65e88
commit 765ed65e88
parent 52770cd3ad
7 changed files with 284 additions and 42 deletions
--- a/crates/editor/src/display_map/fold_map.rs
+++ b/crates/editor/src/display_map/fold_map.rs
@ -863,6 +863,14 @@ impl FoldSnapshot {
        .flat_map(|chunk| chunk.text.chars())
    }

+    pub fn chunks_at(&self, start: FoldPoint) -> FoldChunks<'_> {
+        self.chunks(
+            start.to_offset(self)..self.len(),
+            false,
+            Highlights::default(),
+        )
+    }
+
    #[cfg(test)]
    pub fn clip_offset(&self, offset: FoldOffset, bias: Bias) -> FoldOffset {
        if offset > self.len() {
@ -1263,6 +1271,8 @@ pub struct Chunk<'a> {
    pub is_inlay: bool,
    /// An optional recipe for how the chunk should be presented.
    pub renderer: Option<ChunkRenderer>,
+    /// The location of tab characters in the chunk.
+    pub tabs: u128,
 }

 /// A recipe for how the chunk should be presented.
@ -1410,6 +1420,7 @@ impl<'a> Iterator for FoldChunks<'a> {

            chunk.text = &chunk.text
                [(self.inlay_offset - buffer_chunk_start).0..(chunk_end - buffer_chunk_start).0];
+            chunk.tabs = chunk.tabs >> (self.inlay_offset - buffer_chunk_start).0;

            if chunk_end == transform_end {
                self.transform_cursor.next(&());
@ -1421,6 +1432,7 @@ impl<'a> Iterator for FoldChunks<'a> {
            self.output_offset.0 += chunk.text.len();
            return Some(Chunk {
                text: chunk.text,
+                tabs: chunk.tabs,
                syntax_highlight_id: chunk.syntax_highlight_id,
                highlight_style: chunk.highlight_style,
                diagnostic_severity: chunk.diagnostic_severity,
--- a/crates/editor/src/display_map/tab_map.rs
+++ b/crates/editor/src/display_map/tab_map.rs
@ -305,10 +305,13 @@ impl TabSnapshot {
    }

    pub fn to_fold_point(&self, output: TabPoint, bias: Bias) -> (FoldPoint, u32, u32) {
-        let chars = self.fold_snapshot.chars_at(FoldPoint::new(output.row(), 0));
+        let chunks = self
+            .fold_snapshot
+            .chunks_at(FoldPoint::new(output.row(), 0));
+        let tab_cursor = TabStopCursor::new(chunks);
        let expanded = output.column();
        let (collapsed, expanded_char_column, to_next_stop) =
-            self.collapse_tabs(chars, expanded, bias);
+            self.collapse_tabs(tab_cursor, expanded, bias);
        (
            FoldPoint::new(output.row(), collapsed),
            expanded_char_column,
@ -354,53 +357,89 @@ impl TabSnapshot {
        expanded_bytes + column.saturating_sub(collapsed_bytes)
    }

-    fn collapse_tabs(
-        &self,
-        chars: impl Iterator<Item = char>,
-        column: u32,
-        bias: Bias,
-    ) -> (u32, u32, u32) {
+    fn collapse_tabs(&self, mut cursor: TabStopCursor, column: u32, bias: Bias) -> (u32, u32, u32) {
        let tab_size = self.tab_size.get();
+        let mut collapsed_column = column;
+        let mut tab_count = 0;
+        let mut expanded_tab_len = 0;
+        while let Some(tab_stop) = cursor.next(collapsed_column) {
+            // Calculate how much we want to expand this tab stop (into spaces)
+            let mut expanded_chars = tab_stop.char_offset - tab_count + expanded_tab_len;
+            let tab_len = tab_size - (expanded_chars % tab_size);
+            // Increment tab count
+            tab_count += 1;
+            // The count of how many spaces we've added to this line in place of tab bytes
+            expanded_tab_len += tab_len;

-        let mut expanded_bytes = 0;
-        let mut expanded_chars = 0;
-        let mut collapsed_bytes = 0;
-        for c in chars {
-            if expanded_bytes >= column {
-                break;
-            }
-            if collapsed_bytes >= self.max_expansion_column {
-                break;
-            }
+            // The count of bytes at this point in the iteration while considering tab_count and previous expansions
+            let expanded_bytes = tab_stop.byte_offset - tab_count + expanded_tab_len;

-            if c == '\t' {
-                let tab_len = tab_size - (expanded_chars % tab_size);
-                expanded_chars += tab_len;
-                expanded_bytes += tab_len;
-                if expanded_bytes > column {
-                    expanded_chars -= expanded_bytes - column;
-                    return match bias {
-                        Bias::Left => (collapsed_bytes, expanded_chars, expanded_bytes - column),
-                        Bias::Right => (collapsed_bytes + 1, expanded_chars, 0),
-                    };
-                }
+            // Did we expand past the search target?
+            if expanded_bytes > column {
+                // We expanded past the search target, so need to calculate the offshoot
+                expanded_chars -= expanded_bytes - column;
+                return match bias {
+                    Bias::Left => (
+                        cursor.byte_offset(),
+                        expanded_chars,
+                        expanded_bytes - column,
+                    ),
+                    Bias::Right => (cursor.byte_offset() + 1, expanded_chars, 0),
+                };
            } else {
-                expanded_chars += 1;
-                expanded_bytes += c.len_utf8() as u32;
+                // otherwise we only want to move the cursor collapse column forward
+                collapsed_column = collapsed_column - tab_len + 1;
            }
-
-            if expanded_bytes > column && matches!(bias, Bias::Left) {
-                expanded_chars -= 1;
-                break;
-            }
-
-            collapsed_bytes += c.len_utf8() as u32;
        }
+
+        let collapsed_bytes = cursor.byte_offset();
+        let expanded_bytes = cursor.byte_offset() - tab_count + expanded_tab_len;
+        // let expanded_chars = cursor.char_offset() - tab_count + expanded_tab_len;
        (
            collapsed_bytes + column.saturating_sub(expanded_bytes),
-            expanded_chars,
+            expanded_bytes,
            0,
        )
+
+        // let mut expanded_bytes = 0;
+        // let mut expanded_chars = 0;
+        // let mut collapsed_bytes = 0;
+        // for c in chars {
+        //     if expanded_bytes >= column {
+        //         break;
+        //     }
+        //     if collapsed_bytes >= self.max_expansion_column {
+        //         break;
+        //     }
+
+        //     if c == '\t' {
+        //         let tab_len = tab_size - (expanded_chars % tab_size);
+        //         expanded_chars += tab_len;
+        //         expanded_bytes += tab_len;
+        //         if expanded_bytes > column {
+        //             expanded_chars -= expanded_bytes - column;
+        //             return match bias {
+        //                 Bias::Left => (collapsed_bytes, expanded_chars, expanded_bytes - column),
+        //                 Bias::Right => (collapsed_bytes + 1, expanded_chars, 0),
+        //             };
+        //         }
+        //     } else {
+        //         expanded_chars += 1;
+        //         expanded_bytes += c.len_utf8() as u32;
+        //     }
+
+        //     if expanded_bytes > column && matches!(bias, Bias::Left) {
+        //         expanded_chars -= 1;
+        //         break;
+        //     }
+
+        //     collapsed_bytes += c.len_utf8() as u32;
+        // }
+        // (
+        //     collapsed_bytes + column.saturating_sub(expanded_bytes),
+        //     expanded_chars,
+        //     0,
+        // )
    }
 }

@ -603,7 +642,10 @@ mod tests {
    use super::*;
    use crate::{
        MultiBuffer,
-        display_map::{fold_map::FoldMap, inlay_map::InlayMap},
+        display_map::{
+            fold_map::{FoldMap, FoldOffset},
+            inlay_map::InlayMap,
+        },
    };
    use rand::{Rng, prelude::StdRng};

@ -811,4 +853,138 @@ mod tests {
            );
        }
    }
+
+    #[gpui::test]
+    fn test_tab_stop_cursor(cx: &mut gpui::App) {
+        let text = "\tfoo\tbarbarbar\t\tbaz\n";
+        let buffer = MultiBuffer::build_simple(text, cx);
+        let buffer_snapshot = buffer.read(cx).snapshot(cx);
+        let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone());
+        let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
+        let chunks = fold_snapshot.chunks(
+            FoldOffset(0)..fold_snapshot.len(),
+            false,
+            Default::default(),
+        );
+        let mut cursor = TabStopCursor::new(chunks);
+        let mut tab_stops = Vec::new();
+        while let Some(tab_stop) = cursor.next(u32::MAX) {
+            tab_stops.push(tab_stop);
+        }
+        assert_eq!(
+            &[
+                TabStop {
+                    byte_offset: 1,
+                    char_offset: 1
+                },
+                TabStop {
+                    byte_offset: 5,
+                    char_offset: 5
+                },
+                TabStop {
+                    byte_offset: 15,
+                    char_offset: 15,
+                },
+                TabStop {
+                    byte_offset: 16,
+                    char_offset: 16,
+                },
+            ],
+            tab_stops.as_slice(),
+        );
+
+        assert_eq!(cursor.byte_offset(), 16);
+    }
+}
+
+struct TabStopCursor<'a> {
+    chunks: FoldChunks<'a>,
+    distance_traveled: u32,
+    bytes_offset: u32,
+    /// Chunk
+    /// last tab position iterated through
+    current_chunk: Option<(Chunk<'a>, u32)>,
+}
+
+impl<'a> TabStopCursor<'a> {
+    fn new(chunks: FoldChunks<'a>) -> Self {
+        Self {
+            chunks,
+            distance_traveled: 0,
+            bytes_offset: 0,
+            current_chunk: None,
+        }
+    }
+
+    /// distance: length to move forward while searching for the next tab stop
+    fn next(&mut self, distance: u32) -> Option<TabStop> {
+        if let Some((mut chunk, past_tab_position)) = self.current_chunk.take() {
+            let tab_position = chunk.tabs.trailing_zeros() + 1;
+
+            if self.distance_traveled + tab_position > distance {
+                self.bytes_offset += distance;
+                return None;
+            }
+            self.bytes_offset += tab_position - past_tab_position;
+
+            let tabstop = TabStop {
+                char_offset: self.bytes_offset,
+                byte_offset: self.bytes_offset,
+            };
+
+            self.distance_traveled += tab_position;
+
+            chunk.tabs = (chunk.tabs - 1) & chunk.tabs;
+            if chunk.tabs > 0 {
+                self.current_chunk = Some((chunk, tab_position));
+            }
+
+            return Some(tabstop);
+        }
+
+        while let Some(mut chunk) = self.chunks.next() {
+            if chunk.tabs == 0 {
+                self.distance_traveled += chunk.text.len() as u32;
+                if self.distance_traveled > distance {
+                    self.bytes_offset += distance;
+                    return None;
+                }
+                continue;
+            }
+
+            let tab_position = chunk.tabs.trailing_zeros() + 1;
+
+            if self.distance_traveled + tab_position > distance {
+                self.bytes_offset += distance;
+                return None;
+            }
+            self.bytes_offset += tab_position;
+
+            let tabstop = TabStop {
+                char_offset: self.bytes_offset,
+                byte_offset: self.bytes_offset,
+            };
+
+            self.distance_traveled += tab_position;
+
+            chunk.tabs = (chunk.tabs - 1) & chunk.tabs;
+            if chunk.tabs > 0 {
+                self.current_chunk = Some((chunk, tab_position));
+            }
+
+            return Some(tabstop);
+        }
+
+        None
+    }
+
+    fn byte_offset(&self) -> u32 {
+        self.bytes_offset
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+struct TabStop {
+    char_offset: u32,
+    byte_offset: u32,
 }
--- a/crates/editor/src/element.rs
+++ b/crates/editor/src/element.rs
@ -278,6 +278,7 @@ impl EditorElement {
                if text.is_empty() {
                    return;
                }
+                dbg!("Handle input text:", text);
                editor.handle_input(text, window, cx);
            },
        );
--- a/crates/language/src/buffer.rs
+++ b/crates/language/src/buffer.rs
@ -28,6 +28,7 @@ use gpui::{
    App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText,
    Task, TaskLabel, TextStyle,
 };
+
 use lsp::{LanguageServerId, NumberOrString};
 use parking_lot::Mutex;
 use schemars::JsonSchema;
@ -485,6 +486,8 @@ pub struct Chunk<'a> {
    pub is_unnecessary: bool,
    /// Whether this chunk of text was originally a tab character.
    pub is_tab: bool,
+    /// A bitset of which characters are tabs in this string.
+    pub tabs: u128,
    /// Whether this chunk of text was originally a tab character.
    pub is_inlay: bool,
    /// Whether to underline the corresponding text range in the editor.
@ -4579,7 +4582,7 @@ impl<'a> Iterator for BufferChunks<'a> {
        }
        self.diagnostic_endpoints = diagnostic_endpoints;

-        if let Some(chunk) = self.chunks.peek() {
+        if let Some((chunk, tabs)) = self.chunks.peek_tabs() {
            let chunk_start = self.range.start;
            let mut chunk_end = (self.chunks.offset() + chunk.len())
                .min(next_capture_start)
@ -4594,6 +4597,8 @@ impl<'a> Iterator for BufferChunks<'a> {

            let slice =
                &chunk[chunk_start - self.chunks.offset()..chunk_end - self.chunks.offset()];
+            let tabs = tabs >> (chunk_start - self.chunks.offset());
+
            self.range.start = chunk_end;
            if self.range.start == self.chunks.offset() + chunk.len() {
                self.chunks.next().unwrap();
@ -4605,6 +4610,7 @@ impl<'a> Iterator for BufferChunks<'a> {
                underline: self.underline,
                diagnostic_severity: self.current_diagnostic_severity(),
                is_unnecessary: self.current_code_is_unnecessary(),
+                tabs,
                ..Chunk::default()
            })
        } else {
--- a/crates/language/src/buffer_tests.rs
+++ b/crates/language/src/buffer_tests.rs
@ -3279,6 +3279,28 @@ fn test_contiguous_ranges() {
    );
 }

+#[test]
+fn test_buffer_chunks_tabs() {
+    let buffer = text::Buffer::new(0, BufferId::new(1).unwrap(), "\ta\tbc");
+    let mut iter = buffer.as_rope().chunks();
+
+    while let Some((str, tabs)) = iter.peek_tabs() {
+        dbg!(str, format!("{:b}", tabs));
+        iter.next();
+    }
+    dbg!("---");
+
+    let buffer = text::Buffer::new(0, BufferId::new(1).unwrap(), "\ta\tbc");
+    let mut iter = buffer.as_rope().chunks();
+    iter.seek(3);
+
+    while let Some((str, tabs)) = iter.peek_tabs() {
+        dbg!(str, format!("{:b}", tabs));
+        iter.next();
+    }
+    assert!(false)
+}
+
 #[gpui::test(iterations = 500)]
 fn test_trailing_whitespace_ranges(mut rng: StdRng) {
    // Generate a random multi-line string containing
--- a/crates/rope/src/chunk.rs
+++ b/crates/rope/src/chunk.rs
@ -13,7 +13,7 @@ pub struct Chunk {
    chars: u128,
    chars_utf16: u128,
    newlines: u128,
-    tabs: u128,
+    pub tabs: u128,
    pub text: ArrayString<MAX_BASE>,
 }

--- a/crates/rope/src/rope.rs
+++ b/crates/rope/src/rope.rs
@ -772,6 +772,31 @@ impl<'a> Chunks<'a> {
        Some(&chunk.text[slice_range])
    }

+    pub fn peek_tabs(&self) -> Option<(&'a str, u128)> {
+        if !self.offset_is_valid() {
+            return None;
+        }
+
+        let chunk = self.chunks.item()?;
+        let chunk_start = *self.chunks.start();
+        let slice_range = if self.reversed {
+            let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
+            let slice_end = self.offset - chunk_start;
+            slice_start..slice_end
+        } else {
+            let slice_start = self.offset - chunk_start;
+            let slice_end = cmp::min(self.chunks.end(&()), self.range.end) - chunk_start;
+            slice_start..slice_end
+        };
+        let chunk_start_offset = slice_range.start;
+        let slice_text = &chunk.text[slice_range];
+
+        // Shift the tabs to align with our slice window
+        let shifted_tabs = chunk.tabs >> chunk_start_offset;
+
+        Some((slice_text, shifted_tabs))
+    }
+
    pub fn lines(self) -> Lines<'a> {
        let reversed = self.reversed;
        Lines {