Start work on optomizing tab map

We currently iterate over each character when looking for tab bytes even
though chunks keeps a bitmask that represents each tab position. This
commit is the first step in using the bitmask

Co-authored-by: Remco Smits <djsmits12@gmail.com>
Co-authored-by: Cole Miller <m@cole-miller.net>
This commit is contained in:
Anthony 2025-06-05 17:34:17 -04:00
parent 52770cd3ad
commit 765ed65e88
7 changed files with 284 additions and 42 deletions

View file

@ -863,6 +863,14 @@ impl FoldSnapshot {
.flat_map(|chunk| chunk.text.chars())
}
pub fn chunks_at(&self, start: FoldPoint) -> FoldChunks<'_> {
self.chunks(
start.to_offset(self)..self.len(),
false,
Highlights::default(),
)
}
#[cfg(test)]
pub fn clip_offset(&self, offset: FoldOffset, bias: Bias) -> FoldOffset {
if offset > self.len() {
@ -1263,6 +1271,8 @@ pub struct Chunk<'a> {
pub is_inlay: bool,
/// An optional recipe for how the chunk should be presented.
pub renderer: Option<ChunkRenderer>,
/// The location of tab characters in the chunk.
pub tabs: u128,
}
/// A recipe for how the chunk should be presented.
@ -1410,6 +1420,7 @@ impl<'a> Iterator for FoldChunks<'a> {
chunk.text = &chunk.text
[(self.inlay_offset - buffer_chunk_start).0..(chunk_end - buffer_chunk_start).0];
chunk.tabs = chunk.tabs >> (self.inlay_offset - buffer_chunk_start).0;
if chunk_end == transform_end {
self.transform_cursor.next(&());
@ -1421,6 +1432,7 @@ impl<'a> Iterator for FoldChunks<'a> {
self.output_offset.0 += chunk.text.len();
return Some(Chunk {
text: chunk.text,
tabs: chunk.tabs,
syntax_highlight_id: chunk.syntax_highlight_id,
highlight_style: chunk.highlight_style,
diagnostic_severity: chunk.diagnostic_severity,

View file

@ -305,10 +305,13 @@ impl TabSnapshot {
}
pub fn to_fold_point(&self, output: TabPoint, bias: Bias) -> (FoldPoint, u32, u32) {
let chars = self.fold_snapshot.chars_at(FoldPoint::new(output.row(), 0));
let chunks = self
.fold_snapshot
.chunks_at(FoldPoint::new(output.row(), 0));
let tab_cursor = TabStopCursor::new(chunks);
let expanded = output.column();
let (collapsed, expanded_char_column, to_next_stop) =
self.collapse_tabs(chars, expanded, bias);
self.collapse_tabs(tab_cursor, expanded, bias);
(
FoldPoint::new(output.row(), collapsed),
expanded_char_column,
@ -354,53 +357,89 @@ impl TabSnapshot {
expanded_bytes + column.saturating_sub(collapsed_bytes)
}
fn collapse_tabs(
&self,
chars: impl Iterator<Item = char>,
column: u32,
bias: Bias,
) -> (u32, u32, u32) {
fn collapse_tabs(&self, mut cursor: TabStopCursor, column: u32, bias: Bias) -> (u32, u32, u32) {
let tab_size = self.tab_size.get();
let mut collapsed_column = column;
let mut tab_count = 0;
let mut expanded_tab_len = 0;
while let Some(tab_stop) = cursor.next(collapsed_column) {
// Calculate how much we want to expand this tab stop (into spaces)
let mut expanded_chars = tab_stop.char_offset - tab_count + expanded_tab_len;
let tab_len = tab_size - (expanded_chars % tab_size);
// Increment tab count
tab_count += 1;
// The count of how many spaces we've added to this line in place of tab bytes
expanded_tab_len += tab_len;
let mut expanded_bytes = 0;
let mut expanded_chars = 0;
let mut collapsed_bytes = 0;
for c in chars {
if expanded_bytes >= column {
break;
}
if collapsed_bytes >= self.max_expansion_column {
break;
}
// The count of bytes at this point in the iteration while considering tab_count and previous expansions
let expanded_bytes = tab_stop.byte_offset - tab_count + expanded_tab_len;
if c == '\t' {
let tab_len = tab_size - (expanded_chars % tab_size);
expanded_chars += tab_len;
expanded_bytes += tab_len;
if expanded_bytes > column {
expanded_chars -= expanded_bytes - column;
return match bias {
Bias::Left => (collapsed_bytes, expanded_chars, expanded_bytes - column),
Bias::Right => (collapsed_bytes + 1, expanded_chars, 0),
};
}
// Did we expand past the search target?
if expanded_bytes > column {
// We expanded past the search target, so need to calculate the offshoot
expanded_chars -= expanded_bytes - column;
return match bias {
Bias::Left => (
cursor.byte_offset(),
expanded_chars,
expanded_bytes - column,
),
Bias::Right => (cursor.byte_offset() + 1, expanded_chars, 0),
};
} else {
expanded_chars += 1;
expanded_bytes += c.len_utf8() as u32;
// otherwise we only want to move the cursor collapse column forward
collapsed_column = collapsed_column - tab_len + 1;
}
if expanded_bytes > column && matches!(bias, Bias::Left) {
expanded_chars -= 1;
break;
}
collapsed_bytes += c.len_utf8() as u32;
}
let collapsed_bytes = cursor.byte_offset();
let expanded_bytes = cursor.byte_offset() - tab_count + expanded_tab_len;
// let expanded_chars = cursor.char_offset() - tab_count + expanded_tab_len;
(
collapsed_bytes + column.saturating_sub(expanded_bytes),
expanded_chars,
expanded_bytes,
0,
)
// let mut expanded_bytes = 0;
// let mut expanded_chars = 0;
// let mut collapsed_bytes = 0;
// for c in chars {
// if expanded_bytes >= column {
// break;
// }
// if collapsed_bytes >= self.max_expansion_column {
// break;
// }
// if c == '\t' {
// let tab_len = tab_size - (expanded_chars % tab_size);
// expanded_chars += tab_len;
// expanded_bytes += tab_len;
// if expanded_bytes > column {
// expanded_chars -= expanded_bytes - column;
// return match bias {
// Bias::Left => (collapsed_bytes, expanded_chars, expanded_bytes - column),
// Bias::Right => (collapsed_bytes + 1, expanded_chars, 0),
// };
// }
// } else {
// expanded_chars += 1;
// expanded_bytes += c.len_utf8() as u32;
// }
// if expanded_bytes > column && matches!(bias, Bias::Left) {
// expanded_chars -= 1;
// break;
// }
// collapsed_bytes += c.len_utf8() as u32;
// }
// (
// collapsed_bytes + column.saturating_sub(expanded_bytes),
// expanded_chars,
// 0,
// )
}
}
@ -603,7 +642,10 @@ mod tests {
use super::*;
use crate::{
MultiBuffer,
display_map::{fold_map::FoldMap, inlay_map::InlayMap},
display_map::{
fold_map::{FoldMap, FoldOffset},
inlay_map::InlayMap,
},
};
use rand::{Rng, prelude::StdRng};
@ -811,4 +853,138 @@ mod tests {
);
}
}
#[gpui::test]
fn test_tab_stop_cursor(cx: &mut gpui::App) {
let text = "\tfoo\tbarbarbar\t\tbaz\n";
let buffer = MultiBuffer::build_simple(text, cx);
let buffer_snapshot = buffer.read(cx).snapshot(cx);
let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone());
let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
let chunks = fold_snapshot.chunks(
FoldOffset(0)..fold_snapshot.len(),
false,
Default::default(),
);
let mut cursor = TabStopCursor::new(chunks);
let mut tab_stops = Vec::new();
while let Some(tab_stop) = cursor.next(u32::MAX) {
tab_stops.push(tab_stop);
}
assert_eq!(
&[
TabStop {
byte_offset: 1,
char_offset: 1
},
TabStop {
byte_offset: 5,
char_offset: 5
},
TabStop {
byte_offset: 15,
char_offset: 15,
},
TabStop {
byte_offset: 16,
char_offset: 16,
},
],
tab_stops.as_slice(),
);
assert_eq!(cursor.byte_offset(), 16);
}
}
struct TabStopCursor<'a> {
chunks: FoldChunks<'a>,
distance_traveled: u32,
bytes_offset: u32,
/// Chunk
/// last tab position iterated through
current_chunk: Option<(Chunk<'a>, u32)>,
}
impl<'a> TabStopCursor<'a> {
fn new(chunks: FoldChunks<'a>) -> Self {
Self {
chunks,
distance_traveled: 0,
bytes_offset: 0,
current_chunk: None,
}
}
/// distance: length to move forward while searching for the next tab stop
fn next(&mut self, distance: u32) -> Option<TabStop> {
if let Some((mut chunk, past_tab_position)) = self.current_chunk.take() {
let tab_position = chunk.tabs.trailing_zeros() + 1;
if self.distance_traveled + tab_position > distance {
self.bytes_offset += distance;
return None;
}
self.bytes_offset += tab_position - past_tab_position;
let tabstop = TabStop {
char_offset: self.bytes_offset,
byte_offset: self.bytes_offset,
};
self.distance_traveled += tab_position;
chunk.tabs = (chunk.tabs - 1) & chunk.tabs;
if chunk.tabs > 0 {
self.current_chunk = Some((chunk, tab_position));
}
return Some(tabstop);
}
while let Some(mut chunk) = self.chunks.next() {
if chunk.tabs == 0 {
self.distance_traveled += chunk.text.len() as u32;
if self.distance_traveled > distance {
self.bytes_offset += distance;
return None;
}
continue;
}
let tab_position = chunk.tabs.trailing_zeros() + 1;
if self.distance_traveled + tab_position > distance {
self.bytes_offset += distance;
return None;
}
self.bytes_offset += tab_position;
let tabstop = TabStop {
char_offset: self.bytes_offset,
byte_offset: self.bytes_offset,
};
self.distance_traveled += tab_position;
chunk.tabs = (chunk.tabs - 1) & chunk.tabs;
if chunk.tabs > 0 {
self.current_chunk = Some((chunk, tab_position));
}
return Some(tabstop);
}
None
}
fn byte_offset(&self) -> u32 {
self.bytes_offset
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct TabStop {
char_offset: u32,
byte_offset: u32,
}

View file

@ -278,6 +278,7 @@ impl EditorElement {
if text.is_empty() {
return;
}
dbg!("Handle input text:", text);
editor.handle_input(text, window, cx);
},
);

View file

@ -28,6 +28,7 @@ use gpui::{
App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText,
Task, TaskLabel, TextStyle,
};
use lsp::{LanguageServerId, NumberOrString};
use parking_lot::Mutex;
use schemars::JsonSchema;
@ -485,6 +486,8 @@ pub struct Chunk<'a> {
pub is_unnecessary: bool,
/// Whether this chunk of text was originally a tab character.
pub is_tab: bool,
/// A bitset of which characters are tabs in this string.
pub tabs: u128,
/// Whether this chunk of text was originally a tab character.
pub is_inlay: bool,
/// Whether to underline the corresponding text range in the editor.
@ -4579,7 +4582,7 @@ impl<'a> Iterator for BufferChunks<'a> {
}
self.diagnostic_endpoints = diagnostic_endpoints;
if let Some(chunk) = self.chunks.peek() {
if let Some((chunk, tabs)) = self.chunks.peek_tabs() {
let chunk_start = self.range.start;
let mut chunk_end = (self.chunks.offset() + chunk.len())
.min(next_capture_start)
@ -4594,6 +4597,8 @@ impl<'a> Iterator for BufferChunks<'a> {
let slice =
&chunk[chunk_start - self.chunks.offset()..chunk_end - self.chunks.offset()];
let tabs = tabs >> (chunk_start - self.chunks.offset());
self.range.start = chunk_end;
if self.range.start == self.chunks.offset() + chunk.len() {
self.chunks.next().unwrap();
@ -4605,6 +4610,7 @@ impl<'a> Iterator for BufferChunks<'a> {
underline: self.underline,
diagnostic_severity: self.current_diagnostic_severity(),
is_unnecessary: self.current_code_is_unnecessary(),
tabs,
..Chunk::default()
})
} else {

View file

@ -3279,6 +3279,28 @@ fn test_contiguous_ranges() {
);
}
#[test]
fn test_buffer_chunks_tabs() {
let buffer = text::Buffer::new(0, BufferId::new(1).unwrap(), "\ta\tbc");
let mut iter = buffer.as_rope().chunks();
while let Some((str, tabs)) = iter.peek_tabs() {
dbg!(str, format!("{:b}", tabs));
iter.next();
}
dbg!("---");
let buffer = text::Buffer::new(0, BufferId::new(1).unwrap(), "\ta\tbc");
let mut iter = buffer.as_rope().chunks();
iter.seek(3);
while let Some((str, tabs)) = iter.peek_tabs() {
dbg!(str, format!("{:b}", tabs));
iter.next();
}
assert!(false)
}
#[gpui::test(iterations = 500)]
fn test_trailing_whitespace_ranges(mut rng: StdRng) {
// Generate a random multi-line string containing

View file

@ -13,7 +13,7 @@ pub struct Chunk {
chars: u128,
chars_utf16: u128,
newlines: u128,
tabs: u128,
pub tabs: u128,
pub text: ArrayString<MAX_BASE>,
}

View file

@ -772,6 +772,31 @@ impl<'a> Chunks<'a> {
Some(&chunk.text[slice_range])
}
pub fn peek_tabs(&self) -> Option<(&'a str, u128)> {
if !self.offset_is_valid() {
return None;
}
let chunk = self.chunks.item()?;
let chunk_start = *self.chunks.start();
let slice_range = if self.reversed {
let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
let slice_end = self.offset - chunk_start;
slice_start..slice_end
} else {
let slice_start = self.offset - chunk_start;
let slice_end = cmp::min(self.chunks.end(&()), self.range.end) - chunk_start;
slice_start..slice_end
};
let chunk_start_offset = slice_range.start;
let slice_text = &chunk.text[slice_range];
// Shift the tabs to align with our slice window
let shifted_tabs = chunk.tabs >> chunk_start_offset;
Some((slice_text, shifted_tabs))
}
pub fn lines(self) -> Lines<'a> {
let reversed = self.reversed;
Lines {