diff --git a/Cargo.lock b/Cargo.lock index 42649b137f..9b7eecc8b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5039,6 +5039,7 @@ dependencies = [ "clock", "collections", "convert_case 0.8.0", + "criterion", "ctor", "dap", "db", diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index 339f98ae8b..66775ac502 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -94,6 +94,7 @@ zed_actions.workspace = true workspace-hack.workspace = true [dev-dependencies] +criterion.workspace = true ctor.workspace = true gpui = { workspace = true, features = ["test-support"] } language = { workspace = true, features = ["test-support"] } @@ -119,3 +120,8 @@ util = { workspace = true, features = ["test-support"] } workspace = { workspace = true, features = ["test-support"] } http_client = { workspace = true, features = ["test-support"] } zlog.workspace = true + + +[[bench]] +name = "editor_render" +harness = false diff --git a/crates/editor/benches/editor_render.rs b/crates/editor/benches/editor_render.rs new file mode 100644 index 0000000000..a213737a91 --- /dev/null +++ b/crates/editor/benches/editor_render.rs @@ -0,0 +1,164 @@ +use criterion::{Bencher, BenchmarkId}; +use editor::{ + Editor, EditorMode, MultiBuffer, + actions::{DeleteToPreviousWordStart, SelectAll, SplitSelectionIntoLines}, +}; +use gpui::{AppContext, Focusable as _, TestAppContext, TestDispatcher}; +use project::Project; +use rand::{Rng as _, SeedableRng as _, rngs::StdRng}; +use settings::SettingsStore; +use ui::IntoElement; +use util::RandomCharIter; + +fn editor_input_with_1000_cursors(bencher: &mut Bencher<'_>, cx: &TestAppContext) { + let mut cx = cx.clone(); + let text = String::from_iter(["line:\n"; 500]); + let buffer = cx.update(|cx| MultiBuffer::build_simple(&text, cx)); + + let cx = cx.add_empty_window(); + let editor = cx.update(|window, cx| { + let editor = cx.new(|cx| { + let mut editor = Editor::new(EditorMode::full(), buffer, None, window, cx); + editor.set_style(editor::EditorStyle::default(), window, cx); + editor.select_all(&SelectAll, window, cx); + editor.split_selection_into_lines(&SplitSelectionIntoLines, window, cx); + editor + }); + window.focus(&editor.focus_handle(cx)); + editor + }); + + bencher.iter(|| { + cx.update(|window, cx| { + editor.update(cx, |editor, cx| { + editor.handle_input("hello world", window, cx); + editor.delete_to_previous_word_start( + &DeleteToPreviousWordStart { + ignore_newlines: false, + }, + window, + cx, + ); + editor.delete_to_previous_word_start( + &DeleteToPreviousWordStart { + ignore_newlines: false, + }, + window, + cx, + ); + }); + }) + }); +} + +fn open_editor_with_one_long_line(bencher: &mut Bencher<'_>, args: &(String, TestAppContext)) { + let (text, cx) = args; + let mut cx = cx.clone(); + + bencher.iter(|| { + let buffer = cx.update(|cx| MultiBuffer::build_simple(&text, cx)); + + let cx = cx.add_empty_window(); + let _ = cx.update(|window, cx| { + let editor = cx.new(|cx| { + let mut editor = Editor::new(EditorMode::full(), buffer, None, window, cx); + editor.set_style(editor::EditorStyle::default(), window, cx); + editor + }); + window.focus(&editor.focus_handle(cx)); + editor + }); + }); +} + +fn editor_render(bencher: &mut Bencher<'_>, cx: &TestAppContext) { + let mut cx = cx.clone(); + let buffer = cx.update(|cx| { + let mut rng = StdRng::seed_from_u64(1); + let text_len = rng.gen_range(10000..90000); + if rng.r#gen() { + let text = RandomCharIter::new(&mut rng) + .take(text_len) + .collect::(); + MultiBuffer::build_simple(&text, cx) + } else { + MultiBuffer::build_random(&mut rng, cx) + } + }); + + let cx = cx.add_empty_window(); + let editor = cx.update(|window, cx| { + let editor = cx.new(|cx| { + let mut editor = Editor::new(EditorMode::full(), buffer, None, window, cx); + editor.set_style(editor::EditorStyle::default(), window, cx); + editor + }); + window.focus(&editor.focus_handle(cx)); + editor + }); + + bencher.iter(|| { + cx.update(|window, cx| { + // editor.update(cx, |editor, cx| editor.move_down(&MoveDown, window, cx)); + let mut view = editor.clone().into_any_element(); + let _ = view.request_layout(window, cx); + let _ = view.prepaint(window, cx); + view.paint(window, cx); + }); + }) +} + +pub fn benches() { + let dispatcher = TestDispatcher::new(StdRng::seed_from_u64(1)); + let cx = gpui::TestAppContext::build(dispatcher, None); + cx.update(|cx| { + let store = SettingsStore::test(cx); + cx.set_global(store); + assets::Assets.load_test_fonts(cx); + theme::init(theme::LoadThemes::JustBase, cx); + // release_channel::init(SemanticVersion::default(), cx); + client::init_settings(cx); + language::init(cx); + workspace::init_settings(cx); + Project::init_settings(cx); + editor::init(cx); + }); + + let mut criterion: criterion::Criterion<_> = + (criterion::Criterion::default()).configure_from_args(); + + // setup app context + let mut group = criterion.benchmark_group("Time to render"); + group.bench_with_input( + BenchmarkId::new("editor_render", "TestAppContext"), + &cx, + editor_render, + ); + + group.finish(); + + let text = String::from_iter(["char"; 1000]); + let mut group = criterion.benchmark_group("Build buffer with one long line"); + group.bench_with_input( + BenchmarkId::new("editor_with_one_long_line", "(String, TestAppContext )"), + &(text, cx.clone()), + open_editor_with_one_long_line, + ); + + group.finish(); + + let mut group = criterion.benchmark_group("multi cursor edits"); + group.bench_with_input( + BenchmarkId::new("editor_input_with_1000_cursors", "TestAppContext"), + &cx, + editor_input_with_1000_cursors, + ); + group.finish(); +} + +fn main() { + benches(); + criterion::Criterion::default() + .configure_from_args() + .final_summary(); +} diff --git a/crates/editor/src/display_map/block_map.rs b/crates/editor/src/display_map/block_map.rs index b073fe7be7..c4ddccbbc2 100644 --- a/crates/editor/src/display_map/block_map.rs +++ b/crates/editor/src/display_map/block_map.rs @@ -1737,6 +1737,7 @@ impl<'a> Iterator for BlockChunks<'a> { return Some(Chunk { text: unsafe { std::str::from_utf8_unchecked(&NEWLINES[..line_count as usize]) }, + chars: (1 << line_count) - 1, ..Default::default() }); } @@ -1766,17 +1767,26 @@ impl<'a> Iterator for BlockChunks<'a> { let (mut prefix, suffix) = self.input_chunk.text.split_at(prefix_bytes); self.input_chunk.text = suffix; + self.input_chunk.tabs >>= prefix_bytes.saturating_sub(1); + self.input_chunk.chars >>= prefix_bytes.saturating_sub(1); + + let mut tabs = self.input_chunk.tabs; + let mut chars = self.input_chunk.chars; if self.masked { // Not great for multibyte text because to keep cursor math correct we // need to have the same number of bytes in the input as output. - let chars = prefix.chars().count(); - let bullet_len = chars; + let chars_count = prefix.chars().count(); + let bullet_len = chars_count; prefix = &BULLETS[..bullet_len]; + chars = (1 << bullet_len) - 1; + tabs = 0; } let chunk = Chunk { text: prefix, + tabs, + chars, ..self.input_chunk.clone() }; diff --git a/crates/editor/src/display_map/custom_highlights.rs b/crates/editor/src/display_map/custom_highlights.rs index f3737ea4b7..06fda1098b 100644 --- a/crates/editor/src/display_map/custom_highlights.rs +++ b/crates/editor/src/display_map/custom_highlights.rs @@ -130,21 +130,37 @@ impl<'a> Iterator for CustomHighlightsChunks<'a> { } } + // todo!("Ask if it's ok that i changed the unwraps here") let chunk = self .buffer_chunk - .get_or_insert_with(|| self.buffer_chunks.next().unwrap()); + .get_or_insert_with(|| self.buffer_chunks.next().unwrap_or_default()); if chunk.text.is_empty() { - *chunk = self.buffer_chunks.next().unwrap(); + *chunk = self.buffer_chunks.next()?; } - let (prefix, suffix) = chunk - .text - .split_at(chunk.text.len().min(next_highlight_endpoint - self.offset)); + let split_idx = chunk.text.len().min(next_highlight_endpoint - self.offset); + let (prefix, suffix) = chunk.text.split_at(split_idx); + + let (chars, tabs) = if split_idx == 128 { + let output = (chunk.chars, chunk.tabs); + chunk.chars = 0; + chunk.tabs = 0; + output + } else { + let mask = (1 << split_idx) - 1; + let output = (chunk.chars & mask, chunk.tabs & mask); + chunk.chars = chunk.chars >> split_idx; + chunk.tabs = chunk.tabs >> split_idx; + output + }; chunk.text = suffix; self.offset += prefix.len(); + // FIXME: chunk cloning is wrong because the bitmaps might be off let mut prefix = Chunk { text: prefix, + chars, + tabs, ..chunk.clone() }; if !self.active_highlights.is_empty() { @@ -171,3 +187,140 @@ impl Ord for HighlightEndpoint { .then_with(|| other.is_start.cmp(&self.is_start)) } } + +#[cfg(test)] +mod tests { + use std::{any::TypeId, sync::Arc}; + + use super::*; + use crate::MultiBuffer; + use gpui::App; + use rand::prelude::*; + use util::RandomCharIter; + + #[gpui::test(iterations = 100)] + fn test_random_chunk_bitmaps(cx: &mut App, mut rng: StdRng) { + // Generate random buffer using existing test infrastructure + let len = rng.gen_range(10..10000); + let buffer = if rng.r#gen() { + let text = RandomCharIter::new(&mut rng).take(len).collect::(); + MultiBuffer::build_simple(&text, cx) + } else { + MultiBuffer::build_random(&mut rng, cx) + }; + + let buffer_snapshot = buffer.read(cx).snapshot(cx); + + // Create random highlights + let mut highlights = sum_tree::TreeMap::default(); + let highlight_count = rng.gen_range(1..10); + + for _i in 0..highlight_count { + let style = HighlightStyle { + color: Some(gpui::Hsla { + h: rng.r#gen::(), + s: rng.r#gen::(), + l: rng.r#gen::(), + a: 1.0, + }), + ..Default::default() + }; + + let mut ranges = Vec::new(); + let range_count = rng.gen_range(1..10); + let text = buffer_snapshot.text(); + for _ in 0..range_count { + if buffer_snapshot.len() == 0 { + continue; + } + + let mut start = rng.gen_range(0..=buffer_snapshot.len().saturating_sub(10)); + + while !text.is_char_boundary(start) { + start = start.saturating_sub(1); + } + + let end_end = buffer_snapshot.len().min(start + 100); + let mut end = rng.gen_range(start..=end_end); + while !text.is_char_boundary(end) { + end = end.saturating_sub(1); + } + + if start < end { + start = end; + } + let start_anchor = buffer_snapshot.anchor_before(start); + let end_anchor = buffer_snapshot.anchor_after(end); + ranges.push(start_anchor..end_anchor); + } + + let type_id = TypeId::of::<()>(); // Simple type ID for testing + highlights.insert(HighlightKey::Type(type_id), Arc::new((style, ranges))); + } + + // Get all chunks and verify their bitmaps + let chunks = + CustomHighlightsChunks::new(0..buffer_snapshot.len(), false, None, &buffer_snapshot); + + for chunk in chunks { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + // Check empty chunks have empty bitmaps + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + // Verify that chunk text doesn't exceed 128 bytes + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + // Verify chars bitmap + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + } + + // Verify tabs bitmap + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + if has_bit != is_tab { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Tabs bitmap: {:#b}", tabs_bitmap); + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } + } +} diff --git a/crates/editor/src/display_map/fold_map.rs b/crates/editor/src/display_map/fold_map.rs index 42f46fb749..11e7f0cd21 100644 --- a/crates/editor/src/display_map/fold_map.rs +++ b/crates/editor/src/display_map/fold_map.rs @@ -529,6 +529,7 @@ impl FoldMap { }, placeholder: Some(TransformPlaceholder { text: ELLIPSIS, + chars: 1, renderer: ChunkRenderer { id: ChunkRendererId::Fold(fold.id), render: Arc::new(move |cx| { @@ -872,6 +873,14 @@ impl FoldSnapshot { .flat_map(|chunk| chunk.text.chars()) } + pub fn chunks_at(&self, start: FoldPoint) -> FoldChunks<'_> { + self.chunks( + start.to_offset(self)..self.len(), + false, + Highlights::default(), + ) + } + #[cfg(test)] pub fn clip_offset(&self, offset: FoldOffset, bias: Bias) -> FoldOffset { if offset > self.len() { @@ -1034,6 +1043,7 @@ struct Transform { #[derive(Clone, Debug)] struct TransformPlaceholder { text: &'static str, + chars: u128, renderer: ChunkRenderer, } @@ -1274,6 +1284,10 @@ pub struct Chunk<'a> { pub is_inlay: bool, /// An optional recipe for how the chunk should be presented. pub renderer: Option, + /// Bitmap of tab character locations in chunk + pub tabs: u128, + /// Bitmap of character locations in chunk + pub chars: u128, } #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -1391,6 +1405,7 @@ impl<'a> Iterator for FoldChunks<'a> { self.output_offset.0 += placeholder.text.len(); return Some(Chunk { text: placeholder.text, + chars: placeholder.chars, renderer: Some(placeholder.renderer.clone()), ..Default::default() }); @@ -1429,6 +1444,16 @@ impl<'a> Iterator for FoldChunks<'a> { chunk.text = &chunk.text [(self.inlay_offset - buffer_chunk_start).0..(chunk_end - buffer_chunk_start).0]; + let bit_end = (chunk_end - buffer_chunk_start).0; + let mask = if bit_end >= 128 { + u128::MAX + } else { + (1u128 << bit_end) - 1 + }; + + chunk.tabs = (chunk.tabs >> (self.inlay_offset - buffer_chunk_start).0) & mask; + chunk.chars = (chunk.chars >> (self.inlay_offset - buffer_chunk_start).0) & mask; + if chunk_end == transform_end { self.transform_cursor.next(); } else if chunk_end == buffer_chunk_end { @@ -1439,6 +1464,8 @@ impl<'a> Iterator for FoldChunks<'a> { self.output_offset.0 += chunk.text.len(); return Some(Chunk { text: chunk.text, + tabs: chunk.tabs, + chars: chunk.chars, syntax_highlight_id: chunk.syntax_highlight_id, highlight_style: chunk.highlight_style, diagnostic_severity: chunk.diagnostic_severity, @@ -2068,6 +2095,97 @@ mod tests { ); } + #[gpui::test(iterations = 100)] + fn test_random_chunk_bitmaps(cx: &mut gpui::App, mut rng: StdRng) { + init_test(cx); + + // Generate random buffer using existing test infrastructure + let text_len = rng.gen_range(0..10000); + let buffer = if rng.r#gen() { + let text = RandomCharIter::new(&mut rng) + .take(text_len) + .collect::(); + MultiBuffer::build_simple(&text, cx) + } else { + MultiBuffer::build_random(&mut rng, cx) + }; + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (mut fold_map, _) = FoldMap::new(inlay_snapshot.clone()); + + // Perform random mutations + let mutation_count = rng.gen_range(1..10); + for _ in 0..mutation_count { + fold_map.randomly_mutate(&mut rng); + } + + let (snapshot, _) = fold_map.read(inlay_snapshot, vec![]); + + // Get all chunks and verify their bitmaps + let chunks = snapshot.chunks( + FoldOffset(0)..FoldOffset(snapshot.len().0), + false, + Highlights::default(), + ); + + for chunk in chunks { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + // Check empty chunks have empty bitmaps + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + // Verify that chunk text doesn't exceed 128 bytes + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + // Verify chars bitmap + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + } + + // Verify tabs bitmap + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } + fn init_test(cx: &mut gpui::App) { let store = SettingsStore::test(cx); cx.set_global(store); diff --git a/crates/editor/src/display_map/inlay_map.rs b/crates/editor/src/display_map/inlay_map.rs index 3db9d10fdc..b51f236f2d 100644 --- a/crates/editor/src/display_map/inlay_map.rs +++ b/crates/editor/src/display_map/inlay_map.rs @@ -11,7 +11,7 @@ use std::{ sync::Arc, }; use sum_tree::{Bias, Cursor, Dimensions, SumTree}; -use text::{Patch, Rope}; +use text::{ChunkBitmaps, Patch, Rope}; use ui::{ActiveTheme, IntoElement as _, ParentElement as _, Styled as _, div}; use super::{Highlights, custom_highlights::CustomHighlightsChunks, fold_map::ChunkRendererId}; @@ -245,8 +245,9 @@ pub struct InlayChunks<'a> { transforms: Cursor<'a, Transform, Dimensions>, buffer_chunks: CustomHighlightsChunks<'a>, buffer_chunk: Option>, - inlay_chunks: Option>, - inlay_chunk: Option<&'a str>, + inlay_chunks: Option>, + /// text, char bitmap, tabs bitmap + inlay_chunk: Option>, output_offset: InlayOffset, max_output_offset: InlayOffset, highlight_styles: HighlightStyles, @@ -316,11 +317,26 @@ impl<'a> Iterator for InlayChunks<'a> { let (prefix, suffix) = chunk.text.split_at(split_index); + let (chars, tabs) = if split_index == 128 { + let output = (chunk.chars, chunk.tabs); + chunk.chars = 0; + chunk.tabs = 0; + output + } else { + let mask = (1 << split_index) - 1; + let output = (chunk.chars & mask, chunk.tabs & mask); + chunk.chars = chunk.chars >> split_index; + chunk.tabs = chunk.tabs >> split_index; + output + }; chunk.text = suffix; self.output_offset.0 += prefix.len(); + // FIXME: chunk cloning is wrong because the bitmaps might be off InlayChunk { chunk: Chunk { text: prefix, + chars, + tabs, ..chunk.clone() }, renderer: None, @@ -397,9 +413,14 @@ impl<'a> Iterator for InlayChunks<'a> { let start = offset_in_inlay; let end = cmp::min(self.max_output_offset, self.transforms.end().0) - self.transforms.start().0; - inlay.text.chunks_in_range(start.0..end.0) + let chunks = inlay.text.chunks_in_range(start.0..end.0); + text::ChunkWithBitmaps(chunks) }); - let inlay_chunk = self + let ChunkBitmaps { + text: inlay_chunk, + chars, + tabs, + } = self .inlay_chunk .get_or_insert_with(|| inlay_chunks.next().unwrap()); @@ -421,6 +442,20 @@ impl<'a> Iterator for InlayChunks<'a> { let (chunk, remainder) = inlay_chunk.split_at(split_index); *inlay_chunk = remainder; + + let (chars, tabs) = if split_index == 128 { + let output = (*chars, *tabs); + *chars = 0; + *tabs = 0; + output + } else { + let mask = (1 << split_index as u32) - 1; + let output = (*chars & mask, *tabs & mask); + *chars = *chars >> split_index; + *tabs = *tabs >> split_index; + output + }; + if inlay_chunk.is_empty() { self.inlay_chunk = None; } @@ -430,6 +465,8 @@ impl<'a> Iterator for InlayChunks<'a> { InlayChunk { chunk: Chunk { text: chunk, + chars, + tabs, highlight_style, is_inlay: true, ..Chunk::default() @@ -1220,6 +1257,7 @@ mod tests { use std::{any::TypeId, cmp::Reverse, env, sync::Arc}; use sum_tree::TreeMap; use text::Patch; + use util::RandomCharIter; use util::post_inc; #[test] @@ -1962,6 +2000,102 @@ mod tests { } } + #[gpui::test(iterations = 100)] + fn test_random_chunk_bitmaps(cx: &mut gpui::App, mut rng: StdRng) { + init_test(cx); + + // Generate random buffer using existing test infrastructure + let text_len = rng.gen_range(0..10000); + let buffer = if rng.r#gen() { + let text = RandomCharIter::new(&mut rng) + .take(text_len) + .collect::(); + MultiBuffer::build_simple(&text, cx) + } else { + MultiBuffer::build_random(&mut rng, cx) + }; + + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (mut inlay_map, _) = InlayMap::new(buffer_snapshot.clone()); + + // Perform random mutations to add inlays + let mut next_inlay_id = 0; + let mutation_count = rng.gen_range(1..10); + for _ in 0..mutation_count { + inlay_map.randomly_mutate(&mut next_inlay_id, &mut rng); + } + + let (snapshot, _) = inlay_map.sync(buffer_snapshot, vec![]); + + // Get all chunks and verify their bitmaps + let chunks = snapshot.chunks( + InlayOffset(0)..InlayOffset(snapshot.len().0), + false, + Highlights::default(), + ); + + for chunk in chunks.into_iter().map(|inlay_chunk| inlay_chunk.chunk) { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + // Check empty chunks have empty bitmaps + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + // Verify that chunk text doesn't exceed 128 bytes + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + // Verify chars bitmap + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + } + + // Verify tabs bitmap + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + if has_bit != is_tab { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Tabs bitmap: {:#b}", tabs_bitmap); + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } + } + fn init_test(cx: &mut App) { let store = SettingsStore::test(cx); cx.set_global(store); diff --git a/crates/editor/src/display_map/tab_map.rs b/crates/editor/src/display_map/tab_map.rs index 6f5df9bb8e..5c54c4eeeb 100644 --- a/crates/editor/src/display_map/tab_map.rs +++ b/crates/editor/src/display_map/tab_map.rs @@ -2,6 +2,7 @@ use super::{ Highlights, fold_map::{self, Chunk, FoldChunks, FoldEdit, FoldPoint, FoldSnapshot}, }; + use language::Point; use multi_buffer::MultiBufferSnapshot; use std::{cmp, mem, num::NonZeroU32, ops::Range}; @@ -72,6 +73,7 @@ impl TabMap { false, Highlights::default(), ) { + // todo!(performance use tabs bitmask) for (ix, _) in chunk.text.match_indices('\t') { let offset_from_edit = offset_from_edit + (ix as u32); if first_tab_offset.is_none() { @@ -299,21 +301,29 @@ impl TabSnapshot { } pub fn to_tab_point(&self, input: FoldPoint) -> TabPoint { - let chars = self.fold_snapshot.chars_at(FoldPoint::new(input.row(), 0)); - let expanded = self.expand_tabs(chars, input.column()); + let chunks = self.fold_snapshot.chunks_at(FoldPoint::new(input.row(), 0)); + let tab_cursor = TabStopCursor::new(chunks); + let expanded = self.expand_tabs(tab_cursor, input.column()); TabPoint::new(input.row(), expanded) } pub fn to_fold_point(&self, output: TabPoint, bias: Bias) -> (FoldPoint, u32, u32) { - let chars = self.fold_snapshot.chars_at(FoldPoint::new(output.row(), 0)); + let chunks = self + .fold_snapshot + .chunks_at(FoldPoint::new(output.row(), 0)); + + let tab_cursor = TabStopCursor::new(chunks); let expanded = output.column(); let (collapsed, expanded_char_column, to_next_stop) = - self.collapse_tabs(chars, expanded, bias); - ( + self.collapse_tabs(tab_cursor, expanded, bias); + + let result = ( FoldPoint::new(output.row(), collapsed), expanded_char_column, to_next_stop, - ) + ); + + result } pub fn make_tab_point(&self, point: Point, bias: Bias) -> TabPoint { @@ -330,72 +340,80 @@ impl TabSnapshot { .to_buffer_point(inlay_point) } - fn expand_tabs(&self, chars: impl Iterator, column: u32) -> u32 { + /// todo!(performance use tabs bitmask) + fn expand_tabs(&self, mut cursor: TabStopCursor, column: u32) -> u32 { let tab_size = self.tab_size.get(); - let mut expanded_chars = 0; - let mut expanded_bytes = 0; - let mut collapsed_bytes = 0; let end_column = column.min(self.max_expansion_column); - for c in chars { - if collapsed_bytes >= end_column { - break; - } - if c == '\t' { - let tab_len = tab_size - expanded_chars % tab_size; - expanded_bytes += tab_len; - expanded_chars += tab_len; - } else { - expanded_bytes += c.len_utf8() as u32; - expanded_chars += 1; - } - collapsed_bytes += c.len_utf8() as u32; + let mut seek_target = end_column; + let mut tab_count = 0; + let mut expanded_tab_len = 0; + + while let Some(tab_stop) = cursor.seek(seek_target) { + let expanded_chars_old = tab_stop.char_offset + expanded_tab_len - tab_count; + let tab_len = tab_size - ((expanded_chars_old - 1) % tab_size); + tab_count += 1; + expanded_tab_len += tab_len; + + seek_target = end_column - cursor.byte_offset; } + + let left_over_char_bytes = if !cursor.is_char_boundary() { + cursor.bytes_until_next_char().unwrap_or(0) as u32 + } else { + 0 + }; + + let collapsed_bytes = cursor.byte_offset() + left_over_char_bytes; + let expanded_bytes = + cursor.byte_offset() + expanded_tab_len - tab_count + left_over_char_bytes; + expanded_bytes + column.saturating_sub(collapsed_bytes) } - fn collapse_tabs( - &self, - chars: impl Iterator, - column: u32, - bias: Bias, - ) -> (u32, u32, u32) { + fn collapse_tabs(&self, mut cursor: TabStopCursor, column: u32, bias: Bias) -> (u32, u32, u32) { let tab_size = self.tab_size.get(); + let mut collapsed_column = column; + let mut seek_target = column.min(self.max_expansion_column); + let mut tab_count = 0; + let mut expanded_tab_len = 0; - let mut expanded_bytes = 0; - let mut expanded_chars = 0; - let mut collapsed_bytes = 0; - for c in chars { - if expanded_bytes >= column { - break; - } - if collapsed_bytes >= self.max_expansion_column { - break; - } + while let Some(tab_stop) = cursor.seek(seek_target) { + // Calculate how much we want to expand this tab stop (into spaces) + let expanded_chars_old = tab_stop.char_offset + expanded_tab_len - tab_count; + let tab_len = tab_size - ((expanded_chars_old - 1) % tab_size); + // Increment tab count + tab_count += 1; + // The count of how many spaces we've added to this line in place of tab bytes + expanded_tab_len += tab_len; - if c == '\t' { - let tab_len = tab_size - (expanded_chars % tab_size); - expanded_chars += tab_len; - expanded_bytes += tab_len; - if expanded_bytes > column { - expanded_chars -= expanded_bytes - column; - return match bias { - Bias::Left => (collapsed_bytes, expanded_chars, expanded_bytes - column), - Bias::Right => (collapsed_bytes + 1, expanded_chars, 0), - }; - } + // The count of bytes at this point in the iteration while considering tab_count and previous expansions + let expanded_bytes = tab_stop.byte_offset + expanded_tab_len - tab_count; + + // Did we expand past the search target? + if expanded_bytes > column { + let mut expanded_chars = tab_stop.char_offset + expanded_tab_len - tab_count; + // We expanded past the search target, so need to account for the offshoot + expanded_chars -= expanded_bytes - column; + return match bias { + Bias::Left => ( + cursor.byte_offset() - 1, + expanded_chars, + expanded_bytes - column, + ), + Bias::Right => (cursor.byte_offset(), expanded_chars, 0), + }; } else { - expanded_chars += 1; - expanded_bytes += c.len_utf8() as u32; + // otherwise we only want to move the cursor collapse column forward + collapsed_column = collapsed_column - tab_len + 1; + seek_target = (collapsed_column - cursor.byte_offset) + .min(self.max_expansion_column - cursor.byte_offset); } - - if expanded_bytes > column && matches!(bias, Bias::Left) { - expanded_chars -= 1; - break; - } - - collapsed_bytes += c.len_utf8() as u32; } + + let collapsed_bytes = cursor.byte_offset(); + let expanded_bytes = cursor.byte_offset() + expanded_tab_len - tab_count; + let expanded_chars = cursor.char_offset() + expanded_tab_len - tab_count; ( collapsed_bytes + column.saturating_sub(expanded_bytes), expanded_chars, @@ -523,6 +541,8 @@ impl TabChunks<'_> { self.chunk = Chunk { text: &SPACES[0..(to_next_stop as usize)], is_tab: true, + // todo!(check that this logic is correct) + chars: (1u128 << to_next_stop) - 1, ..Default::default() }; self.inside_leading_tab = to_next_stop > 0; @@ -546,18 +566,37 @@ impl<'a> Iterator for TabChunks<'a> { } } + //todo!(improve performance by using tab cursor) for (ix, c) in self.chunk.text.char_indices() { match c { '\t' => { if ix > 0 { let (prefix, suffix) = self.chunk.text.split_at(ix); + + let (chars, tabs) = if ix == 128 { + let output = (self.chunk.chars, self.chunk.tabs); + self.chunk.chars = 0; + self.chunk.tabs = 0; + output + } else { + let mask = (1 << ix) - 1; + let output = (self.chunk.chars & mask, self.chunk.tabs & mask); + self.chunk.chars = self.chunk.chars >> ix; + self.chunk.tabs = self.chunk.tabs >> ix; + output + }; + self.chunk.text = suffix; return Some(Chunk { text: prefix, + chars, + tabs, ..self.chunk.clone() }); } else { self.chunk.text = &self.chunk.text[1..]; + self.chunk.tabs >>= 1; + self.chunk.chars >>= 1; let tab_size = if self.input_column < self.max_expansion_column { self.tab_size.get() } else { @@ -575,6 +614,8 @@ impl<'a> Iterator for TabChunks<'a> { return Some(Chunk { text: &SPACES[..len as usize], is_tab: true, + chars: (1 << len) - 1, + tabs: 0, ..self.chunk.clone() }); } @@ -603,21 +644,271 @@ mod tests { use super::*; use crate::{ MultiBuffer, - display_map::{fold_map::FoldMap, inlay_map::InlayMap}, + display_map::{ + fold_map::{FoldMap, FoldOffset}, + inlay_map::InlayMap, + }, }; use rand::{Rng, prelude::StdRng}; + use util; + + impl TabSnapshot { + fn expected_collapse_tabs( + &self, + chars: impl Iterator, + column: u32, + bias: Bias, + ) -> (u32, u32, u32) { + let tab_size = self.tab_size.get(); + + let mut expanded_bytes = 0; + let mut expanded_chars = 0; + let mut collapsed_bytes = 0; + for c in chars { + if expanded_bytes >= column { + break; + } + if collapsed_bytes >= self.max_expansion_column { + break; + } + + if c == '\t' { + let tab_len = tab_size - (expanded_chars % tab_size); + expanded_chars += tab_len; + expanded_bytes += tab_len; + if expanded_bytes > column { + expanded_chars -= expanded_bytes - column; + return match bias { + Bias::Left => { + (collapsed_bytes, expanded_chars, expanded_bytes - column) + } + Bias::Right => (collapsed_bytes + 1, expanded_chars, 0), + }; + } + } else { + expanded_chars += 1; + expanded_bytes += c.len_utf8() as u32; + } + + if expanded_bytes > column && matches!(bias, Bias::Left) { + expanded_chars -= 1; + break; + } + + collapsed_bytes += c.len_utf8() as u32; + } + + ( + collapsed_bytes + column.saturating_sub(expanded_bytes), + expanded_chars, + 0, + ) + } + + pub fn expected_to_tab_point(&self, input: FoldPoint) -> TabPoint { + let chars = self.fold_snapshot.chars_at(FoldPoint::new(input.row(), 0)); + let expanded = self.expected_expand_tabs(chars, input.column()); + TabPoint::new(input.row(), expanded) + } + + fn expected_expand_tabs(&self, chars: impl Iterator, column: u32) -> u32 { + let tab_size = self.tab_size.get(); + + let mut expanded_chars = 0; + let mut expanded_bytes = 0; + let mut collapsed_bytes = 0; + let end_column = column.min(self.max_expansion_column); + for c in chars { + if collapsed_bytes >= end_column { + break; + } + if c == '\t' { + let tab_len = tab_size - expanded_chars % tab_size; + expanded_bytes += tab_len; + expanded_chars += tab_len; + } else { + expanded_bytes += c.len_utf8() as u32; + expanded_chars += 1; + } + collapsed_bytes += c.len_utf8() as u32; + } + + expanded_bytes + column.saturating_sub(collapsed_bytes) + } + + fn expected_to_fold_point(&self, output: TabPoint, bias: Bias) -> (FoldPoint, u32, u32) { + let chars = self.fold_snapshot.chars_at(FoldPoint::new(output.row(), 0)); + let expanded = output.column(); + let (collapsed, expanded_char_column, to_next_stop) = + self.expected_collapse_tabs(chars, expanded, bias); + ( + FoldPoint::new(output.row(), collapsed), + expanded_char_column, + to_next_stop, + ) + } + } #[gpui::test] fn test_expand_tabs(cx: &mut gpui::App) { + let test_values = [ + ("κg🏀 f\nwo🏀❌by🍐❎β🍗c\tβ❎ \ncλ🎉", 17), + (" \twςe", 4), + ("fε", 1), + ("i❎\t", 3), + ]; let buffer = MultiBuffer::build_simple("", cx); let buffer_snapshot = buffer.read(cx).snapshot(cx); let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot); let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); let (_, tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap()); - assert_eq!(tab_snapshot.expand_tabs("\t".chars(), 0), 0); - assert_eq!(tab_snapshot.expand_tabs("\t".chars(), 1), 4); - assert_eq!(tab_snapshot.expand_tabs("\ta".chars(), 2), 5); + for (text, column) in test_values { + let mut tabs = 0u128; + let mut chars = 0u128; + for (idx, c) in text.char_indices() { + if c == '\t' { + tabs |= 1 << idx; + } + chars |= 1 << idx; + } + + let chunks = [Chunk { + text, + tabs, + chars, + ..Default::default() + }]; + + let cursor = TabStopCursor::new(chunks); + + assert_eq!( + tab_snapshot.expected_expand_tabs(text.chars(), column), + tab_snapshot.expand_tabs(cursor, column) + ); + } + } + + #[gpui::test] + fn test_collapse_tabs(cx: &mut gpui::App) { + let input = "A\tBC\tDEF\tG\tHI\tJ\tK\tL\tM"; + + let buffer = MultiBuffer::build_simple(input, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + let (_, tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap()); + + for (ix, _) in input.char_indices() { + let range = TabPoint::new(0, ix as u32)..tab_snapshot.max_point(); + + assert_eq!( + tab_snapshot.expected_to_fold_point(range.start, Bias::Left), + tab_snapshot.to_fold_point(range.start, Bias::Left), + "Failed with tab_point at column {ix}" + ); + assert_eq!( + tab_snapshot.expected_to_fold_point(range.start, Bias::Right), + tab_snapshot.to_fold_point(range.start, Bias::Right), + "Failed with tab_point at column {ix}" + ); + + assert_eq!( + tab_snapshot.expected_to_fold_point(range.end, Bias::Left), + tab_snapshot.to_fold_point(range.end, Bias::Left), + "Failed with tab_point at column {ix}" + ); + assert_eq!( + tab_snapshot.expected_to_fold_point(range.end, Bias::Right), + tab_snapshot.to_fold_point(range.end, Bias::Right), + "Failed with tab_point at column {ix}" + ); + } + } + + // todo!(We should have a randomized test here as well) + #[gpui::test] + fn test_to_fold_point_panic_reproduction(cx: &mut gpui::App) { + // This test reproduces a specific panic where to_fold_point returns incorrect results + let _text = "use macro_rules_attribute::apply;\nuse serde_json::Value;\nuse smol::{\n io::AsyncReadExt,\n process::{Command, Stdio},\n};\nuse smol_macros::main;\nuse std::io;\n\nfn test_random() {\n // Generate a random value\n let random_value = std::time::SystemTime::now()\n .duration_since(std::time::UNIX_EPOCH)\n .unwrap()\n .as_secs()\n % 100;\n\n // Create some complex nested data structures\n let mut vector = Vec::new();\n for i in 0..random_value {\n vector.push(i);\n }\n "; + + let text = "γ\tw⭐\n🍐🍗 \t"; + let buffer = MultiBuffer::build_simple(text, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + let (_, tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap()); + + // This should panic with the expected vs actual mismatch + let tab_point = TabPoint::new(0, 9); + let result = tab_snapshot.to_fold_point(tab_point, Bias::Left); + let expected = tab_snapshot.expected_to_fold_point(tab_point, Bias::Left); + + assert_eq!(result, expected); + } + + #[gpui::test(iterations = 100)] + fn test_collapse_tabs_random(cx: &mut gpui::App, mut rng: StdRng) { + // Generate random input string with up to 200 characters including tabs + // to stay within the MAX_EXPANSION_COLUMN limit of 256 + let len = rng.gen_range(0..=2048); + let tab_size = NonZeroU32::new(rng.gen_range(1..=4)).unwrap(); + let mut input = String::with_capacity(len); + + for _ in 0..len { + if rng.gen_bool(0.1) { + // 10% chance of inserting a tab + input.push('\t'); + } else { + // 90% chance of inserting a random ASCII character (excluding tab, newline, carriage return) + let ch = loop { + let ascii_code = rng.gen_range(32..=126); // printable ASCII range + let ch = ascii_code as u8 as char; + if ch != '\t' { + break ch; + } + }; + input.push(ch); + } + } + + let buffer = MultiBuffer::build_simple(&input, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + let (_, mut tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap()); + tab_snapshot.max_expansion_column = rng.gen_range(0..323); + tab_snapshot.tab_size = tab_size; + + for (ix, _) in input.char_indices() { + let range = TabPoint::new(0, ix as u32)..tab_snapshot.max_point(); + + assert_eq!( + tab_snapshot.expected_to_fold_point(range.start, Bias::Left), + tab_snapshot.to_fold_point(range.start, Bias::Left), + "Failed with input: {}, with idx: {ix}", + input + ); + assert_eq!( + tab_snapshot.expected_to_fold_point(range.start, Bias::Right), + tab_snapshot.to_fold_point(range.start, Bias::Right), + "Failed with input: {}, with idx: {ix}", + input + ); + + assert_eq!( + tab_snapshot.expected_to_fold_point(range.end, Bias::Left), + tab_snapshot.to_fold_point(range.end, Bias::Left), + "Failed with input: {}, with idx: {ix}", + input + ); + assert_eq!( + tab_snapshot.expected_to_fold_point(range.end, Bias::Right), + tab_snapshot.to_fold_point(range.end, Bias::Right), + "Failed with input: {}, with idx: {ix}", + input + ); + } } #[gpui::test] @@ -811,4 +1102,475 @@ mod tests { ); } } + + #[gpui::test(iterations = 100)] + fn test_to_tab_point_random(cx: &mut gpui::App, mut rng: StdRng) { + let tab_size = NonZeroU32::new(rng.gen_range(1..=16)).unwrap(); + let len = rng.gen_range(0..=2000); + + // Generate random text using RandomCharIter + let text = util::RandomCharIter::new(&mut rng) + .take(len) + .collect::(); + + // Create buffer and tab map + let buffer = MultiBuffer::build_simple(&text, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (mut inlay_map, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (mut fold_map, fold_snapshot) = FoldMap::new(inlay_snapshot); + let (mut tab_map, _) = TabMap::new(fold_snapshot.clone(), tab_size); + + let mut next_inlay_id = 0; + let (inlay_snapshot, inlay_edits) = inlay_map.randomly_mutate(&mut next_inlay_id, &mut rng); + let (fold_snapshot, fold_edits) = fold_map.read(inlay_snapshot, inlay_edits); + let max_fold_point = fold_snapshot.max_point(); + let (mut tab_snapshot, _) = tab_map.sync(fold_snapshot.clone(), fold_edits, tab_size); + + // Test random fold points + for _ in 0..50 { + tab_snapshot.max_expansion_column = rng.gen_range(0..=256); + // Generate random fold point + let row = rng.gen_range(0..=max_fold_point.row()); + let max_column = if row < max_fold_point.row() { + fold_snapshot.line_len(row) + } else { + max_fold_point.column() + }; + let column = rng.gen_range(0..=max_column + 10); + let fold_point = FoldPoint::new(row, column); + + let actual = tab_snapshot.to_tab_point(fold_point); + let expected = tab_snapshot.expected_to_tab_point(fold_point); + + assert_eq!( + actual, expected, + "to_tab_point mismatch for fold_point {:?} in text {:?}", + fold_point, text + ); + } + } + + #[gpui::test] + fn test_tab_stop_cursor_utf8(cx: &mut gpui::App) { + let text = "\tfoo\tbarbarbar\t\tbaz\n"; + let buffer = MultiBuffer::build_simple(text, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + let chunks = fold_snapshot.chunks( + FoldOffset(0)..fold_snapshot.len(), + false, + Default::default(), + ); + let mut cursor = TabStopCursor::new(chunks); + assert!(cursor.seek(0).is_none()); + let mut tab_stops = Vec::new(); + + let mut all_tab_stops = Vec::new(); + let mut byte_offset = 0; + for (offset, ch) in buffer.read(cx).snapshot(cx).text().char_indices() { + byte_offset += ch.len_utf8() as u32; + + if ch == '\t' { + all_tab_stops.push(TabStop { + byte_offset, + char_offset: offset as u32 + 1, + }); + } + } + + while let Some(tab_stop) = cursor.seek(u32::MAX) { + tab_stops.push(tab_stop); + } + pretty_assertions::assert_eq!(tab_stops.as_slice(), all_tab_stops.as_slice(),); + + assert_eq!(cursor.byte_offset(), byte_offset); + } + + #[gpui::test] + fn test_tab_stop_with_end_range_utf8(cx: &mut gpui::App) { + let input = "A\tBC\t"; // DEF\tG\tHI\tJ\tK\tL\tM + + let buffer = MultiBuffer::build_simple(input, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + + let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0)); + let mut cursor = TabStopCursor::new(chunks); + + let mut actual_tab_stops = Vec::new(); + + let mut expected_tab_stops = Vec::new(); + let mut byte_offset = 0; + for (offset, ch) in buffer.read(cx).snapshot(cx).text().char_indices() { + byte_offset += ch.len_utf8() as u32; + + if ch == '\t' { + expected_tab_stops.push(TabStop { + byte_offset, + char_offset: offset as u32 + 1, + }); + } + } + + while let Some(tab_stop) = cursor.seek(u32::MAX) { + actual_tab_stops.push(tab_stop); + } + pretty_assertions::assert_eq!(actual_tab_stops.as_slice(), expected_tab_stops.as_slice(),); + + assert_eq!(cursor.byte_offset(), byte_offset); + } + + #[gpui::test(iterations = 100)] + fn test_tab_stop_cursor_random_utf8(cx: &mut gpui::App, mut rng: StdRng) { + // Generate random input string with up to 512 characters including tabs + let len = rng.gen_range(0..=2048); + let mut input = String::with_capacity(len); + + let mut skip_tabs = rng.gen_bool(0.10); + for idx in 0..len { + if idx % 128 == 0 { + skip_tabs = rng.gen_bool(0.10); + } + + if rng.gen_bool(0.15) && !skip_tabs { + input.push('\t'); + } else { + let ch = loop { + let ascii_code = rng.gen_range(32..=126); // printable ASCII range + let ch = ascii_code as u8 as char; + if ch != '\t' { + break ch; + } + }; + input.push(ch); + } + } + + // Build the buffer and create cursor + let buffer = MultiBuffer::build_simple(&input, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + + // First, collect all expected tab positions + let mut all_tab_stops = Vec::new(); + let mut byte_offset = 1; + let mut char_offset = 1; + for ch in buffer_snapshot.text().chars() { + if ch == '\t' { + all_tab_stops.push(TabStop { + byte_offset, + char_offset, + }); + } + byte_offset += ch.len_utf8() as u32; + char_offset += 1; + } + + // Test with various distances + let distances = vec![1, 5, 10, 50, 100, u32::MAX]; + // let distances = vec![150]; + + for distance in distances { + let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0)); + let mut cursor = TabStopCursor::new(chunks); + + let mut found_tab_stops = Vec::new(); + let mut position = distance; + while let Some(tab_stop) = cursor.seek(position) { + found_tab_stops.push(tab_stop); + position = distance - tab_stop.byte_offset; + } + + let expected_found_tab_stops: Vec<_> = all_tab_stops + .iter() + .take_while(|tab_stop| tab_stop.byte_offset <= distance) + .cloned() + .collect(); + + pretty_assertions::assert_eq!( + found_tab_stops, + expected_found_tab_stops, + "TabStopCursor output mismatch for distance {}. Input: {:?}", + distance, + input + ); + + let final_position = cursor.byte_offset(); + if !found_tab_stops.is_empty() { + let last_tab_stop = found_tab_stops.last().unwrap(); + assert!( + final_position >= last_tab_stop.byte_offset, + "Cursor final position {} is before last tab stop {}. Input: {:?}", + final_position, + last_tab_stop.byte_offset, + input + ); + } + } + } + + #[gpui::test] + fn test_tab_stop_cursor_utf16(cx: &mut gpui::App) { + let text = "\r\t😁foo\tb😀arbar🤯bar\t\tbaz\n"; + let buffer = MultiBuffer::build_simple(text, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + let chunks = fold_snapshot.chunks( + FoldOffset(0)..fold_snapshot.len(), + false, + Default::default(), + ); + let mut cursor = TabStopCursor::new(chunks); + assert!(cursor.seek(0).is_none()); + + let mut expected_tab_stops = Vec::new(); + let mut byte_offset = 0; + for (i, ch) in fold_snapshot.chars_at(FoldPoint::new(0, 0)).enumerate() { + byte_offset += ch.len_utf8() as u32; + + if ch == '\t' { + expected_tab_stops.push(TabStop { + byte_offset, + char_offset: i as u32 + 1, + }); + } + } + + let mut actual_tab_stops = Vec::new(); + while let Some(tab_stop) = cursor.seek(u32::MAX) { + actual_tab_stops.push(tab_stop); + } + + pretty_assertions::assert_eq!(actual_tab_stops.as_slice(), expected_tab_stops.as_slice(),); + + assert_eq!(cursor.byte_offset(), byte_offset); + } + + #[gpui::test(iterations = 100)] + fn test_tab_stop_cursor_random_utf16(cx: &mut gpui::App, mut rng: StdRng) { + // Generate random input string with up to 512 characters including tabs + let len = rng.gen_range(0..=2048); + let input = util::RandomCharIter::new(&mut rng) + .take(len) + .collect::(); + + // Build the buffer and create cursor + let buffer = MultiBuffer::build_simple(&input, cx); + let buffer_snapshot = buffer.read(cx).snapshot(cx); + let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone()); + let (_, fold_snapshot) = FoldMap::new(inlay_snapshot); + + // First, collect all expected tab positions + let mut all_tab_stops = Vec::new(); + let mut byte_offset = 0; + for (i, ch) in buffer_snapshot.text().chars().enumerate() { + byte_offset += ch.len_utf8() as u32; + if ch == '\t' { + all_tab_stops.push(TabStop { + byte_offset, + char_offset: i as u32 + 1, + }); + } + } + + // Test with various distances + // let distances = vec![1, 5, 10, 50, 100, u32::MAX]; + let distances = vec![150]; + + for distance in distances { + let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0)); + let mut cursor = TabStopCursor::new(chunks); + + let mut found_tab_stops = Vec::new(); + let mut position = distance; + while let Some(tab_stop) = cursor.seek(position) { + found_tab_stops.push(tab_stop); + position = distance - tab_stop.byte_offset; + } + + let expected_found_tab_stops: Vec<_> = all_tab_stops + .iter() + .take_while(|tab_stop| tab_stop.byte_offset <= distance) + .cloned() + .collect(); + + pretty_assertions::assert_eq!( + found_tab_stops, + expected_found_tab_stops, + "TabStopCursor output mismatch for distance {}. Input: {:?}", + distance, + input + ); + + let final_position = cursor.byte_offset(); + if !found_tab_stops.is_empty() { + let last_tab_stop = found_tab_stops.last().unwrap(); + assert!( + final_position >= last_tab_stop.byte_offset, + "Cursor final position {} is before last tab stop {}. Input: {:?}", + final_position, + last_tab_stop.byte_offset, + input + ); + } + } + } +} + +struct TabStopCursor<'a> { + chunks: Box> + 'a>, + byte_offset: u32, + char_offset: u32, + /// Chunk + /// last tab position iterated through + current_chunk: Option<(Chunk<'a>, u32)>, +} + +impl<'a> TabStopCursor<'a> { + fn new(chunks: impl IntoIterator> + 'a) -> Self { + Self { + chunks: Box::new(chunks.into_iter()), + byte_offset: 0, + char_offset: 0, + current_chunk: None, + } + } + + fn bytes_until_next_char(&self) -> Option { + self.current_chunk.as_ref().and_then(|(chunk, idx)| { + let mut idx = *idx; + let mut diff = 0; + while idx > 0 && chunk.chars & (1 << idx) == 0 { + idx -= 1; + diff += 1; + } + + if chunk.chars & (1 << idx) != 0 { + Some( + (chunk.text[idx as usize..].chars().next()?) + .len_utf8() + .saturating_sub(diff), + ) + } else { + None + } + }) + } + + fn is_char_boundary(&self) -> bool { + // FIXME: if idx is 128 should we return false or be at the next chunk? + // idx might also be 1-indexed instead of 0-indexed, need to double check + self.current_chunk + .as_ref() + .is_some_and(|(chunk, idx)| (chunk.chars & (1 << *idx.min(&127))) != 0) + } + + /// distance: length to move forward while searching for the next tab stop + fn seek(&mut self, distance: u32) -> Option { + if distance == 0 { + return None; + } + + let mut distance_traversed = 0; + + while let Some((mut chunk, chunk_position)) = self + .current_chunk + .take() + .or_else(|| self.chunks.next().zip(Some(0))) + { + if chunk.tabs == 0 { + let chunk_distance = chunk.text.len() as u32 - chunk_position; + if chunk_distance + distance_traversed >= distance { + let overshoot = distance_traversed.abs_diff(distance); + + self.byte_offset += overshoot; + self.char_offset += get_char_offset( + chunk_position..(chunk_position + overshoot).saturating_sub(1).min(127), + chunk.chars, + ); + + self.current_chunk = Some((chunk, chunk_position + overshoot)); + + return None; + } + + self.byte_offset += chunk_distance; + self.char_offset += get_char_offset( + chunk_position..(chunk_position + chunk_distance).saturating_sub(1).min(127), + chunk.chars, + ); + distance_traversed += chunk_distance; + continue; + } + let tab_position = chunk.tabs.trailing_zeros() + 1; + + if distance_traversed + tab_position - chunk_position > distance { + let cursor_position = distance_traversed.abs_diff(distance); + + self.char_offset += get_char_offset( + chunk_position..(chunk_position + cursor_position - 1), + chunk.chars, + ); + self.current_chunk = Some((chunk, cursor_position + chunk_position)); + self.byte_offset += cursor_position; + + return None; + } + + self.byte_offset += tab_position - chunk_position; + self.char_offset += get_char_offset(chunk_position..(tab_position - 1), chunk.chars); + + let tabstop = TabStop { + char_offset: self.char_offset, + byte_offset: self.byte_offset, + }; + + chunk.tabs = (chunk.tabs - 1) & chunk.tabs; + + if tab_position as usize != chunk.text.len() { + self.current_chunk = Some((chunk, tab_position)); + } + + return Some(tabstop); + } + + None + } + + fn byte_offset(&self) -> u32 { + self.byte_offset + } + + fn char_offset(&self) -> u32 { + self.char_offset + } +} + +#[inline(always)] +fn get_char_offset(range: Range, bit_map: u128) -> u32 { + // This edge case can happen when we're at chunk position 128 + + if range.start == range.end { + return if (1u128 << range.start) & bit_map == 0 { + 0 + } else { + 1 + }; + } + let end_shift: u128 = 127u128 - range.end.min(127) as u128; + let mut bit_mask = (u128::MAX >> range.start) << range.start; + bit_mask = (bit_mask << end_shift) >> end_shift; + let bit_map = bit_map & bit_mask; + + bit_map.count_ones() +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +struct TabStop { + char_offset: u32, + byte_offset: u32, } diff --git a/crates/editor/src/display_map/wrap_map.rs b/crates/editor/src/display_map/wrap_map.rs index 500ec3a0bb..d130824cd1 100644 --- a/crates/editor/src/display_map/wrap_map.rs +++ b/crates/editor/src/display_map/wrap_map.rs @@ -970,9 +970,25 @@ impl<'a> Iterator for WrapChunks<'a> { } let (prefix, suffix) = self.input_chunk.text.split_at(input_len); + + let (chars, tabs) = if input_len == 128 { + let output = (self.input_chunk.chars, self.input_chunk.tabs); + self.input_chunk.chars = 0; + self.input_chunk.tabs = 0; + output + } else { + let mask = (1 << input_len) - 1; + let output = (self.input_chunk.chars & mask, self.input_chunk.tabs & mask); + self.input_chunk.chars = self.input_chunk.chars >> input_len; + self.input_chunk.tabs = self.input_chunk.tabs >> input_len; + output + }; + self.input_chunk.text = suffix; Some(Chunk { text: prefix, + chars, + tabs, ..self.input_chunk.clone() }) } diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 29e009fdf8..8150712894 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -18424,12 +18424,7 @@ impl Editor { } /// called by the Element so we know what style we were most recently rendered with. - pub(crate) fn set_style( - &mut self, - style: EditorStyle, - window: &mut Window, - cx: &mut Context, - ) { + pub fn set_style(&mut self, style: EditorStyle, window: &mut Window, cx: &mut Context) { // We intentionally do not inform the display map about the minimap style // so that wrapping is not recalculated and stays consistent for the editor // and its linked minimap. diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 4ddc2b3018..65db37699d 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -27,6 +27,7 @@ use gpui::{ App, AppContext as _, Context, Entity, EventEmitter, HighlightStyle, SharedString, StyledText, Task, TaskLabel, TextStyle, }; + use lsp::{LanguageServerId, NumberOrString}; use parking_lot::Mutex; use schemars::JsonSchema; @@ -494,6 +495,10 @@ pub struct Chunk<'a> { pub is_unnecessary: bool, /// Whether this chunk of text was originally a tab character. pub is_tab: bool, + /// A bitset of which characters are tabs in this string. + pub tabs: u128, + /// Bitmap of character indices in this chunk + pub chars: u128, /// Whether this chunk of text was originally a tab character. pub is_inlay: bool, /// Whether to underline the corresponding text range in the editor. @@ -4772,7 +4777,12 @@ impl<'a> Iterator for BufferChunks<'a> { } self.diagnostic_endpoints = diagnostic_endpoints; - if let Some(chunk) = self.chunks.peek() { + if let Some(ChunkBitmaps { + text: chunk, + chars: chars_map, + tabs, + }) = self.chunks.peek_tabs() + { let chunk_start = self.range.start; let mut chunk_end = (self.chunks.offset() + chunk.len()) .min(next_capture_start) @@ -4787,6 +4797,16 @@ impl<'a> Iterator for BufferChunks<'a> { let slice = &chunk[chunk_start - self.chunks.offset()..chunk_end - self.chunks.offset()]; + let bit_end = chunk_end - self.chunks.offset(); + + let mask = if bit_end >= 128 { + u128::MAX + } else { + (1u128 << bit_end) - 1 + }; + let tabs = (tabs >> (chunk_start - self.chunks.offset())) & mask; + let chars_map = (chars_map >> (chunk_start - self.chunks.offset())) & mask; + self.range.start = chunk_end; if self.range.start == self.chunks.offset() + chunk.len() { self.chunks.next().unwrap(); @@ -4798,6 +4818,8 @@ impl<'a> Iterator for BufferChunks<'a> { underline: self.underline, diagnostic_severity: self.current_diagnostic_severity(), is_unnecessary: self.current_code_is_unnecessary(), + tabs, + chars: chars_map, ..Chunk::default() }) } else { diff --git a/crates/language/src/buffer_tests.rs b/crates/language/src/buffer_tests.rs index ce65afa628..d0290c4651 100644 --- a/crates/language/src/buffer_tests.rs +++ b/crates/language/src/buffer_tests.rs @@ -3787,3 +3787,80 @@ fn init_settings(cx: &mut App, f: fn(&mut AllLanguageSettingsContent)) { settings.update_user_settings::(cx, f); }); } + +#[gpui::test(iterations = 100)] +fn test_random_chunk_bitmaps(cx: &mut App, mut rng: StdRng) { + use util::RandomCharIter; + + // Generate random text + let len = rng.gen_range(0..10000); + let text = RandomCharIter::new(&mut rng).take(len).collect::(); + + let buffer = cx.new(|cx| Buffer::local(text, cx)); + let snapshot = buffer.read(cx).snapshot(); + + // Get all chunks and verify their bitmaps + let chunks = snapshot.chunks(0..snapshot.len(), false); + + for chunk in chunks { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + // Check empty chunks have empty bitmaps + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + // Verify that chunk text doesn't exceed 128 bytes + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + // Verify chars bitmap + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + } + + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + + // Verify tabs bitmap + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + if has_bit != is_tab { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Tabs bitmap: {:#b}", tabs_bitmap); + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } +} diff --git a/crates/language_tools/src/lsp_log.rs b/crates/language_tools/src/lsp_log.rs index d5206c1f26..5d8bb0f997 100644 --- a/crates/language_tools/src/lsp_log.rs +++ b/crates/language_tools/src/lsp_log.rs @@ -433,7 +433,7 @@ impl LogStore { log_lines, id, LogMessage { - message: message.trim_end().to_string(), + message: message.trim().to_string(), typ, }, language_server_state.log_level, diff --git a/crates/languages/src/typescript.rs b/crates/languages/src/typescript.rs index 77cf1a64f1..58eca88712 100644 --- a/crates/languages/src/typescript.rs +++ b/crates/languages/src/typescript.rs @@ -780,8 +780,8 @@ pub struct EsLintLspAdapter { } impl EsLintLspAdapter { - const CURRENT_VERSION: &'static str = "2.4.4"; - const CURRENT_VERSION_TAG_NAME: &'static str = "release/2.4.4"; + const CURRENT_VERSION: &'static str = "3.0.10"; + const CURRENT_VERSION_TAG_NAME: &'static str = "release/3.0.10"; #[cfg(not(windows))] const GITHUB_ASSET_KIND: AssetKind = AssetKind::TarGz; diff --git a/crates/multi_buffer/src/multi_buffer.rs b/crates/multi_buffer/src/multi_buffer.rs index e27cbf868a..80c4615fca 100644 --- a/crates/multi_buffer/src/multi_buffer.rs +++ b/crates/multi_buffer/src/multi_buffer.rs @@ -7723,12 +7723,24 @@ impl<'a> Iterator for MultiBufferChunks<'a> { let diff_transform_end = diff_transform_end.min(self.range.end); if diff_transform_end < chunk_end { - let (before, after) = - chunk.text.split_at(diff_transform_end - self.range.start); + let split_idx = diff_transform_end - self.range.start; + let (before, after) = chunk.text.split_at(split_idx); self.range.start = diff_transform_end; + let mask = (1 << split_idx) - 1; + let chars = chunk.chars & mask; + let tabs = chunk.tabs & mask; + chunk.text = after; + chunk.chars = chunk.chars >> split_idx; + chunk.tabs = chunk.tabs >> split_idx; + + // FIXME: We should be handling bitmap for tabs and chars here + // Because we do a split at operation the bitmaps will be off + Some(Chunk { text: before, + chars, + tabs, ..chunk.clone() }) } else { @@ -7772,6 +7784,7 @@ impl<'a> Iterator for MultiBufferChunks<'a> { self.range.start += "\n".len(); Chunk { text: "\n", + chars: 1u128, ..Default::default() } }; @@ -7868,9 +7881,11 @@ impl<'a> Iterator for ExcerptChunks<'a> { if self.footer_height > 0 { let text = unsafe { str::from_utf8_unchecked(&NEWLINES[..self.footer_height]) }; + let chars = (1 << self.footer_height) - 1; self.footer_height = 0; return Some(Chunk { text, + chars, ..Default::default() }); } diff --git a/crates/multi_buffer/src/multi_buffer_tests.rs b/crates/multi_buffer/src/multi_buffer_tests.rs index 61b4b0520f..7dcaf1e66f 100644 --- a/crates/multi_buffer/src/multi_buffer_tests.rs +++ b/crates/multi_buffer/src/multi_buffer_tests.rs @@ -7,6 +7,7 @@ use parking_lot::RwLock; use rand::prelude::*; use settings::SettingsStore; use std::env; +use util::RandomCharIter; use util::test::sample_text; #[ctor::ctor] @@ -3712,3 +3713,235 @@ fn test_new_empty_buffers_title_can_be_set(cx: &mut App) { }); assert_eq!(multibuffer.read(cx).title(cx), "Hey"); } + +#[gpui::test(iterations = 100)] +fn test_random_chunk_bitmaps(cx: &mut App, mut rng: StdRng) { + let multibuffer = if rng.r#gen() { + let len = rng.gen_range(0..10000); + let text = RandomCharIter::new(&mut rng).take(len).collect::(); + let buffer = cx.new(|cx| Buffer::local(text, cx)); + cx.new(|cx| MultiBuffer::singleton(buffer, cx)) + } else { + MultiBuffer::build_random(&mut rng, cx) + }; + + let snapshot = multibuffer.read(cx).snapshot(cx); + + let chunks = snapshot.chunks(0..snapshot.len(), false); + + for chunk in chunks { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + // Verify chars bitmap + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + } + + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + if has_bit != is_tab { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Tabs bitmap: {:#b}", tabs_bitmap); + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } +} + +#[gpui::test(iterations = 100)] +fn test_random_chunk_bitmaps_with_diffs(cx: &mut App, mut rng: StdRng) { + use buffer_diff::BufferDiff; + use util::RandomCharIter; + + let multibuffer = if rng.r#gen() { + let len = rng.gen_range(100..10000); + let text = RandomCharIter::new(&mut rng).take(len).collect::(); + let buffer = cx.new(|cx| Buffer::local(text, cx)); + cx.new(|cx| MultiBuffer::singleton(buffer, cx)) + } else { + MultiBuffer::build_random(&mut rng, cx) + }; + + let _diff_count = rng.gen_range(1..5); + let mut diffs = Vec::new(); + + multibuffer.update(cx, |multibuffer, cx| { + for buffer_id in multibuffer.excerpt_buffer_ids() { + if rng.gen_bool(0.7) { + if let Some(buffer_handle) = multibuffer.buffer(buffer_id) { + let buffer_text = buffer_handle.read(cx).text(); + let mut base_text = String::new(); + + for line in buffer_text.lines() { + if rng.gen_bool(0.3) { + continue; + } else if rng.gen_bool(0.3) { + let line_len = rng.gen_range(0..50); + let modified_line = RandomCharIter::new(&mut rng) + .take(line_len) + .collect::(); + base_text.push_str(&modified_line); + base_text.push('\n'); + } else { + base_text.push_str(line); + base_text.push('\n'); + } + } + + if rng.gen_bool(0.5) { + let extra_lines = rng.gen_range(1..5); + for _ in 0..extra_lines { + let line_len = rng.gen_range(0..50); + let extra_line = RandomCharIter::new(&mut rng) + .take(line_len) + .collect::(); + base_text.push_str(&extra_line); + base_text.push('\n'); + } + } + + let diff = + cx.new(|cx| BufferDiff::new_with_base_text(&base_text, &buffer_handle, cx)); + diffs.push(diff.clone()); + multibuffer.add_diff(diff, cx); + } + } + } + }); + + multibuffer.update(cx, |multibuffer, cx| { + if rng.gen_bool(0.5) { + multibuffer.set_all_diff_hunks_expanded(cx); + } else { + let snapshot = multibuffer.snapshot(cx); + let text = snapshot.text(); + + let mut ranges = Vec::new(); + for _ in 0..rng.gen_range(1..5) { + if snapshot.len() == 0 { + break; + } + + let diff_size = rng.gen_range(5..1000); + let mut start = rng.gen_range(0..snapshot.len()); + + while !text.is_char_boundary(start) { + start = start.saturating_sub(1); + } + + let mut end = rng.gen_range(start..snapshot.len().min(start + diff_size)); + + while !text.is_char_boundary(end) { + end = end.saturating_add(1); + } + let start_anchor = snapshot.anchor_after(start); + let end_anchor = snapshot.anchor_before(end); + ranges.push(start_anchor..end_anchor); + } + multibuffer.expand_diff_hunks(ranges, cx); + } + }); + + let snapshot = multibuffer.read(cx).snapshot(cx); + + let chunks = snapshot.chunks(0..snapshot.len(), false); + + for chunk in chunks { + let chunk_text = chunk.text; + let chars_bitmap = chunk.chars; + let tabs_bitmap = chunk.tabs; + + if chunk_text.is_empty() { + assert_eq!( + chars_bitmap, 0, + "Empty chunk should have empty chars bitmap" + ); + assert_eq!(tabs_bitmap, 0, "Empty chunk should have empty tabs bitmap"); + continue; + } + + assert!( + chunk_text.len() <= 128, + "Chunk text length {} exceeds 128 bytes", + chunk_text.len() + ); + + let char_indices = chunk_text + .char_indices() + .map(|(i, _)| i) + .collect::>(); + + for byte_idx in 0..chunk_text.len() { + let should_have_bit = char_indices.contains(&byte_idx); + let has_bit = chars_bitmap & (1 << byte_idx) != 0; + + if has_bit != should_have_bit { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Char indices: {:?}", char_indices); + eprintln!("Chars bitmap: {:#b}", chars_bitmap); + } + + assert_eq!( + has_bit, should_have_bit, + "Chars bitmap mismatch at byte index {} in chunk {:?}. Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, should_have_bit, has_bit + ); + } + + for (byte_idx, byte) in chunk_text.bytes().enumerate() { + let is_tab = byte == b'\t'; + let has_bit = tabs_bitmap & (1 << byte_idx) != 0; + + if has_bit != is_tab { + eprintln!("Chunk text bytes: {:?}", chunk_text.as_bytes()); + eprintln!("Tabs bitmap: {:#b}", tabs_bitmap); + assert_eq!( + has_bit, is_tab, + "Tabs bitmap mismatch at byte index {} in chunk {:?}. Byte: {:?}, Expected bit: {}, Got bit: {}", + byte_idx, chunk_text, byte as char, is_tab, has_bit + ); + } + } + } +} diff --git a/crates/rope/src/chunk.rs b/crates/rope/src/chunk.rs index 00679d8cf5..fefbcdd0ed 100644 --- a/crates/rope/src/chunk.rs +++ b/crates/rope/src/chunk.rs @@ -13,7 +13,7 @@ pub struct Chunk { chars: u128, chars_utf16: u128, newlines: u128, - tabs: u128, + pub tabs: u128, pub text: ArrayString, } @@ -67,6 +67,11 @@ impl Chunk { pub fn slice(&self, range: Range) -> ChunkSlice<'_> { self.as_slice().slice(range) } + + #[inline(always)] + pub fn chars(&self) -> u128 { + self.chars + } } #[derive(Clone, Copy, Debug)] diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index 0d3f5abbde..6b3f8cfb81 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -594,6 +594,15 @@ impl<'a> Cursor<'a> { } } +pub struct ChunkBitmaps<'a> { + /// A slice of text up to 128 bytes in size + pub text: &'a str, + /// Bitmap of character locations in text. LSB ordered + pub chars: u128, + /// Bitmap of tab locations in text. LSB ordered + pub tabs: u128, +} + #[derive(Clone)] pub struct Chunks<'a> { chunks: sum_tree::Cursor<'a, Chunk, usize>, @@ -755,6 +764,36 @@ impl<'a> Chunks<'a> { self.offset < initial_offset && self.offset == 0 } + /// Returns bitmaps that represent character positions and tab positions + pub fn peak_with_bitmaps(&self) -> Option> { + if !self.offset_is_valid() { + return None; + } + + let chunk = self.chunks.item()?; + let chunk_start = *self.chunks.start(); + let slice_range = if self.reversed { + let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start; + let slice_end = self.offset - chunk_start; + slice_start..slice_end + } else { + let slice_start = self.offset - chunk_start; + let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start; + slice_start..slice_end + }; + + let bitmask = (1u128 << slice_range.end as u128).saturating_sub(1); + + let chars = (chunk.chars() & bitmask) >> slice_range.start; + let tabs = (chunk.tabs & bitmask) >> slice_range.start; + + Some(ChunkBitmaps { + text: &chunk.text[slice_range.clone()], + chars, + tabs, + }) + } + pub fn peek(&self) -> Option<&'a str> { if !self.offset_is_valid() { return None; @@ -775,6 +814,36 @@ impl<'a> Chunks<'a> { Some(&chunk.text[slice_range]) } + pub fn peek_tabs(&self) -> Option> { + if !self.offset_is_valid() { + return None; + } + + let chunk = self.chunks.item()?; + let chunk_start = *self.chunks.start(); + let slice_range = if self.reversed { + let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start; + let slice_end = self.offset - chunk_start; + slice_start..slice_end + } else { + let slice_start = self.offset - chunk_start; + let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start; + slice_start..slice_end + }; + let chunk_start_offset = slice_range.start; + let slice_text = &chunk.text[slice_range]; + + // Shift the tabs to align with our slice window + let shifted_tabs = chunk.tabs >> chunk_start_offset; + let shifted_chars = chunk.chars() >> chunk_start_offset; + + Some(ChunkBitmaps { + text: slice_text, + chars: shifted_chars, + tabs: shifted_tabs, + }) + } + pub fn lines(self) -> Lines<'a> { let reversed = self.reversed; Lines { @@ -820,6 +889,30 @@ impl<'a> Chunks<'a> { } } +pub struct ChunkWithBitmaps<'a>(pub Chunks<'a>); + +impl<'a> Iterator for ChunkWithBitmaps<'a> { + /// text, chars bitmap, tabs bitmap + type Item = ChunkBitmaps<'a>; + + fn next(&mut self) -> Option { + let chunk_bitmaps = self.0.peak_with_bitmaps()?; + if self.0.reversed { + self.0.offset -= chunk_bitmaps.text.len(); + if self.0.offset <= *self.0.chunks.start() { + self.0.chunks.prev(); + } + } else { + self.0.offset += chunk_bitmaps.text.len(); + if self.0.offset >= self.0.chunks.end() { + self.0.chunks.next(); + } + } + + Some(chunk_bitmaps) + } +} + impl<'a> Iterator for Chunks<'a> { type Item = &'a str;