diff --git a/crates/rope/src/chunk.rs b/crates/rope/src/chunk.rs index dc00674380..1766396e6a 100644 --- a/crates/rope/src/chunk.rs +++ b/crates/rope/src/chunk.rs @@ -10,11 +10,15 @@ pub(crate) const MAX_BASE: usize = MIN_BASE * 2; #[derive(Clone, Debug, Default)] pub struct Chunk { + /// Bit mask for character boundaries in the chunk. LSB is the first character. chars: u128, + /// Bit mask for UTF-16 code units in the chunk. LSB is the first character. chars_utf16: u128, + /// Bit mask for newlines in the chunk. LSB is the first character. newlines: u128, + /// Bit mask for tabs in the chunk. LSB is the first character. tabs: u128, - pub text: ArrayString, + pub(crate) text: ArrayString, } impl Chunk { @@ -42,7 +46,7 @@ impl Chunk { pub fn append(&mut self, slice: ChunkSlice) { if slice.is_empty() { return; - }; + } let base_ix = self.text.len(); self.chars |= slice.chars << base_ix; @@ -75,7 +79,7 @@ pub struct ChunkSlice<'a> { chars_utf16: u128, newlines: u128, tabs: u128, - text: &'a str, + pub(crate) text: &'a str, } impl Into for ChunkSlice<'_> { @@ -97,8 +101,26 @@ impl<'a> ChunkSlice<'a> { } #[inline(always)] - pub fn is_char_boundary(self, offset: usize) -> bool { - self.text.is_char_boundary(offset) + pub fn is_char_boundary(&self, offset: usize) -> bool { + if offset == self.text.len() { + return true; + } + if offset > self.text.len() { + return false; + } + + (self.chars & (1 << offset)) != 0 + } + + #[inline(always)] + pub fn round_char_boundary(&self, mut ix: usize, bias: Bias) -> usize { + while !self.is_char_boundary(ix) { + match bias { + Bias::Left => ix -= 1, + Bias::Right => ix += 1, + } + } + ix } #[inline(always)] @@ -197,6 +219,25 @@ impl<'a> ChunkSlice<'a> { Point::new(row, column) } + pub fn last_newline(&self) -> Option { + if self.newlines == 0 { + None + } else { + Some((u128::BITS - self.newlines.leading_zeros() - 1) as usize) + } + } + + #[inline(always)] + pub fn newline_at(&self, i: usize) -> bool { + assert!( + i < u128::BITS as usize, + "index out of bounds: the len is {} but the index is {}", + u128::BITS, + i + ); + (self.newlines & (1 << i)) != 0 + } + /// Get number of chars in first line #[inline(always)] pub fn first_line_chars(&self) -> u32 { @@ -383,9 +424,9 @@ impl<'a> ChunkSlice<'a> { let mut offset = row_offset_range.start; if point.column > 0 { offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize)); - if !self.text.is_char_boundary(offset) { + if !self.is_char_boundary(offset) { offset -= 1; - while !self.text.is_char_boundary(offset) { + while !self.is_char_boundary(offset) { offset -= 1; } if !clip { @@ -415,7 +456,7 @@ impl<'a> ChunkSlice<'a> { Point::new(point.0.row, line.len() as u32) } else { let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize)); - while !line.text.is_char_boundary(column) { + while !line.is_char_boundary(column) { column -= 1; } Point::new(point.0.row, column as u32) @@ -479,7 +520,7 @@ impl<'a> ChunkSlice<'a> { self.len_utf16() } else { let mut offset = self.offset_utf16_to_offset(target); - while !self.text.is_char_boundary(offset) { + while !self.is_char_boundary(offset) { if bias == Bias::Left { offset -= 1; } else { @@ -629,10 +670,10 @@ mod tests { for _ in 0..10 { let mut start = rng.gen_range(0..=chunk.text.len()); let mut end = rng.gen_range(start..=chunk.text.len()); - while !chunk.text.is_char_boundary(start) { + while !chunk.as_slice().is_char_boundary(start) { start -= 1; } - while !chunk.text.is_char_boundary(end) { + while !chunk.as_slice().is_char_boundary(end) { end -= 1; } let range = start..end; diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index aa3ed5db57..a4b53a6a57 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -180,14 +180,11 @@ impl Rope { let split_ix = if last_chunk.text.len() + chunk.len() <= chunk::MAX_BASE { chunk.len() } else { - let mut split_ix = cmp::min( + let split_ix = cmp::min( chunk::MIN_BASE.saturating_sub(last_chunk.text.len()), chunk.len(), ); - while !chunk.is_char_boundary(split_ix) { - split_ix += 1; - } - split_ix + chunk.round_char_boundary(split_ix, Bias::Right) }; let (suffix, remainder) = chunk.split_at(split_ix); @@ -390,24 +387,13 @@ impl Rope { }) } - pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize { + pub fn clip_offset(&self, offset: usize, bias: Bias) -> usize { let mut cursor = self.chunks.cursor::(&()); cursor.seek(&offset, Bias::Left); if let Some(chunk) = cursor.item() { - let mut ix = offset - cursor.start(); - while !chunk.text.is_char_boundary(ix) { - match bias { - Bias::Left => { - ix -= 1; - offset -= 1; - } - Bias::Right => { - ix += 1; - offset += 1; - } - } - } - offset + let ix = offset - cursor.start(); + let ix = chunk.as_slice().round_char_boundary(ix, bias); + cursor.start() + ix } else { self.summary().len } @@ -711,12 +697,12 @@ impl<'a> Chunks<'a> { } if let Some(chunk) = self.chunks.item() { - let mut end_ix = self.offset - *self.chunks.start(); - if chunk.text.as_bytes()[end_ix - 1] == b'\n' { - end_ix -= 1; + let end_ix = self.offset - *self.chunks.start(); + let mut chunk = chunk.slice(0..end_ix); + if chunk.newline_at(end_ix - 1) { + chunk = chunk.slice(0..end_ix - 1); } - - if let Some(newline_ix) = chunk.text[..end_ix].rfind('\n') { + if let Some(newline_ix) = chunk.last_newline() { self.offset = *self.chunks.start() + newline_ix + 1; if self.offset_is_valid() { return true; @@ -728,7 +714,7 @@ impl<'a> Chunks<'a> { .search_backward(|summary| summary.text.lines.row > 0); self.offset = *self.chunks.start(); if let Some(chunk) = self.chunks.item() { - if let Some(newline_ix) = chunk.text.rfind('\n') { + if let Some(newline_ix) = chunk.as_slice().last_newline() { self.offset += newline_ix + 1; if self.offset_is_valid() { if self.offset == self.chunks.end() {