Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
Lukas Wirth
63b916c3e4 Make use of Chunk mask for more operations 2025-08-07 09:36:39 +02:00
2 changed files with 64 additions and 37 deletions

View file

@ -10,11 +10,15 @@ pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct Chunk { pub struct Chunk {
/// Bit mask for character boundaries in the chunk. LSB is the first character.
chars: u128, chars: u128,
/// Bit mask for UTF-16 code units in the chunk. LSB is the first character.
chars_utf16: u128, chars_utf16: u128,
/// Bit mask for newlines in the chunk. LSB is the first character.
newlines: u128, newlines: u128,
/// Bit mask for tabs in the chunk. LSB is the first character.
tabs: u128, tabs: u128,
pub text: ArrayString<MAX_BASE>, pub(crate) text: ArrayString<MAX_BASE>,
} }
impl Chunk { impl Chunk {
@ -42,7 +46,7 @@ impl Chunk {
pub fn append(&mut self, slice: ChunkSlice) { pub fn append(&mut self, slice: ChunkSlice) {
if slice.is_empty() { if slice.is_empty() {
return; return;
}; }
let base_ix = self.text.len(); let base_ix = self.text.len();
self.chars |= slice.chars << base_ix; self.chars |= slice.chars << base_ix;
@ -75,7 +79,7 @@ pub struct ChunkSlice<'a> {
chars_utf16: u128, chars_utf16: u128,
newlines: u128, newlines: u128,
tabs: u128, tabs: u128,
text: &'a str, pub(crate) text: &'a str,
} }
impl Into<Chunk> for ChunkSlice<'_> { impl Into<Chunk> for ChunkSlice<'_> {
@ -97,8 +101,26 @@ impl<'a> ChunkSlice<'a> {
} }
#[inline(always)] #[inline(always)]
pub fn is_char_boundary(self, offset: usize) -> bool { pub fn is_char_boundary(&self, offset: usize) -> bool {
self.text.is_char_boundary(offset) if offset == self.text.len() {
return true;
}
if offset > self.text.len() {
return false;
}
(self.chars & (1 << offset)) != 0
}
#[inline(always)]
pub fn round_char_boundary(&self, mut ix: usize, bias: Bias) -> usize {
while !self.is_char_boundary(ix) {
match bias {
Bias::Left => ix -= 1,
Bias::Right => ix += 1,
}
}
ix
} }
#[inline(always)] #[inline(always)]
@ -197,6 +219,25 @@ impl<'a> ChunkSlice<'a> {
Point::new(row, column) Point::new(row, column)
} }
pub fn last_newline(&self) -> Option<usize> {
if self.newlines == 0 {
None
} else {
Some((u128::BITS - self.newlines.leading_zeros() - 1) as usize)
}
}
#[inline(always)]
pub fn newline_at(&self, i: usize) -> bool {
assert!(
i < u128::BITS as usize,
"index out of bounds: the len is {} but the index is {}",
u128::BITS,
i
);
(self.newlines & (1 << i)) != 0
}
/// Get number of chars in first line /// Get number of chars in first line
#[inline(always)] #[inline(always)]
pub fn first_line_chars(&self) -> u32 { pub fn first_line_chars(&self) -> u32 {
@ -383,9 +424,9 @@ impl<'a> ChunkSlice<'a> {
let mut offset = row_offset_range.start; let mut offset = row_offset_range.start;
if point.column > 0 { if point.column > 0 {
offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize)); offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
if !self.text.is_char_boundary(offset) { if !self.is_char_boundary(offset) {
offset -= 1; offset -= 1;
while !self.text.is_char_boundary(offset) { while !self.is_char_boundary(offset) {
offset -= 1; offset -= 1;
} }
if !clip { if !clip {
@ -415,7 +456,7 @@ impl<'a> ChunkSlice<'a> {
Point::new(point.0.row, line.len() as u32) Point::new(point.0.row, line.len() as u32)
} else { } else {
let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize)); let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
while !line.text.is_char_boundary(column) { while !line.is_char_boundary(column) {
column -= 1; column -= 1;
} }
Point::new(point.0.row, column as u32) Point::new(point.0.row, column as u32)
@ -479,7 +520,7 @@ impl<'a> ChunkSlice<'a> {
self.len_utf16() self.len_utf16()
} else { } else {
let mut offset = self.offset_utf16_to_offset(target); let mut offset = self.offset_utf16_to_offset(target);
while !self.text.is_char_boundary(offset) { while !self.is_char_boundary(offset) {
if bias == Bias::Left { if bias == Bias::Left {
offset -= 1; offset -= 1;
} else { } else {
@ -629,10 +670,10 @@ mod tests {
for _ in 0..10 { for _ in 0..10 {
let mut start = rng.gen_range(0..=chunk.text.len()); let mut start = rng.gen_range(0..=chunk.text.len());
let mut end = rng.gen_range(start..=chunk.text.len()); let mut end = rng.gen_range(start..=chunk.text.len());
while !chunk.text.is_char_boundary(start) { while !chunk.as_slice().is_char_boundary(start) {
start -= 1; start -= 1;
} }
while !chunk.text.is_char_boundary(end) { while !chunk.as_slice().is_char_boundary(end) {
end -= 1; end -= 1;
} }
let range = start..end; let range = start..end;

View file

@ -180,14 +180,11 @@ impl Rope {
let split_ix = if last_chunk.text.len() + chunk.len() <= chunk::MAX_BASE { let split_ix = if last_chunk.text.len() + chunk.len() <= chunk::MAX_BASE {
chunk.len() chunk.len()
} else { } else {
let mut split_ix = cmp::min( let split_ix = cmp::min(
chunk::MIN_BASE.saturating_sub(last_chunk.text.len()), chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
chunk.len(), chunk.len(),
); );
while !chunk.is_char_boundary(split_ix) { chunk.round_char_boundary(split_ix, Bias::Right)
split_ix += 1;
}
split_ix
}; };
let (suffix, remainder) = chunk.split_at(split_ix); let (suffix, remainder) = chunk.split_at(split_ix);
@ -390,24 +387,13 @@ impl Rope {
}) })
} }
pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize { pub fn clip_offset(&self, offset: usize, bias: Bias) -> usize {
let mut cursor = self.chunks.cursor::<usize>(&()); let mut cursor = self.chunks.cursor::<usize>(&());
cursor.seek(&offset, Bias::Left); cursor.seek(&offset, Bias::Left);
if let Some(chunk) = cursor.item() { if let Some(chunk) = cursor.item() {
let mut ix = offset - cursor.start(); let ix = offset - cursor.start();
while !chunk.text.is_char_boundary(ix) { let ix = chunk.as_slice().round_char_boundary(ix, bias);
match bias { cursor.start() + ix
Bias::Left => {
ix -= 1;
offset -= 1;
}
Bias::Right => {
ix += 1;
offset += 1;
}
}
}
offset
} else { } else {
self.summary().len self.summary().len
} }
@ -711,12 +697,12 @@ impl<'a> Chunks<'a> {
} }
if let Some(chunk) = self.chunks.item() { if let Some(chunk) = self.chunks.item() {
let mut end_ix = self.offset - *self.chunks.start(); let end_ix = self.offset - *self.chunks.start();
if chunk.text.as_bytes()[end_ix - 1] == b'\n' { let mut chunk = chunk.slice(0..end_ix);
end_ix -= 1; if chunk.newline_at(end_ix - 1) {
chunk = chunk.slice(0..end_ix - 1);
} }
if let Some(newline_ix) = chunk.last_newline() {
if let Some(newline_ix) = chunk.text[..end_ix].rfind('\n') {
self.offset = *self.chunks.start() + newline_ix + 1; self.offset = *self.chunks.start() + newline_ix + 1;
if self.offset_is_valid() { if self.offset_is_valid() {
return true; return true;
@ -728,7 +714,7 @@ impl<'a> Chunks<'a> {
.search_backward(|summary| summary.text.lines.row > 0); .search_backward(|summary| summary.text.lines.row > 0);
self.offset = *self.chunks.start(); self.offset = *self.chunks.start();
if let Some(chunk) = self.chunks.item() { if let Some(chunk) = self.chunks.item() {
if let Some(newline_ix) = chunk.text.rfind('\n') { if let Some(newline_ix) = chunk.as_slice().last_newline() {
self.offset += newline_ix + 1; self.offset += newline_ix + 1;
if self.offset_is_valid() { if self.offset_is_valid() {
if self.offset == self.chunks.end() { if self.offset == self.chunks.end() {