Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
Lukas Wirth
63b916c3e4 Make use of Chunk mask for more operations 2025-08-07 09:36:39 +02:00
2 changed files with 64 additions and 37 deletions

View file

@ -10,11 +10,15 @@ pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
#[derive(Clone, Debug, Default)]
pub struct Chunk {
/// Bit mask for character boundaries in the chunk. LSB is the first character.
chars: u128,
/// Bit mask for UTF-16 code units in the chunk. LSB is the first character.
chars_utf16: u128,
/// Bit mask for newlines in the chunk. LSB is the first character.
newlines: u128,
/// Bit mask for tabs in the chunk. LSB is the first character.
tabs: u128,
pub text: ArrayString<MAX_BASE>,
pub(crate) text: ArrayString<MAX_BASE>,
}
impl Chunk {
@ -42,7 +46,7 @@ impl Chunk {
pub fn append(&mut self, slice: ChunkSlice) {
if slice.is_empty() {
return;
};
}
let base_ix = self.text.len();
self.chars |= slice.chars << base_ix;
@ -75,7 +79,7 @@ pub struct ChunkSlice<'a> {
chars_utf16: u128,
newlines: u128,
tabs: u128,
text: &'a str,
pub(crate) text: &'a str,
}
impl Into<Chunk> for ChunkSlice<'_> {
@ -97,8 +101,26 @@ impl<'a> ChunkSlice<'a> {
}
#[inline(always)]
pub fn is_char_boundary(self, offset: usize) -> bool {
self.text.is_char_boundary(offset)
pub fn is_char_boundary(&self, offset: usize) -> bool {
if offset == self.text.len() {
return true;
}
if offset > self.text.len() {
return false;
}
(self.chars & (1 << offset)) != 0
}
#[inline(always)]
pub fn round_char_boundary(&self, mut ix: usize, bias: Bias) -> usize {
while !self.is_char_boundary(ix) {
match bias {
Bias::Left => ix -= 1,
Bias::Right => ix += 1,
}
}
ix
}
#[inline(always)]
@ -197,6 +219,25 @@ impl<'a> ChunkSlice<'a> {
Point::new(row, column)
}
pub fn last_newline(&self) -> Option<usize> {
if self.newlines == 0 {
None
} else {
Some((u128::BITS - self.newlines.leading_zeros() - 1) as usize)
}
}
#[inline(always)]
pub fn newline_at(&self, i: usize) -> bool {
assert!(
i < u128::BITS as usize,
"index out of bounds: the len is {} but the index is {}",
u128::BITS,
i
);
(self.newlines & (1 << i)) != 0
}
/// Get number of chars in first line
#[inline(always)]
pub fn first_line_chars(&self) -> u32 {
@ -383,9 +424,9 @@ impl<'a> ChunkSlice<'a> {
let mut offset = row_offset_range.start;
if point.column > 0 {
offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
if !self.text.is_char_boundary(offset) {
if !self.is_char_boundary(offset) {
offset -= 1;
while !self.text.is_char_boundary(offset) {
while !self.is_char_boundary(offset) {
offset -= 1;
}
if !clip {
@ -415,7 +456,7 @@ impl<'a> ChunkSlice<'a> {
Point::new(point.0.row, line.len() as u32)
} else {
let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
while !line.text.is_char_boundary(column) {
while !line.is_char_boundary(column) {
column -= 1;
}
Point::new(point.0.row, column as u32)
@ -479,7 +520,7 @@ impl<'a> ChunkSlice<'a> {
self.len_utf16()
} else {
let mut offset = self.offset_utf16_to_offset(target);
while !self.text.is_char_boundary(offset) {
while !self.is_char_boundary(offset) {
if bias == Bias::Left {
offset -= 1;
} else {
@ -629,10 +670,10 @@ mod tests {
for _ in 0..10 {
let mut start = rng.gen_range(0..=chunk.text.len());
let mut end = rng.gen_range(start..=chunk.text.len());
while !chunk.text.is_char_boundary(start) {
while !chunk.as_slice().is_char_boundary(start) {
start -= 1;
}
while !chunk.text.is_char_boundary(end) {
while !chunk.as_slice().is_char_boundary(end) {
end -= 1;
}
let range = start..end;

View file

@ -180,14 +180,11 @@ impl Rope {
let split_ix = if last_chunk.text.len() + chunk.len() <= chunk::MAX_BASE {
chunk.len()
} else {
let mut split_ix = cmp::min(
let split_ix = cmp::min(
chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
chunk.len(),
);
while !chunk.is_char_boundary(split_ix) {
split_ix += 1;
}
split_ix
chunk.round_char_boundary(split_ix, Bias::Right)
};
let (suffix, remainder) = chunk.split_at(split_ix);
@ -390,24 +387,13 @@ impl Rope {
})
}
pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
pub fn clip_offset(&self, offset: usize, bias: Bias) -> usize {
let mut cursor = self.chunks.cursor::<usize>(&());
cursor.seek(&offset, Bias::Left);
if let Some(chunk) = cursor.item() {
let mut ix = offset - cursor.start();
while !chunk.text.is_char_boundary(ix) {
match bias {
Bias::Left => {
ix -= 1;
offset -= 1;
}
Bias::Right => {
ix += 1;
offset += 1;
}
}
}
offset
let ix = offset - cursor.start();
let ix = chunk.as_slice().round_char_boundary(ix, bias);
cursor.start() + ix
} else {
self.summary().len
}
@ -711,12 +697,12 @@ impl<'a> Chunks<'a> {
}
if let Some(chunk) = self.chunks.item() {
let mut end_ix = self.offset - *self.chunks.start();
if chunk.text.as_bytes()[end_ix - 1] == b'\n' {
end_ix -= 1;
let end_ix = self.offset - *self.chunks.start();
let mut chunk = chunk.slice(0..end_ix);
if chunk.newline_at(end_ix - 1) {
chunk = chunk.slice(0..end_ix - 1);
}
if let Some(newline_ix) = chunk.text[..end_ix].rfind('\n') {
if let Some(newline_ix) = chunk.last_newline() {
self.offset = *self.chunks.start() + newline_ix + 1;
if self.offset_is_valid() {
return true;
@ -728,7 +714,7 @@ impl<'a> Chunks<'a> {
.search_backward(|summary| summary.text.lines.row > 0);
self.offset = *self.chunks.start();
if let Some(chunk) = self.chunks.item() {
if let Some(newline_ix) = chunk.text.rfind('\n') {
if let Some(newline_ix) = chunk.as_slice().last_newline() {
self.offset += newline_ix + 1;
if self.offset_is_valid() {
if self.offset == self.chunks.end() {