950 lines
29 KiB
Rust
950 lines
29 KiB
Rust
use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
|
|
use arrayvec::ArrayString;
|
|
use std::{cmp, ops::Range};
|
|
use sum_tree::Bias;
|
|
use unicode_segmentation::GraphemeCursor;
|
|
use util::debug_panic;
|
|
|
|
pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
|
|
pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
|
|
|
|
#[derive(Clone, Debug, Default)]
|
|
pub struct Chunk {
|
|
chars: u128,
|
|
chars_utf16: u128,
|
|
newlines: u128,
|
|
pub tabs: u128,
|
|
pub text: ArrayString<MAX_BASE>,
|
|
}
|
|
|
|
impl Chunk {
|
|
#[inline(always)]
|
|
pub fn new(text: &str) -> Self {
|
|
let mut this = Chunk::default();
|
|
this.push_str(text);
|
|
this
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn push_str(&mut self, text: &str) {
|
|
for (char_ix, c) in text.char_indices() {
|
|
let ix = self.text.len() + char_ix;
|
|
self.chars |= 1 << ix;
|
|
self.chars_utf16 |= 1 << ix;
|
|
self.chars_utf16 |= (c.len_utf16() as u128) << ix;
|
|
self.newlines |= ((c == '\n') as u128) << ix;
|
|
self.tabs |= ((c == '\t') as u128) << ix;
|
|
}
|
|
self.text.push_str(text);
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn append(&mut self, slice: ChunkSlice) {
|
|
if slice.is_empty() {
|
|
return;
|
|
};
|
|
|
|
let base_ix = self.text.len();
|
|
self.chars |= slice.chars << base_ix;
|
|
self.chars_utf16 |= slice.chars_utf16 << base_ix;
|
|
self.newlines |= slice.newlines << base_ix;
|
|
self.tabs |= slice.tabs << base_ix;
|
|
self.text.push_str(slice.text);
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn as_slice(&self) -> ChunkSlice<'_> {
|
|
ChunkSlice {
|
|
chars: self.chars,
|
|
chars_utf16: self.chars_utf16,
|
|
newlines: self.newlines,
|
|
tabs: self.tabs,
|
|
text: &self.text,
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
|
|
self.as_slice().slice(range)
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn chars(&self) -> u128 {
|
|
self.chars
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
pub struct ChunkSlice<'a> {
|
|
chars: u128,
|
|
chars_utf16: u128,
|
|
newlines: u128,
|
|
tabs: u128,
|
|
text: &'a str,
|
|
}
|
|
|
|
impl Into<Chunk> for ChunkSlice<'_> {
|
|
fn into(self) -> Chunk {
|
|
Chunk {
|
|
chars: self.chars,
|
|
chars_utf16: self.chars_utf16,
|
|
newlines: self.newlines,
|
|
tabs: self.tabs,
|
|
text: self.text.try_into().unwrap(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> ChunkSlice<'a> {
|
|
#[inline(always)]
|
|
pub fn is_empty(&self) -> bool {
|
|
self.text.is_empty()
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn is_char_boundary(self, offset: usize) -> bool {
|
|
self.text.is_char_boundary(offset)
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
|
|
if mid == MAX_BASE {
|
|
let left = self;
|
|
let right = ChunkSlice {
|
|
chars: 0,
|
|
chars_utf16: 0,
|
|
newlines: 0,
|
|
tabs: 0,
|
|
text: "",
|
|
};
|
|
(left, right)
|
|
} else {
|
|
let mask = (1u128 << mid) - 1;
|
|
let (left_text, right_text) = self.text.split_at(mid);
|
|
let left = ChunkSlice {
|
|
chars: self.chars & mask,
|
|
chars_utf16: self.chars_utf16 & mask,
|
|
newlines: self.newlines & mask,
|
|
tabs: self.tabs & mask,
|
|
text: left_text,
|
|
};
|
|
let right = ChunkSlice {
|
|
chars: self.chars >> mid,
|
|
chars_utf16: self.chars_utf16 >> mid,
|
|
newlines: self.newlines >> mid,
|
|
tabs: self.tabs >> mid,
|
|
text: right_text,
|
|
};
|
|
(left, right)
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn slice(self, range: Range<usize>) -> Self {
|
|
let mask = if range.end == MAX_BASE {
|
|
u128::MAX
|
|
} else {
|
|
(1u128 << range.end) - 1
|
|
};
|
|
if range.start == MAX_BASE {
|
|
Self {
|
|
chars: 0,
|
|
chars_utf16: 0,
|
|
newlines: 0,
|
|
tabs: 0,
|
|
text: "",
|
|
}
|
|
} else {
|
|
Self {
|
|
chars: (self.chars & mask) >> range.start,
|
|
chars_utf16: (self.chars_utf16 & mask) >> range.start,
|
|
newlines: (self.newlines & mask) >> range.start,
|
|
tabs: (self.tabs & mask) >> range.start,
|
|
text: &self.text[range],
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn text_summary(&self) -> TextSummary {
|
|
let mut chars = 0;
|
|
let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
|
|
TextSummary {
|
|
len: self.len(),
|
|
chars,
|
|
len_utf16: self.len_utf16(),
|
|
lines: self.lines(),
|
|
first_line_chars: self.first_line_chars(),
|
|
last_line_chars: self.last_line_chars(),
|
|
last_line_len_utf16: self.last_line_len_utf16(),
|
|
longest_row,
|
|
longest_row_chars,
|
|
}
|
|
}
|
|
|
|
/// Get length in bytes
|
|
#[inline(always)]
|
|
pub fn len(&self) -> usize {
|
|
self.text.len()
|
|
}
|
|
|
|
/// Get length in UTF-16 code units
|
|
#[inline(always)]
|
|
pub fn len_utf16(&self) -> OffsetUtf16 {
|
|
OffsetUtf16(self.chars_utf16.count_ones() as usize)
|
|
}
|
|
|
|
/// Get point representing number of lines and length of last line
|
|
#[inline(always)]
|
|
pub fn lines(&self) -> Point {
|
|
let row = self.newlines.count_ones();
|
|
let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
|
|
Point::new(row, column)
|
|
}
|
|
|
|
/// Get number of chars in first line
|
|
#[inline(always)]
|
|
pub fn first_line_chars(&self) -> u32 {
|
|
if self.newlines == 0 {
|
|
self.chars.count_ones()
|
|
} else {
|
|
let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
|
|
(self.chars & mask).count_ones()
|
|
}
|
|
}
|
|
|
|
/// Get number of chars in last line
|
|
#[inline(always)]
|
|
pub fn last_line_chars(&self) -> u32 {
|
|
if self.newlines == 0 {
|
|
self.chars.count_ones()
|
|
} else {
|
|
let mask = !(u128::MAX >> self.newlines.leading_zeros());
|
|
(self.chars & mask).count_ones()
|
|
}
|
|
}
|
|
|
|
/// Get number of UTF-16 code units in last line
|
|
#[inline(always)]
|
|
pub fn last_line_len_utf16(&self) -> u32 {
|
|
if self.newlines == 0 {
|
|
self.chars_utf16.count_ones()
|
|
} else {
|
|
let mask = !(u128::MAX >> self.newlines.leading_zeros());
|
|
(self.chars_utf16 & mask).count_ones()
|
|
}
|
|
}
|
|
|
|
/// Get the longest row in the chunk and its length in characters.
|
|
/// Calculate the total number of characters in the chunk along the way.
|
|
#[inline(always)]
|
|
pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
|
|
let mut chars = self.chars;
|
|
let mut newlines = self.newlines;
|
|
*total_chars = 0;
|
|
let mut row = 0;
|
|
let mut longest_row = 0;
|
|
let mut longest_row_chars = 0;
|
|
while newlines > 0 {
|
|
let newline_ix = newlines.trailing_zeros();
|
|
let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
|
|
*total_chars += usize::from(row_chars);
|
|
if row_chars > longest_row_chars {
|
|
longest_row = row;
|
|
longest_row_chars = row_chars;
|
|
}
|
|
|
|
newlines >>= newline_ix;
|
|
newlines >>= 1;
|
|
chars >>= newline_ix;
|
|
chars >>= 1;
|
|
row += 1;
|
|
*total_chars += 1;
|
|
}
|
|
|
|
let row_chars = chars.count_ones() as u8;
|
|
*total_chars += usize::from(row_chars);
|
|
if row_chars > longest_row_chars {
|
|
(row, row_chars as u32)
|
|
} else {
|
|
(longest_row, longest_row_chars as u32)
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn offset_to_point(&self, offset: usize) -> Point {
|
|
let mask = if offset == MAX_BASE {
|
|
u128::MAX
|
|
} else {
|
|
(1u128 << offset) - 1
|
|
};
|
|
let row = (self.newlines & mask).count_ones();
|
|
let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
|
|
let column = (offset - newline_ix as usize) as u32;
|
|
Point::new(row, column)
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn point_to_offset(&self, point: Point) -> usize {
|
|
if point.row > self.lines().row {
|
|
debug_panic!(
|
|
"point {:?} extends beyond rows for string {:?}",
|
|
point,
|
|
self.text
|
|
);
|
|
return self.len();
|
|
}
|
|
|
|
let row_offset_range = self.offset_range_for_row(point.row);
|
|
if point.column > row_offset_range.len() as u32 {
|
|
debug_panic!(
|
|
"point {:?} extends beyond row for string {:?}",
|
|
point,
|
|
self.text
|
|
);
|
|
row_offset_range.end
|
|
} else {
|
|
row_offset_range.start + point.column as usize
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
|
|
let mask = if offset == MAX_BASE {
|
|
u128::MAX
|
|
} else {
|
|
(1u128 << offset) - 1
|
|
};
|
|
OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
|
|
if target.0 == 0 {
|
|
0
|
|
} else {
|
|
let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
|
|
if ix == MAX_BASE {
|
|
MAX_BASE
|
|
} else {
|
|
let utf8_additional_len = cmp::min(
|
|
(self.chars_utf16 >> ix).trailing_zeros() as usize,
|
|
self.text.len() - ix,
|
|
);
|
|
ix + utf8_additional_len
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
|
|
let mask = if offset == MAX_BASE {
|
|
u128::MAX
|
|
} else {
|
|
(1u128 << offset) - 1
|
|
};
|
|
let row = (self.newlines & mask).count_ones();
|
|
let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
|
|
let column = if newline_ix as usize == MAX_BASE {
|
|
0
|
|
} else {
|
|
((self.chars_utf16 & mask) >> newline_ix).count_ones()
|
|
};
|
|
PointUtf16::new(row, column)
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
|
|
self.offset_to_point_utf16(self.point_to_offset(point))
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
|
|
let lines = self.lines();
|
|
if point.row > lines.row {
|
|
if !clip {
|
|
debug_panic!(
|
|
"point {:?} is beyond this chunk's extent {:?}",
|
|
point,
|
|
self.text
|
|
);
|
|
}
|
|
return self.len();
|
|
}
|
|
|
|
let row_offset_range = self.offset_range_for_row(point.row);
|
|
let line = self.slice(row_offset_range.clone());
|
|
if point.column > line.last_line_len_utf16() {
|
|
if !clip {
|
|
debug_panic!(
|
|
"point {:?} is beyond the end of the line in chunk {:?}",
|
|
point,
|
|
self.text
|
|
);
|
|
}
|
|
return line.len();
|
|
}
|
|
|
|
let mut offset = row_offset_range.start;
|
|
if point.column > 0 {
|
|
offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
|
|
if !self.text.is_char_boundary(offset) {
|
|
offset -= 1;
|
|
while !self.text.is_char_boundary(offset) {
|
|
offset -= 1;
|
|
}
|
|
if !clip {
|
|
debug_panic!(
|
|
"point {:?} is within character in chunk {:?}",
|
|
point,
|
|
self.text,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
offset
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
|
|
let max_point = self.lines();
|
|
if point.0.row > max_point.row {
|
|
return max_point;
|
|
}
|
|
|
|
let row_offset_range = self.offset_range_for_row(point.0.row);
|
|
let line = self.slice(row_offset_range);
|
|
if point.0.column == 0 {
|
|
Point::new(point.0.row, 0)
|
|
} else if point.0.column >= line.len_utf16().0 as u32 {
|
|
Point::new(point.0.row, line.len() as u32)
|
|
} else {
|
|
let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
|
|
while !line.text.is_char_boundary(column) {
|
|
column -= 1;
|
|
}
|
|
Point::new(point.0.row, column as u32)
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
|
|
let max_point = self.lines();
|
|
if point.row > max_point.row {
|
|
return max_point;
|
|
}
|
|
|
|
let line = self.slice(self.offset_range_for_row(point.row));
|
|
if point.column == 0 {
|
|
point
|
|
} else if point.column >= line.len() as u32 {
|
|
Point::new(point.row, line.len() as u32)
|
|
} else {
|
|
let mut column = point.column as usize;
|
|
let bytes = line.text.as_bytes();
|
|
if bytes[column - 1] < 128 && bytes[column] < 128 {
|
|
return Point::new(point.row, column as u32);
|
|
}
|
|
|
|
let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
|
|
loop {
|
|
if line.is_char_boundary(column)
|
|
&& grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
|
|
{
|
|
break;
|
|
}
|
|
|
|
match bias {
|
|
Bias::Left => column -= 1,
|
|
Bias::Right => column += 1,
|
|
}
|
|
grapheme_cursor.set_cursor(column);
|
|
}
|
|
Point::new(point.row, column as u32)
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
|
|
let max_point = self.lines();
|
|
if point.0.row > max_point.row {
|
|
PointUtf16::new(max_point.row, self.last_line_len_utf16())
|
|
} else {
|
|
let line = self.slice(self.offset_range_for_row(point.0.row));
|
|
let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
|
|
PointUtf16::new(point.0.row, column.0 as u32)
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
|
|
if target == OffsetUtf16::default() {
|
|
OffsetUtf16::default()
|
|
} else if target >= self.len_utf16() {
|
|
self.len_utf16()
|
|
} else {
|
|
let mut offset = self.offset_utf16_to_offset(target);
|
|
while !self.text.is_char_boundary(offset) {
|
|
if bias == Bias::Left {
|
|
offset -= 1;
|
|
} else {
|
|
offset += 1;
|
|
}
|
|
}
|
|
self.offset_to_offset_utf16(offset)
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn offset_range_for_row(&self, row: u32) -> Range<usize> {
|
|
let row_start = if row > 0 {
|
|
nth_set_bit(self.newlines, row as usize) + 1
|
|
} else {
|
|
0
|
|
};
|
|
let row_len = if row_start == MAX_BASE {
|
|
0
|
|
} else {
|
|
cmp::min(
|
|
(self.newlines >> row_start).trailing_zeros(),
|
|
(self.text.len() - row_start) as u32,
|
|
)
|
|
};
|
|
row_start..row_start + row_len as usize
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn tabs(&self) -> Tabs {
|
|
Tabs {
|
|
tabs: self.tabs,
|
|
chars: self.chars,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct Tabs {
|
|
tabs: u128,
|
|
chars: u128,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub struct TabPosition {
|
|
pub byte_offset: usize,
|
|
pub char_offset: usize,
|
|
}
|
|
|
|
impl Iterator for Tabs {
|
|
type Item = TabPosition;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
if self.tabs == 0 {
|
|
return None;
|
|
}
|
|
|
|
let tab_offset = self.tabs.trailing_zeros() as usize;
|
|
let chars_mask = (1 << tab_offset) - 1;
|
|
let char_offset = (self.chars & chars_mask).count_ones() as usize;
|
|
|
|
// Since tabs are 1 byte the tab offset is the same as the byte offset
|
|
let position = TabPosition {
|
|
byte_offset: tab_offset,
|
|
char_offset,
|
|
};
|
|
// Remove the tab we've just seen
|
|
self.tabs ^= 1 << tab_offset;
|
|
|
|
Some(position)
|
|
}
|
|
}
|
|
|
|
/// Finds the n-th bit that is set to 1.
|
|
#[inline(always)]
|
|
fn nth_set_bit(v: u128, n: usize) -> usize {
|
|
let low = v as u64;
|
|
let high = (v >> 64) as u64;
|
|
|
|
let low_count = low.count_ones() as usize;
|
|
if n > low_count {
|
|
64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
|
|
} else {
|
|
nth_set_bit_u64(low, n as u64) as usize
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
|
|
let v = v.reverse_bits();
|
|
let mut s: u64 = 64;
|
|
|
|
// Parallel bit count intermediates
|
|
let a = v - ((v >> 1) & (u64::MAX / 3));
|
|
let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
|
|
let c = (b + (b >> 4)) & (u64::MAX / 0x11);
|
|
let d = (c + (c >> 8)) & (u64::MAX / 0x101);
|
|
|
|
// Branchless select
|
|
let t = (d >> 32) + (d >> 48);
|
|
s -= (t.wrapping_sub(n) & 256) >> 3;
|
|
n -= t & (t.wrapping_sub(n) >> 8);
|
|
|
|
let t = (d >> (s - 16)) & 0xff;
|
|
s -= (t.wrapping_sub(n) & 256) >> 4;
|
|
n -= t & (t.wrapping_sub(n) >> 8);
|
|
|
|
let t = (c >> (s - 8)) & 0xf;
|
|
s -= (t.wrapping_sub(n) & 256) >> 5;
|
|
n -= t & (t.wrapping_sub(n) >> 8);
|
|
|
|
let t = (b >> (s - 4)) & 0x7;
|
|
s -= (t.wrapping_sub(n) & 256) >> 6;
|
|
n -= t & (t.wrapping_sub(n) >> 8);
|
|
|
|
let t = (a >> (s - 2)) & 0x3;
|
|
s -= (t.wrapping_sub(n) & 256) >> 7;
|
|
n -= t & (t.wrapping_sub(n) >> 8);
|
|
|
|
let t = (v >> (s - 1)) & 0x1;
|
|
s -= (t.wrapping_sub(n) & 256) >> 8;
|
|
|
|
65 - s - 1
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use rand::prelude::*;
|
|
use util::RandomCharIter;
|
|
|
|
#[gpui::test(iterations = 100)]
|
|
fn test_random_chunks(mut rng: StdRng) {
|
|
let chunk_len = rng.gen_range(0..=MAX_BASE);
|
|
let text = RandomCharIter::new(&mut rng)
|
|
.take(chunk_len)
|
|
.collect::<String>();
|
|
let mut ix = chunk_len;
|
|
while !text.is_char_boundary(ix) {
|
|
ix -= 1;
|
|
}
|
|
let text = &text[..ix];
|
|
|
|
log::info!("Chunk: {:?}", text);
|
|
let chunk = Chunk::new(text);
|
|
verify_chunk(chunk.as_slice(), text);
|
|
|
|
for _ in 0..10 {
|
|
let mut start = rng.gen_range(0..=chunk.text.len());
|
|
let mut end = rng.gen_range(start..=chunk.text.len());
|
|
while !chunk.text.is_char_boundary(start) {
|
|
start -= 1;
|
|
}
|
|
while !chunk.text.is_char_boundary(end) {
|
|
end -= 1;
|
|
}
|
|
let range = start..end;
|
|
log::info!("Range: {:?}", range);
|
|
let text_slice = &text[range.clone()];
|
|
let chunk_slice = chunk.slice(range);
|
|
verify_chunk(chunk_slice, text_slice);
|
|
}
|
|
}
|
|
|
|
#[gpui::test(iterations = 1000)]
|
|
fn test_nth_set_bit_random(mut rng: StdRng) {
|
|
let set_count = rng.gen_range(0..=128);
|
|
let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
|
|
set_bits.sort();
|
|
let mut n = 0;
|
|
for ix in set_bits.iter().copied() {
|
|
n |= 1 << ix;
|
|
}
|
|
|
|
for (mut ix, position) in set_bits.into_iter().enumerate() {
|
|
ix += 1;
|
|
assert_eq!(
|
|
nth_set_bit(n, ix),
|
|
position,
|
|
"nth_set_bit({:0128b}, {})",
|
|
n,
|
|
ix
|
|
);
|
|
}
|
|
}
|
|
|
|
fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
|
|
let mut offset = 0;
|
|
let mut offset_utf16 = OffsetUtf16(0);
|
|
let mut point = Point::zero();
|
|
let mut point_utf16 = PointUtf16::zero();
|
|
|
|
log::info!("Verifying chunk {:?}", text);
|
|
assert_eq!(chunk.offset_to_point(0), Point::zero());
|
|
|
|
let mut expected_tab_positions = Vec::new();
|
|
|
|
for (char_offset, c) in text.chars().enumerate() {
|
|
let expected_point = chunk.offset_to_point(offset);
|
|
assert_eq!(point, expected_point, "mismatch at offset {}", offset);
|
|
assert_eq!(
|
|
chunk.point_to_offset(point),
|
|
offset,
|
|
"mismatch at point {:?}",
|
|
point
|
|
);
|
|
assert_eq!(
|
|
chunk.offset_to_offset_utf16(offset),
|
|
offset_utf16,
|
|
"mismatch at offset {}",
|
|
offset
|
|
);
|
|
assert_eq!(
|
|
chunk.offset_utf16_to_offset(offset_utf16),
|
|
offset,
|
|
"mismatch at offset_utf16 {:?}",
|
|
offset_utf16
|
|
);
|
|
assert_eq!(
|
|
chunk.point_to_point_utf16(point),
|
|
point_utf16,
|
|
"mismatch at point {:?}",
|
|
point
|
|
);
|
|
assert_eq!(
|
|
chunk.point_utf16_to_offset(point_utf16, false),
|
|
offset,
|
|
"mismatch at point_utf16 {:?}",
|
|
point_utf16
|
|
);
|
|
assert_eq!(
|
|
chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
|
|
point,
|
|
"mismatch for unclipped_point_utf16_to_point at {:?}",
|
|
point_utf16
|
|
);
|
|
|
|
assert_eq!(
|
|
chunk.clip_point(point, Bias::Left),
|
|
point,
|
|
"incorrect left clip at {:?}",
|
|
point
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_point(point, Bias::Right),
|
|
point,
|
|
"incorrect right clip at {:?}",
|
|
point
|
|
);
|
|
|
|
for i in 1..c.len_utf8() {
|
|
let test_point = Point::new(point.row, point.column + i as u32);
|
|
assert_eq!(
|
|
chunk.clip_point(test_point, Bias::Left),
|
|
point,
|
|
"incorrect left clip within multi-byte char at {:?}",
|
|
test_point
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_point(test_point, Bias::Right),
|
|
Point::new(point.row, point.column + c.len_utf8() as u32),
|
|
"incorrect right clip within multi-byte char at {:?}",
|
|
test_point
|
|
);
|
|
}
|
|
|
|
for i in 1..c.len_utf16() {
|
|
let test_point = Unclipped(PointUtf16::new(
|
|
point_utf16.row,
|
|
point_utf16.column + i as u32,
|
|
));
|
|
assert_eq!(
|
|
chunk.unclipped_point_utf16_to_point(test_point),
|
|
point,
|
|
"incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
|
|
test_point
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_point_utf16(test_point, Bias::Left),
|
|
point_utf16,
|
|
"incorrect left clip_point_utf16 within multi-byte char at {:?}",
|
|
test_point
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_point_utf16(test_point, Bias::Right),
|
|
PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
|
|
"incorrect right clip_point_utf16 within multi-byte char at {:?}",
|
|
test_point
|
|
);
|
|
|
|
let test_offset = OffsetUtf16(offset_utf16.0 + i);
|
|
assert_eq!(
|
|
chunk.clip_offset_utf16(test_offset, Bias::Left),
|
|
offset_utf16,
|
|
"incorrect left clip_offset_utf16 within multi-byte char at {:?}",
|
|
test_offset
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_offset_utf16(test_offset, Bias::Right),
|
|
OffsetUtf16(offset_utf16.0 + c.len_utf16()),
|
|
"incorrect right clip_offset_utf16 within multi-byte char at {:?}",
|
|
test_offset
|
|
);
|
|
}
|
|
|
|
if c == '\n' {
|
|
point.row += 1;
|
|
point.column = 0;
|
|
point_utf16.row += 1;
|
|
point_utf16.column = 0;
|
|
} else {
|
|
point.column += c.len_utf8() as u32;
|
|
point_utf16.column += c.len_utf16() as u32;
|
|
}
|
|
|
|
if c == '\t' {
|
|
expected_tab_positions.push(TabPosition {
|
|
byte_offset: offset,
|
|
char_offset,
|
|
});
|
|
}
|
|
|
|
offset += c.len_utf8();
|
|
offset_utf16.0 += c.len_utf16();
|
|
}
|
|
|
|
let final_point = chunk.offset_to_point(offset);
|
|
assert_eq!(point, final_point, "mismatch at final offset {}", offset);
|
|
assert_eq!(
|
|
chunk.point_to_offset(point),
|
|
offset,
|
|
"mismatch at point {:?}",
|
|
point
|
|
);
|
|
assert_eq!(
|
|
chunk.offset_to_offset_utf16(offset),
|
|
offset_utf16,
|
|
"mismatch at offset {}",
|
|
offset
|
|
);
|
|
assert_eq!(
|
|
chunk.offset_utf16_to_offset(offset_utf16),
|
|
offset,
|
|
"mismatch at offset_utf16 {:?}",
|
|
offset_utf16
|
|
);
|
|
assert_eq!(
|
|
chunk.point_to_point_utf16(point),
|
|
point_utf16,
|
|
"mismatch at final point {:?}",
|
|
point
|
|
);
|
|
assert_eq!(
|
|
chunk.point_utf16_to_offset(point_utf16, false),
|
|
offset,
|
|
"mismatch at final point_utf16 {:?}",
|
|
point_utf16
|
|
);
|
|
assert_eq!(
|
|
chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
|
|
point,
|
|
"mismatch for unclipped_point_utf16_to_point at final point {:?}",
|
|
point_utf16
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_point(point, Bias::Left),
|
|
point,
|
|
"incorrect left clip at final point {:?}",
|
|
point
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_point(point, Bias::Right),
|
|
point,
|
|
"incorrect right clip at final point {:?}",
|
|
point
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
|
|
point_utf16,
|
|
"incorrect left clip_point_utf16 at final point {:?}",
|
|
point_utf16
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
|
|
point_utf16,
|
|
"incorrect right clip_point_utf16 at final point {:?}",
|
|
point_utf16
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_offset_utf16(offset_utf16, Bias::Left),
|
|
offset_utf16,
|
|
"incorrect left clip_offset_utf16 at final offset {:?}",
|
|
offset_utf16
|
|
);
|
|
assert_eq!(
|
|
chunk.clip_offset_utf16(offset_utf16, Bias::Right),
|
|
offset_utf16,
|
|
"incorrect right clip_offset_utf16 at final offset {:?}",
|
|
offset_utf16
|
|
);
|
|
|
|
// Verify length methods
|
|
assert_eq!(chunk.len(), text.len());
|
|
assert_eq!(
|
|
chunk.len_utf16().0,
|
|
text.chars().map(|c| c.len_utf16()).sum::<usize>()
|
|
);
|
|
|
|
// Verify line counting
|
|
let lines = chunk.lines();
|
|
let mut newline_count = 0;
|
|
let mut last_line_len = 0;
|
|
for c in text.chars() {
|
|
if c == '\n' {
|
|
newline_count += 1;
|
|
last_line_len = 0;
|
|
} else {
|
|
last_line_len += c.len_utf8() as u32;
|
|
}
|
|
}
|
|
assert_eq!(lines, Point::new(newline_count, last_line_len));
|
|
|
|
// Verify first/last line chars
|
|
if !text.is_empty() {
|
|
let first_line = text.split('\n').next().unwrap();
|
|
assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
|
|
|
|
let last_line = text.split('\n').next_back().unwrap();
|
|
assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
|
|
assert_eq!(
|
|
chunk.last_line_len_utf16(),
|
|
last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
|
|
);
|
|
}
|
|
|
|
// Verify longest row
|
|
let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
|
|
let mut max_chars = 0;
|
|
let mut current_row = 0;
|
|
let mut current_chars = 0;
|
|
let mut max_row = 0;
|
|
|
|
for c in text.chars() {
|
|
if c == '\n' {
|
|
if current_chars > max_chars {
|
|
max_chars = current_chars;
|
|
max_row = current_row;
|
|
}
|
|
current_row += 1;
|
|
current_chars = 0;
|
|
} else {
|
|
current_chars += 1;
|
|
}
|
|
}
|
|
|
|
if current_chars > max_chars {
|
|
max_chars = current_chars;
|
|
max_row = current_row;
|
|
}
|
|
|
|
assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
|
|
assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
|
|
}
|
|
}
|