ZIm/zed/src/editor/buffer/text.rs
Antonio Scandurra 5e0d5b7f9a Fix Debug impl for Text to only display the slice a Text refers to
...as opposed to the whole string.
2021-04-08 15:05:33 +02:00

445 lines
14 KiB
Rust

use super::Point;
use crate::sum_tree::{self, SeekBias, SumTree};
use arrayvec::ArrayVec;
use std::{
cmp,
fmt::{self, Debug},
ops::{Bound, Index, Range, RangeBounds},
sync::Arc,
};
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum Run {
Newline,
Chars { len: usize, char_size: u8 },
}
#[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)]
struct ByteOffset(usize);
impl sum_tree::Item for Run {
type Summary = TextSummary;
fn summary(&self) -> Self::Summary {
match *self {
Run::Newline => TextSummary {
chars: 1,
bytes: 1,
lines: Point::new(1, 0),
first_line_len: 0,
rightmost_point: Point::new(0, 0),
},
Run::Chars { len, char_size } => TextSummary {
chars: len,
bytes: len * char_size as usize,
lines: Point::new(0, len as u32),
first_line_len: len as u32,
rightmost_point: Point::new(0, len as u32),
},
}
}
}
impl Run {
fn char_size(&self) -> u8 {
match self {
Run::Newline => 1,
Run::Chars { char_size, .. } => *char_size,
}
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct TextSummary {
pub chars: usize,
pub bytes: usize,
pub lines: Point,
pub first_line_len: u32,
pub rightmost_point: Point,
}
impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
fn add_assign(&mut self, other: &'a Self) {
let joined_line_len = self.lines.column + other.first_line_len;
if joined_line_len > self.rightmost_point.column {
self.rightmost_point = Point::new(self.lines.row, joined_line_len);
}
if other.rightmost_point.column > self.rightmost_point.column {
self.rightmost_point = self.lines + &other.rightmost_point;
}
if self.lines.row == 0 {
self.first_line_len += other.first_line_len;
}
self.chars += other.chars;
self.bytes += other.bytes;
self.lines += &other.lines;
}
}
impl std::ops::AddAssign<Self> for TextSummary {
fn add_assign(&mut self, other: Self) {
*self += &other;
}
}
impl<'a> sum_tree::Dimension<'a, TextSummary> for TextSummary {
fn add_summary(&mut self, summary: &TextSummary) {
*self += summary;
}
}
impl<'a> sum_tree::Dimension<'a, TextSummary> for Point {
fn add_summary(&mut self, summary: &TextSummary) {
*self += &summary.lines;
}
}
impl<'a> sum_tree::Dimension<'a, TextSummary> for ByteOffset {
fn add_summary(&mut self, summary: &TextSummary) {
self.0 += summary.bytes
}
}
impl<'a> sum_tree::Dimension<'a, TextSummary> for usize {
fn add_summary(&mut self, summary: &TextSummary) {
*self += summary.chars;
}
}
#[derive(Clone)]
pub struct Text {
text: Arc<str>,
runs: SumTree<Run>,
range: Range<usize>,
}
impl From<String> for Text {
fn from(text: String) -> Self {
let mut runs = Vec::new();
let mut chars_len = 0;
let mut run_char_size = 0;
let mut run_chars = 0;
let mut chars = text.chars();
loop {
let ch = chars.next();
let ch_size = ch.map_or(0, |ch| ch.len_utf8());
if run_chars != 0 && (ch.is_none() || ch == Some('\n') || run_char_size != ch_size) {
runs.push(Run::Chars {
len: run_chars,
char_size: run_char_size as u8,
});
run_chars = 0;
}
run_char_size = ch_size;
match ch {
Some('\n') => runs.push(Run::Newline),
Some(_) => run_chars += 1,
None => break,
}
chars_len += 1;
}
let mut tree = SumTree::new();
tree.extend(runs);
Text {
text: text.into(),
runs: tree,
range: 0..chars_len,
}
}
}
impl<'a> From<&'a str> for Text {
fn from(text: &'a str) -> Self {
Self::from(String::from(text))
}
}
impl Debug for Text {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("Text").field(&self.as_str()).finish()
}
}
impl PartialEq for Text {
fn eq(&self, other: &Self) -> bool {
self.text == other.text
}
}
impl Eq for Text {}
impl<T: RangeBounds<usize>> Index<T> for Text {
type Output = str;
fn index(&self, range: T) -> &Self::Output {
let start = match range.start_bound() {
Bound::Included(start) => cmp::min(self.range.start + start, self.range.end),
Bound::Excluded(_) => unimplemented!(),
Bound::Unbounded => self.range.start,
};
let end = match range.end_bound() {
Bound::Included(end) => cmp::min(self.range.start + end + 1, self.range.end),
Bound::Excluded(end) => cmp::min(self.range.start + end, self.range.end),
Bound::Unbounded => self.range.end,
};
let byte_start = self.abs_byte_offset_for_offset(start);
let byte_end = self.abs_byte_offset_for_offset(end);
&self.text[byte_start..byte_end]
}
}
impl Text {
pub fn range(&self) -> Range<usize> {
self.range.clone()
}
pub fn as_str(&self) -> &str {
&self[..]
}
pub fn slice<T: RangeBounds<usize>>(&self, range: T) -> Text {
let start = match range.start_bound() {
Bound::Included(start) => cmp::min(self.range.start + start, self.range.end),
Bound::Excluded(_) => unimplemented!(),
Bound::Unbounded => self.range.start,
};
let end = match range.end_bound() {
Bound::Included(end) => cmp::min(self.range.start + end + 1, self.range.end),
Bound::Excluded(end) => cmp::min(self.range.start + end, self.range.end),
Bound::Unbounded => self.range.end,
};
Text {
text: self.text.clone(),
runs: self.runs.clone(),
range: start..end,
}
}
pub fn line_len(&self, row: u32) -> u32 {
let mut cursor = self.runs.cursor::<usize, Point>();
cursor.seek(&self.range.start, SeekBias::Right);
let absolute_row = cursor.start().row + row;
let mut cursor = self.runs.cursor::<Point, usize>();
cursor.seek(&Point::new(absolute_row, 0), SeekBias::Right);
let prefix_len = self.range.start.saturating_sub(*cursor.start());
let line_len = cursor.summary::<usize>(&Point::new(absolute_row + 1, 0), SeekBias::Left);
let suffix_len = cursor.start().saturating_sub(self.range.end);
line_len
.saturating_sub(prefix_len)
.saturating_sub(suffix_len) as u32
}
pub fn len(&self) -> usize {
self.range.end - self.range.start
}
pub fn lines(&self) -> Point {
self.abs_point_for_offset(self.range.end) - &self.abs_point_for_offset(self.range.start)
}
pub fn rightmost_point(&self) -> Point {
let lines = self.lines();
let mut candidates = ArrayVec::<[Point; 3]>::new();
candidates.push(lines);
if lines.row > 0 {
candidates.push(Point::new(0, self.line_len(0)));
if lines.row > 1 {
let mut cursor = self.runs.cursor::<usize, Point>();
cursor.seek(&self.range.start, SeekBias::Right);
let absolute_start_row = cursor.start().row;
let mut cursor = self.runs.cursor::<Point, usize>();
cursor.seek(&Point::new(absolute_start_row + 1, 0), SeekBias::Right);
let summary = cursor.summary::<TextSummary>(
&Point::new(absolute_start_row + lines.row, 0),
SeekBias::Left,
);
candidates.push(Point::new(1, 0) + &summary.rightmost_point);
}
}
candidates.into_iter().max_by_key(|p| p.column).unwrap()
}
pub fn point_for_offset(&self, offset: usize) -> Point {
self.abs_point_for_offset(self.range.start + offset)
- &self.abs_point_for_offset(self.range.start)
}
pub fn offset_for_point(&self, point: Point) -> usize {
let mut cursor = self.runs.cursor::<Point, TextSummary>();
let abs_point = self.abs_point_for_offset(self.range.start) + &point;
cursor.seek(&abs_point, SeekBias::Right);
let overshoot = abs_point - &cursor.start().lines;
let abs_offset = cursor.start().chars + overshoot.column as usize;
abs_offset - self.range.start
}
pub fn summary(&self) -> TextSummary {
TextSummary {
chars: self.range.end - self.range.start,
bytes: self.abs_byte_offset_for_offset(self.range.end)
- self.abs_byte_offset_for_offset(self.range.start),
lines: self.abs_point_for_offset(self.range.end)
- &self.abs_point_for_offset(self.range.start),
first_line_len: self.line_len(0),
rightmost_point: self.rightmost_point(),
}
}
fn abs_point_for_offset(&self, offset: usize) -> Point {
let mut cursor = self.runs.cursor::<usize, TextSummary>();
cursor.seek(&offset, SeekBias::Right);
let overshoot = (offset - cursor.start().chars) as u32;
cursor.start().lines + &Point::new(0, overshoot)
}
fn abs_byte_offset_for_offset(&self, offset: usize) -> usize {
let mut cursor = self.runs.cursor::<usize, TextSummary>();
cursor.seek(&offset, SeekBias::Right);
let overshoot = offset - cursor.start().chars;
cursor.start().bytes + overshoot * cursor.item().map_or(0, |run| run.char_size()) as usize
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashSet;
use std::iter::FromIterator;
#[test]
fn test_basic() {
let text = Text::from(String::from("ab\ncd€\nfghij\nkl¢m"));
assert_eq!(text.len(), 17);
assert_eq!(text.as_str(), "ab\ncd€\nfghij\nkl¢m");
assert_eq!(text.lines(), Point::new(3, 4));
assert_eq!(text.line_len(0), 2);
assert_eq!(text.line_len(1), 3);
assert_eq!(text.line_len(2), 5);
assert_eq!(text.line_len(3), 4);
assert_eq!(text.rightmost_point(), Point::new(2, 5));
let b_to_g = text.slice(1..9);
assert_eq!(b_to_g.as_str(), "b\ncd€\nfg");
assert_eq!(b_to_g.len(), 8);
assert_eq!(b_to_g.lines(), Point::new(2, 2));
assert_eq!(b_to_g.line_len(0), 1);
assert_eq!(b_to_g.line_len(1), 3);
assert_eq!(b_to_g.line_len(2), 2);
assert_eq!(b_to_g.line_len(3), 0);
assert_eq!(b_to_g.rightmost_point(), Point::new(1, 3));
let d_to_i = text.slice(4..11);
assert_eq!(d_to_i.as_str(), "d€\nfghi");
assert_eq!(&d_to_i[1..5], "\nfg");
assert_eq!(d_to_i.len(), 7);
assert_eq!(d_to_i.lines(), Point::new(1, 4));
assert_eq!(d_to_i.line_len(0), 2);
assert_eq!(d_to_i.line_len(1), 4);
assert_eq!(d_to_i.line_len(2), 0);
assert_eq!(d_to_i.rightmost_point(), Point::new(1, 4));
let d_to_j = text.slice(4..=11);
assert_eq!(d_to_j.as_str(), "d€\nfghij");
assert_eq!(&d_to_j[1..], "\nfghij");
assert_eq!(d_to_j.len(), 8);
}
#[test]
fn test_random() {
use rand::prelude::*;
for seed in 0..100 {
println!("buffer::text seed: {}", seed);
let rng = &mut StdRng::seed_from_u64(seed);
let len = rng.gen_range(0..50);
let mut string = String::new();
for _ in 0..len {
if rng.gen_ratio(1, 5) {
string.push('\n');
} else {
string.push(rng.gen());
}
}
let text = Text::from(string.clone());
for _ in 0..10 {
let start = rng.gen_range(0..text.len() + 1);
let end = rng.gen_range(start..text.len() + 2);
let string_slice = string
.chars()
.skip(start)
.take(end - start)
.collect::<String>();
let expected_line_endpoints = string_slice
.split('\n')
.enumerate()
.map(|(row, line)| Point::new(row as u32, line.chars().count() as u32))
.collect::<Vec<_>>();
let text_slice = text.slice(start..end);
assert_eq!(text_slice.lines(), lines(&string_slice));
let mut rightmost_points: HashSet<Point> = HashSet::new();
for endpoint in &expected_line_endpoints {
if let Some(rightmost_point) = rightmost_points.iter().next().cloned() {
if endpoint.column > rightmost_point.column {
rightmost_points.clear();
}
if endpoint.column >= rightmost_point.column {
rightmost_points.insert(*endpoint);
}
} else {
rightmost_points.insert(*endpoint);
}
assert_eq!(text_slice.line_len(endpoint.row as u32), endpoint.column);
}
assert!(rightmost_points.contains(&text_slice.rightmost_point()));
for _ in 0..10 {
let offset = rng.gen_range(0..string_slice.chars().count() + 1);
let point = lines(&string_slice.chars().take(offset).collect::<String>());
assert_eq!(text_slice.point_for_offset(offset), point);
assert_eq!(text_slice.offset_for_point(point), offset);
if offset < string_slice.chars().count() {
assert_eq!(
&text_slice[offset..offset + 1],
String::from_iter(string_slice.chars().nth(offset)).as_str()
);
}
}
}
}
}
pub fn lines(s: &str) -> Point {
let mut row = 0;
let mut column = 0;
for ch in s.chars() {
if ch == '\n' {
row += 1;
column = 0;
} else {
column += 1;
}
}
Point::new(row, column)
}
}