Start indexing UTF-16 offsets

This is needed because cocoa will report ranges as UTF-16 indices.
This commit is contained in:
Antonio Scandurra 2022-07-21 09:40:48 +02:00
parent eda60effed
commit 32662b6b92
6 changed files with 261 additions and 55 deletions

View file

@ -1,6 +1,5 @@
use crate::PointUtf16;
use super::Point;
use crate::{OffsetUtf16, PointUtf16};
use arrayvec::ArrayString;
use bromberg_sl2::{DigestString, HashMatrix};
use smallvec::SmallVec;
@ -165,8 +164,34 @@ impl Rope {
Chunks::new(self, range, true)
}
pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
if offset >= self.summary().len {
return OffsetUtf16(self.summary().len_utf16);
}
let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
cursor.seek(&offset, Bias::Left, &());
let overshoot = offset - cursor.start().0;
cursor.start().1
+ cursor.item().map_or(Default::default(), |chunk| {
chunk.offset_to_offset_utf16(overshoot)
})
}
pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
if offset.0 >= self.summary().len_utf16 {
return self.summary().len;
}
let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
cursor.seek(&offset, Bias::Left, &());
let overshoot = offset - cursor.start().0;
cursor.start().1
+ cursor.item().map_or(Default::default(), |chunk| {
chunk.offset_utf16_to_offset(overshoot)
})
}
pub fn offset_to_point(&self, offset: usize) -> Point {
if offset >= self.summary().bytes {
if offset >= self.summary().len {
return self.summary().lines;
}
let mut cursor = self.chunks.cursor::<(usize, Point)>();
@ -179,7 +204,7 @@ impl Rope {
}
pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
if offset >= self.summary().bytes {
if offset >= self.summary().len {
return self.summary().lines_utf16;
}
let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
@ -206,7 +231,7 @@ impl Rope {
pub fn point_to_offset(&self, point: Point) -> usize {
if point >= self.summary().lines {
return self.summary().bytes;
return self.summary().len;
}
let mut cursor = self.chunks.cursor::<(Point, usize)>();
cursor.seek(&point, Bias::Left, &());
@ -219,7 +244,7 @@ impl Rope {
pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
if point >= self.summary().lines_utf16 {
return self.summary().bytes;
return self.summary().len;
}
let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
cursor.seek(&point, Bias::Left, &());
@ -262,7 +287,7 @@ impl Rope {
}
offset
} else {
self.summary().bytes
self.summary().len
}
}
@ -543,6 +568,34 @@ impl<'a> io::Read for Bytes<'a> {
struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
impl Chunk {
fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
let mut offset = 0;
let mut offset_utf16 = OffsetUtf16(0);
for ch in self.0.chars() {
if offset >= target {
break;
}
offset += ch.len_utf8();
offset_utf16.0 += ch.len_utf16();
}
offset_utf16
}
fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
let mut offset_utf16 = OffsetUtf16(0);
let mut offset = 0;
for ch in self.0.chars() {
if offset_utf16 >= target {
break;
}
offset += ch.len_utf8();
offset_utf16.0 += ch.len_utf16();
}
offset
}
fn offset_to_point(&self, target: usize) -> Point {
let mut offset = 0;
let mut point = Point::new(0, 0);
@ -748,7 +801,8 @@ impl sum_tree::Summary for ChunkSummary {
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct TextSummary {
pub bytes: usize,
pub len: usize,
pub len_utf16: usize,
pub lines: Point,
pub lines_utf16: PointUtf16,
pub first_line_chars: u32,
@ -759,6 +813,7 @@ pub struct TextSummary {
impl<'a> From<&'a str> for TextSummary {
fn from(text: &'a str) -> Self {
let mut len_utf16 = 0;
let mut lines = Point::new(0, 0);
let mut lines_utf16 = PointUtf16::new(0, 0);
let mut first_line_chars = 0;
@ -766,6 +821,8 @@ impl<'a> From<&'a str> for TextSummary {
let mut longest_row = 0;
let mut longest_row_chars = 0;
for c in text.chars() {
len_utf16 += c.len_utf16();
if c == '\n' {
lines += Point::new(1, 0);
lines_utf16 += PointUtf16::new(1, 0);
@ -787,7 +844,8 @@ impl<'a> From<&'a str> for TextSummary {
}
TextSummary {
bytes: text.len(),
len: text.len(),
len_utf16,
lines,
lines_utf16,
first_line_chars,
@ -837,7 +895,8 @@ impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
self.last_line_chars = other.last_line_chars;
}
self.bytes += other.bytes;
self.len += other.len;
self.len_utf16 += other.len_utf16;
self.lines += other.lines;
self.lines_utf16 += other.lines_utf16;
}
@ -886,13 +945,29 @@ impl TextDimension for TextSummary {
impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
*self += summary.text.bytes;
*self += summary.text.len;
}
}
impl TextDimension for usize {
fn from_text_summary(summary: &TextSummary) -> Self {
summary.bytes
summary.len
}
fn add_assign(&mut self, other: &Self) {
*self += other;
}
}
impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
self.0 += summary.text.len_utf16;
}
}
impl TextDimension for OffsetUtf16 {
fn from_text_summary(summary: &TextSummary) -> Self {
Self(summary.len_utf16)
}
fn add_assign(&mut self, other: &Self) {
@ -1054,6 +1129,7 @@ mod tests {
);
}
let mut offset_utf16 = OffsetUtf16(0);
let mut point = Point::new(0, 0);
let mut point_utf16 = PointUtf16::new(0, 0);
for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
@ -1076,6 +1152,18 @@ mod tests {
"point_utf16_to_offset({:?})",
point_utf16
);
assert_eq!(
actual.offset_to_offset_utf16(ix),
offset_utf16,
"offset_to_offset_utf16({:?})",
ix
);
assert_eq!(
actual.offset_utf16_to_offset(offset_utf16),
ix,
"offset_utf16_to_offset({:?})",
offset_utf16
);
if ch == '\n' {
point += Point::new(1, 0);
point_utf16 += PointUtf16::new(1, 0);
@ -1083,6 +1171,7 @@ mod tests {
point.column += ch.len_utf8() as u32;
point_utf16.column += ch.len_utf16() as u32;
}
offset_utf16.0 += ch.len_utf16();
}
let mut point_utf16 = PointUtf16::zero();

View file

@ -247,7 +247,8 @@ fn test_text_summary_for_range() {
assert_eq!(
buffer.text_summary_for_range::<TextSummary, _>(1..3),
TextSummary {
bytes: 2,
len: 2,
len_utf16: 2,
lines: Point::new(1, 0),
lines_utf16: PointUtf16::new(1, 0),
first_line_chars: 1,
@ -259,7 +260,8 @@ fn test_text_summary_for_range() {
assert_eq!(
buffer.text_summary_for_range::<TextSummary, _>(1..12),
TextSummary {
bytes: 11,
len: 11,
len_utf16: 11,
lines: Point::new(3, 0),
lines_utf16: PointUtf16::new(3, 0),
first_line_chars: 1,
@ -271,7 +273,8 @@ fn test_text_summary_for_range() {
assert_eq!(
buffer.text_summary_for_range::<TextSummary, _>(0..20),
TextSummary {
bytes: 20,
len: 20,
len_utf16: 20,
lines: Point::new(4, 1),
lines_utf16: PointUtf16::new(4, 1),
first_line_chars: 2,
@ -283,7 +286,8 @@ fn test_text_summary_for_range() {
assert_eq!(
buffer.text_summary_for_range::<TextSummary, _>(0..22),
TextSummary {
bytes: 22,
len: 22,
len_utf16: 22,
lines: Point::new(4, 3),
lines_utf16: PointUtf16::new(4, 3),
first_line_chars: 2,
@ -295,7 +299,8 @@ fn test_text_summary_for_range() {
assert_eq!(
buffer.text_summary_for_range::<TextSummary, _>(7..22),
TextSummary {
bytes: 15,
len: 15,
len_utf16: 15,
lines: Point::new(2, 3),
lines_utf16: PointUtf16::new(2, 3),
first_line_chars: 4,

View file

@ -2,6 +2,7 @@ mod anchor;
pub mod locator;
#[cfg(any(test, feature = "test-support"))]
pub mod network;
mod offset_utf16;
pub mod operation_queue;
mod patch;
mod point;
@ -20,6 +21,7 @@ use clock::ReplicaId;
use collections::{HashMap, HashSet};
use lazy_static::lazy_static;
use locator::Locator;
pub use offset_utf16::*;
use operation_queue::OperationQueue;
pub use patch::Patch;
pub use point::*;
@ -1621,6 +1623,14 @@ impl BufferSnapshot {
self.visible_text.point_utf16_to_point(point)
}
pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
self.visible_text.offset_utf16_to_offset(offset)
}
pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
self.visible_text.offset_to_offset_utf16(offset)
}
pub fn offset_to_point(&self, offset: usize) -> Point {
self.visible_text.offset_to_point(offset)
}
@ -2423,6 +2433,12 @@ impl ToOffset for usize {
}
}
impl ToOffset for OffsetUtf16 {
fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize {
snapshot.offset_utf16_to_offset(*self)
}
}
impl ToOffset for Anchor {
fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize {
snapshot.summary_for_anchor(self)
@ -2491,6 +2507,28 @@ impl ToPointUtf16 for Point {
}
}
pub trait ToOffsetUtf16 {
fn to_offset_utf16<'a>(&self, snapshot: &BufferSnapshot) -> OffsetUtf16;
}
impl ToOffsetUtf16 for Anchor {
fn to_offset_utf16<'a>(&self, snapshot: &BufferSnapshot) -> OffsetUtf16 {
snapshot.summary_for_anchor(self)
}
}
impl ToOffsetUtf16 for usize {
fn to_offset_utf16<'a>(&self, snapshot: &BufferSnapshot) -> OffsetUtf16 {
snapshot.offset_to_offset_utf16(*self)
}
}
impl ToOffsetUtf16 for OffsetUtf16 {
fn to_offset_utf16<'a>(&self, _snapshot: &BufferSnapshot) -> OffsetUtf16 {
*self
}
}
pub trait Clip {
fn clip(&self, bias: Bias, snapshot: &BufferSnapshot) -> Self;
}