Start indexing UTF-16 offsets
This is needed because cocoa will report ranges as UTF-16 indices.
This commit is contained in:
parent
eda60effed
commit
32662b6b92
6 changed files with 261 additions and 55 deletions
|
@ -1,6 +1,5 @@
|
|||
use crate::PointUtf16;
|
||||
|
||||
use super::Point;
|
||||
use crate::{OffsetUtf16, PointUtf16};
|
||||
use arrayvec::ArrayString;
|
||||
use bromberg_sl2::{DigestString, HashMatrix};
|
||||
use smallvec::SmallVec;
|
||||
|
@ -165,8 +164,34 @@ impl Rope {
|
|||
Chunks::new(self, range, true)
|
||||
}
|
||||
|
||||
pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
|
||||
if offset >= self.summary().len {
|
||||
return OffsetUtf16(self.summary().len_utf16);
|
||||
}
|
||||
let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
|
||||
cursor.seek(&offset, Bias::Left, &());
|
||||
let overshoot = offset - cursor.start().0;
|
||||
cursor.start().1
|
||||
+ cursor.item().map_or(Default::default(), |chunk| {
|
||||
chunk.offset_to_offset_utf16(overshoot)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
|
||||
if offset.0 >= self.summary().len_utf16 {
|
||||
return self.summary().len;
|
||||
}
|
||||
let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
|
||||
cursor.seek(&offset, Bias::Left, &());
|
||||
let overshoot = offset - cursor.start().0;
|
||||
cursor.start().1
|
||||
+ cursor.item().map_or(Default::default(), |chunk| {
|
||||
chunk.offset_utf16_to_offset(overshoot)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn offset_to_point(&self, offset: usize) -> Point {
|
||||
if offset >= self.summary().bytes {
|
||||
if offset >= self.summary().len {
|
||||
return self.summary().lines;
|
||||
}
|
||||
let mut cursor = self.chunks.cursor::<(usize, Point)>();
|
||||
|
@ -179,7 +204,7 @@ impl Rope {
|
|||
}
|
||||
|
||||
pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
|
||||
if offset >= self.summary().bytes {
|
||||
if offset >= self.summary().len {
|
||||
return self.summary().lines_utf16;
|
||||
}
|
||||
let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
|
||||
|
@ -206,7 +231,7 @@ impl Rope {
|
|||
|
||||
pub fn point_to_offset(&self, point: Point) -> usize {
|
||||
if point >= self.summary().lines {
|
||||
return self.summary().bytes;
|
||||
return self.summary().len;
|
||||
}
|
||||
let mut cursor = self.chunks.cursor::<(Point, usize)>();
|
||||
cursor.seek(&point, Bias::Left, &());
|
||||
|
@ -219,7 +244,7 @@ impl Rope {
|
|||
|
||||
pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
|
||||
if point >= self.summary().lines_utf16 {
|
||||
return self.summary().bytes;
|
||||
return self.summary().len;
|
||||
}
|
||||
let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
|
||||
cursor.seek(&point, Bias::Left, &());
|
||||
|
@ -262,7 +287,7 @@ impl Rope {
|
|||
}
|
||||
offset
|
||||
} else {
|
||||
self.summary().bytes
|
||||
self.summary().len
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -543,6 +568,34 @@ impl<'a> io::Read for Bytes<'a> {
|
|||
struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
|
||||
|
||||
impl Chunk {
|
||||
fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
|
||||
let mut offset = 0;
|
||||
let mut offset_utf16 = OffsetUtf16(0);
|
||||
for ch in self.0.chars() {
|
||||
if offset >= target {
|
||||
break;
|
||||
}
|
||||
|
||||
offset += ch.len_utf8();
|
||||
offset_utf16.0 += ch.len_utf16();
|
||||
}
|
||||
offset_utf16
|
||||
}
|
||||
|
||||
fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
|
||||
let mut offset_utf16 = OffsetUtf16(0);
|
||||
let mut offset = 0;
|
||||
for ch in self.0.chars() {
|
||||
if offset_utf16 >= target {
|
||||
break;
|
||||
}
|
||||
|
||||
offset += ch.len_utf8();
|
||||
offset_utf16.0 += ch.len_utf16();
|
||||
}
|
||||
offset
|
||||
}
|
||||
|
||||
fn offset_to_point(&self, target: usize) -> Point {
|
||||
let mut offset = 0;
|
||||
let mut point = Point::new(0, 0);
|
||||
|
@ -748,7 +801,8 @@ impl sum_tree::Summary for ChunkSummary {
|
|||
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
pub struct TextSummary {
|
||||
pub bytes: usize,
|
||||
pub len: usize,
|
||||
pub len_utf16: usize,
|
||||
pub lines: Point,
|
||||
pub lines_utf16: PointUtf16,
|
||||
pub first_line_chars: u32,
|
||||
|
@ -759,6 +813,7 @@ pub struct TextSummary {
|
|||
|
||||
impl<'a> From<&'a str> for TextSummary {
|
||||
fn from(text: &'a str) -> Self {
|
||||
let mut len_utf16 = 0;
|
||||
let mut lines = Point::new(0, 0);
|
||||
let mut lines_utf16 = PointUtf16::new(0, 0);
|
||||
let mut first_line_chars = 0;
|
||||
|
@ -766,6 +821,8 @@ impl<'a> From<&'a str> for TextSummary {
|
|||
let mut longest_row = 0;
|
||||
let mut longest_row_chars = 0;
|
||||
for c in text.chars() {
|
||||
len_utf16 += c.len_utf16();
|
||||
|
||||
if c == '\n' {
|
||||
lines += Point::new(1, 0);
|
||||
lines_utf16 += PointUtf16::new(1, 0);
|
||||
|
@ -787,7 +844,8 @@ impl<'a> From<&'a str> for TextSummary {
|
|||
}
|
||||
|
||||
TextSummary {
|
||||
bytes: text.len(),
|
||||
len: text.len(),
|
||||
len_utf16,
|
||||
lines,
|
||||
lines_utf16,
|
||||
first_line_chars,
|
||||
|
@ -837,7 +895,8 @@ impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
|
|||
self.last_line_chars = other.last_line_chars;
|
||||
}
|
||||
|
||||
self.bytes += other.bytes;
|
||||
self.len += other.len;
|
||||
self.len_utf16 += other.len_utf16;
|
||||
self.lines += other.lines;
|
||||
self.lines_utf16 += other.lines_utf16;
|
||||
}
|
||||
|
@ -886,13 +945,29 @@ impl TextDimension for TextSummary {
|
|||
|
||||
impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
|
||||
fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
|
||||
*self += summary.text.bytes;
|
||||
*self += summary.text.len;
|
||||
}
|
||||
}
|
||||
|
||||
impl TextDimension for usize {
|
||||
fn from_text_summary(summary: &TextSummary) -> Self {
|
||||
summary.bytes
|
||||
summary.len
|
||||
}
|
||||
|
||||
fn add_assign(&mut self, other: &Self) {
|
||||
*self += other;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
|
||||
fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
|
||||
self.0 += summary.text.len_utf16;
|
||||
}
|
||||
}
|
||||
|
||||
impl TextDimension for OffsetUtf16 {
|
||||
fn from_text_summary(summary: &TextSummary) -> Self {
|
||||
Self(summary.len_utf16)
|
||||
}
|
||||
|
||||
fn add_assign(&mut self, other: &Self) {
|
||||
|
@ -1054,6 +1129,7 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
let mut offset_utf16 = OffsetUtf16(0);
|
||||
let mut point = Point::new(0, 0);
|
||||
let mut point_utf16 = PointUtf16::new(0, 0);
|
||||
for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
|
||||
|
@ -1076,6 +1152,18 @@ mod tests {
|
|||
"point_utf16_to_offset({:?})",
|
||||
point_utf16
|
||||
);
|
||||
assert_eq!(
|
||||
actual.offset_to_offset_utf16(ix),
|
||||
offset_utf16,
|
||||
"offset_to_offset_utf16({:?})",
|
||||
ix
|
||||
);
|
||||
assert_eq!(
|
||||
actual.offset_utf16_to_offset(offset_utf16),
|
||||
ix,
|
||||
"offset_utf16_to_offset({:?})",
|
||||
offset_utf16
|
||||
);
|
||||
if ch == '\n' {
|
||||
point += Point::new(1, 0);
|
||||
point_utf16 += PointUtf16::new(1, 0);
|
||||
|
@ -1083,6 +1171,7 @@ mod tests {
|
|||
point.column += ch.len_utf8() as u32;
|
||||
point_utf16.column += ch.len_utf16() as u32;
|
||||
}
|
||||
offset_utf16.0 += ch.len_utf16();
|
||||
}
|
||||
|
||||
let mut point_utf16 = PointUtf16::zero();
|
||||
|
|
|
@ -247,7 +247,8 @@ fn test_text_summary_for_range() {
|
|||
assert_eq!(
|
||||
buffer.text_summary_for_range::<TextSummary, _>(1..3),
|
||||
TextSummary {
|
||||
bytes: 2,
|
||||
len: 2,
|
||||
len_utf16: 2,
|
||||
lines: Point::new(1, 0),
|
||||
lines_utf16: PointUtf16::new(1, 0),
|
||||
first_line_chars: 1,
|
||||
|
@ -259,7 +260,8 @@ fn test_text_summary_for_range() {
|
|||
assert_eq!(
|
||||
buffer.text_summary_for_range::<TextSummary, _>(1..12),
|
||||
TextSummary {
|
||||
bytes: 11,
|
||||
len: 11,
|
||||
len_utf16: 11,
|
||||
lines: Point::new(3, 0),
|
||||
lines_utf16: PointUtf16::new(3, 0),
|
||||
first_line_chars: 1,
|
||||
|
@ -271,7 +273,8 @@ fn test_text_summary_for_range() {
|
|||
assert_eq!(
|
||||
buffer.text_summary_for_range::<TextSummary, _>(0..20),
|
||||
TextSummary {
|
||||
bytes: 20,
|
||||
len: 20,
|
||||
len_utf16: 20,
|
||||
lines: Point::new(4, 1),
|
||||
lines_utf16: PointUtf16::new(4, 1),
|
||||
first_line_chars: 2,
|
||||
|
@ -283,7 +286,8 @@ fn test_text_summary_for_range() {
|
|||
assert_eq!(
|
||||
buffer.text_summary_for_range::<TextSummary, _>(0..22),
|
||||
TextSummary {
|
||||
bytes: 22,
|
||||
len: 22,
|
||||
len_utf16: 22,
|
||||
lines: Point::new(4, 3),
|
||||
lines_utf16: PointUtf16::new(4, 3),
|
||||
first_line_chars: 2,
|
||||
|
@ -295,7 +299,8 @@ fn test_text_summary_for_range() {
|
|||
assert_eq!(
|
||||
buffer.text_summary_for_range::<TextSummary, _>(7..22),
|
||||
TextSummary {
|
||||
bytes: 15,
|
||||
len: 15,
|
||||
len_utf16: 15,
|
||||
lines: Point::new(2, 3),
|
||||
lines_utf16: PointUtf16::new(2, 3),
|
||||
first_line_chars: 4,
|
||||
|
|
|
@ -2,6 +2,7 @@ mod anchor;
|
|||
pub mod locator;
|
||||
#[cfg(any(test, feature = "test-support"))]
|
||||
pub mod network;
|
||||
mod offset_utf16;
|
||||
pub mod operation_queue;
|
||||
mod patch;
|
||||
mod point;
|
||||
|
@ -20,6 +21,7 @@ use clock::ReplicaId;
|
|||
use collections::{HashMap, HashSet};
|
||||
use lazy_static::lazy_static;
|
||||
use locator::Locator;
|
||||
pub use offset_utf16::*;
|
||||
use operation_queue::OperationQueue;
|
||||
pub use patch::Patch;
|
||||
pub use point::*;
|
||||
|
@ -1621,6 +1623,14 @@ impl BufferSnapshot {
|
|||
self.visible_text.point_utf16_to_point(point)
|
||||
}
|
||||
|
||||
pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
|
||||
self.visible_text.offset_utf16_to_offset(offset)
|
||||
}
|
||||
|
||||
pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
|
||||
self.visible_text.offset_to_offset_utf16(offset)
|
||||
}
|
||||
|
||||
pub fn offset_to_point(&self, offset: usize) -> Point {
|
||||
self.visible_text.offset_to_point(offset)
|
||||
}
|
||||
|
@ -2423,6 +2433,12 @@ impl ToOffset for usize {
|
|||
}
|
||||
}
|
||||
|
||||
impl ToOffset for OffsetUtf16 {
|
||||
fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize {
|
||||
snapshot.offset_utf16_to_offset(*self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToOffset for Anchor {
|
||||
fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize {
|
||||
snapshot.summary_for_anchor(self)
|
||||
|
@ -2491,6 +2507,28 @@ impl ToPointUtf16 for Point {
|
|||
}
|
||||
}
|
||||
|
||||
pub trait ToOffsetUtf16 {
|
||||
fn to_offset_utf16<'a>(&self, snapshot: &BufferSnapshot) -> OffsetUtf16;
|
||||
}
|
||||
|
||||
impl ToOffsetUtf16 for Anchor {
|
||||
fn to_offset_utf16<'a>(&self, snapshot: &BufferSnapshot) -> OffsetUtf16 {
|
||||
snapshot.summary_for_anchor(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToOffsetUtf16 for usize {
|
||||
fn to_offset_utf16<'a>(&self, snapshot: &BufferSnapshot) -> OffsetUtf16 {
|
||||
snapshot.offset_to_offset_utf16(*self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToOffsetUtf16 for OffsetUtf16 {
|
||||
fn to_offset_utf16<'a>(&self, _snapshot: &BufferSnapshot) -> OffsetUtf16 {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Clip {
|
||||
fn clip(&self, bias: Bias, snapshot: &BufferSnapshot) -> Self;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue