Get tab cursor working with correct character offset with utf16 chars
This commit is contained in:
parent
3f3125d206
commit
94b034ffc1
7 changed files with 255 additions and 120 deletions
|
@ -1271,8 +1271,10 @@ pub struct Chunk<'a> {
|
||||||
pub is_inlay: bool,
|
pub is_inlay: bool,
|
||||||
/// An optional recipe for how the chunk should be presented.
|
/// An optional recipe for how the chunk should be presented.
|
||||||
pub renderer: Option<ChunkRenderer>,
|
pub renderer: Option<ChunkRenderer>,
|
||||||
/// The location of tab characters in the chunk.
|
/// Bitmap of tab character locations in chunk
|
||||||
pub tabs: u128,
|
pub tabs: u128,
|
||||||
|
/// Bitmap of character locations in chunk
|
||||||
|
pub chars: u128,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A recipe for how the chunk should be presented.
|
/// A recipe for how the chunk should be presented.
|
||||||
|
@ -1421,6 +1423,7 @@ impl<'a> Iterator for FoldChunks<'a> {
|
||||||
chunk.text = &chunk.text
|
chunk.text = &chunk.text
|
||||||
[(self.inlay_offset - buffer_chunk_start).0..(chunk_end - buffer_chunk_start).0];
|
[(self.inlay_offset - buffer_chunk_start).0..(chunk_end - buffer_chunk_start).0];
|
||||||
chunk.tabs = chunk.tabs >> (self.inlay_offset - buffer_chunk_start).0;
|
chunk.tabs = chunk.tabs >> (self.inlay_offset - buffer_chunk_start).0;
|
||||||
|
chunk.chars = chunk.chars >> (self.inlay_offset - buffer_chunk_start).0;
|
||||||
|
|
||||||
if chunk_end == transform_end {
|
if chunk_end == transform_end {
|
||||||
self.transform_cursor.next(&());
|
self.transform_cursor.next(&());
|
||||||
|
@ -1433,6 +1436,7 @@ impl<'a> Iterator for FoldChunks<'a> {
|
||||||
return Some(Chunk {
|
return Some(Chunk {
|
||||||
text: chunk.text,
|
text: chunk.text,
|
||||||
tabs: chunk.tabs,
|
tabs: chunk.tabs,
|
||||||
|
chars: chunk.chars,
|
||||||
syntax_highlight_id: chunk.syntax_highlight_id,
|
syntax_highlight_id: chunk.syntax_highlight_id,
|
||||||
highlight_style: chunk.highlight_style,
|
highlight_style: chunk.highlight_style,
|
||||||
diagnostic_severity: chunk.diagnostic_severity,
|
diagnostic_severity: chunk.diagnostic_severity,
|
||||||
|
|
|
@ -387,7 +387,7 @@ impl TabSnapshot {
|
||||||
|
|
||||||
while let Some(tab_stop) = cursor.seek(seek_target) {
|
while let Some(tab_stop) = cursor.seek(seek_target) {
|
||||||
// Calculate how much we want to expand this tab stop (into spaces)
|
// Calculate how much we want to expand this tab stop (into spaces)
|
||||||
let expanded_chars_old = tab_stop.char_offset - tab_count + expanded_tab_len;
|
let expanded_chars_old = tab_stop.char_offset + expanded_tab_len - tab_count;
|
||||||
let tab_len = tab_size - ((expanded_chars_old - 1) % tab_size);
|
let tab_len = tab_size - ((expanded_chars_old - 1) % tab_size);
|
||||||
// Increment tab count
|
// Increment tab count
|
||||||
tab_count += 1;
|
tab_count += 1;
|
||||||
|
@ -395,11 +395,11 @@ impl TabSnapshot {
|
||||||
expanded_tab_len += tab_len;
|
expanded_tab_len += tab_len;
|
||||||
|
|
||||||
// The count of bytes at this point in the iteration while considering tab_count and previous expansions
|
// The count of bytes at this point in the iteration while considering tab_count and previous expansions
|
||||||
let expanded_bytes = tab_stop.byte_offset - tab_count + expanded_tab_len;
|
let expanded_bytes = tab_stop.byte_offset + expanded_tab_len - tab_count;
|
||||||
|
|
||||||
// Did we expand past the search target?
|
// Did we expand past the search target?
|
||||||
if expanded_bytes > column {
|
if expanded_bytes > column {
|
||||||
let mut expanded_chars = tab_stop.char_offset - tab_count + expanded_tab_len;
|
let mut expanded_chars = tab_stop.char_offset + expanded_tab_len - tab_count;
|
||||||
// We expanded past the search target, so need to account for the offshoot
|
// We expanded past the search target, so need to account for the offshoot
|
||||||
expanded_chars -= expanded_bytes - column;
|
expanded_chars -= expanded_bytes - column;
|
||||||
return match bias {
|
return match bias {
|
||||||
|
@ -413,17 +413,17 @@ impl TabSnapshot {
|
||||||
} else {
|
} else {
|
||||||
// otherwise we only want to move the cursor collapse column forward
|
// otherwise we only want to move the cursor collapse column forward
|
||||||
collapsed_column = collapsed_column - tab_len + 1;
|
collapsed_column = collapsed_column - tab_len + 1;
|
||||||
seek_target = (collapsed_column - cursor.bytes_offset)
|
seek_target = (collapsed_column - cursor.byte_offset)
|
||||||
.min(self.max_expansion_column - cursor.bytes_offset);
|
.min(self.max_expansion_column - cursor.byte_offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let collapsed_bytes = cursor.byte_offset();
|
let collapsed_bytes = cursor.byte_offset();
|
||||||
let expanded_bytes = cursor.byte_offset() - tab_count + expanded_tab_len;
|
let expanded_bytes = cursor.byte_offset() + expanded_tab_len - tab_count;
|
||||||
// let expanded_chars = cursor.char_offset() - tab_count + expanded_tab_len;
|
let expanded_chars = cursor.char_offset() + expanded_tab_len - tab_count;
|
||||||
(
|
(
|
||||||
collapsed_bytes + column.saturating_sub(expanded_bytes),
|
collapsed_bytes + column.saturating_sub(expanded_bytes),
|
||||||
expanded_bytes,
|
expanded_chars,
|
||||||
0,
|
0,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -776,6 +776,7 @@ mod tests {
|
||||||
// Generate random input string with up to 200 characters including tabs
|
// Generate random input string with up to 200 characters including tabs
|
||||||
// to stay within the MAX_EXPANSION_COLUMN limit of 256
|
// to stay within the MAX_EXPANSION_COLUMN limit of 256
|
||||||
let len = rng.gen_range(0..=2048);
|
let len = rng.gen_range(0..=2048);
|
||||||
|
let tab_size = NonZeroU32::new(rng.gen_range(1..=4)).unwrap();
|
||||||
let mut input = String::with_capacity(len);
|
let mut input = String::with_capacity(len);
|
||||||
|
|
||||||
for _ in 0..len {
|
for _ in 0..len {
|
||||||
|
@ -801,6 +802,7 @@ mod tests {
|
||||||
let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
|
let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
|
||||||
let (_, mut tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap());
|
let (_, mut tab_snapshot) = TabMap::new(fold_snapshot, 4.try_into().unwrap());
|
||||||
tab_snapshot.max_expansion_column = rng.gen_range(0..323);
|
tab_snapshot.max_expansion_column = rng.gen_range(0..323);
|
||||||
|
tab_snapshot.tab_size = tab_size;
|
||||||
|
|
||||||
for (ix, _) in input.char_indices() {
|
for (ix, _) in input.char_indices() {
|
||||||
let range = TabPoint::new(0, ix as u32)..tab_snapshot.max_point();
|
let range = TabPoint::new(0, ix as u32)..tab_snapshot.max_point();
|
||||||
|
@ -1026,8 +1028,9 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[gpui::test]
|
#[gpui::test]
|
||||||
fn test_tab_stop_cursor(cx: &mut gpui::App) {
|
fn test_tab_stop_cursor_utf8(cx: &mut gpui::App) {
|
||||||
let text = "\tfoo\tbarbarbar\t\tbaz\n";
|
let text = "\tfoo\tbarbarbar\t\tbaz\n";
|
||||||
|
let text = "rikR~${H25ao'\\@r/<`&bjrzg(uQG})kl#!^r>Z\\27X$mmh\"tz;fq@F>=<Oi+R4;0Xt09,_!WxDZD&Rs/\"%5o7\\Kr`fIJR(.a]2SQHTZJJ)(^cx,%FfwrGkd,u&00&!;\t";
|
||||||
let buffer = MultiBuffer::build_simple(text, cx);
|
let buffer = MultiBuffer::build_simple(text, cx);
|
||||||
let buffer_snapshot = buffer.read(cx).snapshot(cx);
|
let buffer_snapshot = buffer.read(cx).snapshot(cx);
|
||||||
let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone());
|
let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone());
|
||||||
|
@ -1041,37 +1044,33 @@ mod tests {
|
||||||
assert!(cursor.seek(0).is_none());
|
assert!(cursor.seek(0).is_none());
|
||||||
let mut tab_stops = Vec::new();
|
let mut tab_stops = Vec::new();
|
||||||
|
|
||||||
|
let mut all_tab_stops = Vec::new();
|
||||||
|
let mut byte_offset = 0;
|
||||||
|
let mut char_offset = 0;
|
||||||
|
for ch in buffer.read(cx).snapshot(cx).text().chars() {
|
||||||
|
// byte_offset += ch.len_utf8();
|
||||||
|
byte_offset += ch.len_utf8() as u32;
|
||||||
|
char_offset += 1;
|
||||||
|
|
||||||
|
if ch == '\t' {
|
||||||
|
all_tab_stops.push(TabStop {
|
||||||
|
byte_offset,
|
||||||
|
char_offset,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while let Some(tab_stop) = cursor.seek(u32::MAX) {
|
while let Some(tab_stop) = cursor.seek(u32::MAX) {
|
||||||
tab_stops.push(tab_stop);
|
tab_stops.push(tab_stop);
|
||||||
}
|
}
|
||||||
pretty_assertions::assert_eq!(
|
pretty_assertions::assert_eq!(tab_stops.as_slice(), all_tab_stops.as_slice(),);
|
||||||
tab_stops.as_slice(),
|
|
||||||
&[
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 1,
|
|
||||||
char_offset: 1
|
|
||||||
},
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 5,
|
|
||||||
char_offset: 5
|
|
||||||
},
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 15,
|
|
||||||
char_offset: 15,
|
|
||||||
},
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 16,
|
|
||||||
char_offset: 16,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(cursor.byte_offset(), 20);
|
assert_eq!(cursor.byte_offset(), byte_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[gpui::test]
|
#[gpui::test]
|
||||||
fn test_tab_stop_with_end_range(cx: &mut gpui::App) {
|
fn test_tab_stop_with_end_range_utf8(cx: &mut gpui::App) {
|
||||||
let input = "A\tBC\tDEF\tG\tHI\tJ\tK\tL\tM";
|
let input = "A\tBC\t"; // DEF\tG\tHI\tJ\tK\tL\tM
|
||||||
|
|
||||||
let buffer = MultiBuffer::build_simple(input, cx);
|
let buffer = MultiBuffer::build_simple(input, cx);
|
||||||
let buffer_snapshot = buffer.read(cx).snapshot(cx);
|
let buffer_snapshot = buffer.read(cx).snapshot(cx);
|
||||||
|
@ -1080,53 +1079,35 @@ mod tests {
|
||||||
|
|
||||||
let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0));
|
let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0));
|
||||||
let mut cursor = TabStopCursor::new(chunks);
|
let mut cursor = TabStopCursor::new(chunks);
|
||||||
let mut tab_stops = Vec::new();
|
|
||||||
while let Some(tab_stop) = cursor.seek(33) {
|
|
||||||
tab_stops.push(tab_stop);
|
|
||||||
}
|
|
||||||
pretty_assertions::assert_eq!(
|
|
||||||
&[
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 2,
|
|
||||||
char_offset: 2
|
|
||||||
},
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 5,
|
|
||||||
char_offset: 5
|
|
||||||
},
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 9,
|
|
||||||
char_offset: 9,
|
|
||||||
},
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 11,
|
|
||||||
char_offset: 11,
|
|
||||||
},
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 14,
|
|
||||||
char_offset: 14
|
|
||||||
},
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 16,
|
|
||||||
char_offset: 16,
|
|
||||||
},
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 18,
|
|
||||||
char_offset: 18
|
|
||||||
},
|
|
||||||
TabStop {
|
|
||||||
byte_offset: 20,
|
|
||||||
char_offset: 20,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
tab_stops.as_slice(),
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(cursor.byte_offset(), 21);
|
let mut actual_tab_stops = Vec::new();
|
||||||
|
|
||||||
|
let mut expected_tab_stops = Vec::new();
|
||||||
|
let mut byte_offset = 0;
|
||||||
|
let mut char_offset = 0;
|
||||||
|
for ch in buffer.read(cx).snapshot(cx).text().chars() {
|
||||||
|
// byte_offset += ch.len_utf8();
|
||||||
|
byte_offset += ch.len_utf8() as u32;
|
||||||
|
char_offset += 1;
|
||||||
|
|
||||||
|
if ch == '\t' {
|
||||||
|
expected_tab_stops.push(TabStop {
|
||||||
|
byte_offset,
|
||||||
|
char_offset,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(tab_stop) = cursor.seek(u32::MAX) {
|
||||||
|
actual_tab_stops.push(tab_stop);
|
||||||
|
}
|
||||||
|
pretty_assertions::assert_eq!(actual_tab_stops.as_slice(), expected_tab_stops.as_slice(),);
|
||||||
|
|
||||||
|
assert_eq!(cursor.byte_offset(), byte_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[gpui::test(iterations = 100)]
|
#[gpui::test(iterations = 100)]
|
||||||
fn test_tab_stop_cursor_random(cx: &mut gpui::App, mut rng: StdRng) {
|
fn test_tab_stop_cursor_random_utf8(cx: &mut gpui::App, mut rng: StdRng) {
|
||||||
// Generate random input string with up to 512 characters including tabs
|
// Generate random input string with up to 512 characters including tabs
|
||||||
let len = rng.gen_range(0..=2048);
|
let len = rng.gen_range(0..=2048);
|
||||||
let mut input = String::with_capacity(len);
|
let mut input = String::with_capacity(len);
|
||||||
|
@ -1138,14 +1119,12 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
if rng.gen_bool(0.15) && !skip_tabs {
|
if rng.gen_bool(0.15) && !skip_tabs {
|
||||||
// 15% chance of inserting a tab
|
|
||||||
input.push('\t');
|
input.push('\t');
|
||||||
} else {
|
} else {
|
||||||
// 85% chance of inserting a random ASCII character (excluding tab, newline, carriage return)
|
|
||||||
let ch = loop {
|
let ch = loop {
|
||||||
let ascii_code = rng.gen_range(32..=126); // printable ASCII range
|
let ascii_code = rng.gen_range(32..=126); // printable ASCII range
|
||||||
let ch = ascii_code as u8 as char;
|
let ch = ascii_code as u8 as char;
|
||||||
if ch != '\t' && ch != '\n' && ch != '\r' {
|
if ch != '\t' {
|
||||||
break ch;
|
break ch;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -1163,7 +1142,121 @@ mod tests {
|
||||||
let mut all_tab_stops = Vec::new();
|
let mut all_tab_stops = Vec::new();
|
||||||
let mut byte_offset = 1;
|
let mut byte_offset = 1;
|
||||||
let mut char_offset = 1;
|
let mut char_offset = 1;
|
||||||
for ch in input.chars() {
|
for ch in buffer_snapshot.text().chars() {
|
||||||
|
if ch == '\t' {
|
||||||
|
all_tab_stops.push(TabStop {
|
||||||
|
byte_offset,
|
||||||
|
char_offset,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
byte_offset += ch.len_utf8() as u32;
|
||||||
|
char_offset += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test with various distances
|
||||||
|
let distances = vec![1, 5, 10, 50, 100, u32::MAX];
|
||||||
|
// let distances = vec![150];
|
||||||
|
|
||||||
|
for distance in distances {
|
||||||
|
let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0));
|
||||||
|
let mut cursor = TabStopCursor::new(chunks);
|
||||||
|
|
||||||
|
let mut found_tab_stops = Vec::new();
|
||||||
|
let mut position = distance;
|
||||||
|
while let Some(tab_stop) = cursor.seek(position) {
|
||||||
|
found_tab_stops.push(tab_stop);
|
||||||
|
position = distance - tab_stop.byte_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
let expected_found_tab_stops: Vec<_> = all_tab_stops
|
||||||
|
.iter()
|
||||||
|
.take_while(|tab_stop| tab_stop.byte_offset <= distance)
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
pretty_assertions::assert_eq!(
|
||||||
|
found_tab_stops,
|
||||||
|
expected_found_tab_stops,
|
||||||
|
"TabStopCursor output mismatch for distance {}. Input: {:?}",
|
||||||
|
distance,
|
||||||
|
input
|
||||||
|
);
|
||||||
|
|
||||||
|
let final_position = cursor.byte_offset();
|
||||||
|
if !found_tab_stops.is_empty() {
|
||||||
|
let last_tab_stop = found_tab_stops.last().unwrap();
|
||||||
|
assert!(
|
||||||
|
final_position >= last_tab_stop.byte_offset,
|
||||||
|
"Cursor final position {} is before last tab stop {}. Input: {:?}",
|
||||||
|
final_position,
|
||||||
|
last_tab_stop.byte_offset,
|
||||||
|
input
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[gpui::test]
|
||||||
|
fn test_tab_stop_cursor_utf16(cx: &mut gpui::App) {
|
||||||
|
let text = "\r\t😁foo\tb😀arbar🤯bar\t\tbaz\n";
|
||||||
|
let buffer = MultiBuffer::build_simple(text, cx);
|
||||||
|
let buffer_snapshot = buffer.read(cx).snapshot(cx);
|
||||||
|
let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone());
|
||||||
|
let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
|
||||||
|
let chunks = fold_snapshot.chunks(
|
||||||
|
FoldOffset(0)..fold_snapshot.len(),
|
||||||
|
false,
|
||||||
|
Default::default(),
|
||||||
|
);
|
||||||
|
let mut cursor = TabStopCursor::new(chunks);
|
||||||
|
assert!(cursor.seek(0).is_none());
|
||||||
|
let mut tab_stops = Vec::new();
|
||||||
|
|
||||||
|
let mut all_tab_stops = Vec::new();
|
||||||
|
let mut byte_offset = 0;
|
||||||
|
let mut char_offset = 0;
|
||||||
|
for ch in buffer.read(cx).snapshot(cx).text().chars() {
|
||||||
|
// byte_offset += ch.len_utf8();
|
||||||
|
byte_offset += ch.len_utf8() as u32;
|
||||||
|
char_offset += 1;
|
||||||
|
|
||||||
|
if ch == '\t' {
|
||||||
|
all_tab_stops.push(TabStop {
|
||||||
|
byte_offset,
|
||||||
|
char_offset,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(tab_stop) = cursor.seek(u32::MAX) {
|
||||||
|
tab_stops.push(tab_stop);
|
||||||
|
}
|
||||||
|
pretty_assertions::assert_eq!(tab_stops.as_slice(), all_tab_stops.as_slice(),);
|
||||||
|
|
||||||
|
assert_eq!(cursor.byte_offset(), byte_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[gpui::test(iterations = 100)]
|
||||||
|
fn test_tab_stop_cursor_random_utf16(cx: &mut gpui::App, mut rng: StdRng) {
|
||||||
|
// Generate random input string with up to 512 characters including tabs
|
||||||
|
let len = rng.gen_range(0..=2048);
|
||||||
|
let input = util::RandomCharIter::new(&mut rng)
|
||||||
|
.take(len)
|
||||||
|
.collect::<String>();
|
||||||
|
|
||||||
|
// Build the buffer and create cursor
|
||||||
|
let buffer = MultiBuffer::build_simple(&input, cx);
|
||||||
|
let buffer_snapshot = buffer.read(cx).snapshot(cx);
|
||||||
|
let (_, inlay_snapshot) = InlayMap::new(buffer_snapshot.clone());
|
||||||
|
let (_, fold_snapshot) = FoldMap::new(inlay_snapshot);
|
||||||
|
|
||||||
|
// First, collect all expected tab positions
|
||||||
|
let mut all_tab_stops = Vec::new();
|
||||||
|
let mut byte_offset = 0;
|
||||||
|
let mut char_offset = 0;
|
||||||
|
for ch in buffer_snapshot.text().chars() {
|
||||||
|
byte_offset += ch.len_utf8() as u32;
|
||||||
|
char_offset += 1;
|
||||||
if ch == '\t' {
|
if ch == '\t' {
|
||||||
all_tab_stops.push(TabStop {
|
all_tab_stops.push(TabStop {
|
||||||
byte_offset,
|
byte_offset,
|
||||||
|
@ -1171,12 +1264,11 @@ mod tests {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
// byte_offset += ch.len_utf8();
|
// byte_offset += ch.len_utf8();
|
||||||
byte_offset += 1;
|
|
||||||
char_offset += 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test with various distances
|
// Test with various distances
|
||||||
let distances = vec![1, 5, 10, 50, 100, u32::MAX];
|
// let distances = vec![1, 5, 10, 50, 100, u32::MAX];
|
||||||
|
let distances = vec![150];
|
||||||
|
|
||||||
for distance in distances {
|
for distance in distances {
|
||||||
let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0));
|
let chunks = fold_snapshot.chunks_at(FoldPoint::new(0, 0));
|
||||||
|
@ -1220,19 +1312,19 @@ mod tests {
|
||||||
|
|
||||||
struct TabStopCursor<'a> {
|
struct TabStopCursor<'a> {
|
||||||
chunks: FoldChunks<'a>,
|
chunks: FoldChunks<'a>,
|
||||||
bytes_offset: u32,
|
byte_offset: u32,
|
||||||
|
char_offset: u32,
|
||||||
/// Chunk
|
/// Chunk
|
||||||
/// last tab position iterated through
|
/// last tab position iterated through
|
||||||
current_chunk: Option<(Chunk<'a>, u32)>,
|
current_chunk: Option<(Chunk<'a>, u32)>,
|
||||||
end_of_chunk: Option<u32>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> TabStopCursor<'a> {
|
impl<'a> TabStopCursor<'a> {
|
||||||
fn new(chunks: FoldChunks<'a>) -> Self {
|
fn new(chunks: FoldChunks<'a>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
chunks,
|
chunks,
|
||||||
bytes_offset: 0,
|
byte_offset: 0,
|
||||||
end_of_chunk: None,
|
char_offset: 0,
|
||||||
current_chunk: None,
|
current_chunk: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1243,17 +1335,8 @@ impl<'a> TabStopCursor<'a> {
|
||||||
debug_assert!(distance == 0, "Can't seek backwards: {distance}");
|
debug_assert!(distance == 0, "Can't seek backwards: {distance}");
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let past_chunk = self.end_of_chunk.take().unwrap_or_default();
|
|
||||||
|
|
||||||
let mut distance_traversed = 0;
|
let mut distance_traversed = 0;
|
||||||
if past_chunk > distance {
|
|
||||||
self.bytes_offset += distance;
|
|
||||||
self.end_of_chunk = Some(past_chunk - distance);
|
|
||||||
return None;
|
|
||||||
} else {
|
|
||||||
self.bytes_offset += past_chunk;
|
|
||||||
distance_traversed += past_chunk;
|
|
||||||
}
|
|
||||||
|
|
||||||
while let Some((mut chunk, chunk_position)) = self
|
while let Some((mut chunk, chunk_position)) = self
|
||||||
.current_chunk
|
.current_chunk
|
||||||
|
@ -1261,37 +1344,52 @@ impl<'a> TabStopCursor<'a> {
|
||||||
.or_else(|| self.chunks.next().zip(Some(0)))
|
.or_else(|| self.chunks.next().zip(Some(0)))
|
||||||
{
|
{
|
||||||
if chunk.tabs == 0 {
|
if chunk.tabs == 0 {
|
||||||
let chunk_distance = chunk.text.len() as u32;
|
let chunk_distance = chunk.text.len() as u32 - chunk_position;
|
||||||
if chunk_distance + distance_traversed - chunk_position >= distance {
|
if chunk_distance + distance_traversed >= distance {
|
||||||
let overshoot = chunk_distance + distance_traversed - distance;
|
let overshoot = distance_traversed.abs_diff(distance);
|
||||||
self.bytes_offset += distance_traversed.abs_diff(distance);
|
self.byte_offset += overshoot;
|
||||||
self.end_of_chunk = Some(overshoot); // todo! this should be a chunk position
|
|
||||||
|
self.char_offset += get_char_offset(
|
||||||
|
chunk_position..(chunk_position + overshoot).saturating_sub(1).min(127),
|
||||||
|
chunk.chars,
|
||||||
|
);
|
||||||
|
self.current_chunk = Some((chunk, chunk_position + overshoot));
|
||||||
|
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
self.bytes_offset += chunk.text.len() as u32;
|
|
||||||
distance_traversed += chunk.text.len() as u32;
|
self.byte_offset += chunk_distance;
|
||||||
|
// todo! calculate char offset
|
||||||
|
self.char_offset += get_char_offset(
|
||||||
|
chunk_position..(chunk_position + chunk_distance).saturating_sub(1).min(127),
|
||||||
|
chunk.chars,
|
||||||
|
);
|
||||||
|
distance_traversed += chunk_distance;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let tab_position = chunk.tabs.trailing_zeros() + 1;
|
let tab_position = chunk.tabs.trailing_zeros() + 1;
|
||||||
|
|
||||||
if distance_traversed + tab_position - chunk_position > distance {
|
if distance_traversed + tab_position - chunk_position > distance {
|
||||||
let cursor_position = distance_traversed.abs_diff(distance);
|
let cursor_position = distance_traversed.abs_diff(distance);
|
||||||
|
self.char_offset += get_char_offset(0..(cursor_position - 1), chunk.chars);
|
||||||
self.current_chunk = Some((chunk, cursor_position));
|
self.current_chunk = Some((chunk, cursor_position));
|
||||||
self.bytes_offset += cursor_position;
|
self.byte_offset += cursor_position;
|
||||||
|
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
self.bytes_offset += tab_position - chunk_position;
|
|
||||||
|
self.byte_offset += tab_position - chunk_position;
|
||||||
|
self.char_offset += get_char_offset(chunk_position..(tab_position - 1), chunk.chars);
|
||||||
|
|
||||||
let tabstop = TabStop {
|
let tabstop = TabStop {
|
||||||
char_offset: self.bytes_offset,
|
char_offset: self.char_offset,
|
||||||
byte_offset: self.bytes_offset,
|
byte_offset: self.byte_offset,
|
||||||
};
|
};
|
||||||
|
|
||||||
chunk.tabs = (chunk.tabs - 1) & chunk.tabs;
|
chunk.tabs = (chunk.tabs - 1) & chunk.tabs;
|
||||||
if chunk.tabs > 0 {
|
|
||||||
|
if tab_position as usize != chunk.text.len() {
|
||||||
self.current_chunk = Some((chunk, tab_position));
|
self.current_chunk = Some((chunk, tab_position));
|
||||||
} else {
|
|
||||||
self.end_of_chunk = Some(chunk.text.len() as u32 - tab_position);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return Some(tabstop);
|
return Some(tabstop);
|
||||||
|
@ -1301,8 +1399,31 @@ impl<'a> TabStopCursor<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn byte_offset(&self) -> u32 {
|
fn byte_offset(&self) -> u32 {
|
||||||
self.bytes_offset
|
self.byte_offset
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn char_offset(&self) -> u32 {
|
||||||
|
self.char_offset
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn get_char_offset(range: Range<u32>, bit_map: u128) -> u32 {
|
||||||
|
// This edge case can happen when we're at chunk position 128
|
||||||
|
|
||||||
|
if range.start == range.end {
|
||||||
|
return if (1u128 << range.start) & bit_map == 0 {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
1
|
||||||
|
};
|
||||||
|
}
|
||||||
|
let end_shift: u128 = 127u128 - range.end.min(127) as u128;
|
||||||
|
let mut bit_mask = (u128::MAX >> range.start) << range.start;
|
||||||
|
bit_mask = (bit_mask << end_shift) >> end_shift;
|
||||||
|
let bit_map = bit_map & bit_mask;
|
||||||
|
|
||||||
|
bit_map.count_ones()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
|
|
@ -278,7 +278,6 @@ impl EditorElement {
|
||||||
if text.is_empty() {
|
if text.is_empty() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
dbg!("Handle input text:", text);
|
|
||||||
editor.handle_input(text, window, cx);
|
editor.handle_input(text, window, cx);
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
|
@ -488,6 +488,8 @@ pub struct Chunk<'a> {
|
||||||
pub is_tab: bool,
|
pub is_tab: bool,
|
||||||
/// A bitset of which characters are tabs in this string.
|
/// A bitset of which characters are tabs in this string.
|
||||||
pub tabs: u128,
|
pub tabs: u128,
|
||||||
|
/// Bitmap of character indices in this chunk
|
||||||
|
pub chars: u128,
|
||||||
/// Whether this chunk of text was originally a tab character.
|
/// Whether this chunk of text was originally a tab character.
|
||||||
pub is_inlay: bool,
|
pub is_inlay: bool,
|
||||||
/// Whether to underline the corresponding text range in the editor.
|
/// Whether to underline the corresponding text range in the editor.
|
||||||
|
@ -4582,7 +4584,7 @@ impl<'a> Iterator for BufferChunks<'a> {
|
||||||
}
|
}
|
||||||
self.diagnostic_endpoints = diagnostic_endpoints;
|
self.diagnostic_endpoints = diagnostic_endpoints;
|
||||||
|
|
||||||
if let Some((chunk, tabs)) = self.chunks.peek_tabs() {
|
if let Some((chunk, tabs, chars_map)) = self.chunks.peek_tabs() {
|
||||||
let chunk_start = self.range.start;
|
let chunk_start = self.range.start;
|
||||||
let mut chunk_end = (self.chunks.offset() + chunk.len())
|
let mut chunk_end = (self.chunks.offset() + chunk.len())
|
||||||
.min(next_capture_start)
|
.min(next_capture_start)
|
||||||
|
@ -4598,6 +4600,7 @@ impl<'a> Iterator for BufferChunks<'a> {
|
||||||
let slice =
|
let slice =
|
||||||
&chunk[chunk_start - self.chunks.offset()..chunk_end - self.chunks.offset()];
|
&chunk[chunk_start - self.chunks.offset()..chunk_end - self.chunks.offset()];
|
||||||
let tabs = tabs >> (chunk_start - self.chunks.offset());
|
let tabs = tabs >> (chunk_start - self.chunks.offset());
|
||||||
|
let chars_map = chars_map >> (chunk_start - self.chunks.offset());
|
||||||
|
|
||||||
self.range.start = chunk_end;
|
self.range.start = chunk_end;
|
||||||
if self.range.start == self.chunks.offset() + chunk.len() {
|
if self.range.start == self.chunks.offset() + chunk.len() {
|
||||||
|
@ -4611,6 +4614,7 @@ impl<'a> Iterator for BufferChunks<'a> {
|
||||||
diagnostic_severity: self.current_diagnostic_severity(),
|
diagnostic_severity: self.current_diagnostic_severity(),
|
||||||
is_unnecessary: self.current_code_is_unnecessary(),
|
is_unnecessary: self.current_code_is_unnecessary(),
|
||||||
tabs,
|
tabs,
|
||||||
|
chars: chars_map,
|
||||||
..Chunk::default()
|
..Chunk::default()
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -3281,11 +3281,12 @@ fn test_contiguous_ranges() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_buffer_chunks_tabs() {
|
fn test_buffer_chunks_tabs() {
|
||||||
let buffer = text::Buffer::new(0, BufferId::new(1).unwrap(), "\ta\tbc");
|
let buffer = text::Buffer::new(0, BufferId::new(1).unwrap(), "\ta\tbc😁");
|
||||||
let mut iter = buffer.as_rope().chunks();
|
let mut iter = buffer.as_rope().chunks();
|
||||||
|
|
||||||
while let Some((str, tabs)) = iter.peek_tabs() {
|
while let Some((str, _, chars)) = iter.peek_tabs() {
|
||||||
dbg!(str, format!("{:b}", tabs));
|
dbg!(str.len(), str.bytes().count());
|
||||||
|
dbg!(str, format!("{:b}", chars));
|
||||||
iter.next();
|
iter.next();
|
||||||
}
|
}
|
||||||
dbg!("---");
|
dbg!("---");
|
||||||
|
@ -3294,7 +3295,7 @@ fn test_buffer_chunks_tabs() {
|
||||||
let mut iter = buffer.as_rope().chunks();
|
let mut iter = buffer.as_rope().chunks();
|
||||||
iter.seek(3);
|
iter.seek(3);
|
||||||
|
|
||||||
while let Some((str, tabs)) = iter.peek_tabs() {
|
while let Some((str, tabs, _)) = iter.peek_tabs() {
|
||||||
dbg!(str, format!("{:b}", tabs));
|
dbg!(str, format!("{:b}", tabs));
|
||||||
iter.next();
|
iter.next();
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,6 +67,11 @@ impl Chunk {
|
||||||
pub fn slice(&self, range: Range<usize>) -> ChunkSlice {
|
pub fn slice(&self, range: Range<usize>) -> ChunkSlice {
|
||||||
self.as_slice().slice(range)
|
self.as_slice().slice(range)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn chars(&self) -> u128 {
|
||||||
|
self.chars
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug)]
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
|
|
@ -772,7 +772,7 @@ impl<'a> Chunks<'a> {
|
||||||
Some(&chunk.text[slice_range])
|
Some(&chunk.text[slice_range])
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn peek_tabs(&self) -> Option<(&'a str, u128)> {
|
pub fn peek_tabs(&self) -> Option<(&'a str, u128, u128)> {
|
||||||
if !self.offset_is_valid() {
|
if !self.offset_is_valid() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
@ -793,8 +793,9 @@ impl<'a> Chunks<'a> {
|
||||||
|
|
||||||
// Shift the tabs to align with our slice window
|
// Shift the tabs to align with our slice window
|
||||||
let shifted_tabs = chunk.tabs >> chunk_start_offset;
|
let shifted_tabs = chunk.tabs >> chunk_start_offset;
|
||||||
|
let shifted_chars_utf16 = chunk.chars();
|
||||||
|
|
||||||
Some((slice_text, shifted_tabs))
|
Some((slice_text, shifted_tabs, shifted_chars_utf16))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lines(self) -> Lines<'a> {
|
pub fn lines(self) -> Lines<'a> {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue