rope: Index tab locations for each chunk (#20289)
This is a follow-up to #19913 and adds another "index" to the `Chunk`, this time indexing the location of tabs. Release Notes: - N/A --------- Co-authored-by: Antonio <antonio@zed.dev>
This commit is contained in:
parent
d3a49f6d8f
commit
b6adab84a0
1 changed files with 77 additions and 6 deletions
|
@ -13,6 +13,7 @@ pub struct Chunk {
|
||||||
chars: u128,
|
chars: u128,
|
||||||
chars_utf16: u128,
|
chars_utf16: u128,
|
||||||
newlines: u128,
|
newlines: u128,
|
||||||
|
tabs: u128,
|
||||||
pub text: ArrayString<MAX_BASE>,
|
pub text: ArrayString<MAX_BASE>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,6 +33,7 @@ impl Chunk {
|
||||||
self.chars_utf16 |= 1 << ix;
|
self.chars_utf16 |= 1 << ix;
|
||||||
self.chars_utf16 |= (c.len_utf16() as u128) << ix;
|
self.chars_utf16 |= (c.len_utf16() as u128) << ix;
|
||||||
self.newlines |= ((c == '\n') as u128) << ix;
|
self.newlines |= ((c == '\n') as u128) << ix;
|
||||||
|
self.tabs |= ((c == '\t') as u128) << ix;
|
||||||
}
|
}
|
||||||
self.text.push_str(text);
|
self.text.push_str(text);
|
||||||
}
|
}
|
||||||
|
@ -46,6 +48,7 @@ impl Chunk {
|
||||||
self.chars |= slice.chars << base_ix;
|
self.chars |= slice.chars << base_ix;
|
||||||
self.chars_utf16 |= slice.chars_utf16 << base_ix;
|
self.chars_utf16 |= slice.chars_utf16 << base_ix;
|
||||||
self.newlines |= slice.newlines << base_ix;
|
self.newlines |= slice.newlines << base_ix;
|
||||||
|
self.tabs |= slice.tabs << base_ix;
|
||||||
self.text.push_str(&slice.text);
|
self.text.push_str(&slice.text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,6 +58,7 @@ impl Chunk {
|
||||||
chars: self.chars,
|
chars: self.chars,
|
||||||
chars_utf16: self.chars_utf16,
|
chars_utf16: self.chars_utf16,
|
||||||
newlines: self.newlines,
|
newlines: self.newlines,
|
||||||
|
tabs: self.tabs,
|
||||||
text: &self.text,
|
text: &self.text,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -70,6 +74,7 @@ pub struct ChunkSlice<'a> {
|
||||||
chars: u128,
|
chars: u128,
|
||||||
chars_utf16: u128,
|
chars_utf16: u128,
|
||||||
newlines: u128,
|
newlines: u128,
|
||||||
|
tabs: u128,
|
||||||
text: &'a str,
|
text: &'a str,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,6 +84,7 @@ impl<'a> Into<Chunk> for ChunkSlice<'a> {
|
||||||
chars: self.chars,
|
chars: self.chars,
|
||||||
chars_utf16: self.chars_utf16,
|
chars_utf16: self.chars_utf16,
|
||||||
newlines: self.newlines,
|
newlines: self.newlines,
|
||||||
|
tabs: self.tabs,
|
||||||
text: self.text.try_into().unwrap(),
|
text: self.text.try_into().unwrap(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -103,26 +109,25 @@ impl<'a> ChunkSlice<'a> {
|
||||||
chars: 0,
|
chars: 0,
|
||||||
chars_utf16: 0,
|
chars_utf16: 0,
|
||||||
newlines: 0,
|
newlines: 0,
|
||||||
|
tabs: 0,
|
||||||
text: "",
|
text: "",
|
||||||
};
|
};
|
||||||
(left, right)
|
(left, right)
|
||||||
} else {
|
} else {
|
||||||
let mask = if mid == MAX_BASE {
|
let mask = (1u128 << mid) - 1;
|
||||||
u128::MAX
|
|
||||||
} else {
|
|
||||||
(1u128 << mid) - 1
|
|
||||||
};
|
|
||||||
let (left_text, right_text) = self.text.split_at(mid);
|
let (left_text, right_text) = self.text.split_at(mid);
|
||||||
let left = ChunkSlice {
|
let left = ChunkSlice {
|
||||||
chars: self.chars & mask,
|
chars: self.chars & mask,
|
||||||
chars_utf16: self.chars_utf16 & mask,
|
chars_utf16: self.chars_utf16 & mask,
|
||||||
newlines: self.newlines & mask,
|
newlines: self.newlines & mask,
|
||||||
|
tabs: self.tabs & mask,
|
||||||
text: left_text,
|
text: left_text,
|
||||||
};
|
};
|
||||||
let right = ChunkSlice {
|
let right = ChunkSlice {
|
||||||
chars: self.chars >> mid,
|
chars: self.chars >> mid,
|
||||||
chars_utf16: self.chars_utf16 >> mid,
|
chars_utf16: self.chars_utf16 >> mid,
|
||||||
newlines: self.newlines >> mid,
|
newlines: self.newlines >> mid,
|
||||||
|
tabs: self.tabs >> mid,
|
||||||
text: right_text,
|
text: right_text,
|
||||||
};
|
};
|
||||||
(left, right)
|
(left, right)
|
||||||
|
@ -141,6 +146,7 @@ impl<'a> ChunkSlice<'a> {
|
||||||
chars: 0,
|
chars: 0,
|
||||||
chars_utf16: 0,
|
chars_utf16: 0,
|
||||||
newlines: 0,
|
newlines: 0,
|
||||||
|
tabs: 0,
|
||||||
text: "",
|
text: "",
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -148,6 +154,7 @@ impl<'a> ChunkSlice<'a> {
|
||||||
chars: (self.chars & mask) >> range.start,
|
chars: (self.chars & mask) >> range.start,
|
||||||
chars_utf16: (self.chars_utf16 & mask) >> range.start,
|
chars_utf16: (self.chars_utf16 & mask) >> range.start,
|
||||||
newlines: (self.newlines & mask) >> range.start,
|
newlines: (self.newlines & mask) >> range.start,
|
||||||
|
tabs: (self.tabs & mask) >> range.start,
|
||||||
text: &self.text[range],
|
text: &self.text[range],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -493,6 +500,60 @@ impl<'a> ChunkSlice<'a> {
|
||||||
};
|
};
|
||||||
row_start..row_start + row_len as usize
|
row_start..row_start + row_len as usize
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn tabs(&self) -> Tabs {
|
||||||
|
Tabs {
|
||||||
|
byte_offset: 0,
|
||||||
|
char_offset: 0,
|
||||||
|
tabs: self.tabs,
|
||||||
|
chars: self.chars,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Tabs {
|
||||||
|
byte_offset: usize,
|
||||||
|
char_offset: usize,
|
||||||
|
tabs: u128,
|
||||||
|
chars: u128,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub struct TabPosition {
|
||||||
|
pub byte_offset: usize,
|
||||||
|
pub char_offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for Tabs {
|
||||||
|
type Item = TabPosition;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.tabs == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let tab_offset = self.tabs.trailing_zeros() as usize;
|
||||||
|
let chars_mask = (1 << tab_offset) - 1;
|
||||||
|
let char_offset = (self.chars & chars_mask).count_ones() as usize;
|
||||||
|
self.byte_offset += tab_offset;
|
||||||
|
self.char_offset += char_offset;
|
||||||
|
let position = TabPosition {
|
||||||
|
byte_offset: self.byte_offset,
|
||||||
|
char_offset: self.char_offset,
|
||||||
|
};
|
||||||
|
|
||||||
|
self.byte_offset += 1;
|
||||||
|
self.char_offset += 1;
|
||||||
|
if self.byte_offset == MAX_BASE {
|
||||||
|
self.tabs = 0;
|
||||||
|
} else {
|
||||||
|
self.tabs >>= tab_offset + 1;
|
||||||
|
self.chars >>= tab_offset + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(position)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Finds the n-th bit that is set to 1.
|
/// Finds the n-th bit that is set to 1.
|
||||||
|
@ -617,7 +678,9 @@ mod tests {
|
||||||
log::info!("Verifying chunk {:?}", text);
|
log::info!("Verifying chunk {:?}", text);
|
||||||
assert_eq!(chunk.offset_to_point(0), Point::zero());
|
assert_eq!(chunk.offset_to_point(0), Point::zero());
|
||||||
|
|
||||||
for c in text.chars() {
|
let mut expected_tab_positions = Vec::new();
|
||||||
|
|
||||||
|
for (char_offset, c) in text.chars().enumerate() {
|
||||||
let expected_point = chunk.offset_to_point(offset);
|
let expected_point = chunk.offset_to_point(offset);
|
||||||
assert_eq!(point, expected_point, "mismatch at offset {}", offset);
|
assert_eq!(point, expected_point, "mismatch at offset {}", offset);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -735,6 +798,13 @@ mod tests {
|
||||||
point_utf16.column += c.len_utf16() as u32;
|
point_utf16.column += c.len_utf16() as u32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if c == '\t' {
|
||||||
|
expected_tab_positions.push(TabPosition {
|
||||||
|
byte_offset: offset,
|
||||||
|
char_offset,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
offset += c.len_utf8();
|
offset += c.len_utf8();
|
||||||
offset_utf16.0 += c.len_utf16();
|
offset_utf16.0 += c.len_utf16();
|
||||||
}
|
}
|
||||||
|
@ -874,5 +944,6 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
|
assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
|
||||||
|
assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue