From e860cacb9fa38a1c11afc318a3887d579d22c24c Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 14 May 2021 17:45:16 -0700 Subject: [PATCH] Get randomized tests passing in the presence of multibyte chars --- zed/src/editor/buffer/mod.rs | 31 ++++++++++++------ zed/src/editor/buffer/rope.rs | 44 +++++++++++++++----------- zed/src/editor/display_map/fold_map.rs | 7 ++-- zed/src/util.rs | 33 +++++++++++++++++-- 4 files changed, 83 insertions(+), 32 deletions(-) diff --git a/zed/src/editor/buffer/mod.rs b/zed/src/editor/buffer/mod.rs index 78c7c2f2af..87ebf272cb 100644 --- a/zed/src/editor/buffer/mod.rs +++ b/zed/src/editor/buffer/mod.rs @@ -2509,12 +2509,12 @@ mod tests { for _i in 0..10 { let (old_ranges, new_text, _) = buffer.randomly_mutate(rng, None); for old_range in old_ranges.iter().rev() { - reference_string = [ - &reference_string[0..old_range.start], - new_text.as_str(), - &reference_string[old_range.end..], - ] - .concat(); + reference_string = reference_string + .chars() + .take(old_range.start) + .chain(new_text.chars()) + .chain(reference_string.chars().skip(old_range.end)) + .collect(); } assert_eq!(buffer.text(), reference_string); @@ -2549,7 +2549,12 @@ mod tests { let range_sum = buffer.text_summary_for_range(start..end); assert_eq!(range_sum.rightmost_point.column, *longest_column); assert!(longest_rows.contains(&range_sum.rightmost_point.row)); - let range_text = &buffer.text()[start..end]; + let range_text = buffer + .text() + .chars() + .skip(start) + .take(end - start) + .collect::(); assert_eq!(range_sum.chars, range_text.chars().count()); assert_eq!(range_sum.bytes, range_text.len()); } @@ -3458,9 +3463,17 @@ mod tests { fn line_lengths_in_range(buffer: &Buffer, range: Range) -> BTreeMap> { let mut lengths = BTreeMap::new(); - for (row, line) in buffer.text()[range].lines().enumerate() { + for (row, line) in buffer + .text() + .chars() + .skip(range.start) + .take(range.len()) + .collect::() + .lines() + .enumerate() + { lengths - .entry(line.len() as u32) + .entry(line.chars().count() as u32) .or_insert(HashSet::default()) .insert(row as u32); } diff --git a/zed/src/editor/buffer/rope.rs b/zed/src/editor/buffer/rope.rs index 74a680286b..231aa59878 100644 --- a/zed/src/editor/buffer/rope.rs +++ b/zed/src/editor/buffer/rope.rs @@ -1,12 +1,13 @@ use super::Point; use crate::sum_tree::{self, SeekBias, SumTree}; +use crate::util::byte_range_for_char_range; use anyhow::{anyhow, Result}; use arrayvec::ArrayString; use smallvec::SmallVec; -use std::{cmp, ops::Range, str}; +use std::{cmp, iter::Skip, ops::Range, str}; #[cfg(test)] -const CHUNK_BASE: usize = 2; +const CHUNK_BASE: usize = 6; #[cfg(not(test))] const CHUNK_BASE: usize = 16; @@ -55,10 +56,7 @@ impl Rope { if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE { last_chunk.0.push_str(&first_new_chunk.take().unwrap().0); } else { - let mut text = ArrayString::<[_; 4 * CHUNK_BASE]>::new(); - text.push_str(&last_chunk.0); - text.push_str(&first_new_chunk_ref.0); - + let text = [last_chunk.0, first_new_chunk_ref.0].concat(); let mut midpoint = text.len() / 2; while !text.is_char_boundary(midpoint) { midpoint += 1; @@ -83,10 +81,11 @@ impl Rope { #[cfg(test)] { // Ensure all chunks except maybe the last one are not underflowing. + // Allow some wiggle room for multibyte characters at chunk boundaries. let mut chunks = self.chunks.cursor::<(), ()>().peekable(); while let Some(chunk) = chunks.next() { if chunks.peek().is_some() { - assert!(chunk.0.len() >= CHUNK_BASE); + assert!(chunk.0.len() + 3 >= CHUNK_BASE); } } } @@ -190,7 +189,8 @@ impl<'a> Cursor<'a> { if let Some(start_chunk) = self.chunks.item() { let start_ix = self.offset - self.chunks.start(); let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start(); - slice.push(&start_chunk.0[start_ix..end_ix]); + let byte_range = byte_range_for_char_range(start_chunk.0, start_ix..end_ix); + slice.push(&start_chunk.0[byte_range]); } if end_offset > self.chunks.end() { @@ -199,7 +199,9 @@ impl<'a> Cursor<'a> { chunks: self.chunks.slice(&end_offset, SeekBias::Right, &()), }); if let Some(end_chunk) = self.chunks.item() { - slice.push(&end_chunk.0[..end_offset - self.chunks.start()]); + let end_ix = end_offset - self.chunks.start(); + let byte_range = byte_range_for_char_range(end_chunk.0, 0..end_ix); + slice.push(&end_chunk.0[byte_range]); } } @@ -217,7 +219,7 @@ impl<'a> Cursor<'a> { } #[derive(Clone, Debug, Default)] -struct Chunk(ArrayString<[u8; 2 * CHUNK_BASE]>); +struct Chunk(ArrayString<[u8; 4 * CHUNK_BASE]>); impl Chunk { fn to_point(&self, target: usize) -> Point { @@ -358,7 +360,7 @@ impl<'a> sum_tree::Dimension<'a, TextSummary> for Point { pub struct Chars<'a> { cursor: sum_tree::Cursor<'a, Chunk, usize, usize>, - chars: str::Chars<'a>, + chars: Skip>, } impl<'a> Chars<'a> { @@ -368,9 +370,9 @@ impl<'a> Chars<'a> { let chars = if let Some(chunk) = cursor.item() { let ix = start - cursor.start(); cursor.next(); - chunk.0[ix..].chars() + chunk.0.chars().skip(ix) } else { - "".chars() + "".chars().skip(0) }; Self { cursor, chars } @@ -384,7 +386,7 @@ impl<'a> Iterator for Chars<'a> { if let Some(ch) = self.chars.next() { Some(ch) } else if let Some(chunk) = self.cursor.item() { - self.chars = chunk.0.chars(); + self.chars = chunk.0.chars().skip(0); self.cursor.next(); Some(self.chars.next().unwrap()) } else { @@ -422,9 +424,9 @@ mod tests { let mut expected = String::new(); let mut actual = Rope::new(); for _ in 0..operations { - let end_ix = rng.gen_range(0..=expected.len()); + let end_ix = rng.gen_range(0..=expected.chars().count()); let start_ix = rng.gen_range(0..=end_ix); - let len = rng.gen_range(0..=20); + let len = rng.gen_range(0..=64); let new_text: String = RandomCharIter::new(&mut rng).take(len).collect(); let mut new_actual = Rope::new(); @@ -436,16 +438,20 @@ mod tests { actual = new_actual; let mut new_expected = String::new(); - new_expected.push_str(&expected[..start_ix]); + new_expected.extend(expected.chars().take(start_ix)); new_expected.push_str(&new_text); - new_expected.push_str(&expected[end_ix..]); + new_expected.extend(expected.chars().skip(end_ix)); expected = new_expected; assert_eq!(actual.text(), expected); + log::info!("text: {:?}", expected); for _ in 0..5 { let ix = rng.gen_range(0..=expected.len()); - assert_eq!(actual.chars_at(ix).collect::(), expected[ix..]); + assert_eq!( + actual.chars_at(ix).collect::(), + expected.chars().skip(ix).collect::() + ); } let mut point = Point::new(0, 0); diff --git a/zed/src/editor/display_map/fold_map.rs b/zed/src/editor/display_map/fold_map.rs index b404bcb3dc..89eb7b062e 100644 --- a/zed/src/editor/display_map/fold_map.rs +++ b/zed/src/editor/display_map/fold_map.rs @@ -830,7 +830,7 @@ mod tests { #[gpui::test] fn test_random_folds(app: &mut gpui::MutableAppContext) { use crate::editor::ToPoint; - use crate::util::RandomCharIter; + use crate::util::{byte_range_for_char_range, RandomCharIter}; use rand::prelude::*; use std::env; @@ -905,7 +905,10 @@ mod tests { expected_buffer_rows.extend((fold_end.row + 1..=next_row).rev()); next_row = fold_start.row; - expected_text.replace_range(fold_range.start..fold_range.end, "…"); + expected_text.replace_range( + byte_range_for_char_range(&expected_text, fold_range.start..fold_range.end), + "…", + ); } expected_buffer_rows.extend((0..=next_row).rev()); expected_buffer_rows.reverse(); diff --git a/zed/src/util.rs b/zed/src/util.rs index 1ce914ddd4..c5e968243f 100644 --- a/zed/src/util.rs +++ b/zed/src/util.rs @@ -1,5 +1,20 @@ use rand::prelude::*; -use std::cmp::Ordering; +use std::{cmp::Ordering, ops::Range}; + +pub fn byte_range_for_char_range(text: impl AsRef, char_range: Range) -> Range { + let text = text.as_ref(); + let mut result = text.len()..text.len(); + for (i, (offset, _)) in text.char_indices().enumerate() { + if i == char_range.start { + result.start = offset; + } + if i == char_range.end { + result.end = offset; + break; + } + } + result +} pub fn post_inc(value: &mut usize) -> usize { let prev = *value; @@ -44,7 +59,21 @@ impl Iterator for RandomCharIter { fn next(&mut self) -> Option { if self.0.gen_bool(1.0 / 5.0) { Some('\n') - } else { + } + // two-byte greek letters + else if self.0.gen_bool(1.0 / 8.0) { + Some(std::char::from_u32(self.0.gen_range(('α' as u32)..('ω' as u32 + 1))).unwrap()) + } + // three-byte characters + else if self.0.gen_bool(1.0 / 10.0) { + ['✋', '✅', '❌', '❎', '⭐'].choose(&mut self.0).cloned() + } + // four-byte characters + else if self.0.gen_bool(1.0 / 12.0) { + ['🍐', '🏀', '🍗', '🎉'].choose(&mut self.0).cloned() + } + // ascii letters + else { Some(self.0.gen_range(b'a'..b'z' + 1).into()) } }