fix bug in marked_range utils

This commit is contained in:
Keith Simmons 2022-05-25 14:13:18 -07:00
parent 98f9575653
commit e104cb94e7
3 changed files with 217 additions and 227 deletions

View file

@ -24,31 +24,67 @@ pub fn marked_text(marked_text: &str) -> (String, Vec<usize>) {
(unmarked_text, markers.remove(&'|').unwrap_or_default())
}
#[derive(Eq, PartialEq, Hash)]
pub enum TextRangeMarker {
Empty(char),
Range(char, char),
}
impl TextRangeMarker {
fn markers(&self) -> Vec<char> {
match self {
Self::Empty(m) => vec![*m],
Self::Range(l, r) => vec![*l, *r],
}
}
}
impl From<char> for TextRangeMarker {
fn from(marker: char) -> Self {
Self::Empty(marker)
}
}
impl From<(char, char)> for TextRangeMarker {
fn from((left_marker, right_marker): (char, char)) -> Self {
Self::Range(left_marker, right_marker)
}
}
pub fn marked_text_ranges_by(
marked_text: &str,
delimiters: Vec<(char, char)>,
) -> (String, HashMap<(char, char), Vec<Range<usize>>>) {
let all_markers = delimiters
.iter()
.flat_map(|(start, end)| [*start, *end])
.collect();
let (unmarked_text, mut markers) = marked_text_by(marked_text, all_markers);
let range_lookup = delimiters
.into_iter()
.map(|(start_marker, end_marker)| {
let starts = markers.remove(&start_marker).unwrap_or_default();
let ends = markers.remove(&end_marker).unwrap_or_default();
assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
markers: Vec<TextRangeMarker>,
) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
let ranges = starts
.into_iter()
.zip(ends)
.map(|(start, end)| {
assert!(end >= start, "marked ranges must be disjoint");
start..end
})
.collect::<Vec<Range<usize>>>();
((start_marker, end_marker), ranges)
let (unmarked_text, mut marker_offsets) = marked_text_by(marked_text, all_markers);
let range_lookup = markers
.into_iter()
.map(|marker| match marker {
TextRangeMarker::Empty(empty_marker_char) => {
let ranges = marker_offsets
.remove(&empty_marker_char)
.unwrap_or_default()
.into_iter()
.map(|empty_index| empty_index..empty_index)
.collect::<Vec<Range<usize>>>();
(marker, ranges)
}
TextRangeMarker::Range(start_marker, end_marker) => {
let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
let ranges = starts
.into_iter()
.zip(ends)
.map(|(start, end)| {
assert!(end >= start, "marked ranges must be disjoint");
start..end
})
.collect::<Vec<Range<usize>>>();
(marker, ranges)
}
})
.collect();
@ -58,14 +94,16 @@ pub fn marked_text_ranges_by(
// Returns ranges delimited by (), [], and <> ranges. Ranges using the same markers
// must not be overlapping. May also include | for empty ranges
pub fn marked_text_ranges(full_marked_text: &str) -> (String, Vec<Range<usize>>) {
let (range_marked_text, empty_offsets) = marked_text(full_marked_text);
let (unmarked, range_lookup) =
marked_text_ranges_by(&range_marked_text, vec![('[', ']'), ('(', ')'), ('<', '>')]);
let mut combined_ranges: Vec<_> = range_lookup
.into_values()
.flatten()
.chain(empty_offsets.into_iter().map(|offset| offset..offset))
.collect();
let (unmarked, range_lookup) = marked_text_ranges_by(
&full_marked_text,
vec![
'|'.into(),
('[', ']').into(),
('(', ')').into(),
('<', '>').into(),
],
);
let mut combined_ranges: Vec<_> = range_lookup.into_values().flatten().collect();
combined_ranges.sort_by_key(|range| range.start);
(unmarked, combined_ranges)