Fix fuzzy string match invariant check (#22032)

Version in #21983 only handled out of range issues rather than utf-8
boundary issues (thanks to @s3bba for pointing this out)

Release Notes:

- N/A
This commit is contained in:
Michael Sloan 2024-12-15 01:15:22 -07:00 committed by GitHub
parent 7e6233d70f
commit f64fcedabb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -61,24 +61,24 @@ impl StringMatch {
let mut positions = self.positions.iter().peekable(); let mut positions = self.positions.iter().peekable();
iter::from_fn(move || { iter::from_fn(move || {
if let Some(start) = positions.next().copied() { if let Some(start) = positions.next().copied() {
if start >= self.string.len() { let Some(char_len) = self.char_len_at_index(start) else {
log::error!( log::error!(
"Invariant violation: Index {start} out of range in string {:?}", "Invariant violation: Index {start} out of range or not on a utf-8 boundary in string {:?}",
self.string self.string
); );
return None; return None;
} };
let mut end = start + self.char_len_at_index(start); let mut end = start + char_len;
while let Some(next_start) = positions.peek() { while let Some(next_start) = positions.peek() {
if end == **next_start { if end == **next_start {
if end >= self.string.len() { let Some(char_len) = self.char_len_at_index(end) else {
log::error!( log::error!(
"Invariant violation: Index {end} out of range in string {:?}", "Invariant violation: Index {end} out of range or not on a utf-8 boundary in string {:?}",
self.string self.string
); );
return None; return None;
} };
end += self.char_len_at_index(end); end += char_len;
positions.next(); positions.next();
} else { } else {
break; break;
@ -91,8 +91,12 @@ impl StringMatch {
}) })
} }
fn char_len_at_index(&self, ix: usize) -> usize { /// Gets the byte length of the utf-8 character at a byte offset. If the index is out of range
self.string[ix..].chars().next().unwrap().len_utf8() /// or not on a utf-8 boundary then None is returned.
fn char_len_at_index(&self, ix: usize) -> Option<usize> {
self.string
.get(ix..)
.and_then(|slice| slice.chars().next().map(|char| char.len_utf8()))
} }
} }