Compare commits
9 commits
main
...
fix-unicod
Author | SHA1 | Date | |
---|---|---|---|
![]() |
211f20f41f | ||
![]() |
6107e7c604 | ||
![]() |
53ce77a0f7 | ||
![]() |
b69a09892b | ||
![]() |
7e152e0439 | ||
![]() |
45fd87e63a | ||
![]() |
6e19923c27 | ||
![]() |
92fb7656c4 | ||
![]() |
2de99369f4 |
1 changed files with 265 additions and 9 deletions
|
@ -296,12 +296,10 @@ impl<'a> Iterator for InlayChunks<'a> {
|
||||||
*chunk = self.buffer_chunks.next().unwrap();
|
*chunk = self.buffer_chunks.next().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
let (prefix, suffix) = chunk.text.split_at(
|
let (prefix, suffix) = chunk.text.split_at(utf8_char_boundary(
|
||||||
chunk
|
chunk.text,
|
||||||
.text
|
self.transforms.end(&()).0.0 - self.output_offset.0,
|
||||||
.len()
|
));
|
||||||
.min(self.transforms.end(&()).0.0 - self.output_offset.0),
|
|
||||||
);
|
|
||||||
|
|
||||||
chunk.text = suffix;
|
chunk.text = suffix;
|
||||||
self.output_offset.0 += prefix.len();
|
self.output_offset.0 += prefix.len();
|
||||||
|
@ -391,8 +389,10 @@ impl<'a> Iterator for InlayChunks<'a> {
|
||||||
let inlay_chunk = self
|
let inlay_chunk = self
|
||||||
.inlay_chunk
|
.inlay_chunk
|
||||||
.get_or_insert_with(|| inlay_chunks.next().unwrap());
|
.get_or_insert_with(|| inlay_chunks.next().unwrap());
|
||||||
let (chunk, remainder) =
|
let (chunk, remainder) = inlay_chunk.split_at(utf8_char_boundary(
|
||||||
inlay_chunk.split_at(inlay_chunk.len().min(next_inlay_highlight_endpoint));
|
inlay_chunk,
|
||||||
|
next_inlay_highlight_endpoint,
|
||||||
|
));
|
||||||
*inlay_chunk = remainder;
|
*inlay_chunk = remainder;
|
||||||
if inlay_chunk.is_empty() {
|
if inlay_chunk.is_empty() {
|
||||||
self.inlay_chunk = None;
|
self.inlay_chunk = None;
|
||||||
|
@ -412,7 +412,7 @@ impl<'a> Iterator for InlayChunks<'a> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if self.output_offset == self.transforms.end(&()).0 {
|
if self.output_offset >= self.transforms.end(&()).0 {
|
||||||
self.inlay_chunks = None;
|
self.inlay_chunks = None;
|
||||||
self.transforms.next(&());
|
self.transforms.next(&());
|
||||||
}
|
}
|
||||||
|
@ -1143,6 +1143,56 @@ fn push_isomorphic(sum_tree: &mut SumTree<Transform>, summary: TextSummary) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given a byte offset into a nonempty string slice, returns the byte index of
|
||||||
|
/// the previous valid `char` in the string. We look for the *previous* valid
|
||||||
|
/// one because if the index is in the middle of a UTF-8 multibyte sequence, we
|
||||||
|
/// can always get from there to a valid index by searching backwards, whereas
|
||||||
|
/// if we search forward we may run out of string bytes before finding a `char`.
|
||||||
|
///
|
||||||
|
/// Panics if given an empty slice.
|
||||||
|
#[inline(always)]
|
||||||
|
fn utf8_char_boundary(text: &str, byte_index: usize) -> usize {
|
||||||
|
let mut byte_index = byte_index.min(text.len().saturating_sub(1));
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
let start_byte_index = byte_index;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if let Some(byte) = text.as_bytes().get(byte_index) {
|
||||||
|
// The bits in a UTF-8 continuation byte are always 10xxxxxx,
|
||||||
|
// so if we see one of those, we'd be splitting on a continuation
|
||||||
|
// byte instead of a Unicode Scalar Value like we need.
|
||||||
|
if (byte >> 6) != 0b00000010 {
|
||||||
|
return byte_index;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// This should only happen if given an empty string, because we started at index
|
||||||
|
// (text.len() - 1) and then decremented from there. A valid nonempty &str should
|
||||||
|
// have at least one byte which passes the conditional, and the function's docs
|
||||||
|
// note that it panics when given an empty string.
|
||||||
|
panic!(
|
||||||
|
"Tried to find UTF-8 char boundary at index {byte_index} in a string with length {}",
|
||||||
|
text.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Eventually we'll get down to index 0, which in a &str is guaranteed
|
||||||
|
// to not be a continuation byte.
|
||||||
|
byte_index -= 1;
|
||||||
|
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
// UTF-8 can have at most 3 continuation bytes, so we should never
|
||||||
|
// look back more than 4 bytes total (including the starting byte).
|
||||||
|
// If we do, the &str was invalid UTF-8, which should never happen!
|
||||||
|
debug_assert!(
|
||||||
|
start_byte_index.abs_diff(byte_index) < 4,
|
||||||
|
"Looked back {} bytes without finding a UTF-8 boundary - the given string must be malformed",
|
||||||
|
start_byte_index.abs_diff(byte_index).saturating_add(1)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -1882,4 +1932,210 @@ mod tests {
|
||||||
cx.set_global(store);
|
cx.set_global(store);
|
||||||
theme::init(theme::LoadThemes::JustBase, cx);
|
theme::init(theme::LoadThemes::JustBase, cx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Helper to create test highlights for an inlay
|
||||||
|
fn create_inlay_highlights(
|
||||||
|
inlay_id: InlayId,
|
||||||
|
highlight_range: Range<usize>,
|
||||||
|
position: Anchor,
|
||||||
|
) -> TreeMap<TypeId, TreeMap<InlayId, (HighlightStyle, InlayHighlight)>> {
|
||||||
|
let mut inlay_highlights = TreeMap::default();
|
||||||
|
let mut type_highlights = TreeMap::default();
|
||||||
|
type_highlights.insert(
|
||||||
|
inlay_id,
|
||||||
|
(
|
||||||
|
HighlightStyle::default(),
|
||||||
|
InlayHighlight {
|
||||||
|
inlay: inlay_id,
|
||||||
|
range: highlight_range,
|
||||||
|
inlay_position: position,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
);
|
||||||
|
inlay_highlights.insert(TypeId::of::<()>(), type_highlights);
|
||||||
|
inlay_highlights
|
||||||
|
}
|
||||||
|
|
||||||
|
#[gpui::test]
|
||||||
|
fn test_inlay_utf8_boundary_panic_fix(cx: &mut App) {
|
||||||
|
init_test(cx);
|
||||||
|
|
||||||
|
// This test verifies that we handle UTF-8 character boundaries correctly
|
||||||
|
// when splitting inlay text for highlighting. Previously, this would panic
|
||||||
|
// when trying to split at byte 13, which is in the middle of the '…' character.
|
||||||
|
//
|
||||||
|
// See https://github.com/zed-industries/zed/issues/33641
|
||||||
|
let buffer = MultiBuffer::build_simple("fn main() {}\n", cx);
|
||||||
|
let (mut inlay_map, _) = InlayMap::new(buffer.read(cx).snapshot(cx));
|
||||||
|
|
||||||
|
// Create an inlay with text that contains a multi-byte character
|
||||||
|
// The string "SortingDirec…" contains an ellipsis character '…' which is 3 bytes (E2 80 A6)
|
||||||
|
let inlay_text = "SortingDirec…";
|
||||||
|
let position = buffer.read(cx).snapshot(cx).anchor_before(Point::new(0, 5));
|
||||||
|
|
||||||
|
let inlay = Inlay {
|
||||||
|
id: InlayId::Hint(0),
|
||||||
|
position,
|
||||||
|
text: text::Rope::from(inlay_text),
|
||||||
|
color: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let (inlay_snapshot, _) = inlay_map.splice(&[], vec![inlay]);
|
||||||
|
|
||||||
|
// Create highlights that request a split at byte 13, which is in the middle
|
||||||
|
// of the '…' character (bytes 12..14). We should round down to byte 12.
|
||||||
|
let inlay_highlights = create_inlay_highlights(InlayId::Hint(0), 0..13, position);
|
||||||
|
|
||||||
|
let highlights = crate::display_map::Highlights {
|
||||||
|
text_highlights: None,
|
||||||
|
inlay_highlights: Some(&inlay_highlights),
|
||||||
|
styles: crate::display_map::HighlightStyles::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Collect chunks - this previously would panic
|
||||||
|
let chunks: Vec<_> = inlay_snapshot
|
||||||
|
.chunks(
|
||||||
|
InlayOffset(0)..InlayOffset(inlay_snapshot.len().0),
|
||||||
|
false,
|
||||||
|
highlights,
|
||||||
|
)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Verify the chunks are correct
|
||||||
|
let full_text: String = chunks.iter().map(|c| c.chunk.text).collect();
|
||||||
|
assert_eq!(full_text, "fn maSortingDirec…in() {}\n");
|
||||||
|
|
||||||
|
// Verify the highlighted portion includes the complete ellipsis character
|
||||||
|
let highlighted_chunks: Vec<_> = chunks
|
||||||
|
.iter()
|
||||||
|
.filter(|c| c.chunk.highlight_style.is_some() && c.chunk.is_inlay)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
assert_eq!(highlighted_chunks.len(), 1);
|
||||||
|
assert_eq!(highlighted_chunks[0].chunk.text, "SortingDirec…");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[gpui::test]
|
||||||
|
fn test_inlay_utf8_boundaries(cx: &mut App) {
|
||||||
|
init_test(cx);
|
||||||
|
|
||||||
|
struct TestCase {
|
||||||
|
inlay_text: &'static str,
|
||||||
|
highlight_range: Range<usize>,
|
||||||
|
expected_highlighted: &'static str,
|
||||||
|
description: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
let test_cases = vec![
|
||||||
|
TestCase {
|
||||||
|
inlay_text: "Hello👋World",
|
||||||
|
highlight_range: 0..7,
|
||||||
|
expected_highlighted: "Hello👋",
|
||||||
|
description: "Emoji boundary - rounds up to include full emoji",
|
||||||
|
},
|
||||||
|
TestCase {
|
||||||
|
inlay_text: "Test→End",
|
||||||
|
highlight_range: 0..5,
|
||||||
|
expected_highlighted: "Test→",
|
||||||
|
description: "Arrow boundary - rounds up to include full arrow",
|
||||||
|
},
|
||||||
|
TestCase {
|
||||||
|
inlay_text: "café",
|
||||||
|
highlight_range: 0..4,
|
||||||
|
expected_highlighted: "café",
|
||||||
|
description: "Accented char boundary - rounds up to include full é",
|
||||||
|
},
|
||||||
|
TestCase {
|
||||||
|
inlay_text: "🎨🎭🎪",
|
||||||
|
highlight_range: 0..5,
|
||||||
|
expected_highlighted: "🎨🎭",
|
||||||
|
description: "Multiple emojis - partial highlight",
|
||||||
|
},
|
||||||
|
TestCase {
|
||||||
|
inlay_text: "普通话",
|
||||||
|
highlight_range: 0..4,
|
||||||
|
expected_highlighted: "普通",
|
||||||
|
description: "Chinese characters - partial highlight",
|
||||||
|
},
|
||||||
|
TestCase {
|
||||||
|
inlay_text: "Hello",
|
||||||
|
highlight_range: 0..2,
|
||||||
|
expected_highlighted: "He",
|
||||||
|
description: "ASCII only - no adjustment needed",
|
||||||
|
},
|
||||||
|
TestCase {
|
||||||
|
inlay_text: "👋",
|
||||||
|
highlight_range: 0..1,
|
||||||
|
expected_highlighted: "👋",
|
||||||
|
description: "Single emoji - partial byte range includes whole char",
|
||||||
|
},
|
||||||
|
TestCase {
|
||||||
|
inlay_text: "Test",
|
||||||
|
highlight_range: 0..0,
|
||||||
|
expected_highlighted: "",
|
||||||
|
description: "Empty range",
|
||||||
|
},
|
||||||
|
TestCase {
|
||||||
|
inlay_text: "🎨ABC",
|
||||||
|
highlight_range: 2..5,
|
||||||
|
expected_highlighted: "A",
|
||||||
|
description: "Range starting mid-emoji skips the emoji",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
for test_case in test_cases {
|
||||||
|
let buffer = MultiBuffer::build_simple("test", cx);
|
||||||
|
let (mut inlay_map, _) = InlayMap::new(buffer.read(cx).snapshot(cx));
|
||||||
|
let position = buffer.read(cx).snapshot(cx).anchor_before(Point::new(0, 2));
|
||||||
|
|
||||||
|
let inlay = Inlay {
|
||||||
|
id: InlayId::Hint(0),
|
||||||
|
position,
|
||||||
|
text: text::Rope::from(test_case.inlay_text),
|
||||||
|
color: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let (inlay_snapshot, _) = inlay_map.splice(&[], vec![inlay]);
|
||||||
|
let inlay_highlights = create_inlay_highlights(
|
||||||
|
InlayId::Hint(0),
|
||||||
|
test_case.highlight_range.clone(),
|
||||||
|
position,
|
||||||
|
);
|
||||||
|
|
||||||
|
let highlights = crate::display_map::Highlights {
|
||||||
|
text_highlights: None,
|
||||||
|
inlay_highlights: Some(&inlay_highlights),
|
||||||
|
styles: crate::display_map::HighlightStyles::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let chunks: Vec<_> = inlay_snapshot
|
||||||
|
.chunks(
|
||||||
|
InlayOffset(0)..InlayOffset(inlay_snapshot.len().0),
|
||||||
|
false,
|
||||||
|
highlights,
|
||||||
|
)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Verify we got chunks and they total to the expected text
|
||||||
|
let full_text: String = chunks.iter().map(|c| c.chunk.text).collect();
|
||||||
|
assert_eq!(
|
||||||
|
full_text,
|
||||||
|
format!("te{}st", test_case.inlay_text),
|
||||||
|
"Full text mismatch for case: {}",
|
||||||
|
test_case.description
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify that the highlighted portion matches expectations
|
||||||
|
let highlighted_text: String = chunks
|
||||||
|
.iter()
|
||||||
|
.filter(|c| c.chunk.highlight_style.is_some() && c.chunk.is_inlay)
|
||||||
|
.map(|c| c.chunk.text)
|
||||||
|
.collect();
|
||||||
|
assert_eq!(
|
||||||
|
highlighted_text, test_case.expected_highlighted,
|
||||||
|
"Highlighted text mismatch for case: {} (text: '{}', range: {:?})",
|
||||||
|
test_case.description, test_case.inlay_text, test_case.highlight_range
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue