Reland invisibles (#19846)

Release Notes:

- Show invisibles in the editor

Relands #19298 

Trying to quantify a performance impact, it doesn't seem to impact much
visible in Instruments or in a micro-benchmark of Editor#layout_lines.
We're still taking a few hundred micro-seconds (+/- a lot) every time.
The ascii file has just ascii, where as the cc file has one control
character per line.

<img width="1055" alt="Screenshot 2024-10-28 at 12 14 53"
src="https://github.com/user-attachments/assets/1c382063-bb19-4e92-bbba-ed5e7c02309f">
<img width="1020" alt="Screenshot 2024-10-28 at 12 15 07"
src="https://github.com/user-attachments/assets/1789f65e-5f83-4c32-be47-7748c62c3703">
This commit is contained in:
Conrad Irwin 2024-10-28 20:27:09 -06:00 committed by GitHub
parent 85ff03cde0
commit 58e5d4ff02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 319 additions and 16 deletions

View file

@ -21,6 +21,7 @@ mod block_map;
mod crease_map;
mod fold_map;
mod inlay_map;
pub(crate) mod invisibles;
mod tab_map;
mod wrap_map;
@ -42,6 +43,7 @@ use gpui::{
pub(crate) use inlay_map::Inlay;
use inlay_map::{InlayMap, InlaySnapshot};
pub use inlay_map::{InlayOffset, InlayPoint};
use invisibles::{is_invisible, replacement};
use language::{
language_settings::language_settings, ChunkRenderer, OffsetUtf16, Point,
Subscription as BufferSubscription,
@ -56,6 +58,7 @@ use std::{
any::TypeId,
borrow::Cow,
fmt::Debug,
iter,
num::NonZeroU32,
ops::{Add, Range, Sub},
sync::Arc,
@ -63,7 +66,7 @@ use std::{
use sum_tree::{Bias, TreeMap};
use tab_map::{TabMap, TabSnapshot};
use text::LineIndent;
use ui::WindowContext;
use ui::{div, px, IntoElement, ParentElement, Styled, WindowContext};
use wrap_map::{WrapMap, WrapSnapshot};
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
@ -461,6 +464,98 @@ pub struct HighlightedChunk<'a> {
pub renderer: Option<ChunkRenderer>,
}
impl<'a> HighlightedChunk<'a> {
fn highlight_invisibles(
self,
editor_style: &'a EditorStyle,
) -> impl Iterator<Item = Self> + 'a {
let mut chars = self.text.chars().peekable();
let mut text = self.text;
let style = self.style;
let is_tab = self.is_tab;
let renderer = self.renderer;
iter::from_fn(move || {
let mut prefix_len = 0;
while let Some(&ch) = chars.peek() {
if !is_invisible(ch) {
prefix_len += ch.len_utf8();
chars.next();
continue;
}
if prefix_len > 0 {
let (prefix, suffix) = text.split_at(prefix_len);
text = suffix;
return Some(HighlightedChunk {
text: prefix,
style,
is_tab,
renderer: renderer.clone(),
});
}
chars.next();
let (prefix, suffix) = text.split_at(ch.len_utf8());
text = suffix;
if let Some(replacement) = replacement(ch) {
let background = editor_style.status.hint_background;
let underline = editor_style.status.hint;
return Some(HighlightedChunk {
text: prefix,
style: None,
is_tab: false,
renderer: Some(ChunkRenderer {
render: Arc::new(move |_| {
div()
.child(replacement)
.bg(background)
.text_decoration_1()
.text_decoration_color(underline)
.into_any_element()
}),
constrain_width: false,
}),
});
} else {
let invisible_highlight = HighlightStyle {
background_color: Some(editor_style.status.hint_background),
underline: Some(UnderlineStyle {
color: Some(editor_style.status.hint),
thickness: px(1.),
wavy: false,
}),
..Default::default()
};
let invisible_style = if let Some(mut style) = style {
style.highlight(invisible_highlight);
style
} else {
invisible_highlight
};
return Some(HighlightedChunk {
text: prefix,
style: Some(invisible_style),
is_tab: false,
renderer: renderer.clone(),
});
}
}
if !text.is_empty() {
let remainder = text;
text = "";
Some(HighlightedChunk {
text: remainder,
style,
is_tab,
renderer: renderer.clone(),
})
} else {
None
}
})
}
}
#[derive(Clone)]
pub struct DisplaySnapshot {
pub buffer_snapshot: MultiBufferSnapshot,
@ -675,7 +770,7 @@ impl DisplaySnapshot {
suggestion: Some(editor_style.suggestions_style),
},
)
.map(|chunk| {
.flat_map(|chunk| {
let mut highlight_style = chunk
.syntax_highlight_id
.and_then(|id| id.style(&editor_style.syntax));
@ -718,6 +813,7 @@ impl DisplaySnapshot {
is_tab: chunk.is_tab,
renderer: chunk.renderer,
}
.highlight_invisibles(editor_style)
})
}

View file

@ -0,0 +1,129 @@
// Invisibility in a Unicode context is not well defined, so we have to guess.
//
// We highlight all ASCII control codes, and unicode whitespace because they are likely
// confused with an ASCII space in a programming context (U+0020).
//
// We also highlight the handful of blank non-space characters:
// U+2800 BRAILLE PATTERN BLANK - Category: So
// U+115F HANGUL CHOSEONG FILLER - Category: Lo
// U+1160 HANGUL CHOSEONG FILLER - Category: Lo
// U+3164 HANGUL FILLER - Category: Lo
// U+FFA0 HALFWIDTH HANGUL FILLER - Category: Lo
// U+FFFC OBJECT REPLACEMENT CHARACTER - Category: So
//
// For the rest of Unicode, invisibility happens for two reasons:
// * A Format character (like a byte order mark or right-to-left override)
// * An invisible Nonspacing Mark character (like U+034F, or variation selectors)
//
// We don't consider unassigned codepoints invisible as the font renderer already shows
// a replacement character in that case (and there are a *lot* of them)
//
// Control characters are mostly fine to highlight; except:
// * U+E0020..=U+E007F are used in emoji flags. We don't highlight them right now, but we could if we tightened our heuristics.
// * U+200D is used to join characters. We highlight this but don't replace it. As our font system ignores mid-glyph highlights this mostly works to highlight unexpected uses.
//
// Nonspacing marks are handled like U+200D. This means that mid-glyph we ignore them, but
// probably causes issues with end-of-glyph usage.
//
// ref: https://invisible-characters.com
// ref: https://www.compart.com/en/unicode/category/Cf
// ref: https://gist.github.com/ConradIrwin/f759e1fc29267143c4c7895aa495dca5?h=1
// ref: https://unicode.org/Public/emoji/13.0/emoji-test.txt
// https://github.com/bits/UTF-8-Unicode-Test-Documents/blob/master/UTF-8_sequence_separated/utf8_sequence_0-0x10ffff_assigned_including-unprintable-asis.txt
pub fn is_invisible(c: char) -> bool {
if c <= '\u{1f}' {
c != '\t' && c != '\n' && c != '\r'
} else if c >= '\u{7f}' {
c <= '\u{9f}'
|| (c.is_whitespace() && c != IDEOGRAPHIC_SPACE)
|| contains(c, &FORMAT)
|| contains(c, &OTHER)
} else {
false
}
}
// ASCII control characters have fancy unicode glyphs, everything else
// is replaced by a space - unless it is used in combining characters in
// which case we need to leave it in the string.
pub(crate) fn replacement(c: char) -> Option<&'static str> {
if c <= '\x1f' {
Some(C0_SYMBOLS[c as usize])
} else if c == '\x7f' {
Some(DEL)
} else if contains(c, &PRESERVE) {
None
} else {
Some("\u{2007}") // fixed width space
}
}
// IDEOGRAPHIC SPACE is common alongside Chinese and other wide character sets.
// We don't highlight this for now (as it already shows up wide in the editor),
// but could if we tracked state in the classifier.
const IDEOGRAPHIC_SPACE: char = '\u{3000}';
const C0_SYMBOLS: &'static [&'static str] = &[
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "",
];
const DEL: &'static str = "";
// generated using ucd-generate: ucd-generate general-category --include Format --chars ucd-16.0.0
pub const FORMAT: &'static [(char, char)] = &[
('\u{ad}', '\u{ad}'),
('\u{600}', '\u{605}'),
('\u{61c}', '\u{61c}'),
('\u{6dd}', '\u{6dd}'),
('\u{70f}', '\u{70f}'),
('\u{890}', '\u{891}'),
('\u{8e2}', '\u{8e2}'),
('\u{180e}', '\u{180e}'),
('\u{200b}', '\u{200f}'),
('\u{202a}', '\u{202e}'),
('\u{2060}', '\u{2064}'),
('\u{2066}', '\u{206f}'),
('\u{feff}', '\u{feff}'),
('\u{fff9}', '\u{fffb}'),
('\u{110bd}', '\u{110bd}'),
('\u{110cd}', '\u{110cd}'),
('\u{13430}', '\u{1343f}'),
('\u{1bca0}', '\u{1bca3}'),
('\u{1d173}', '\u{1d17a}'),
('\u{e0001}', '\u{e0001}'),
('\u{e0020}', '\u{e007f}'),
];
// hand-made base on https://invisible-characters.com (Excluding Cf)
pub const OTHER: &'static [(char, char)] = &[
('\u{034f}', '\u{034f}'),
('\u{115F}', '\u{1160}'),
('\u{17b4}', '\u{17b5}'),
('\u{180b}', '\u{180d}'),
('\u{2800}', '\u{2800}'),
('\u{3164}', '\u{3164}'),
('\u{fe00}', '\u{fe0d}'),
('\u{ffa0}', '\u{ffa0}'),
('\u{fffc}', '\u{fffc}'),
('\u{e0100}', '\u{e01ef}'),
];
// a subset of FORMAT/OTHER that may appear within glyphs
const PRESERVE: &'static [(char, char)] = &[
('\u{034f}', '\u{034f}'),
('\u{200d}', '\u{200d}'),
('\u{17b4}', '\u{17b5}'),
('\u{180b}', '\u{180d}'),
('\u{e0061}', '\u{e007a}'),
('\u{e007f}', '\u{e007f}'),
];
fn contains(c: char, list: &[(char, char)]) -> bool {
for (start, end) in list {
if c < *start {
return false;
}
if c <= *end {
return true;
}
}
false
}

View file

@ -1,5 +1,5 @@
use crate::{
display_map::{InlayOffset, ToDisplayPoint},
display_map::{invisibles::is_invisible, InlayOffset, ToDisplayPoint},
hover_links::{InlayHighlight, RangeInEditor},
scroll::ScrollAmount,
Anchor, AnchorRangeExt, DisplayPoint, DisplayRow, Editor, EditorSettings, EditorSnapshot,
@ -11,7 +11,7 @@ use gpui::{
StyleRefinement, Styled, Task, TextStyleRefinement, View, ViewContext,
};
use itertools::Itertools;
use language::{DiagnosticEntry, Language, LanguageRegistry};
use language::{Diagnostic, DiagnosticEntry, Language, LanguageRegistry};
use lsp::DiagnosticSeverity;
use markdown::{Markdown, MarkdownStyle};
use multi_buffer::ToOffset;
@ -259,7 +259,7 @@ fn show_hover(
}
// If there's a diagnostic, assign it on the hover state and notify
let local_diagnostic = snapshot
let mut local_diagnostic = snapshot
.buffer_snapshot
.diagnostics_in_range::<_, usize>(anchor..anchor, false)
// Find the entry with the most specific range
@ -280,6 +280,41 @@ fn show_hover(
range: entry.range.to_anchors(&snapshot.buffer_snapshot),
})
});
if let Some(invisible) = snapshot
.buffer_snapshot
.chars_at(anchor)
.next()
.filter(|&c| is_invisible(c))
{
let after = snapshot.buffer_snapshot.anchor_after(
anchor.to_offset(&snapshot.buffer_snapshot) + invisible.len_utf8(),
);
local_diagnostic = Some(DiagnosticEntry {
diagnostic: Diagnostic {
severity: DiagnosticSeverity::HINT,
message: format!("Unicode character U+{:02X}", invisible as u32),
..Default::default()
},
range: anchor..after,
})
} else if let Some(invisible) = snapshot
.buffer_snapshot
.reversed_chars_at(anchor)
.next()
.filter(|&c| is_invisible(c))
{
let before = snapshot.buffer_snapshot.anchor_before(
anchor.to_offset(&snapshot.buffer_snapshot) - invisible.len_utf8(),
);
local_diagnostic = Some(DiagnosticEntry {
diagnostic: Diagnostic {
severity: DiagnosticSeverity::HINT,
message: format!("Unicode character U+{:02X}", invisible as u32),
..Default::default()
},
range: before..anchor,
})
}
let diagnostic_popover = if let Some(local_diagnostic) = local_diagnostic {
let text = match local_diagnostic.diagnostic.source {

View file

@ -1,6 +1,7 @@
use crate::{
black, fill, point, px, size, Bounds, Hsla, LineLayout, Pixels, Point, Result, SharedString,
StrikethroughStyle, UnderlineStyle, WindowContext, WrapBoundary, WrappedLineLayout,
black, fill, point, px, size, Bounds, Half, Hsla, LineLayout, Pixels, Point, Result,
SharedString, StrikethroughStyle, UnderlineStyle, WindowContext, WrapBoundary,
WrappedLineLayout,
};
use derive_more::{Deref, DerefMut};
use smallvec::SmallVec;
@ -129,8 +130,9 @@ fn paint_line(
let text_system = cx.text_system().clone();
let mut glyph_origin = origin;
let mut prev_glyph_position = Point::default();
let mut max_glyph_size = size(px(0.), px(0.));
for (run_ix, run) in layout.runs.iter().enumerate() {
let max_glyph_size = text_system.bounding_box(run.font_id, layout.font_size).size;
max_glyph_size = text_system.bounding_box(run.font_id, layout.font_size).size;
for (glyph_ix, glyph) in run.glyphs.iter().enumerate() {
glyph_origin.x += glyph.position.x - prev_glyph_position.x;
@ -139,6 +141,9 @@ fn paint_line(
wraps.next();
if let Some((background_origin, background_color)) = current_background.as_mut()
{
if glyph_origin.x == background_origin.x {
background_origin.x -= max_glyph_size.width.half()
}
cx.paint_quad(fill(
Bounds {
origin: *background_origin,
@ -150,6 +155,9 @@ fn paint_line(
background_origin.y += line_height;
}
if let Some((underline_origin, underline_style)) = current_underline.as_mut() {
if glyph_origin.x == underline_origin.x {
underline_origin.x -= max_glyph_size.width.half();
};
cx.paint_underline(
*underline_origin,
glyph_origin.x - underline_origin.x,
@ -161,6 +169,9 @@ fn paint_line(
if let Some((strikethrough_origin, strikethrough_style)) =
current_strikethrough.as_mut()
{
if glyph_origin.x == strikethrough_origin.x {
strikethrough_origin.x -= max_glyph_size.width.half();
};
cx.paint_strikethrough(
*strikethrough_origin,
glyph_origin.x - strikethrough_origin.x,
@ -179,7 +190,18 @@ fn paint_line(
let mut finished_underline: Option<(Point<Pixels>, UnderlineStyle)> = None;
let mut finished_strikethrough: Option<(Point<Pixels>, StrikethroughStyle)> = None;
if glyph.index >= run_end {
if let Some(style_run) = decoration_runs.next() {
let mut style_run = decoration_runs.next();
// ignore style runs that apply to a partial glyph
while let Some(run) = style_run {
if glyph.index < run_end + (run.len as usize) {
break;
}
run_end += run.len as usize;
style_run = decoration_runs.next();
}
if let Some(style_run) = style_run {
if let Some((_, background_color)) = &mut current_background {
if style_run.background_color.as_ref() != Some(background_color) {
finished_background = current_background.take();
@ -239,17 +261,24 @@ fn paint_line(
}
}
if let Some((background_origin, background_color)) = finished_background {
if let Some((mut background_origin, background_color)) = finished_background {
let mut width = glyph_origin.x - background_origin.x;
if background_origin.x == glyph_origin.x {
background_origin.x -= max_glyph_size.width.half();
};
cx.paint_quad(fill(
Bounds {
origin: background_origin,
size: size(glyph_origin.x - background_origin.x, line_height),
size: size(width, line_height),
},
background_color,
));
}
if let Some((underline_origin, underline_style)) = finished_underline {
if let Some((mut underline_origin, underline_style)) = finished_underline {
if underline_origin.x == glyph_origin.x {
underline_origin.x -= max_glyph_size.width.half();
};
cx.paint_underline(
underline_origin,
glyph_origin.x - underline_origin.x,
@ -257,7 +286,12 @@ fn paint_line(
);
}
if let Some((strikethrough_origin, strikethrough_style)) = finished_strikethrough {
if let Some((mut strikethrough_origin, strikethrough_style)) =
finished_strikethrough
{
if strikethrough_origin.x == glyph_origin.x {
strikethrough_origin.x -= max_glyph_size.width.half();
};
cx.paint_strikethrough(
strikethrough_origin,
glyph_origin.x - strikethrough_origin.x,
@ -299,7 +333,10 @@ fn paint_line(
last_line_end_x -= glyph.position.x;
}
if let Some((background_origin, background_color)) = current_background.take() {
if let Some((mut background_origin, background_color)) = current_background.take() {
if last_line_end_x == background_origin.x {
background_origin.x -= max_glyph_size.width.half()
};
cx.paint_quad(fill(
Bounds {
origin: background_origin,
@ -309,7 +346,10 @@ fn paint_line(
));
}
if let Some((underline_start, underline_style)) = current_underline.take() {
if let Some((mut underline_start, underline_style)) = current_underline.take() {
if last_line_end_x == underline_start.x {
underline_start.x -= max_glyph_size.width.half()
};
cx.paint_underline(
underline_start,
last_line_end_x - underline_start.x,
@ -317,7 +357,10 @@ fn paint_line(
);
}
if let Some((strikethrough_start, strikethrough_style)) = current_strikethrough.take() {
if let Some((mut strikethrough_start, strikethrough_style)) = current_strikethrough.take() {
if last_line_end_x == strikethrough_start.x {
strikethrough_start.x -= max_glyph_size.width.half()
};
cx.paint_strikethrough(
strikethrough_start,
last_line_end_x - strikethrough_start.x,