From 30f3efe6976ca1e6b13eb605f17c4a5be13e6bd2 Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Fri, 9 May 2025 13:38:03 +0200 Subject: [PATCH] Revert unnecessary "fix handling of unicode when counting codeblock lines" + document (#30368) After merging #30364 I realized why it was unnecessary to fix the code, and was more efficient before. UTF-8 does not use the standard 0-127 ASCII range for multi-byte chars. So this reverts that change and documents why the code is valid. Release Notes: - N/A --- crates/markdown/src/markdown.rs | 1 + crates/markdown/src/parser.rs | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/markdown/src/markdown.rs b/crates/markdown/src/markdown.rs index 8ef88901f3..998a180e63 100644 --- a/crates/markdown/src/markdown.rs +++ b/crates/markdown/src/markdown.rs @@ -223,6 +223,7 @@ impl Markdown { } pub fn escape(s: &str) -> Cow { + // Valid to use bytes since multi-byte UTF-8 doesn't use ASCII chars. let count = s .bytes() .filter(|c| *c == b'\n' || c.is_ascii_punctuation()) diff --git a/crates/markdown/src/parser.rs b/crates/markdown/src/parser.rs index e5e5847289..debfe5245f 100644 --- a/crates/markdown/src/parser.rs +++ b/crates/markdown/src/parser.rs @@ -79,9 +79,10 @@ pub fn parse_markdown( let content_range = content_range.start + range.start..content_range.end + range.start; + // Valid to use bytes since multi-byte UTF-8 doesn't use ASCII chars. let line_count = text[content_range.clone()] - .chars() - .filter(|c| *c == '\n') + .bytes() + .filter(|c| *c == b'\n') .count(); let metadata = CodeBlockMetadata { content_range,