diff --git a/crates/editor/src/hover_popover.rs b/crates/editor/src/hover_popover.rs index 1d322118b6..f5126a6c3e 100644 --- a/crates/editor/src/hover_popover.rs +++ b/crates/editor/src/hover_popover.rs @@ -951,8 +951,10 @@ mod tests { for (range, event) in slice.iter() { match event { - MarkdownEvent::Text(parsed) => rendered_text.push_str(parsed), - MarkdownEvent::Code => rendered_text.push_str(&text[range.clone()]), + MarkdownEvent::SubstitutedText(parsed) => rendered_text.push_str(parsed), + MarkdownEvent::Text | MarkdownEvent::Code => { + rendered_text.push_str(&text[range.clone()]) + } _ => {} } } diff --git a/crates/markdown/src/markdown.rs b/crates/markdown/src/markdown.rs index c8da8dcd64..6db397aea5 100644 --- a/crates/markdown/src/markdown.rs +++ b/crates/markdown/src/markdown.rs @@ -868,8 +868,11 @@ impl Element for MarkdownElement { } _ => log::debug!("unsupported markdown tag end: {:?}", tag), }, - MarkdownEvent::Text(parsed) => { - builder.push_text(parsed, range.start); + MarkdownEvent::Text => { + builder.push_text(&parsed_markdown.source[range.clone()], range.start); + } + MarkdownEvent::SubstitutedText(text) => { + builder.push_text(text, range.start); } MarkdownEvent::Code => { builder.push_text_style(self.style.inline_code.clone()); diff --git a/crates/markdown/src/parser.rs b/crates/markdown/src/parser.rs index b4c81493d4..e5877f3b16 100644 --- a/crates/markdown/src/parser.rs +++ b/crates/markdown/src/parser.rs @@ -1,8 +1,13 @@ use gpui::SharedString; use linkify::LinkFinder; pub use pulldown_cmark::TagEnd as MarkdownTagEnd; -use pulldown_cmark::{Alignment, HeadingLevel, LinkType, MetadataBlockKind, Options, Parser}; -use std::{collections::HashSet, ops::Range}; +use pulldown_cmark::{ + Alignment, HeadingLevel, InlineStr, LinkType, MetadataBlockKind, Options, Parser, +}; +use std::{ + collections::HashSet, + ops::{Deref, Range}, +}; const PARSE_OPTIONS: Options = Options::ENABLE_TABLES .union(Options::ENABLE_FOOTNOTES) @@ -49,49 +54,60 @@ pub fn parse_markdown(text: &str) -> (Vec<(Range, MarkdownEvent)>, HashSe events.push((range, MarkdownEvent::End(tag))); } pulldown_cmark::Event::Text(parsed) => { - // Automatically detect links in text if we're not already within a markdown - // link. - let mut parsed = parsed.as_ref(); - if !within_link { - let mut finder = LinkFinder::new(); - finder.kinds(&[linkify::LinkKind::Url]); - let text_range = range.clone(); - for link in finder.links(&text[text_range.clone()]) { - let link_range = - text_range.start + link.start()..text_range.start + link.end(); - - if link_range.start > range.start { - let (text, tail) = parsed.split_at(link_range.start - range.start); - events.push(( - range.start..link_range.start, - MarkdownEvent::Text(SharedString::new(text)), - )); - parsed = tail; - } - - events.push(( - link_range.clone(), - MarkdownEvent::Start(MarkdownTag::Link { - link_type: LinkType::Autolink, - dest_url: SharedString::from(link.as_str().to_string()), - title: SharedString::default(), - id: SharedString::default(), - }), - )); - - let (link_text, tail) = parsed.split_at(link_range.end - link_range.start); - events.push(( - link_range.clone(), - MarkdownEvent::Text(SharedString::new(link_text)), - )); - events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link))); - - range.start = link_range.end; - parsed = tail; + // `parsed` will share bytes with the input unless a substitution like handling of + // HTML entities or smart punctuation has occurred. When these substitutions occur, + // `parsed` only consists of the result of a single substitution. + if !cow_str_points_inside(&parsed, text) { + // Attempt to detect cases where the assumptions here are not valid or the + // behavior has changed. + if parsed.len() > 4 { + log::error!( + "Bug in markdown parser. \ + pulldown_cmark::Event::Text expected to a substituted HTML entity, \ + but it was longer than expected.\n\ + Source: {}\n\ + Parsed: {}", + &text[range.clone()], + parsed + ); + } + events.push((range, MarkdownEvent::SubstitutedText(parsed.into()))); + } else { + // Automatically detect links in text if not already within a markdown link. + if !within_link { + let mut finder = LinkFinder::new(); + finder.kinds(&[linkify::LinkKind::Url]); + let text_range = range.clone(); + for link in finder.links(&text[text_range.clone()]) { + let link_range = + text_range.start + link.start()..text_range.start + link.end(); + + if link_range.start > range.start { + events.push((range.start..link_range.start, MarkdownEvent::Text)); + } + + events.push(( + link_range.clone(), + MarkdownEvent::Start(MarkdownTag::Link { + link_type: LinkType::Autolink, + dest_url: SharedString::from(link.as_str().to_string()), + title: SharedString::default(), + id: SharedString::default(), + }), + )); + + events.push((link_range.clone(), MarkdownEvent::Text)); + events.push(( + link_range.clone(), + MarkdownEvent::End(MarkdownTagEnd::Link), + )); + + range.start = link_range.end; + } + } + if range.start < range.end { + events.push((range, MarkdownEvent::Text)); } - } - if range.start < range.end { - events.push((range, MarkdownEvent::Text(SharedString::new(parsed)))); } } pulldown_cmark::Event::Code(_) => { @@ -116,7 +132,7 @@ pub fn parse_markdown(text: &str) -> (Vec<(Range, MarkdownEvent)>, HashSe (events, languages) } -pub fn parse_links_only(mut text: &str) -> Vec<(Range, MarkdownEvent)> { +pub fn parse_links_only(text: &str) -> Vec<(Range, MarkdownEvent)> { let mut events = Vec::new(); let mut finder = LinkFinder::new(); finder.kinds(&[linkify::LinkKind::Url]); @@ -128,15 +144,9 @@ pub fn parse_links_only(mut text: &str) -> Vec<(Range, MarkdownEvent)> { let link_range = link.start()..link.end(); if link_range.start > text_range.start { - let (head, tail) = text.split_at(link_range.start - text_range.start); - events.push(( - text_range.start..link_range.start, - MarkdownEvent::Text(SharedString::new(head)), - )); - text = tail; + events.push((text_range.start..link_range.start, MarkdownEvent::Text)); } - let (link_text, tail) = text.split_at(link_range.end - link_range.start); events.push(( link_range.clone(), MarkdownEvent::Start(MarkdownTag::Link { @@ -146,18 +156,14 @@ pub fn parse_links_only(mut text: &str) -> Vec<(Range, MarkdownEvent)> { id: SharedString::default(), }), )); - events.push(( - link_range.clone(), - MarkdownEvent::Text(SharedString::new(link_text)), - )); + events.push((link_range.clone(), MarkdownEvent::Text)); events.push((link_range.clone(), MarkdownEvent::End(MarkdownTagEnd::Link))); text_range.start = link_range.end; - text = tail; } if text_range.end > text_range.start { - events.push((text_range, MarkdownEvent::Text(SharedString::new(text)))); + events.push((text_range, MarkdownEvent::Text)); } events @@ -173,8 +179,11 @@ pub enum MarkdownEvent { Start(MarkdownTag), /// End of a tagged element. End(MarkdownTagEnd), - /// A text node. - Text(SharedString), + /// Text that uses the associated range from the mardown source. + Text, + /// Text that differs from the markdown source - typically due to substitution of HTML entities + /// and smart punctuation. + SubstitutedText(CompactStr), /// An inline code node. Code, /// An HTML node. @@ -365,8 +374,71 @@ impl From> for MarkdownTag { } } +/// Represents either an owned or inline string. Motivation for this is to make `SubstitutedText` +/// more efficient - it fits within a `pulldown_cmark::InlineStr` in all known cases. +/// +/// Same as `pulldown_cmark::CowStr` but without the `Borrow` case. +#[derive(Clone, Debug)] +pub enum CompactStr { + Boxed(Box), + Inlined(InlineStr), +} + +impl Deref for CompactStr { + type Target = str; + + fn deref(&self) -> &str { + match self { + CompactStr::Boxed(b) => b, + CompactStr::Inlined(i) => i, + } + } +} + +impl From<&str> for CompactStr { + fn from(s: &str) -> Self { + if let Ok(inlined) = s.try_into() { + CompactStr::Inlined(inlined) + } else { + CompactStr::Boxed(s.into()) + } + } +} + +impl From> for CompactStr { + fn from(cow_str: pulldown_cmark::CowStr) -> Self { + match cow_str { + pulldown_cmark::CowStr::Boxed(b) => CompactStr::Boxed(b), + pulldown_cmark::CowStr::Borrowed(b) => b.into(), + pulldown_cmark::CowStr::Inlined(i) => CompactStr::Inlined(i), + } + } +} + +impl PartialEq for CompactStr { + fn eq(&self, other: &Self) -> bool { + self.deref() == other.deref() + } +} + +fn cow_str_points_inside(substring: &pulldown_cmark::CowStr, container: &str) -> bool { + match substring { + pulldown_cmark::CowStr::Boxed(b) => str_points_inside(b, container), + pulldown_cmark::CowStr::Borrowed(b) => str_points_inside(b, container), + pulldown_cmark::CowStr::Inlined(_) => false, + } +} + +fn str_points_inside(substring: &str, container: &str) -> bool { + let substring_ptr = substring.as_ptr(); + let container_ptr = container.as_ptr(); + unsafe { substring_ptr >= container_ptr && substring_ptr < container_ptr.add(container.len()) } +} + #[cfg(test)] mod tests { + use super::MarkdownEvent::*; + use super::MarkdownTag::*; use super::*; const UNWANTED_OPTIONS: Options = Options::ENABLE_YAML_STYLE_METADATA_BLOCKS @@ -387,4 +459,64 @@ mod tests { Options::empty() ); } + + #[test] + fn test_plain_urls_and_escaped_text() { + assert_eq!( + parse_markdown("   https://some.url some \\`►\\` text"), + ( + vec![ + (0..51, Start(Paragraph)), + (0..6, SubstitutedText("\u{a0}".into())), + (6..12, SubstitutedText("\u{a0}".into())), + (12..13, Text), + ( + 13..29, + Start(Link { + link_type: LinkType::Autolink, + dest_url: "https://some.url".into(), + title: "".into(), + id: "".into(), + }) + ), + (13..29, Text), + (13..29, End(MarkdownTagEnd::Link)), + (29..35, Text), + (36..37, Text), // Escaped backtick + (37..44, SubstitutedText("►".into())), + (45..46, Text), // Escaped backtick + (46..51, Text), + (0..51, End(MarkdownTagEnd::Paragraph)) + ], + HashSet::new() + ) + ); + } + + #[test] + fn test_smart_punctuation() { + assert_eq!( + parse_markdown("-- --- ... \"double quoted\" 'single quoted'"), + ( + vec![ + (0..42, Start(Paragraph)), + (0..2, SubstitutedText("–".into())), + (2..3, Text), + (3..6, SubstitutedText("—".into())), + (6..7, Text), + (7..10, SubstitutedText("…".into())), + (10..11, Text), + (11..12, SubstitutedText("“".into())), + (12..25, Text), + (25..26, SubstitutedText("”".into())), + (26..27, Text), + (27..28, SubstitutedText("‘".into())), + (28..41, Text), + (41..42, SubstitutedText("’".into())), + (0..42, End(MarkdownTagEnd::Paragraph)) + ], + HashSet::new() + ) + ) + } }