diff --git a/crates/repl/src/outputs/table.rs b/crates/repl/src/outputs/table.rs index 7c525b02c1..28184b0371 100644 --- a/crates/repl/src/outputs/table.rs +++ b/crates/repl/src/outputs/table.rs @@ -61,6 +61,7 @@ use serde_json::Value; use settings::Settings; use theme::ThemeSettings; use ui::{div, prelude::*, v_flex, IntoElement, Styled}; +use util::markdown::MarkdownString; use crate::outputs::OutputContent; @@ -139,17 +140,6 @@ impl TableView { } } - fn escape_markdown(s: &str) -> String { - s.replace('|', "\\|") - .replace('*', "\\*") - .replace('_', "\\_") - .replace('`', "\\`") - .replace('[', "\\[") - .replace(']', "\\]") - .replace('<', "<") - .replace('>', ">") - } - fn create_clipboard_content(table: &TabularDataResource) -> String { let data = match table.data.as_ref() { Some(data) => data, @@ -180,7 +170,7 @@ impl TableView { let row_content = schema .fields .iter() - .map(|field| Self::escape_markdown(&cell_content(record, &field.name))) + .map(|field| MarkdownString::escape(&cell_content(record, &field.name)).0) .collect::>(); row_content.join(" | ") diff --git a/crates/util/src/markdown.rs b/crates/util/src/markdown.rs new file mode 100644 index 0000000000..a95b486f49 --- /dev/null +++ b/crates/util/src/markdown.rs @@ -0,0 +1,247 @@ +use std::fmt::{Display, Formatter}; + +/// Markdown text. +#[derive(Debug)] +pub struct MarkdownString(pub String); + +impl Display for MarkdownString { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl MarkdownString { + /// Escapes markdown special characters. + /// + /// Also escapes the following markdown extensions: + /// + /// * `^` for superscripts + /// * `$` for inline math + /// * `~` for strikethrough + /// + /// Escape of some character is unnecessary because while they are involved in markdown syntax, + /// the other characters involved are escaped: + /// + /// * `!`, `]`, `(`, and `)` are used in link syntax, but `[` is escaped so these are parsed as + /// plaintext. + /// + /// * `;` is used in HTML entity syntax, but `&` is escaped, so they are parsed as plaintext. + /// + /// TODO: There is one escape this doesn't do currently. Period after numbers at the start of the + /// line (`[0-9]*\.`) should also be escaped to avoid it being interpreted as a list item. + pub fn escape(text: &str) -> Self { + let mut chunks = Vec::new(); + let mut start_of_unescaped = None; + for (ix, c) in text.char_indices() { + match c { + // Always escaped. + '\\' | '`' | '*' | '_' | '[' | '^' | '$' | '~' | '&' | + // TODO: these only need to be escaped when they are the first non-whitespace + // character of the line of a block. There should probably be both an `escape_block` + // which does this and an `escape_inline` method which does not escape these. + '#' | '+' | '=' | '-' => { + match start_of_unescaped { + None => {} + Some(start_of_unescaped) => { + chunks.push(&text[start_of_unescaped..ix]); + } + } + chunks.push("\\"); + // Can include this char in the "unescaped" text since a + // backslash was just emitted. + start_of_unescaped = Some(ix); + } + // Escaped since `<` is used in opening HTML tags. `<` is used since Markdown + // supports HTML entities, and this allows the text to be used directly in HTML. + '<' => { + match start_of_unescaped { + None => {} + Some(start_of_unescaped) => { + chunks.push(&text[start_of_unescaped..ix]); + } + } + chunks.push("<"); + start_of_unescaped = None; + } + // Escaped since `>` is used for blockquotes. `>` is used since Markdown supports + // HTML entities, and this allows the text to be used directly in HTML. + '>' => { + match start_of_unescaped { + None => {} + Some(start_of_unescaped) => { + chunks.push(&text[start_of_unescaped..ix]); + } + } + chunks.push("gt;"); + start_of_unescaped = None; + } + _ => { + if start_of_unescaped.is_none() { + start_of_unescaped = Some(ix); + } + } + } + } + if let Some(start_of_unescaped) = start_of_unescaped { + chunks.push(&text[start_of_unescaped..]) + } + Self(chunks.concat()) + } + + /// Returns markdown for inline code (wrapped in backticks), handling code that contains backticks + /// and spaces. All whitespace is treated as a single space character. For text that does not + /// contain whitespace other than ' ', this escaping roundtrips through pulldown-cmark. + /// + /// When used in tables, `|` should be escaped like `\|` in the text provided to this function. + pub fn inline_code(text: &str) -> Self { + // Apache License 2.0, same as this crate. + // + // Copied from `pulldown-cmark-to-cmark-20.0.0` with modifications: + // + // * Handling of all whitespace. pulldown-cmark-to-cmark is anticipating + // `Code` events parsed by pulldown-cmark. + // + // * Direct return of string. + // + // https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L290 + + let mut all_whitespace = true; + let text = text + .chars() + .map(|c| { + if c.is_whitespace() { + ' ' + } else { + all_whitespace = false; + c + } + }) + .collect::(); + + // When inline code has leading and trailing ' ' characters, additional space is needed + // to escape it, unless all characters are space. + if all_whitespace { + Self(format!("`{text}`")) + } else { + // More backticks are needed to delimit the inline code than the maximum number of + // backticks in a consecutive run. + let backticks = "`".repeat(count_max_consecutive_chars(&text, '`') + 1); + let space = match text.as_bytes() { + &[b'`', ..] | &[.., b'`'] => " ", // Space needed to separate backtick. + &[b' ', .., b' '] => " ", // Space needed to escape inner space. + _ => "", // No space needed. + }; + Self(format!("{backticks}{space}{text}{space}{backticks}")) + } + } +} + +// Copied from `pulldown-cmark-to-cmark-20.0.0` with changed names. +// https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L1063 +// Apache License 2.0, same as this code. +fn count_max_consecutive_chars(text: &str, search: char) -> usize { + let mut in_search_chars = false; + let mut max_count = 0; + let mut cur_count = 0; + + for ch in text.chars() { + if ch == search { + cur_count += 1; + in_search_chars = true; + } else if in_search_chars { + max_count = max_count.max(cur_count); + cur_count = 0; + in_search_chars = false; + } + } + max_count.max(cur_count) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_markdown_string_escape() { + let input = r#" + # Heading + + Another heading + === + + Another heading variant + --- + + Paragraph with [link](https://example.com) and `code`, *emphasis*, and ~strikethrough~. + + ``` + code block + ``` + + List with varying leaders: + - Item 1 + * Item 2 + + Item 3 + + Some math: $`\sqrt{3x-1}+(1+x)^2`$ + + HTML entity:   + "#; + + let expected = r#" + \# Heading + + Another heading + \=\=\= + + Another heading variant + \-\-\- + + Paragraph with \[link](https://example.com) and \`code\`, \*emphasis\*, and \~strikethrough\~. + + \`\`\` + code block + \`\`\` + + List with varying leaders: + \- Item 1 + \* Item 2 + \+ Item 3 + + Some math: \$\`\\sqrt{3x\-1}\+(1\+x)\^2\`\$ + + HTML entity: \  + "#; + + assert_eq!(MarkdownString::escape(input).0, expected); + } + + #[test] + fn test_markdown_string_inline_code() { + assert_eq!(MarkdownString::inline_code(" ").0, "` `"); + assert_eq!(MarkdownString::inline_code("text").0, "`text`"); + assert_eq!(MarkdownString::inline_code("text ").0, "`text `"); + assert_eq!(MarkdownString::inline_code(" text ").0, "` text `"); + assert_eq!(MarkdownString::inline_code("`").0, "`` ` ``"); + assert_eq!(MarkdownString::inline_code("``").0, "``` `` ```"); + assert_eq!(MarkdownString::inline_code("`text`").0, "`` `text` ``"); + assert_eq!( + MarkdownString::inline_code("some `text` no leading or trailing backticks").0, + "``some `text` no leading or trailing backticks``" + ); + } + + #[test] + fn test_count_max_consecutive_chars() { + assert_eq!( + count_max_consecutive_chars("``a```b``", '`'), + 3, + "the highest seen consecutive segment of backticks counts" + ); + assert_eq!( + count_max_consecutive_chars("```a``b`", '`'), + 3, + "it can't be downgraded later" + ); + } +} diff --git a/crates/util/src/util.rs b/crates/util/src/util.rs index 6c6196756c..95f0bdc624 100644 --- a/crates/util/src/util.rs +++ b/crates/util/src/util.rs @@ -1,6 +1,7 @@ pub mod arc_cow; pub mod command; pub mod fs; +pub mod markdown; pub mod paths; pub mod serde; #[cfg(any(test, feature = "test-support"))]