markdown: Track code block metadata in parser (#28543)
This allows us to not scan the codeblock content for newlines on every frame in `active_thread` Release Notes: - N/A
This commit is contained in:
parent
73305ce45e
commit
44cb8e582b
3 changed files with 247 additions and 203 deletions
|
@ -18,6 +18,7 @@ use gpui::{
|
|||
TextStyleRefinement, actions, point, quad,
|
||||
};
|
||||
use language::{Language, LanguageRegistry, Rope};
|
||||
use parser::CodeBlockMetadata;
|
||||
use parser::{MarkdownEvent, MarkdownTag, MarkdownTagEnd, parse_links_only, parse_markdown};
|
||||
use pulldown_cmark::Alignment;
|
||||
use sum_tree::TreeMap;
|
||||
|
@ -99,10 +100,19 @@ pub enum CodeBlockRenderer {
|
|||
},
|
||||
}
|
||||
|
||||
pub type CodeBlockRenderFn =
|
||||
Arc<dyn Fn(&CodeBlockKind, &ParsedMarkdown, Range<usize>, &mut Window, &App) -> Div>;
|
||||
pub type CodeBlockRenderFn = Arc<
|
||||
dyn Fn(
|
||||
&CodeBlockKind,
|
||||
&ParsedMarkdown,
|
||||
Range<usize>,
|
||||
CodeBlockMetadata,
|
||||
&mut Window,
|
||||
&App,
|
||||
) -> Div,
|
||||
>;
|
||||
|
||||
pub type CodeBlockTransformFn = Arc<dyn Fn(AnyDiv, Range<usize>, &mut Window, &App) -> AnyDiv>;
|
||||
pub type CodeBlockTransformFn =
|
||||
Arc<dyn Fn(AnyDiv, Range<usize>, CodeBlockMetadata, &mut Window, &App) -> AnyDiv>;
|
||||
|
||||
actions!(markdown, [Copy, CopyAsMarkdown]);
|
||||
|
||||
|
@ -603,6 +613,8 @@ impl Element for MarkdownElement {
|
|||
0
|
||||
};
|
||||
|
||||
let mut current_code_block_metadata = None;
|
||||
|
||||
for (range, event) in parsed_markdown.events.iter() {
|
||||
match event {
|
||||
MarkdownEvent::Start(tag) => {
|
||||
|
@ -641,7 +653,7 @@ impl Element for MarkdownElement {
|
|||
markdown_end,
|
||||
);
|
||||
}
|
||||
MarkdownTag::CodeBlock(kind) => {
|
||||
MarkdownTag::CodeBlock { kind, metadata } => {
|
||||
let language = match kind {
|
||||
CodeBlockKind::Fenced => None,
|
||||
CodeBlockKind::FencedLang(language) => {
|
||||
|
@ -654,6 +666,8 @@ impl Element for MarkdownElement {
|
|||
_ => None,
|
||||
};
|
||||
|
||||
current_code_block_metadata = Some(metadata.clone());
|
||||
|
||||
let is_indented = matches!(kind, CodeBlockKind::Indented);
|
||||
|
||||
match (&self.code_block_renderer, is_indented) {
|
||||
|
@ -686,8 +700,14 @@ impl Element for MarkdownElement {
|
|||
builder.push_div(code_block, range, markdown_end);
|
||||
}
|
||||
(CodeBlockRenderer::Custom { render, .. }, _) => {
|
||||
let parent_container =
|
||||
render(kind, &parsed_markdown, range.clone(), window, cx);
|
||||
let parent_container = render(
|
||||
kind,
|
||||
&parsed_markdown,
|
||||
range.clone(),
|
||||
metadata.clone(),
|
||||
window,
|
||||
cx,
|
||||
);
|
||||
|
||||
builder.push_div(parent_container, range, markdown_end);
|
||||
|
||||
|
@ -852,12 +872,22 @@ impl Element for MarkdownElement {
|
|||
builder.pop_text_style();
|
||||
}
|
||||
|
||||
let metadata = current_code_block_metadata.take();
|
||||
|
||||
if let CodeBlockRenderer::Custom {
|
||||
transform: Some(modify),
|
||||
transform: Some(transform),
|
||||
..
|
||||
} = &self.code_block_renderer
|
||||
{
|
||||
builder.modify_current_div(|el| modify(el, range.clone(), window, cx));
|
||||
builder.modify_current_div(|el| {
|
||||
transform(
|
||||
el,
|
||||
range.clone(),
|
||||
metadata.clone().unwrap_or_default(),
|
||||
window,
|
||||
cx,
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
if matches!(
|
||||
|
@ -866,9 +896,13 @@ impl Element for MarkdownElement {
|
|||
) {
|
||||
builder.flush_text();
|
||||
builder.modify_current_div(|el| {
|
||||
let code =
|
||||
without_fences(parsed_markdown.source()[range.clone()].trim())
|
||||
.to_string();
|
||||
let content_range = parser::extract_code_block_content_range(
|
||||
parsed_markdown.source()[range.clone()].trim(),
|
||||
);
|
||||
let content_range = content_range.start + range.start
|
||||
..content_range.end + range.start;
|
||||
|
||||
let code = parsed_markdown.source()[content_range].to_string();
|
||||
let codeblock = render_copy_code_block_button(
|
||||
range.end,
|
||||
code,
|
||||
|
@ -1507,43 +1541,3 @@ impl RenderedText {
|
|||
.find(|link| link.source_range.contains(&source_index))
|
||||
}
|
||||
}
|
||||
|
||||
/// Some markdown blocks are indented, and others have e.g. ```rust … ``` around them.
|
||||
/// If this block is fenced with backticks, strip them off (and the language name).
|
||||
/// We use this when copying code blocks to the clipboard.
|
||||
pub fn without_fences(mut markdown: &str) -> &str {
|
||||
if let Some(opening_backticks) = markdown.find("```") {
|
||||
markdown = &markdown[opening_backticks..];
|
||||
|
||||
// Trim off the next newline. This also trims off a language name if it's there.
|
||||
if let Some(newline) = markdown.find('\n') {
|
||||
markdown = &markdown[newline + 1..];
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(closing_backticks) = markdown.rfind("```") {
|
||||
markdown = &markdown[..closing_backticks];
|
||||
};
|
||||
|
||||
markdown
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_without_fences() {
|
||||
let input = "```rust\nlet x = 5;\n```";
|
||||
assert_eq!(without_fences(input), "let x = 5;\n");
|
||||
|
||||
let input = " ```\nno language\n``` ";
|
||||
assert_eq!(without_fences(input), "no language\n");
|
||||
|
||||
let input = "plain text";
|
||||
assert_eq!(without_fences(input), "plain text");
|
||||
|
||||
let input = "```python\nprint('hello')\nprint('world')\n```";
|
||||
assert_eq!(without_fences(input), "print('hello')\nprint('world')\n");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -65,11 +65,33 @@ pub fn parse_markdown(
|
|||
within_metadata = true;
|
||||
MarkdownTag::MetadataBlock(kind)
|
||||
}
|
||||
pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Indented) => {
|
||||
MarkdownTag::CodeBlock {
|
||||
kind: CodeBlockKind::Indented,
|
||||
metadata: CodeBlockMetadata {
|
||||
content_range: range.start + 1..range.end + 1,
|
||||
line_count: 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
|
||||
ref info,
|
||||
)) => {
|
||||
let content_range = extract_code_block_content_range(&text[range.clone()]);
|
||||
let content_range =
|
||||
content_range.start + range.start..content_range.end + range.start;
|
||||
|
||||
let line_count = text[content_range.clone()]
|
||||
.bytes()
|
||||
.filter(|c| *c == b'\n')
|
||||
.count();
|
||||
let metadata = CodeBlockMetadata {
|
||||
content_range,
|
||||
line_count,
|
||||
};
|
||||
|
||||
let info = info.trim();
|
||||
MarkdownTag::CodeBlock(if info.is_empty() {
|
||||
let kind = if info.is_empty() {
|
||||
CodeBlockKind::Fenced
|
||||
// Languages should never contain a slash, and PathRanges always should.
|
||||
// (Models are told to specify them relative to a workspace root.)
|
||||
|
@ -81,9 +103,68 @@ pub fn parse_markdown(
|
|||
let language = SharedString::from(info.to_string());
|
||||
language_names.insert(language.clone());
|
||||
CodeBlockKind::FencedLang(language)
|
||||
})
|
||||
};
|
||||
|
||||
MarkdownTag::CodeBlock { kind, metadata }
|
||||
}
|
||||
pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
|
||||
pulldown_cmark::Tag::Heading {
|
||||
level,
|
||||
id,
|
||||
classes,
|
||||
attrs,
|
||||
} => {
|
||||
let id = id.map(|id| SharedString::from(id.into_string()));
|
||||
let classes = classes
|
||||
.into_iter()
|
||||
.map(|c| SharedString::from(c.into_string()))
|
||||
.collect();
|
||||
let attrs = attrs
|
||||
.into_iter()
|
||||
.map(|(key, value)| {
|
||||
(
|
||||
SharedString::from(key.into_string()),
|
||||
value.map(|v| SharedString::from(v.into_string())),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
MarkdownTag::Heading {
|
||||
level,
|
||||
id,
|
||||
classes,
|
||||
attrs,
|
||||
}
|
||||
}
|
||||
pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
|
||||
pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
|
||||
pulldown_cmark::Tag::Item => MarkdownTag::Item,
|
||||
pulldown_cmark::Tag::FootnoteDefinition(label) => {
|
||||
MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
|
||||
}
|
||||
pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
|
||||
pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
|
||||
pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
|
||||
pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
|
||||
pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
|
||||
pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
|
||||
pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
|
||||
pulldown_cmark::Tag::Image {
|
||||
link_type,
|
||||
dest_url,
|
||||
title,
|
||||
id,
|
||||
} => MarkdownTag::Image {
|
||||
link_type,
|
||||
dest_url: SharedString::from(dest_url.into_string()),
|
||||
title: SharedString::from(title.into_string()),
|
||||
id: SharedString::from(id.into_string()),
|
||||
},
|
||||
pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
|
||||
pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
|
||||
pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
|
||||
pulldown_cmark::Tag::DefinitionListDefinition => {
|
||||
MarkdownTag::DefinitionListDefinition
|
||||
}
|
||||
tag => tag.into(),
|
||||
};
|
||||
events.push((range, MarkdownEvent::Start(tag)))
|
||||
}
|
||||
|
@ -252,7 +333,10 @@ pub enum MarkdownTag {
|
|||
BlockQuote,
|
||||
|
||||
/// A code block.
|
||||
CodeBlock(CodeBlockKind),
|
||||
CodeBlock {
|
||||
kind: CodeBlockKind,
|
||||
metadata: CodeBlockMetadata,
|
||||
},
|
||||
|
||||
/// A HTML block.
|
||||
HtmlBlock,
|
||||
|
@ -323,96 +407,26 @@ pub enum CodeBlockKind {
|
|||
FencedSrc(PathWithRange),
|
||||
}
|
||||
|
||||
impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
|
||||
fn from(tag: pulldown_cmark::Tag) -> Self {
|
||||
match tag {
|
||||
pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
|
||||
pulldown_cmark::Tag::Heading {
|
||||
level,
|
||||
id,
|
||||
classes,
|
||||
attrs,
|
||||
} => {
|
||||
let id = id.map(|id| SharedString::from(id.into_string()));
|
||||
let classes = classes
|
||||
.into_iter()
|
||||
.map(|c| SharedString::from(c.into_string()))
|
||||
.collect();
|
||||
let attrs = attrs
|
||||
.into_iter()
|
||||
.map(|(key, value)| {
|
||||
(
|
||||
SharedString::from(key.into_string()),
|
||||
value.map(|v| SharedString::from(v.into_string())),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
MarkdownTag::Heading {
|
||||
level,
|
||||
id,
|
||||
classes,
|
||||
attrs,
|
||||
}
|
||||
}
|
||||
pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
|
||||
pulldown_cmark::Tag::CodeBlock(kind) => match kind {
|
||||
pulldown_cmark::CodeBlockKind::Indented => {
|
||||
MarkdownTag::CodeBlock(CodeBlockKind::Indented)
|
||||
}
|
||||
pulldown_cmark::CodeBlockKind::Fenced(info) => {
|
||||
let info = info.trim();
|
||||
MarkdownTag::CodeBlock(if info.is_empty() {
|
||||
CodeBlockKind::Fenced
|
||||
} else if info.contains('/') {
|
||||
// Languages should never contain a slash, and PathRanges always should.
|
||||
// (Models are told to specify them relative to a workspace root.)
|
||||
CodeBlockKind::FencedSrc(PathWithRange::new(info))
|
||||
} else {
|
||||
CodeBlockKind::FencedLang(SharedString::from(info.to_string()))
|
||||
})
|
||||
}
|
||||
},
|
||||
pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
|
||||
pulldown_cmark::Tag::Item => MarkdownTag::Item,
|
||||
pulldown_cmark::Tag::FootnoteDefinition(label) => {
|
||||
MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
|
||||
}
|
||||
pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
|
||||
pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
|
||||
pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
|
||||
pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
|
||||
pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
|
||||
pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
|
||||
pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
|
||||
pulldown_cmark::Tag::Link {
|
||||
link_type,
|
||||
dest_url,
|
||||
title,
|
||||
id,
|
||||
} => MarkdownTag::Link {
|
||||
link_type,
|
||||
dest_url: SharedString::from(dest_url.into_string()),
|
||||
title: SharedString::from(title.into_string()),
|
||||
id: SharedString::from(id.into_string()),
|
||||
},
|
||||
pulldown_cmark::Tag::Image {
|
||||
link_type,
|
||||
dest_url,
|
||||
title,
|
||||
id,
|
||||
} => MarkdownTag::Image {
|
||||
link_type,
|
||||
dest_url: SharedString::from(dest_url.into_string()),
|
||||
title: SharedString::from(title.into_string()),
|
||||
id: SharedString::from(id.into_string()),
|
||||
},
|
||||
pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
|
||||
pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
|
||||
pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
|
||||
pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
|
||||
pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
|
||||
#[derive(Default, Clone, Debug, PartialEq)]
|
||||
pub struct CodeBlockMetadata {
|
||||
pub content_range: Range<usize>,
|
||||
pub line_count: usize,
|
||||
}
|
||||
|
||||
pub(crate) fn extract_code_block_content_range(text: &str) -> Range<usize> {
|
||||
let mut range = 0..text.len();
|
||||
if text.starts_with("```") {
|
||||
range.start += 3;
|
||||
|
||||
if let Some(newline_ix) = text[range.clone()].find('\n') {
|
||||
range.start += newline_ix + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if !range.is_empty() && text.ends_with("```") {
|
||||
range.end -= 3;
|
||||
}
|
||||
range
|
||||
}
|
||||
|
||||
/// Represents either an owned or inline string. Motivation for this is to make `SubstitutedText`
|
||||
|
@ -570,4 +584,41 @@ mod tests {
|
|||
)
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_code_block_metadata() {
|
||||
assert_eq!(
|
||||
parse_markdown("```rust\nfn main() {\n let a = 1;\n}\n```"),
|
||||
(
|
||||
vec![
|
||||
(
|
||||
0..37,
|
||||
Start(CodeBlock {
|
||||
kind: CodeBlockKind::FencedLang("rust".into()),
|
||||
metadata: CodeBlockMetadata {
|
||||
content_range: 8..34,
|
||||
line_count: 3
|
||||
}
|
||||
})
|
||||
),
|
||||
(8..34, Text),
|
||||
(0..37, End(MarkdownTagEnd::CodeBlock)),
|
||||
],
|
||||
HashSet::from(["rust".into()]),
|
||||
HashSet::new()
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_code_block_content_range() {
|
||||
let input = "```rust\nlet x = 5;\n```";
|
||||
assert_eq!(extract_code_block_content_range(input), 8..19);
|
||||
|
||||
let input = "plain text";
|
||||
assert_eq!(extract_code_block_content_range(input), 0..10);
|
||||
|
||||
let input = "```python\nprint('hello')\nprint('world')\n```";
|
||||
assert_eq!(extract_code_block_content_range(input), 10..40);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue