markdown: Track code block metadata in parser (#28543)

This allows us to not scan the codeblock content for newlines on every
frame in `active_thread`

Release Notes:

- N/A
This commit is contained in:
Bennet Bo Fenner 2025-04-10 15:49:08 -06:00 committed by GitHub
parent 73305ce45e
commit 44cb8e582b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 247 additions and 203 deletions

View file

@ -22,12 +22,11 @@ use gpui::{
}; };
use language::{Buffer, LanguageRegistry}; use language::{Buffer, LanguageRegistry};
use language_model::{LanguageModelRegistry, LanguageModelToolUseId, Role}; use language_model::{LanguageModelRegistry, LanguageModelToolUseId, Role};
use markdown::parser::CodeBlockKind; use markdown::parser::{CodeBlockKind, CodeBlockMetadata};
use markdown::{Markdown, MarkdownElement, MarkdownStyle, ParsedMarkdown, without_fences}; use markdown::{Markdown, MarkdownElement, MarkdownStyle, ParsedMarkdown};
use project::ProjectItem as _; use project::ProjectItem as _;
use rope::Point; use rope::Point;
use settings::{Settings as _, update_settings_file}; use settings::{Settings as _, update_settings_file};
use std::ops::Range;
use std::path::Path; use std::path::Path;
use std::rc::Rc; use std::rc::Rc;
use std::sync::Arc; use std::sync::Arc;
@ -66,6 +65,8 @@ pub struct ActiveThread {
open_feedback_editors: HashMap<MessageId, Entity<Editor>>, open_feedback_editors: HashMap<MessageId, Entity<Editor>>,
} }
const MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK: usize = 5;
struct RenderedMessage { struct RenderedMessage {
language_registry: Arc<LanguageRegistry>, language_registry: Arc<LanguageRegistry>,
segments: Vec<RenderedMessageSegment>, segments: Vec<RenderedMessageSegment>,
@ -295,7 +296,7 @@ fn render_markdown_code_block(
ix: usize, ix: usize,
kind: &CodeBlockKind, kind: &CodeBlockKind,
parsed_markdown: &ParsedMarkdown, parsed_markdown: &ParsedMarkdown,
codeblock_range: Range<usize>, metadata: CodeBlockMetadata,
active_thread: Entity<ActiveThread>, active_thread: Entity<ActiveThread>,
workspace: WeakEntity<Workspace>, workspace: WeakEntity<Workspace>,
_window: &Window, _window: &Window,
@ -467,15 +468,6 @@ fn render_markdown_code_block(
.element_background .element_background
.blend(cx.theme().colors().editor_foreground.opacity(0.01)); .blend(cx.theme().colors().editor_foreground.opacity(0.01));
const CODE_FENCES_LINE_COUNT: usize = 2;
const MAX_COLLAPSED_LINES: usize = 5;
let line_count = parsed_markdown.source()[codeblock_range.clone()]
.bytes()
.filter(|c| *c == b'\n')
.count()
.saturating_sub(CODE_FENCES_LINE_COUNT - 1);
let codeblock_header = h_flex() let codeblock_header = h_flex()
.group("codeblock_header") .group("codeblock_header")
.p_1() .p_1()
@ -505,16 +497,14 @@ fn render_markdown_code_block(
.on_click({ .on_click({
let active_thread = active_thread.clone(); let active_thread = active_thread.clone();
let parsed_markdown = parsed_markdown.clone(); let parsed_markdown = parsed_markdown.clone();
let code_block_range = metadata.content_range.clone();
move |_event, _window, cx| { move |_event, _window, cx| {
active_thread.update(cx, |this, cx| { active_thread.update(cx, |this, cx| {
this.copied_code_block_ids.insert((message_id, ix)); this.copied_code_block_ids.insert((message_id, ix));
let code = without_fences( let code = parsed_markdown.source()[code_block_range.clone()]
&parsed_markdown.source()[codeblock_range.clone()], .to_string();
) cx.write_to_clipboard(ClipboardItem::new_string(code));
.to_string();
cx.write_to_clipboard(ClipboardItem::new_string(code.clone()));
cx.spawn(async move |this, cx| { cx.spawn(async move |this, cx| {
cx.background_executor() cx.background_executor()
@ -536,38 +526,41 @@ fn render_markdown_code_block(
}), }),
), ),
) )
.when(line_count > MAX_COLLAPSED_LINES, |header| { .when(
header.child( metadata.line_count > MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK,
IconButton::new( |header| {
("expand-collapse-code", ix), header.child(
if is_expanded { IconButton::new(
IconName::ChevronUp ("expand-collapse-code", ix),
if is_expanded {
IconName::ChevronUp
} else {
IconName::ChevronDown
},
)
.icon_color(Color::Muted)
.shape(ui::IconButtonShape::Square)
.tooltip(Tooltip::text(if is_expanded {
"Collapse Code"
} else { } else {
IconName::ChevronDown "Expand Code"
}, }))
.on_click({
let active_thread = active_thread.clone();
move |_event, _window, cx| {
active_thread.update(cx, |this, cx| {
let is_expanded = this
.expanded_code_blocks
.entry((message_id, ix))
.or_insert(false);
*is_expanded = !*is_expanded;
cx.notify();
});
}
}),
) )
.icon_color(Color::Muted) },
.shape(ui::IconButtonShape::Square) ),
.tooltip(Tooltip::text(if is_expanded {
"Collapse Code"
} else {
"Expand Code"
}))
.on_click({
let active_thread = active_thread.clone();
move |_event, _window, cx| {
active_thread.update(cx, |this, cx| {
let is_expanded = this
.expanded_code_blocks
.entry((message_id, ix))
.or_insert(false);
*is_expanded = !*is_expanded;
cx.notify();
});
}
}),
)
}),
); );
v_flex() v_flex()
@ -578,13 +571,16 @@ fn render_markdown_code_block(
.border_color(cx.theme().colors().border_variant) .border_color(cx.theme().colors().border_variant)
.bg(cx.theme().colors().editor_background) .bg(cx.theme().colors().editor_background)
.child(codeblock_header) .child(codeblock_header)
.when(line_count > MAX_COLLAPSED_LINES, |this| { .when(
if is_expanded { metadata.line_count > MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK,
this.h_full() |this| {
} else { if is_expanded {
this.max_h_40() this.h_full()
} } else {
}) this.max_h_40()
}
},
)
} }
fn open_markdown_link( fn open_markdown_link(
@ -1896,13 +1892,13 @@ impl ActiveThread {
render: Arc::new({ render: Arc::new({
let workspace = workspace.clone(); let workspace = workspace.clone();
let active_thread = cx.entity(); let active_thread = cx.entity();
move |kind, parsed_markdown, range, window, cx| { move |kind, parsed_markdown, range, metadata, window, cx| {
render_markdown_code_block( render_markdown_code_block(
message_id, message_id,
range.start, range.start,
kind, kind,
parsed_markdown, parsed_markdown,
range, metadata,
active_thread.clone(), active_thread.clone(),
workspace.clone(), workspace.clone(),
window, window,
@ -1912,7 +1908,7 @@ impl ActiveThread {
}), }),
transform: Some(Arc::new({ transform: Some(Arc::new({
let active_thread = cx.entity(); let active_thread = cx.entity();
move |el, range, _, cx| { move |el, range, metadata, _, cx| {
let is_expanded = active_thread let is_expanded = active_thread
.read(cx) .read(cx)
.expanded_code_blocks .expanded_code_blocks
@ -1920,7 +1916,10 @@ impl ActiveThread {
.copied() .copied()
.unwrap_or(false); .unwrap_or(false);
if is_expanded { if is_expanded
|| metadata.line_count
<= MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK
{
return el; return el;
} }
el.child( el.child(

View file

@ -18,6 +18,7 @@ use gpui::{
TextStyleRefinement, actions, point, quad, TextStyleRefinement, actions, point, quad,
}; };
use language::{Language, LanguageRegistry, Rope}; use language::{Language, LanguageRegistry, Rope};
use parser::CodeBlockMetadata;
use parser::{MarkdownEvent, MarkdownTag, MarkdownTagEnd, parse_links_only, parse_markdown}; use parser::{MarkdownEvent, MarkdownTag, MarkdownTagEnd, parse_links_only, parse_markdown};
use pulldown_cmark::Alignment; use pulldown_cmark::Alignment;
use sum_tree::TreeMap; use sum_tree::TreeMap;
@ -99,10 +100,19 @@ pub enum CodeBlockRenderer {
}, },
} }
pub type CodeBlockRenderFn = pub type CodeBlockRenderFn = Arc<
Arc<dyn Fn(&CodeBlockKind, &ParsedMarkdown, Range<usize>, &mut Window, &App) -> Div>; dyn Fn(
&CodeBlockKind,
&ParsedMarkdown,
Range<usize>,
CodeBlockMetadata,
&mut Window,
&App,
) -> Div,
>;
pub type CodeBlockTransformFn = Arc<dyn Fn(AnyDiv, Range<usize>, &mut Window, &App) -> AnyDiv>; pub type CodeBlockTransformFn =
Arc<dyn Fn(AnyDiv, Range<usize>, CodeBlockMetadata, &mut Window, &App) -> AnyDiv>;
actions!(markdown, [Copy, CopyAsMarkdown]); actions!(markdown, [Copy, CopyAsMarkdown]);
@ -603,6 +613,8 @@ impl Element for MarkdownElement {
0 0
}; };
let mut current_code_block_metadata = None;
for (range, event) in parsed_markdown.events.iter() { for (range, event) in parsed_markdown.events.iter() {
match event { match event {
MarkdownEvent::Start(tag) => { MarkdownEvent::Start(tag) => {
@ -641,7 +653,7 @@ impl Element for MarkdownElement {
markdown_end, markdown_end,
); );
} }
MarkdownTag::CodeBlock(kind) => { MarkdownTag::CodeBlock { kind, metadata } => {
let language = match kind { let language = match kind {
CodeBlockKind::Fenced => None, CodeBlockKind::Fenced => None,
CodeBlockKind::FencedLang(language) => { CodeBlockKind::FencedLang(language) => {
@ -654,6 +666,8 @@ impl Element for MarkdownElement {
_ => None, _ => None,
}; };
current_code_block_metadata = Some(metadata.clone());
let is_indented = matches!(kind, CodeBlockKind::Indented); let is_indented = matches!(kind, CodeBlockKind::Indented);
match (&self.code_block_renderer, is_indented) { match (&self.code_block_renderer, is_indented) {
@ -686,8 +700,14 @@ impl Element for MarkdownElement {
builder.push_div(code_block, range, markdown_end); builder.push_div(code_block, range, markdown_end);
} }
(CodeBlockRenderer::Custom { render, .. }, _) => { (CodeBlockRenderer::Custom { render, .. }, _) => {
let parent_container = let parent_container = render(
render(kind, &parsed_markdown, range.clone(), window, cx); kind,
&parsed_markdown,
range.clone(),
metadata.clone(),
window,
cx,
);
builder.push_div(parent_container, range, markdown_end); builder.push_div(parent_container, range, markdown_end);
@ -852,12 +872,22 @@ impl Element for MarkdownElement {
builder.pop_text_style(); builder.pop_text_style();
} }
let metadata = current_code_block_metadata.take();
if let CodeBlockRenderer::Custom { if let CodeBlockRenderer::Custom {
transform: Some(modify), transform: Some(transform),
.. ..
} = &self.code_block_renderer } = &self.code_block_renderer
{ {
builder.modify_current_div(|el| modify(el, range.clone(), window, cx)); builder.modify_current_div(|el| {
transform(
el,
range.clone(),
metadata.clone().unwrap_or_default(),
window,
cx,
)
});
} }
if matches!( if matches!(
@ -866,9 +896,13 @@ impl Element for MarkdownElement {
) { ) {
builder.flush_text(); builder.flush_text();
builder.modify_current_div(|el| { builder.modify_current_div(|el| {
let code = let content_range = parser::extract_code_block_content_range(
without_fences(parsed_markdown.source()[range.clone()].trim()) parsed_markdown.source()[range.clone()].trim(),
.to_string(); );
let content_range = content_range.start + range.start
..content_range.end + range.start;
let code = parsed_markdown.source()[content_range].to_string();
let codeblock = render_copy_code_block_button( let codeblock = render_copy_code_block_button(
range.end, range.end,
code, code,
@ -1507,43 +1541,3 @@ impl RenderedText {
.find(|link| link.source_range.contains(&source_index)) .find(|link| link.source_range.contains(&source_index))
} }
} }
/// Some markdown blocks are indented, and others have e.g. ```rust … ``` around them.
/// If this block is fenced with backticks, strip them off (and the language name).
/// We use this when copying code blocks to the clipboard.
pub fn without_fences(mut markdown: &str) -> &str {
if let Some(opening_backticks) = markdown.find("```") {
markdown = &markdown[opening_backticks..];
// Trim off the next newline. This also trims off a language name if it's there.
if let Some(newline) = markdown.find('\n') {
markdown = &markdown[newline + 1..];
}
};
if let Some(closing_backticks) = markdown.rfind("```") {
markdown = &markdown[..closing_backticks];
};
markdown
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_without_fences() {
let input = "```rust\nlet x = 5;\n```";
assert_eq!(without_fences(input), "let x = 5;\n");
let input = " ```\nno language\n``` ";
assert_eq!(without_fences(input), "no language\n");
let input = "plain text";
assert_eq!(without_fences(input), "plain text");
let input = "```python\nprint('hello')\nprint('world')\n```";
assert_eq!(without_fences(input), "print('hello')\nprint('world')\n");
}
}

View file

@ -65,11 +65,33 @@ pub fn parse_markdown(
within_metadata = true; within_metadata = true;
MarkdownTag::MetadataBlock(kind) MarkdownTag::MetadataBlock(kind)
} }
pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Indented) => {
MarkdownTag::CodeBlock {
kind: CodeBlockKind::Indented,
metadata: CodeBlockMetadata {
content_range: range.start + 1..range.end + 1,
line_count: 1,
},
}
}
pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced( pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
ref info, ref info,
)) => { )) => {
let content_range = extract_code_block_content_range(&text[range.clone()]);
let content_range =
content_range.start + range.start..content_range.end + range.start;
let line_count = text[content_range.clone()]
.bytes()
.filter(|c| *c == b'\n')
.count();
let metadata = CodeBlockMetadata {
content_range,
line_count,
};
let info = info.trim(); let info = info.trim();
MarkdownTag::CodeBlock(if info.is_empty() { let kind = if info.is_empty() {
CodeBlockKind::Fenced CodeBlockKind::Fenced
// Languages should never contain a slash, and PathRanges always should. // Languages should never contain a slash, and PathRanges always should.
// (Models are told to specify them relative to a workspace root.) // (Models are told to specify them relative to a workspace root.)
@ -81,9 +103,68 @@ pub fn parse_markdown(
let language = SharedString::from(info.to_string()); let language = SharedString::from(info.to_string());
language_names.insert(language.clone()); language_names.insert(language.clone());
CodeBlockKind::FencedLang(language) CodeBlockKind::FencedLang(language)
}) };
MarkdownTag::CodeBlock { kind, metadata }
}
pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
pulldown_cmark::Tag::Heading {
level,
id,
classes,
attrs,
} => {
let id = id.map(|id| SharedString::from(id.into_string()));
let classes = classes
.into_iter()
.map(|c| SharedString::from(c.into_string()))
.collect();
let attrs = attrs
.into_iter()
.map(|(key, value)| {
(
SharedString::from(key.into_string()),
value.map(|v| SharedString::from(v.into_string())),
)
})
.collect();
MarkdownTag::Heading {
level,
id,
classes,
attrs,
}
}
pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
pulldown_cmark::Tag::Item => MarkdownTag::Item,
pulldown_cmark::Tag::FootnoteDefinition(label) => {
MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
}
pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
pulldown_cmark::Tag::Image {
link_type,
dest_url,
title,
id,
} => MarkdownTag::Image {
link_type,
dest_url: SharedString::from(dest_url.into_string()),
title: SharedString::from(title.into_string()),
id: SharedString::from(id.into_string()),
},
pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
pulldown_cmark::Tag::DefinitionListDefinition => {
MarkdownTag::DefinitionListDefinition
} }
tag => tag.into(),
}; };
events.push((range, MarkdownEvent::Start(tag))) events.push((range, MarkdownEvent::Start(tag)))
} }
@ -252,7 +333,10 @@ pub enum MarkdownTag {
BlockQuote, BlockQuote,
/// A code block. /// A code block.
CodeBlock(CodeBlockKind), CodeBlock {
kind: CodeBlockKind,
metadata: CodeBlockMetadata,
},
/// A HTML block. /// A HTML block.
HtmlBlock, HtmlBlock,
@ -323,96 +407,26 @@ pub enum CodeBlockKind {
FencedSrc(PathWithRange), FencedSrc(PathWithRange),
} }
impl From<pulldown_cmark::Tag<'_>> for MarkdownTag { #[derive(Default, Clone, Debug, PartialEq)]
fn from(tag: pulldown_cmark::Tag) -> Self { pub struct CodeBlockMetadata {
match tag { pub content_range: Range<usize>,
pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph, pub line_count: usize,
pulldown_cmark::Tag::Heading { }
level,
id, pub(crate) fn extract_code_block_content_range(text: &str) -> Range<usize> {
classes, let mut range = 0..text.len();
attrs, if text.starts_with("```") {
} => { range.start += 3;
let id = id.map(|id| SharedString::from(id.into_string()));
let classes = classes if let Some(newline_ix) = text[range.clone()].find('\n') {
.into_iter() range.start += newline_ix + 1;
.map(|c| SharedString::from(c.into_string()))
.collect();
let attrs = attrs
.into_iter()
.map(|(key, value)| {
(
SharedString::from(key.into_string()),
value.map(|v| SharedString::from(v.into_string())),
)
})
.collect();
MarkdownTag::Heading {
level,
id,
classes,
attrs,
}
}
pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
pulldown_cmark::Tag::CodeBlock(kind) => match kind {
pulldown_cmark::CodeBlockKind::Indented => {
MarkdownTag::CodeBlock(CodeBlockKind::Indented)
}
pulldown_cmark::CodeBlockKind::Fenced(info) => {
let info = info.trim();
MarkdownTag::CodeBlock(if info.is_empty() {
CodeBlockKind::Fenced
} else if info.contains('/') {
// Languages should never contain a slash, and PathRanges always should.
// (Models are told to specify them relative to a workspace root.)
CodeBlockKind::FencedSrc(PathWithRange::new(info))
} else {
CodeBlockKind::FencedLang(SharedString::from(info.to_string()))
})
}
},
pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
pulldown_cmark::Tag::Item => MarkdownTag::Item,
pulldown_cmark::Tag::FootnoteDefinition(label) => {
MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
}
pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
pulldown_cmark::Tag::Link {
link_type,
dest_url,
title,
id,
} => MarkdownTag::Link {
link_type,
dest_url: SharedString::from(dest_url.into_string()),
title: SharedString::from(title.into_string()),
id: SharedString::from(id.into_string()),
},
pulldown_cmark::Tag::Image {
link_type,
dest_url,
title,
id,
} => MarkdownTag::Image {
link_type,
dest_url: SharedString::from(dest_url.into_string()),
title: SharedString::from(title.into_string()),
id: SharedString::from(id.into_string()),
},
pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
} }
} }
if !range.is_empty() && text.ends_with("```") {
range.end -= 3;
}
range
} }
/// Represents either an owned or inline string. Motivation for this is to make `SubstitutedText` /// Represents either an owned or inline string. Motivation for this is to make `SubstitutedText`
@ -570,4 +584,41 @@ mod tests {
) )
) )
} }
#[test]
fn test_code_block_metadata() {
assert_eq!(
parse_markdown("```rust\nfn main() {\n let a = 1;\n}\n```"),
(
vec![
(
0..37,
Start(CodeBlock {
kind: CodeBlockKind::FencedLang("rust".into()),
metadata: CodeBlockMetadata {
content_range: 8..34,
line_count: 3
}
})
),
(8..34, Text),
(0..37, End(MarkdownTagEnd::CodeBlock)),
],
HashSet::from(["rust".into()]),
HashSet::new()
)
)
}
#[test]
fn test_extract_code_block_content_range() {
let input = "```rust\nlet x = 5;\n```";
assert_eq!(extract_code_block_content_range(input), 8..19);
let input = "plain text";
assert_eq!(extract_code_block_content_range(input), 0..10);
let input = "```python\nprint('hello')\nprint('world')\n```";
assert_eq!(extract_code_block_content_range(input), 10..40);
}
} }