markdown: Track code block metadata in parser (#28543)

This allows us to not scan the codeblock content for newlines on every
frame in `active_thread`

Release Notes:

- N/A
This commit is contained in:
Bennet Bo Fenner 2025-04-10 15:49:08 -06:00 committed by GitHub
parent 73305ce45e
commit 44cb8e582b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 247 additions and 203 deletions

View file

@ -22,12 +22,11 @@ use gpui::{
};
use language::{Buffer, LanguageRegistry};
use language_model::{LanguageModelRegistry, LanguageModelToolUseId, Role};
use markdown::parser::CodeBlockKind;
use markdown::{Markdown, MarkdownElement, MarkdownStyle, ParsedMarkdown, without_fences};
use markdown::parser::{CodeBlockKind, CodeBlockMetadata};
use markdown::{Markdown, MarkdownElement, MarkdownStyle, ParsedMarkdown};
use project::ProjectItem as _;
use rope::Point;
use settings::{Settings as _, update_settings_file};
use std::ops::Range;
use std::path::Path;
use std::rc::Rc;
use std::sync::Arc;
@ -66,6 +65,8 @@ pub struct ActiveThread {
open_feedback_editors: HashMap<MessageId, Entity<Editor>>,
}
const MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK: usize = 5;
struct RenderedMessage {
language_registry: Arc<LanguageRegistry>,
segments: Vec<RenderedMessageSegment>,
@ -295,7 +296,7 @@ fn render_markdown_code_block(
ix: usize,
kind: &CodeBlockKind,
parsed_markdown: &ParsedMarkdown,
codeblock_range: Range<usize>,
metadata: CodeBlockMetadata,
active_thread: Entity<ActiveThread>,
workspace: WeakEntity<Workspace>,
_window: &Window,
@ -467,15 +468,6 @@ fn render_markdown_code_block(
.element_background
.blend(cx.theme().colors().editor_foreground.opacity(0.01));
const CODE_FENCES_LINE_COUNT: usize = 2;
const MAX_COLLAPSED_LINES: usize = 5;
let line_count = parsed_markdown.source()[codeblock_range.clone()]
.bytes()
.filter(|c| *c == b'\n')
.count()
.saturating_sub(CODE_FENCES_LINE_COUNT - 1);
let codeblock_header = h_flex()
.group("codeblock_header")
.p_1()
@ -505,16 +497,14 @@ fn render_markdown_code_block(
.on_click({
let active_thread = active_thread.clone();
let parsed_markdown = parsed_markdown.clone();
let code_block_range = metadata.content_range.clone();
move |_event, _window, cx| {
active_thread.update(cx, |this, cx| {
this.copied_code_block_ids.insert((message_id, ix));
let code = without_fences(
&parsed_markdown.source()[codeblock_range.clone()],
)
.to_string();
cx.write_to_clipboard(ClipboardItem::new_string(code.clone()));
let code = parsed_markdown.source()[code_block_range.clone()]
.to_string();
cx.write_to_clipboard(ClipboardItem::new_string(code));
cx.spawn(async move |this, cx| {
cx.background_executor()
@ -536,38 +526,41 @@ fn render_markdown_code_block(
}),
),
)
.when(line_count > MAX_COLLAPSED_LINES, |header| {
header.child(
IconButton::new(
("expand-collapse-code", ix),
if is_expanded {
IconName::ChevronUp
.when(
metadata.line_count > MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK,
|header| {
header.child(
IconButton::new(
("expand-collapse-code", ix),
if is_expanded {
IconName::ChevronUp
} else {
IconName::ChevronDown
},
)
.icon_color(Color::Muted)
.shape(ui::IconButtonShape::Square)
.tooltip(Tooltip::text(if is_expanded {
"Collapse Code"
} else {
IconName::ChevronDown
},
"Expand Code"
}))
.on_click({
let active_thread = active_thread.clone();
move |_event, _window, cx| {
active_thread.update(cx, |this, cx| {
let is_expanded = this
.expanded_code_blocks
.entry((message_id, ix))
.or_insert(false);
*is_expanded = !*is_expanded;
cx.notify();
});
}
}),
)
.icon_color(Color::Muted)
.shape(ui::IconButtonShape::Square)
.tooltip(Tooltip::text(if is_expanded {
"Collapse Code"
} else {
"Expand Code"
}))
.on_click({
let active_thread = active_thread.clone();
move |_event, _window, cx| {
active_thread.update(cx, |this, cx| {
let is_expanded = this
.expanded_code_blocks
.entry((message_id, ix))
.or_insert(false);
*is_expanded = !*is_expanded;
cx.notify();
});
}
}),
)
}),
},
),
);
v_flex()
@ -578,13 +571,16 @@ fn render_markdown_code_block(
.border_color(cx.theme().colors().border_variant)
.bg(cx.theme().colors().editor_background)
.child(codeblock_header)
.when(line_count > MAX_COLLAPSED_LINES, |this| {
if is_expanded {
this.h_full()
} else {
this.max_h_40()
}
})
.when(
metadata.line_count > MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK,
|this| {
if is_expanded {
this.h_full()
} else {
this.max_h_40()
}
},
)
}
fn open_markdown_link(
@ -1896,13 +1892,13 @@ impl ActiveThread {
render: Arc::new({
let workspace = workspace.clone();
let active_thread = cx.entity();
move |kind, parsed_markdown, range, window, cx| {
move |kind, parsed_markdown, range, metadata, window, cx| {
render_markdown_code_block(
message_id,
range.start,
kind,
parsed_markdown,
range,
metadata,
active_thread.clone(),
workspace.clone(),
window,
@ -1912,7 +1908,7 @@ impl ActiveThread {
}),
transform: Some(Arc::new({
let active_thread = cx.entity();
move |el, range, _, cx| {
move |el, range, metadata, _, cx| {
let is_expanded = active_thread
.read(cx)
.expanded_code_blocks
@ -1920,7 +1916,10 @@ impl ActiveThread {
.copied()
.unwrap_or(false);
if is_expanded {
if is_expanded
|| metadata.line_count
<= MAX_UNCOLLAPSED_LINES_IN_CODE_BLOCK
{
return el;
}
el.child(

View file

@ -18,6 +18,7 @@ use gpui::{
TextStyleRefinement, actions, point, quad,
};
use language::{Language, LanguageRegistry, Rope};
use parser::CodeBlockMetadata;
use parser::{MarkdownEvent, MarkdownTag, MarkdownTagEnd, parse_links_only, parse_markdown};
use pulldown_cmark::Alignment;
use sum_tree::TreeMap;
@ -99,10 +100,19 @@ pub enum CodeBlockRenderer {
},
}
pub type CodeBlockRenderFn =
Arc<dyn Fn(&CodeBlockKind, &ParsedMarkdown, Range<usize>, &mut Window, &App) -> Div>;
pub type CodeBlockRenderFn = Arc<
dyn Fn(
&CodeBlockKind,
&ParsedMarkdown,
Range<usize>,
CodeBlockMetadata,
&mut Window,
&App,
) -> Div,
>;
pub type CodeBlockTransformFn = Arc<dyn Fn(AnyDiv, Range<usize>, &mut Window, &App) -> AnyDiv>;
pub type CodeBlockTransformFn =
Arc<dyn Fn(AnyDiv, Range<usize>, CodeBlockMetadata, &mut Window, &App) -> AnyDiv>;
actions!(markdown, [Copy, CopyAsMarkdown]);
@ -603,6 +613,8 @@ impl Element for MarkdownElement {
0
};
let mut current_code_block_metadata = None;
for (range, event) in parsed_markdown.events.iter() {
match event {
MarkdownEvent::Start(tag) => {
@ -641,7 +653,7 @@ impl Element for MarkdownElement {
markdown_end,
);
}
MarkdownTag::CodeBlock(kind) => {
MarkdownTag::CodeBlock { kind, metadata } => {
let language = match kind {
CodeBlockKind::Fenced => None,
CodeBlockKind::FencedLang(language) => {
@ -654,6 +666,8 @@ impl Element for MarkdownElement {
_ => None,
};
current_code_block_metadata = Some(metadata.clone());
let is_indented = matches!(kind, CodeBlockKind::Indented);
match (&self.code_block_renderer, is_indented) {
@ -686,8 +700,14 @@ impl Element for MarkdownElement {
builder.push_div(code_block, range, markdown_end);
}
(CodeBlockRenderer::Custom { render, .. }, _) => {
let parent_container =
render(kind, &parsed_markdown, range.clone(), window, cx);
let parent_container = render(
kind,
&parsed_markdown,
range.clone(),
metadata.clone(),
window,
cx,
);
builder.push_div(parent_container, range, markdown_end);
@ -852,12 +872,22 @@ impl Element for MarkdownElement {
builder.pop_text_style();
}
let metadata = current_code_block_metadata.take();
if let CodeBlockRenderer::Custom {
transform: Some(modify),
transform: Some(transform),
..
} = &self.code_block_renderer
{
builder.modify_current_div(|el| modify(el, range.clone(), window, cx));
builder.modify_current_div(|el| {
transform(
el,
range.clone(),
metadata.clone().unwrap_or_default(),
window,
cx,
)
});
}
if matches!(
@ -866,9 +896,13 @@ impl Element for MarkdownElement {
) {
builder.flush_text();
builder.modify_current_div(|el| {
let code =
without_fences(parsed_markdown.source()[range.clone()].trim())
.to_string();
let content_range = parser::extract_code_block_content_range(
parsed_markdown.source()[range.clone()].trim(),
);
let content_range = content_range.start + range.start
..content_range.end + range.start;
let code = parsed_markdown.source()[content_range].to_string();
let codeblock = render_copy_code_block_button(
range.end,
code,
@ -1507,43 +1541,3 @@ impl RenderedText {
.find(|link| link.source_range.contains(&source_index))
}
}
/// Some markdown blocks are indented, and others have e.g. ```rust … ``` around them.
/// If this block is fenced with backticks, strip them off (and the language name).
/// We use this when copying code blocks to the clipboard.
pub fn without_fences(mut markdown: &str) -> &str {
if let Some(opening_backticks) = markdown.find("```") {
markdown = &markdown[opening_backticks..];
// Trim off the next newline. This also trims off a language name if it's there.
if let Some(newline) = markdown.find('\n') {
markdown = &markdown[newline + 1..];
}
};
if let Some(closing_backticks) = markdown.rfind("```") {
markdown = &markdown[..closing_backticks];
};
markdown
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_without_fences() {
let input = "```rust\nlet x = 5;\n```";
assert_eq!(without_fences(input), "let x = 5;\n");
let input = " ```\nno language\n``` ";
assert_eq!(without_fences(input), "no language\n");
let input = "plain text";
assert_eq!(without_fences(input), "plain text");
let input = "```python\nprint('hello')\nprint('world')\n```";
assert_eq!(without_fences(input), "print('hello')\nprint('world')\n");
}
}

View file

@ -65,11 +65,33 @@ pub fn parse_markdown(
within_metadata = true;
MarkdownTag::MetadataBlock(kind)
}
pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Indented) => {
MarkdownTag::CodeBlock {
kind: CodeBlockKind::Indented,
metadata: CodeBlockMetadata {
content_range: range.start + 1..range.end + 1,
line_count: 1,
},
}
}
pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
ref info,
)) => {
let content_range = extract_code_block_content_range(&text[range.clone()]);
let content_range =
content_range.start + range.start..content_range.end + range.start;
let line_count = text[content_range.clone()]
.bytes()
.filter(|c| *c == b'\n')
.count();
let metadata = CodeBlockMetadata {
content_range,
line_count,
};
let info = info.trim();
MarkdownTag::CodeBlock(if info.is_empty() {
let kind = if info.is_empty() {
CodeBlockKind::Fenced
// Languages should never contain a slash, and PathRanges always should.
// (Models are told to specify them relative to a workspace root.)
@ -81,9 +103,68 @@ pub fn parse_markdown(
let language = SharedString::from(info.to_string());
language_names.insert(language.clone());
CodeBlockKind::FencedLang(language)
})
};
MarkdownTag::CodeBlock { kind, metadata }
}
pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
pulldown_cmark::Tag::Heading {
level,
id,
classes,
attrs,
} => {
let id = id.map(|id| SharedString::from(id.into_string()));
let classes = classes
.into_iter()
.map(|c| SharedString::from(c.into_string()))
.collect();
let attrs = attrs
.into_iter()
.map(|(key, value)| {
(
SharedString::from(key.into_string()),
value.map(|v| SharedString::from(v.into_string())),
)
})
.collect();
MarkdownTag::Heading {
level,
id,
classes,
attrs,
}
}
pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
pulldown_cmark::Tag::Item => MarkdownTag::Item,
pulldown_cmark::Tag::FootnoteDefinition(label) => {
MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
}
pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
pulldown_cmark::Tag::Image {
link_type,
dest_url,
title,
id,
} => MarkdownTag::Image {
link_type,
dest_url: SharedString::from(dest_url.into_string()),
title: SharedString::from(title.into_string()),
id: SharedString::from(id.into_string()),
},
pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
pulldown_cmark::Tag::DefinitionListDefinition => {
MarkdownTag::DefinitionListDefinition
}
tag => tag.into(),
};
events.push((range, MarkdownEvent::Start(tag)))
}
@ -252,7 +333,10 @@ pub enum MarkdownTag {
BlockQuote,
/// A code block.
CodeBlock(CodeBlockKind),
CodeBlock {
kind: CodeBlockKind,
metadata: CodeBlockMetadata,
},
/// A HTML block.
HtmlBlock,
@ -323,96 +407,26 @@ pub enum CodeBlockKind {
FencedSrc(PathWithRange),
}
impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
fn from(tag: pulldown_cmark::Tag) -> Self {
match tag {
pulldown_cmark::Tag::Paragraph => MarkdownTag::Paragraph,
pulldown_cmark::Tag::Heading {
level,
id,
classes,
attrs,
} => {
let id = id.map(|id| SharedString::from(id.into_string()));
let classes = classes
.into_iter()
.map(|c| SharedString::from(c.into_string()))
.collect();
let attrs = attrs
.into_iter()
.map(|(key, value)| {
(
SharedString::from(key.into_string()),
value.map(|v| SharedString::from(v.into_string())),
)
})
.collect();
MarkdownTag::Heading {
level,
id,
classes,
attrs,
}
}
pulldown_cmark::Tag::BlockQuote(_kind) => MarkdownTag::BlockQuote,
pulldown_cmark::Tag::CodeBlock(kind) => match kind {
pulldown_cmark::CodeBlockKind::Indented => {
MarkdownTag::CodeBlock(CodeBlockKind::Indented)
}
pulldown_cmark::CodeBlockKind::Fenced(info) => {
let info = info.trim();
MarkdownTag::CodeBlock(if info.is_empty() {
CodeBlockKind::Fenced
} else if info.contains('/') {
// Languages should never contain a slash, and PathRanges always should.
// (Models are told to specify them relative to a workspace root.)
CodeBlockKind::FencedSrc(PathWithRange::new(info))
} else {
CodeBlockKind::FencedLang(SharedString::from(info.to_string()))
})
}
},
pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
pulldown_cmark::Tag::Item => MarkdownTag::Item,
pulldown_cmark::Tag::FootnoteDefinition(label) => {
MarkdownTag::FootnoteDefinition(SharedString::from(label.to_string()))
}
pulldown_cmark::Tag::Table(alignments) => MarkdownTag::Table(alignments),
pulldown_cmark::Tag::TableHead => MarkdownTag::TableHead,
pulldown_cmark::Tag::TableRow => MarkdownTag::TableRow,
pulldown_cmark::Tag::TableCell => MarkdownTag::TableCell,
pulldown_cmark::Tag::Emphasis => MarkdownTag::Emphasis,
pulldown_cmark::Tag::Strong => MarkdownTag::Strong,
pulldown_cmark::Tag::Strikethrough => MarkdownTag::Strikethrough,
pulldown_cmark::Tag::Link {
link_type,
dest_url,
title,
id,
} => MarkdownTag::Link {
link_type,
dest_url: SharedString::from(dest_url.into_string()),
title: SharedString::from(title.into_string()),
id: SharedString::from(id.into_string()),
},
pulldown_cmark::Tag::Image {
link_type,
dest_url,
title,
id,
} => MarkdownTag::Image {
link_type,
dest_url: SharedString::from(dest_url.into_string()),
title: SharedString::from(title.into_string()),
id: SharedString::from(id.into_string()),
},
pulldown_cmark::Tag::HtmlBlock => MarkdownTag::HtmlBlock,
pulldown_cmark::Tag::MetadataBlock(kind) => MarkdownTag::MetadataBlock(kind),
pulldown_cmark::Tag::DefinitionList => MarkdownTag::DefinitionList,
pulldown_cmark::Tag::DefinitionListTitle => MarkdownTag::DefinitionListTitle,
pulldown_cmark::Tag::DefinitionListDefinition => MarkdownTag::DefinitionListDefinition,
#[derive(Default, Clone, Debug, PartialEq)]
pub struct CodeBlockMetadata {
pub content_range: Range<usize>,
pub line_count: usize,
}
pub(crate) fn extract_code_block_content_range(text: &str) -> Range<usize> {
let mut range = 0..text.len();
if text.starts_with("```") {
range.start += 3;
if let Some(newline_ix) = text[range.clone()].find('\n') {
range.start += newline_ix + 1;
}
}
if !range.is_empty() && text.ends_with("```") {
range.end -= 3;
}
range
}
/// Represents either an owned or inline string. Motivation for this is to make `SubstitutedText`
@ -570,4 +584,41 @@ mod tests {
)
)
}
#[test]
fn test_code_block_metadata() {
assert_eq!(
parse_markdown("```rust\nfn main() {\n let a = 1;\n}\n```"),
(
vec![
(
0..37,
Start(CodeBlock {
kind: CodeBlockKind::FencedLang("rust".into()),
metadata: CodeBlockMetadata {
content_range: 8..34,
line_count: 3
}
})
),
(8..34, Text),
(0..37, End(MarkdownTagEnd::CodeBlock)),
],
HashSet::from(["rust".into()]),
HashSet::new()
)
)
}
#[test]
fn test_extract_code_block_content_range() {
let input = "```rust\nlet x = 5;\n```";
assert_eq!(extract_code_block_content_range(input), 8..19);
let input = "plain text";
assert_eq!(extract_code_block_content_range(input), 0..10);
let input = "```python\nprint('hello')\nprint('world')\n```";
assert_eq!(extract_code_block_content_range(input), 10..40);
}
}