From fa90b3a98688eff6b3dcfcbd6eade70ff4b66e00 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Mon, 7 Apr 2025 11:01:34 -0400 Subject: [PATCH] Link to cited code blocks (#28217) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Screenshot 2025-04-06 at 9 59 41 PM Release Notes: - Agent panel now shows links to relevant source code files above code blocks. --- Cargo.lock | 2 + assets/prompts/assistant_system_prompt.hbs | 101 ++++++++++ crates/markdown/Cargo.toml | 2 + crates/markdown/src/markdown.rs | 136 +++++++++++-- crates/markdown/src/parser.rs | 91 +++++++-- crates/markdown/src/path_range.rs | 222 +++++++++++++++++++++ 6 files changed, 522 insertions(+), 32 deletions(-) create mode 100644 crates/markdown/src/path_range.rs diff --git a/Cargo.lock b/Cargo.lock index 0e1bca8a90..cd082c8bce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8355,6 +8355,7 @@ dependencies = [ "anyhow", "assets", "env_logger 0.11.8", + "file_icons", "gpui", "language", "languages", @@ -8366,6 +8367,7 @@ dependencies = [ "theme", "ui", "util", + "workspace", "workspace-hack", ] diff --git a/assets/prompts/assistant_system_prompt.hbs b/assets/prompts/assistant_system_prompt.hbs index c2b2e8f7a9..86f63eff41 100644 --- a/assets/prompts/assistant_system_prompt.hbs +++ b/assets/prompts/assistant_system_prompt.hbs @@ -9,6 +9,7 @@ You should only perform actions that modify the user's system if explicitly requ - If the user asks a question about how to accomplish a task, provide guidance or information, and use read-only tools (e.g., search) to assist. You may suggest potential actions, but do not directly modify the user’s system without explicit instruction. - If the user clearly requests that you perform an action, carry out the action directly without explaining why you are doing so. + + The user has opened a project that contains the following root directories/files. Whenever you specify a path in the project, it must be a relative path which begins with one of these root directories/files: {{#each worktrees}} diff --git a/crates/markdown/Cargo.toml b/crates/markdown/Cargo.toml index f87d347162..d4c9b87938 100644 --- a/crates/markdown/Cargo.toml +++ b/crates/markdown/Cargo.toml @@ -20,6 +20,7 @@ test-support = [ [dependencies] anyhow.workspace = true +file_icons.workspace = true gpui.workspace = true language.workspace = true linkify.workspace = true @@ -28,6 +29,7 @@ pulldown-cmark.workspace = true theme.workspace = true ui.workspace = true util.workspace = true +workspace.workspace = true workspace-hack.workspace = true [dev-dependencies] diff --git a/crates/markdown/src/markdown.rs b/crates/markdown/src/markdown.rs index 6db397aea5..902ac352d9 100644 --- a/crates/markdown/src/markdown.rs +++ b/crates/markdown/src/markdown.rs @@ -1,9 +1,12 @@ pub mod parser; +mod path_range; +use file_icons::FileIcons; use std::collections::{HashMap, HashSet}; use std::iter; use std::mem; use std::ops::Range; +use std::path::PathBuf; use std::rc::Rc; use std::sync::Arc; use std::time::Duration; @@ -19,8 +22,9 @@ use language::{Language, LanguageRegistry, Rope}; use parser::{MarkdownEvent, MarkdownTag, MarkdownTagEnd, parse_links_only, parse_markdown}; use pulldown_cmark::Alignment; use theme::SyntaxTheme; -use ui::{Tooltip, prelude::*}; +use ui::{ButtonLike, Tooltip, prelude::*}; use util::{ResultExt, TryFutureExt}; +use workspace::Workspace; use crate::parser::CodeBlockKind; @@ -210,13 +214,15 @@ impl Markdown { return anyhow::Ok(ParsedMarkdown { events: Arc::from(parse_links_only(source.as_ref())), source, - languages: HashMap::default(), + languages_by_name: HashMap::default(), + languages_by_path: HashMap::default(), }); } - let (events, language_names) = parse_markdown(&source); - let mut languages = HashMap::with_capacity(language_names.len()); - for name in language_names { - if let Some(registry) = language_registry.as_ref() { + let (events, language_names, paths) = parse_markdown(&source); + let mut languages_by_name = HashMap::with_capacity(language_names.len()); + let mut languages_by_path = HashMap::with_capacity(paths.len()); + if let Some(registry) = language_registry.as_ref() { + for name in language_names { let language = if !name.is_empty() { registry.language_for_name(&name) } else if let Some(fallback) = &fallback { @@ -225,14 +231,21 @@ impl Markdown { continue; }; if let Ok(language) = language.await { - languages.insert(name, language); + languages_by_name.insert(name, language); + } + } + + for path in paths { + if let Ok(language) = registry.language_for_file_path(&path).await { + languages_by_path.insert(path, language); } } } anyhow::Ok(ParsedMarkdown { source, events: Arc::from(events), - languages, + languages_by_name, + languages_by_path, }) }); @@ -308,7 +321,8 @@ impl Selection { pub struct ParsedMarkdown { source: SharedString, events: Arc<[(Range, MarkdownEvent)]>, - languages: HashMap>, + languages_by_name: HashMap>, + languages_by_path: HashMap>, } impl ParsedMarkdown { @@ -574,7 +588,9 @@ impl Element for MarkdownElement { } else { 0 }; - for (range, event) in parsed_markdown.events.iter() { + + let code_citation_id = SharedString::from("code-citation-link"); + for (index, (range, event)) in parsed_markdown.events.iter().enumerate() { match event { MarkdownEvent::Start(tag) => { match tag { @@ -613,10 +629,102 @@ impl Element for MarkdownElement { ); } MarkdownTag::CodeBlock(kind) => { - let language = if let CodeBlockKind::Fenced(language) = kind { - parsed_markdown.languages.get(language).cloned() - } else { - None + let language = match kind { + CodeBlockKind::Fenced => None, + CodeBlockKind::FencedLang(language) => { + parsed_markdown.languages_by_name.get(language).cloned() + } + CodeBlockKind::FencedSrc(path_range) => { + // If the path actually exists in the project, render a link to it. + if let Some(project_path) = + window.root::().flatten().and_then(|workspace| { + workspace + .read(cx) + .project() + .read(cx) + .find_project_path(&path_range.path, cx) + }) + { + builder.flush_text(); + + builder.push_div( + div().relative().w_full(), + range, + markdown_end, + ); + + builder.modify_current_div(|el| { + let file_icon = + FileIcons::get_icon(&project_path.path, cx) + .map(|path| { + Icon::from_path(path) + .color(Color::Muted) + .into_any_element() + }) + .unwrap_or_else(|| { + IconButton::new( + "file-path-icon", + IconName::File, + ) + .shape(ui::IconButtonShape::Square) + .into_any_element() + }); + + el.child( + ButtonLike::new(ElementId::NamedInteger( + code_citation_id.clone(), + index, + )) + .child( + div() + .mb_1() + .flex() + .items_center() + .gap_1() + .child(file_icon) + .child( + Label::new( + project_path + .path + .display() + .to_string(), + ) + .color(Color::Muted) + .underline(), + ), + ) + .on_click({ + let click_path = project_path.clone(); + move |_, window, cx| { + if let Some(workspace) = + window.root::().flatten() + { + workspace.update(cx, |workspace, cx| { + workspace + .open_path( + click_path.clone(), + None, + true, + window, + cx, + ) + .detach_and_log_err(cx); + }) + } + } + }), + ) + }); + + builder.pop_div(); + } + + parsed_markdown + .languages_by_path + .get(&path_range.path) + .cloned() + } + _ => None, }; // This is a parent container that we can position the copy button inside. diff --git a/crates/markdown/src/parser.rs b/crates/markdown/src/parser.rs index e5877f3b16..f649920be0 100644 --- a/crates/markdown/src/parser.rs +++ b/crates/markdown/src/parser.rs @@ -7,8 +7,11 @@ use pulldown_cmark::{ use std::{ collections::HashSet, ops::{Deref, Range}, + path::PathBuf, }; +use crate::path_range::PathRange; + const PARSE_OPTIONS: Options = Options::ENABLE_TABLES .union(Options::ENABLE_FOOTNOTES) .union(Options::ENABLE_STRIKETHROUGH) @@ -19,9 +22,16 @@ const PARSE_OPTIONS: Options = Options::ENABLE_TABLES .union(Options::ENABLE_OLD_FOOTNOTES) .union(Options::ENABLE_GFM); -pub fn parse_markdown(text: &str) -> (Vec<(Range, MarkdownEvent)>, HashSet) { +pub fn parse_markdown( + text: &str, +) -> ( + Vec<(Range, MarkdownEvent)>, + HashSet, + HashSet, +) { let mut events = Vec::new(); - let mut languages = HashSet::new(); + let mut language_names = HashSet::new(); + let mut language_paths = HashSet::new(); let mut within_link = false; let mut within_metadata = false; for (pulldown_event, mut range) in Parser::new_ext(text, PARSE_OPTIONS).into_offset_iter() { @@ -35,17 +45,46 @@ pub fn parse_markdown(text: &str) -> (Vec<(Range, MarkdownEvent)>, HashSe } match pulldown_event { pulldown_cmark::Event::Start(tag) => { - match tag { - pulldown_cmark::Tag::Link { .. } => within_link = true, - pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true, - pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced( - ref language, - )) => { - languages.insert(SharedString::from(language.to_string())); + let tag = match tag { + pulldown_cmark::Tag::Link { + link_type, + dest_url, + title, + id, + } => { + within_link = true; + MarkdownTag::Link { + link_type, + dest_url: SharedString::from(dest_url.into_string()), + title: SharedString::from(title.into_string()), + id: SharedString::from(id.into_string()), + } } - _ => {} - } - events.push((range, MarkdownEvent::Start(tag.into()))) + pulldown_cmark::Tag::MetadataBlock(kind) => { + within_metadata = true; + MarkdownTag::MetadataBlock(kind) + } + pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced( + ref info, + )) => { + let info = info.trim(); + MarkdownTag::CodeBlock(if info.is_empty() { + CodeBlockKind::Fenced + // Languages should never contain a slash, and PathRanges always should. + // (Models are told to specify them relative to a workspace root.) + } else if info.contains('/') { + let path_range = PathRange::new(info); + language_paths.insert(path_range.path.clone()); + CodeBlockKind::FencedSrc(path_range) + } else { + let language = SharedString::from(info.to_string()); + language_names.insert(language.clone()); + CodeBlockKind::FencedLang(language) + }) + } + tag => tag.into(), + }; + events.push((range, MarkdownEvent::Start(tag))) } pulldown_cmark::Event::End(tag) => { if let pulldown_cmark::TagEnd::Link = tag { @@ -129,7 +168,7 @@ pub fn parse_markdown(text: &str) -> (Vec<(Range, MarkdownEvent)>, HashSe pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {} } } - (events, languages) + (events, language_names, language_paths) } pub fn parse_links_only(text: &str) -> Vec<(Range, MarkdownEvent)> { @@ -287,8 +326,13 @@ pub enum MarkdownTag { #[derive(Clone, Debug, PartialEq)] pub enum CodeBlockKind { Indented, - /// The value contained in the tag describes the language of the code, which may be empty. - Fenced(SharedString), + /// "Fenced" means "surrounded by triple backticks." + /// There can optionally be either a language after the backticks (like in traditional Markdown) + /// or, if an agent is specifying a path for a source location in the project, it can be a PathRange, + /// e.g. ```path/to/foo.rs#L123-456 instead of ```rust + Fenced, + FencedLang(SharedString), + FencedSrc(PathRange), } impl From> for MarkdownTag { @@ -327,9 +371,18 @@ impl From> for MarkdownTag { pulldown_cmark::CodeBlockKind::Indented => { MarkdownTag::CodeBlock(CodeBlockKind::Indented) } - pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock( - CodeBlockKind::Fenced(SharedString::from(info.into_string())), - ), + pulldown_cmark::CodeBlockKind::Fenced(info) => { + let info = info.trim(); + MarkdownTag::CodeBlock(if info.is_empty() { + CodeBlockKind::Fenced + } else if info.contains('/') { + // Languages should never contain a slash, and PathRanges always should. + // (Models are told to specify them relative to a workspace root.) + CodeBlockKind::FencedSrc(PathRange::new(info)) + } else { + CodeBlockKind::FencedLang(SharedString::from(info.to_string())) + }) + } }, pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number), pulldown_cmark::Tag::Item => MarkdownTag::Item, @@ -488,6 +541,7 @@ mod tests { (46..51, Text), (0..51, End(MarkdownTagEnd::Paragraph)) ], + HashSet::new(), HashSet::new() ) ); @@ -515,6 +569,7 @@ mod tests { (41..42, SubstitutedText("’".into())), (0..42, End(MarkdownTagEnd::Paragraph)) ], + HashSet::new(), HashSet::new() ) ) diff --git a/crates/markdown/src/path_range.rs b/crates/markdown/src/path_range.rs new file mode 100644 index 0000000000..7e7700a58f --- /dev/null +++ b/crates/markdown/src/path_range.rs @@ -0,0 +1,222 @@ +use std::{ops::Range, path::PathBuf}; + +#[derive(Debug, Clone, PartialEq)] +pub struct PathRange { + pub path: PathBuf, + pub range: Option>, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct LineCol { + pub line: u32, + pub col: Option, +} + +impl LineCol { + pub fn new(str: impl AsRef) -> Option { + let str = str.as_ref(); + match str.split_once(':') { + Some((line, col)) => match (line.parse::(), col.parse::()) { + (Ok(line), Ok(col)) => Some(Self { + line, + col: Some(col), + }), + _ => None, + }, + None => match str.parse::() { + Ok(line) => Some(Self { line, col: None }), + Err(_) => None, + }, + } + } +} + +impl PathRange { + pub fn new(str: impl AsRef) -> Self { + let str = str.as_ref(); + // Sometimes the model will include a language at the start, + // e.g. "```rust zed/crates/markdown/src/markdown.rs#L1" + // We just discard that. + let str = match str.trim_end().rfind(' ') { + Some(space) => &str[space + 1..], + None => str.trim_start(), + }; + + match str.rsplit_once('#') { + Some((path, after_hash)) => { + // Be tolerant to the model omitting the "L" prefix, lowercasing it, + // or including it more than once. + let after_hash = after_hash.replace(['L', 'l'], ""); + + let range = { + let mut iter = after_hash.split('-').flat_map(LineCol::new); + iter.next() + .map(|start| iter.next().map(|end| start..end).unwrap_or(start..start)) + }; + + Self { + path: PathBuf::from(path), + range, + } + } + None => Self { + path: str.into(), + range: None, + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_linecol_parsing() { + let line_col = LineCol::new("10:5"); + assert_eq!( + line_col, + Some(LineCol { + line: 10, + col: Some(5) + }) + ); + + let line_only = LineCol::new("42"); + assert_eq!( + line_only, + Some(LineCol { + line: 42, + col: None + }) + ); + + assert_eq!(LineCol::new(""), None); + assert_eq!(LineCol::new("not a number"), None); + assert_eq!(LineCol::new("10:not a number"), None); + assert_eq!(LineCol::new("not:5"), None); + } + + #[test] + fn test_pathrange_parsing() { + let path_range = PathRange::new("file.rs#L10-L20"); + assert_eq!(path_range.path, PathBuf::from("file.rs")); + assert!(path_range.range.is_some()); + if let Some(range) = path_range.range { + assert_eq!(range.start.line, 10); + assert_eq!(range.start.col, None); + assert_eq!(range.end.line, 20); + assert_eq!(range.end.col, None); + } + + let single_line = PathRange::new("file.rs#L15"); + assert_eq!(single_line.path, PathBuf::from("file.rs")); + assert!(single_line.range.is_some()); + if let Some(range) = single_line.range { + assert_eq!(range.start.line, 15); + assert_eq!(range.end.line, 15); + } + + let no_range = PathRange::new("file.rs"); + assert_eq!(no_range.path, PathBuf::from("file.rs")); + assert!(no_range.range.is_none()); + + let lowercase = PathRange::new("file.rs#l5-l10"); + assert_eq!(lowercase.path, PathBuf::from("file.rs")); + assert!(lowercase.range.is_some()); + if let Some(range) = lowercase.range { + assert_eq!(range.start.line, 5); + assert_eq!(range.end.line, 10); + } + + let complex = PathRange::new("src/path/to/file.rs#L100"); + assert_eq!(complex.path, PathBuf::from("src/path/to/file.rs")); + assert!(complex.range.is_some()); + } + + #[test] + fn test_pathrange_from_str() { + let with_range = PathRange::new("file.rs#L10-L20"); + assert!(with_range.range.is_some()); + assert_eq!(with_range.path, PathBuf::from("file.rs")); + + let without_range = PathRange::new("file.rs"); + assert!(without_range.range.is_none()); + + let single_line = PathRange::new("file.rs#L15"); + assert!(single_line.range.is_some()); + } + + #[test] + fn test_pathrange_leading_text_trimming() { + let with_language = PathRange::new("```rust file.rs#L10"); + assert_eq!(with_language.path, PathBuf::from("file.rs")); + assert!(with_language.range.is_some()); + if let Some(range) = with_language.range { + assert_eq!(range.start.line, 10); + } + + let with_spaces = PathRange::new("``` file.rs#L10-L20"); + assert_eq!(with_spaces.path, PathBuf::from("file.rs")); + assert!(with_spaces.range.is_some()); + + let with_words = PathRange::new("```rust code example file.rs#L15:10"); + assert_eq!(with_words.path, PathBuf::from("file.rs")); + assert!(with_words.range.is_some()); + if let Some(range) = with_words.range { + assert_eq!(range.start.line, 15); + assert_eq!(range.start.col, Some(10)); + } + + let with_whitespace = PathRange::new(" file.rs#L5"); + assert_eq!(with_whitespace.path, PathBuf::from("file.rs")); + assert!(with_whitespace.range.is_some()); + + let no_leading = PathRange::new("file.rs#L10"); + assert_eq!(no_leading.path, PathBuf::from("file.rs")); + assert!(no_leading.range.is_some()); + } + + #[test] + fn test_pathrange_with_line_and_column() { + let line_and_col = PathRange::new("file.rs#L10:5"); + assert_eq!(line_and_col.path, PathBuf::from("file.rs")); + assert!(line_and_col.range.is_some()); + if let Some(range) = line_and_col.range { + assert_eq!(range.start.line, 10); + assert_eq!(range.start.col, Some(5)); + assert_eq!(range.end.line, 10); + assert_eq!(range.end.col, Some(5)); + } + + let full_range = PathRange::new("file.rs#L10:5-L20:15"); + assert_eq!(full_range.path, PathBuf::from("file.rs")); + assert!(full_range.range.is_some()); + if let Some(range) = full_range.range { + assert_eq!(range.start.line, 10); + assert_eq!(range.start.col, Some(5)); + assert_eq!(range.end.line, 20); + assert_eq!(range.end.col, Some(15)); + } + + let mixed_range1 = PathRange::new("file.rs#L10:5-L20"); + assert_eq!(mixed_range1.path, PathBuf::from("file.rs")); + assert!(mixed_range1.range.is_some()); + if let Some(range) = mixed_range1.range { + assert_eq!(range.start.line, 10); + assert_eq!(range.start.col, Some(5)); + assert_eq!(range.end.line, 20); + assert_eq!(range.end.col, None); + } + + let mixed_range2 = PathRange::new("file.rs#L10-L20:15"); + assert_eq!(mixed_range2.path, PathBuf::from("file.rs")); + assert!(mixed_range2.range.is_some()); + if let Some(range) = mixed_range2.range { + assert_eq!(range.start.line, 10); + assert_eq!(range.start.col, None); + assert_eq!(range.end.line, 20); + assert_eq!(range.end.col, Some(15)); + } + } +}