diff --git a/Cargo.lock b/Cargo.lock index 94dd63c66c..e210baa814 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5117,12 +5117,6 @@ dependencies = [ "zlog", ] -[[package]] -name = "ego-tree" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" - [[package]] name = "either" version = "1.15.0" @@ -6335,15 +6329,6 @@ dependencies = [ "thread_local", ] -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "generator" version = "0.8.5" @@ -6378,15 +6363,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "getopts" -version = "0.2.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1" -dependencies = [ - "unicode-width 0.2.0", -] - [[package]] name = "getrandom" version = "0.2.15" @@ -7913,18 +7889,7 @@ dependencies = [ "log", "mac", "markup5ever 0.16.1", - "match_token 0.1.0", -] - -[[package]] -name = "html5ever" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4" -dependencies = [ - "log", - "markup5ever 0.35.0", - "match_token 0.35.0", + "match_token", ] [[package]] @@ -9965,12 +9930,13 @@ dependencies = [ "editor", "fs", "gpui", + "html5ever 0.27.0", "language", "linkify", "log", + "markup5ever_rcdom", "pretty_assertions", "pulldown-cmark 0.12.2", - "scraper", "settings", "theme", "ui", @@ -10004,17 +9970,6 @@ dependencies = [ "web_atoms", ] -[[package]] -name = "markup5ever" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3" -dependencies = [ - "log", - "tendril", - "web_atoms", -] - [[package]] name = "markup5ever_rcdom" version = "0.3.0" @@ -10038,17 +9993,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "match_token" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - [[package]] name = "matchers" version = "0.1.0" @@ -14460,21 +14404,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "scraper" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5f3a24d916e78954af99281a455168d4a9515d65eca99a18da1b813689c4ad9" -dependencies = [ - "cssparser", - "ego-tree", - "getopts", - "html5ever 0.35.0", - "precomputed-hash", - "selectors", - "tendril", -] - [[package]] name = "scratch" version = "1.0.8" @@ -14719,25 +14648,6 @@ dependencies = [ "libc", ] -[[package]] -name = "selectors" -version = "0.31.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5685b6ae43bfcf7d2e7dfcfb5d8e8f61b46442c902531e41a32a9a8bf0ee0fb6" -dependencies = [ - "bitflags 2.9.0", - "cssparser", - "derive_more 2.0.1", - "fxhash", - "log", - "new_debug_unreachable", - "phf", - "phf_codegen", - "precomputed-hash", - "servo_arc", - "smallvec", -] - [[package]] name = "self_cell" version = "1.2.0" @@ -14924,15 +14834,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "servo_arc" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "204ea332803bd95a0b60388590d59cf6468ec9becf626e2451f1d26a1d972de4" -dependencies = [ - "stable_deref_trait", -] - [[package]] name = "session" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 2d5a57ca3e..b13795e1e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -574,7 +574,6 @@ rustls = { version = "0.23.26" } rustls-platform-verifier = "0.5.0" scap = { git = "https://github.com/zed-industries/scap", rev = "808aa5c45b41e8f44729d02e38fd00a2fe2722e7", default-features = false } schemars = { version = "1.0", features = ["indexmap2"] } -scraper = "0.24.0" semver = "1.0" serde = { version = "1.0", features = ["derive", "rc"] } serde_derive = { version = "1.0", features = ["deserialize_in_place"] } diff --git a/crates/markdown_preview/Cargo.toml b/crates/markdown_preview/Cargo.toml index 0a9d228de2..55646cdcf4 100644 --- a/crates/markdown_preview/Cargo.toml +++ b/crates/markdown_preview/Cargo.toml @@ -21,12 +21,13 @@ collections.workspace = true editor.workspace = true fs.workspace = true gpui.workspace = true +html5ever.workspace = true language.workspace = true linkify.workspace = true log.workspace = true +markup5ever_rcdom.workspace = true pretty_assertions.workspace = true pulldown-cmark.workspace = true -scraper.workspace = true settings.workspace = true theme.workspace = true ui.workspace = true diff --git a/crates/markdown_preview/src/markdown_parser.rs b/crates/markdown_preview/src/markdown_parser.rs index 0562ae4932..9e8e179560 100644 --- a/crates/markdown_preview/src/markdown_parser.rs +++ b/crates/markdown_preview/src/markdown_parser.rs @@ -2,9 +2,11 @@ use crate::markdown_elements::*; use async_recursion::async_recursion; use collections::FxHashMap; use gpui::FontWeight; +use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink}; use language::LanguageRegistry; +use markup5ever_rcdom::RcDom; use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd}; -use std::{ops::Range, path::PathBuf, sync::Arc, vec}; +use std::{cell::RefCell, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec}; use ui::{px, relative}; pub async fn parse_markdown( @@ -757,10 +759,19 @@ impl<'a> MarkdownParser<'a> { let source_range = source_range.clone(); match current { Event::Html(html) => { - let fragment = scraper::Html::parse_fragment(html); + let mut cursor = std::io::Cursor::new(html.as_bytes()); + let Some(dom) = parse_document(RcDom::default(), ParseOpts::default()) + .from_utf8() + .read_from(&mut cursor) + .ok() + else { + self.cursor += 1; + continue; + }; + self.cursor += 1; - elements.extend(self.parse_html_image(fragment, source_range)); + self.parse_html_node(source_range, &dom.document, &mut elements); } Event::End(TagEnd::CodeBlock) => { self.cursor += 1; @@ -775,6 +786,92 @@ impl<'a> MarkdownParser<'a> { elements } + fn attr_value( + attrs: &RefCell>, + name: html5ever::LocalName, + ) -> Option { + attrs.borrow().iter().find_map(|attr| { + if attr.name.local == name { + Some(attr.value.to_string()) + } else { + None + } + }) + } + + fn parse_html_node( + &self, + source_range: Range, + node: &Rc, + elements: &mut Vec, + ) { + match &node.data { + markup5ever_rcdom::NodeData::Document => { + self.consume_children(source_range, node, elements); + } + markup5ever_rcdom::NodeData::Doctype { .. } => {} + markup5ever_rcdom::NodeData::Text { contents } => { + elements.push(ParsedMarkdownElement::Paragraph(vec![ + MarkdownParagraphChunk::Text(ParsedMarkdownText { + source_range, + contents: contents.borrow().to_string(), + highlights: Vec::default(), + region_ranges: Vec::default(), + regions: Vec::default(), + }), + ])); + } + markup5ever_rcdom::NodeData::Comment { .. } => {} + markup5ever_rcdom::NodeData::Element { name, attrs, .. } => { + if local_name!("img") == name.local { + let Some(src) = Self::attr_value(attrs, local_name!("src")) else { + return; + }; + + let Some(mut image) = Image::identify( + src.to_string(), + source_range, + self.file_location_directory.clone(), + ) else { + return; + }; + + if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) { + image.set_alt_text(alt.to_string().into()); + } + + if let Some(width) = Self::attr_value(attrs, local_name!("width")) + .and_then(|width| Self::parse_length(&width)) + { + image.set_width(width); + } + + if let Some(height) = Self::attr_value(attrs, local_name!("height")) + .and_then(|height| Self::parse_length(&height)) + { + image.set_height(height); + } + + elements.push(ParsedMarkdownElement::Image(image)); + } else { + self.consume_children(source_range, node, elements); + } + } + markup5ever_rcdom::NodeData::ProcessingInstruction { .. } => {} + } + } + + fn consume_children( + &self, + source_range: Range, + node: &Rc, + elements: &mut Vec, + ) { + for node in node.children.borrow().iter() { + self.parse_html_node(source_range.clone(), node, elements); + } + } + /// Parses the width/height attribute value of an html element (e.g. img element) fn parse_length(value: &str) -> Option { if value.ends_with("px") { @@ -797,51 +894,6 @@ impl<'a> MarkdownParser<'a> { .map(|value| px(value).into()) } } - - fn parse_html_image( - &self, - html: scraper::Html, - source_range: Range, - ) -> Vec { - let mut images = Vec::new(); - let selector = scraper::Selector::parse("img").unwrap(); - - for element in html.select(&selector) { - let Some(src) = element.attr("src") else { - continue; - }; - - let Some(mut image) = Image::identify( - src.to_string(), - source_range.clone(), - self.file_location_directory.clone(), - ) else { - continue; - }; - - if let Some(alt) = element.attr("alt") { - image.set_alt_text(alt.to_string().into()); - } - - if let Some(width) = element - .attr("width") - .and_then(|width| Self::parse_length(width)) - { - image.set_width(width); - } - - if let Some(height) = element - .attr("height") - .and_then(|height| Self::parse_length(height)) - { - image.set_height(height); - } - - images.push(ParsedMarkdownElement::Image(image)); - } - - images - } } #[cfg(test)]