Refactoring so we can support other html types aswell
This commit is contained in:
parent
03487fff5b
commit
85321152cf
4 changed files with 105 additions and 152 deletions
105
Cargo.lock
generated
105
Cargo.lock
generated
|
@ -5117,12 +5117,6 @@ dependencies = [
|
|||
"zlog",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ego-tree"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
|
@ -6335,15 +6329,6 @@ dependencies = [
|
|||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generator"
|
||||
version = "0.8.5"
|
||||
|
@ -6378,15 +6363,6 @@ dependencies = [
|
|||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getopts"
|
||||
version = "0.2.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1"
|
||||
dependencies = [
|
||||
"unicode-width 0.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.15"
|
||||
|
@ -7913,18 +7889,7 @@ dependencies = [
|
|||
"log",
|
||||
"mac",
|
||||
"markup5ever 0.16.1",
|
||||
"match_token 0.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.35.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
|
||||
dependencies = [
|
||||
"log",
|
||||
"markup5ever 0.35.0",
|
||||
"match_token 0.35.0",
|
||||
"match_token",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -9965,12 +9930,13 @@ dependencies = [
|
|||
"editor",
|
||||
"fs",
|
||||
"gpui",
|
||||
"html5ever 0.27.0",
|
||||
"language",
|
||||
"linkify",
|
||||
"log",
|
||||
"markup5ever_rcdom",
|
||||
"pretty_assertions",
|
||||
"pulldown-cmark 0.12.2",
|
||||
"scraper",
|
||||
"settings",
|
||||
"theme",
|
||||
"ui",
|
||||
|
@ -10004,17 +9970,6 @@ dependencies = [
|
|||
"web_atoms",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.35.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"tendril",
|
||||
"web_atoms",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever_rcdom"
|
||||
version = "0.3.0"
|
||||
|
@ -10038,17 +9993,6 @@ dependencies = [
|
|||
"syn 2.0.101",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "match_token"
|
||||
version = "0.35.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.101",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.1.0"
|
||||
|
@ -14460,21 +14404,6 @@ version = "1.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "scraper"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5f3a24d916e78954af99281a455168d4a9515d65eca99a18da1b813689c4ad9"
|
||||
dependencies = [
|
||||
"cssparser",
|
||||
"ego-tree",
|
||||
"getopts",
|
||||
"html5ever 0.35.0",
|
||||
"precomputed-hash",
|
||||
"selectors",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scratch"
|
||||
version = "1.0.8"
|
||||
|
@ -14719,25 +14648,6 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "selectors"
|
||||
version = "0.31.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5685b6ae43bfcf7d2e7dfcfb5d8e8f61b46442c902531e41a32a9a8bf0ee0fb6"
|
||||
dependencies = [
|
||||
"bitflags 2.9.0",
|
||||
"cssparser",
|
||||
"derive_more 2.0.1",
|
||||
"fxhash",
|
||||
"log",
|
||||
"new_debug_unreachable",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"precomputed-hash",
|
||||
"servo_arc",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "self_cell"
|
||||
version = "1.2.0"
|
||||
|
@ -14924,15 +14834,6 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "servo_arc"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "204ea332803bd95a0b60388590d59cf6468ec9becf626e2451f1d26a1d972de4"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.1.0"
|
||||
|
|
|
@ -574,7 +574,6 @@ rustls = { version = "0.23.26" }
|
|||
rustls-platform-verifier = "0.5.0"
|
||||
scap = { git = "https://github.com/zed-industries/scap", rev = "808aa5c45b41e8f44729d02e38fd00a2fe2722e7", default-features = false }
|
||||
schemars = { version = "1.0", features = ["indexmap2"] }
|
||||
scraper = "0.24.0"
|
||||
semver = "1.0"
|
||||
serde = { version = "1.0", features = ["derive", "rc"] }
|
||||
serde_derive = { version = "1.0", features = ["deserialize_in_place"] }
|
||||
|
|
|
@ -21,12 +21,13 @@ collections.workspace = true
|
|||
editor.workspace = true
|
||||
fs.workspace = true
|
||||
gpui.workspace = true
|
||||
html5ever.workspace = true
|
||||
language.workspace = true
|
||||
linkify.workspace = true
|
||||
log.workspace = true
|
||||
markup5ever_rcdom.workspace = true
|
||||
pretty_assertions.workspace = true
|
||||
pulldown-cmark.workspace = true
|
||||
scraper.workspace = true
|
||||
settings.workspace = true
|
||||
theme.workspace = true
|
||||
ui.workspace = true
|
||||
|
|
|
@ -2,9 +2,11 @@ use crate::markdown_elements::*;
|
|||
use async_recursion::async_recursion;
|
||||
use collections::FxHashMap;
|
||||
use gpui::FontWeight;
|
||||
use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
|
||||
use language::LanguageRegistry;
|
||||
use markup5ever_rcdom::RcDom;
|
||||
use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
|
||||
use std::{ops::Range, path::PathBuf, sync::Arc, vec};
|
||||
use std::{cell::RefCell, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
|
||||
use ui::{px, relative};
|
||||
|
||||
pub async fn parse_markdown(
|
||||
|
@ -757,10 +759,19 @@ impl<'a> MarkdownParser<'a> {
|
|||
let source_range = source_range.clone();
|
||||
match current {
|
||||
Event::Html(html) => {
|
||||
let fragment = scraper::Html::parse_fragment(html);
|
||||
let mut cursor = std::io::Cursor::new(html.as_bytes());
|
||||
let Some(dom) = parse_document(RcDom::default(), ParseOpts::default())
|
||||
.from_utf8()
|
||||
.read_from(&mut cursor)
|
||||
.ok()
|
||||
else {
|
||||
self.cursor += 1;
|
||||
continue;
|
||||
};
|
||||
|
||||
self.cursor += 1;
|
||||
|
||||
elements.extend(self.parse_html_image(fragment, source_range));
|
||||
self.parse_html_node(source_range, &dom.document, &mut elements);
|
||||
}
|
||||
Event::End(TagEnd::CodeBlock) => {
|
||||
self.cursor += 1;
|
||||
|
@ -775,6 +786,92 @@ impl<'a> MarkdownParser<'a> {
|
|||
elements
|
||||
}
|
||||
|
||||
fn attr_value(
|
||||
attrs: &RefCell<Vec<html5ever::Attribute>>,
|
||||
name: html5ever::LocalName,
|
||||
) -> Option<String> {
|
||||
attrs.borrow().iter().find_map(|attr| {
|
||||
if attr.name.local == name {
|
||||
Some(attr.value.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_html_node(
|
||||
&self,
|
||||
source_range: Range<usize>,
|
||||
node: &Rc<markup5ever_rcdom::Node>,
|
||||
elements: &mut Vec<ParsedMarkdownElement>,
|
||||
) {
|
||||
match &node.data {
|
||||
markup5ever_rcdom::NodeData::Document => {
|
||||
self.consume_children(source_range, node, elements);
|
||||
}
|
||||
markup5ever_rcdom::NodeData::Doctype { .. } => {}
|
||||
markup5ever_rcdom::NodeData::Text { contents } => {
|
||||
elements.push(ParsedMarkdownElement::Paragraph(vec![
|
||||
MarkdownParagraphChunk::Text(ParsedMarkdownText {
|
||||
source_range,
|
||||
contents: contents.borrow().to_string(),
|
||||
highlights: Vec::default(),
|
||||
region_ranges: Vec::default(),
|
||||
regions: Vec::default(),
|
||||
}),
|
||||
]));
|
||||
}
|
||||
markup5ever_rcdom::NodeData::Comment { .. } => {}
|
||||
markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
|
||||
if local_name!("img") == name.local {
|
||||
let Some(src) = Self::attr_value(attrs, local_name!("src")) else {
|
||||
return;
|
||||
};
|
||||
|
||||
let Some(mut image) = Image::identify(
|
||||
src.to_string(),
|
||||
source_range,
|
||||
self.file_location_directory.clone(),
|
||||
) else {
|
||||
return;
|
||||
};
|
||||
|
||||
if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
|
||||
image.set_alt_text(alt.to_string().into());
|
||||
}
|
||||
|
||||
if let Some(width) = Self::attr_value(attrs, local_name!("width"))
|
||||
.and_then(|width| Self::parse_length(&width))
|
||||
{
|
||||
image.set_width(width);
|
||||
}
|
||||
|
||||
if let Some(height) = Self::attr_value(attrs, local_name!("height"))
|
||||
.and_then(|height| Self::parse_length(&height))
|
||||
{
|
||||
image.set_height(height);
|
||||
}
|
||||
|
||||
elements.push(ParsedMarkdownElement::Image(image));
|
||||
} else {
|
||||
self.consume_children(source_range, node, elements);
|
||||
}
|
||||
}
|
||||
markup5ever_rcdom::NodeData::ProcessingInstruction { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn consume_children(
|
||||
&self,
|
||||
source_range: Range<usize>,
|
||||
node: &Rc<markup5ever_rcdom::Node>,
|
||||
elements: &mut Vec<ParsedMarkdownElement>,
|
||||
) {
|
||||
for node in node.children.borrow().iter() {
|
||||
self.parse_html_node(source_range.clone(), node, elements);
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses the width/height attribute value of an html element (e.g. img element)
|
||||
fn parse_length(value: &str) -> Option<ui::DefiniteLength> {
|
||||
if value.ends_with("px") {
|
||||
|
@ -797,51 +894,6 @@ impl<'a> MarkdownParser<'a> {
|
|||
.map(|value| px(value).into())
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_html_image(
|
||||
&self,
|
||||
html: scraper::Html,
|
||||
source_range: Range<usize>,
|
||||
) -> Vec<ParsedMarkdownElement> {
|
||||
let mut images = Vec::new();
|
||||
let selector = scraper::Selector::parse("img").unwrap();
|
||||
|
||||
for element in html.select(&selector) {
|
||||
let Some(src) = element.attr("src") else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let Some(mut image) = Image::identify(
|
||||
src.to_string(),
|
||||
source_range.clone(),
|
||||
self.file_location_directory.clone(),
|
||||
) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if let Some(alt) = element.attr("alt") {
|
||||
image.set_alt_text(alt.to_string().into());
|
||||
}
|
||||
|
||||
if let Some(width) = element
|
||||
.attr("width")
|
||||
.and_then(|width| Self::parse_length(width))
|
||||
{
|
||||
image.set_width(width);
|
||||
}
|
||||
|
||||
if let Some(height) = element
|
||||
.attr("height")
|
||||
.and_then(|height| Self::parse_length(height))
|
||||
{
|
||||
image.set_height(height);
|
||||
}
|
||||
|
||||
images.push(ParsedMarkdownElement::Image(image));
|
||||
}
|
||||
|
||||
images
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue