Refactoring so we can support other html types aswell
This commit is contained in:
parent
03487fff5b
commit
85321152cf
4 changed files with 105 additions and 152 deletions
105
Cargo.lock
generated
105
Cargo.lock
generated
|
@ -5117,12 +5117,6 @@ dependencies = [
|
||||||
"zlog",
|
"zlog",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "ego-tree"
|
|
||||||
version = "0.10.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "either"
|
name = "either"
|
||||||
version = "1.15.0"
|
version = "1.15.0"
|
||||||
|
@ -6335,15 +6329,6 @@ dependencies = [
|
||||||
"thread_local",
|
"thread_local",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "fxhash"
|
|
||||||
version = "0.2.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
|
||||||
dependencies = [
|
|
||||||
"byteorder",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "generator"
|
name = "generator"
|
||||||
version = "0.8.5"
|
version = "0.8.5"
|
||||||
|
@ -6378,15 +6363,6 @@ dependencies = [
|
||||||
"windows-targets 0.48.5",
|
"windows-targets 0.48.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "getopts"
|
|
||||||
version = "0.2.23"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1"
|
|
||||||
dependencies = [
|
|
||||||
"unicode-width 0.2.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "getrandom"
|
name = "getrandom"
|
||||||
version = "0.2.15"
|
version = "0.2.15"
|
||||||
|
@ -7913,18 +7889,7 @@ dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"mac",
|
"mac",
|
||||||
"markup5ever 0.16.1",
|
"markup5ever 0.16.1",
|
||||||
"match_token 0.1.0",
|
"match_token",
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "html5ever"
|
|
||||||
version = "0.35.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
|
|
||||||
dependencies = [
|
|
||||||
"log",
|
|
||||||
"markup5ever 0.35.0",
|
|
||||||
"match_token 0.35.0",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -9965,12 +9930,13 @@ dependencies = [
|
||||||
"editor",
|
"editor",
|
||||||
"fs",
|
"fs",
|
||||||
"gpui",
|
"gpui",
|
||||||
|
"html5ever 0.27.0",
|
||||||
"language",
|
"language",
|
||||||
"linkify",
|
"linkify",
|
||||||
"log",
|
"log",
|
||||||
|
"markup5ever_rcdom",
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
"pulldown-cmark 0.12.2",
|
"pulldown-cmark 0.12.2",
|
||||||
"scraper",
|
|
||||||
"settings",
|
"settings",
|
||||||
"theme",
|
"theme",
|
||||||
"ui",
|
"ui",
|
||||||
|
@ -10004,17 +9970,6 @@ dependencies = [
|
||||||
"web_atoms",
|
"web_atoms",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "markup5ever"
|
|
||||||
version = "0.35.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3"
|
|
||||||
dependencies = [
|
|
||||||
"log",
|
|
||||||
"tendril",
|
|
||||||
"web_atoms",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "markup5ever_rcdom"
|
name = "markup5ever_rcdom"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
|
@ -10038,17 +9993,6 @@ dependencies = [
|
||||||
"syn 2.0.101",
|
"syn 2.0.101",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "match_token"
|
|
||||||
version = "0.35.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn 2.0.101",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "matchers"
|
name = "matchers"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
@ -14460,21 +14404,6 @@ version = "1.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "scraper"
|
|
||||||
version = "0.24.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e5f3a24d916e78954af99281a455168d4a9515d65eca99a18da1b813689c4ad9"
|
|
||||||
dependencies = [
|
|
||||||
"cssparser",
|
|
||||||
"ego-tree",
|
|
||||||
"getopts",
|
|
||||||
"html5ever 0.35.0",
|
|
||||||
"precomputed-hash",
|
|
||||||
"selectors",
|
|
||||||
"tendril",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "scratch"
|
name = "scratch"
|
||||||
version = "1.0.8"
|
version = "1.0.8"
|
||||||
|
@ -14719,25 +14648,6 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "selectors"
|
|
||||||
version = "0.31.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5685b6ae43bfcf7d2e7dfcfb5d8e8f61b46442c902531e41a32a9a8bf0ee0fb6"
|
|
||||||
dependencies = [
|
|
||||||
"bitflags 2.9.0",
|
|
||||||
"cssparser",
|
|
||||||
"derive_more 2.0.1",
|
|
||||||
"fxhash",
|
|
||||||
"log",
|
|
||||||
"new_debug_unreachable",
|
|
||||||
"phf",
|
|
||||||
"phf_codegen",
|
|
||||||
"precomputed-hash",
|
|
||||||
"servo_arc",
|
|
||||||
"smallvec",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "self_cell"
|
name = "self_cell"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
|
@ -14924,15 +14834,6 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "servo_arc"
|
|
||||||
version = "0.4.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "204ea332803bd95a0b60388590d59cf6468ec9becf626e2451f1d26a1d972de4"
|
|
||||||
dependencies = [
|
|
||||||
"stable_deref_trait",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "session"
|
name = "session"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
|
@ -574,7 +574,6 @@ rustls = { version = "0.23.26" }
|
||||||
rustls-platform-verifier = "0.5.0"
|
rustls-platform-verifier = "0.5.0"
|
||||||
scap = { git = "https://github.com/zed-industries/scap", rev = "808aa5c45b41e8f44729d02e38fd00a2fe2722e7", default-features = false }
|
scap = { git = "https://github.com/zed-industries/scap", rev = "808aa5c45b41e8f44729d02e38fd00a2fe2722e7", default-features = false }
|
||||||
schemars = { version = "1.0", features = ["indexmap2"] }
|
schemars = { version = "1.0", features = ["indexmap2"] }
|
||||||
scraper = "0.24.0"
|
|
||||||
semver = "1.0"
|
semver = "1.0"
|
||||||
serde = { version = "1.0", features = ["derive", "rc"] }
|
serde = { version = "1.0", features = ["derive", "rc"] }
|
||||||
serde_derive = { version = "1.0", features = ["deserialize_in_place"] }
|
serde_derive = { version = "1.0", features = ["deserialize_in_place"] }
|
||||||
|
|
|
@ -21,12 +21,13 @@ collections.workspace = true
|
||||||
editor.workspace = true
|
editor.workspace = true
|
||||||
fs.workspace = true
|
fs.workspace = true
|
||||||
gpui.workspace = true
|
gpui.workspace = true
|
||||||
|
html5ever.workspace = true
|
||||||
language.workspace = true
|
language.workspace = true
|
||||||
linkify.workspace = true
|
linkify.workspace = true
|
||||||
log.workspace = true
|
log.workspace = true
|
||||||
|
markup5ever_rcdom.workspace = true
|
||||||
pretty_assertions.workspace = true
|
pretty_assertions.workspace = true
|
||||||
pulldown-cmark.workspace = true
|
pulldown-cmark.workspace = true
|
||||||
scraper.workspace = true
|
|
||||||
settings.workspace = true
|
settings.workspace = true
|
||||||
theme.workspace = true
|
theme.workspace = true
|
||||||
ui.workspace = true
|
ui.workspace = true
|
||||||
|
|
|
@ -2,9 +2,11 @@ use crate::markdown_elements::*;
|
||||||
use async_recursion::async_recursion;
|
use async_recursion::async_recursion;
|
||||||
use collections::FxHashMap;
|
use collections::FxHashMap;
|
||||||
use gpui::FontWeight;
|
use gpui::FontWeight;
|
||||||
|
use html5ever::{ParseOpts, local_name, parse_document, tendril::TendrilSink};
|
||||||
use language::LanguageRegistry;
|
use language::LanguageRegistry;
|
||||||
|
use markup5ever_rcdom::RcDom;
|
||||||
use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
|
use pulldown_cmark::{Alignment, Event, Options, Parser, Tag, TagEnd};
|
||||||
use std::{ops::Range, path::PathBuf, sync::Arc, vec};
|
use std::{cell::RefCell, ops::Range, path::PathBuf, rc::Rc, sync::Arc, vec};
|
||||||
use ui::{px, relative};
|
use ui::{px, relative};
|
||||||
|
|
||||||
pub async fn parse_markdown(
|
pub async fn parse_markdown(
|
||||||
|
@ -757,10 +759,19 @@ impl<'a> MarkdownParser<'a> {
|
||||||
let source_range = source_range.clone();
|
let source_range = source_range.clone();
|
||||||
match current {
|
match current {
|
||||||
Event::Html(html) => {
|
Event::Html(html) => {
|
||||||
let fragment = scraper::Html::parse_fragment(html);
|
let mut cursor = std::io::Cursor::new(html.as_bytes());
|
||||||
|
let Some(dom) = parse_document(RcDom::default(), ParseOpts::default())
|
||||||
|
.from_utf8()
|
||||||
|
.read_from(&mut cursor)
|
||||||
|
.ok()
|
||||||
|
else {
|
||||||
|
self.cursor += 1;
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
self.cursor += 1;
|
self.cursor += 1;
|
||||||
|
|
||||||
elements.extend(self.parse_html_image(fragment, source_range));
|
self.parse_html_node(source_range, &dom.document, &mut elements);
|
||||||
}
|
}
|
||||||
Event::End(TagEnd::CodeBlock) => {
|
Event::End(TagEnd::CodeBlock) => {
|
||||||
self.cursor += 1;
|
self.cursor += 1;
|
||||||
|
@ -775,6 +786,92 @@ impl<'a> MarkdownParser<'a> {
|
||||||
elements
|
elements
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn attr_value(
|
||||||
|
attrs: &RefCell<Vec<html5ever::Attribute>>,
|
||||||
|
name: html5ever::LocalName,
|
||||||
|
) -> Option<String> {
|
||||||
|
attrs.borrow().iter().find_map(|attr| {
|
||||||
|
if attr.name.local == name {
|
||||||
|
Some(attr.value.to_string())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_html_node(
|
||||||
|
&self,
|
||||||
|
source_range: Range<usize>,
|
||||||
|
node: &Rc<markup5ever_rcdom::Node>,
|
||||||
|
elements: &mut Vec<ParsedMarkdownElement>,
|
||||||
|
) {
|
||||||
|
match &node.data {
|
||||||
|
markup5ever_rcdom::NodeData::Document => {
|
||||||
|
self.consume_children(source_range, node, elements);
|
||||||
|
}
|
||||||
|
markup5ever_rcdom::NodeData::Doctype { .. } => {}
|
||||||
|
markup5ever_rcdom::NodeData::Text { contents } => {
|
||||||
|
elements.push(ParsedMarkdownElement::Paragraph(vec![
|
||||||
|
MarkdownParagraphChunk::Text(ParsedMarkdownText {
|
||||||
|
source_range,
|
||||||
|
contents: contents.borrow().to_string(),
|
||||||
|
highlights: Vec::default(),
|
||||||
|
region_ranges: Vec::default(),
|
||||||
|
regions: Vec::default(),
|
||||||
|
}),
|
||||||
|
]));
|
||||||
|
}
|
||||||
|
markup5ever_rcdom::NodeData::Comment { .. } => {}
|
||||||
|
markup5ever_rcdom::NodeData::Element { name, attrs, .. } => {
|
||||||
|
if local_name!("img") == name.local {
|
||||||
|
let Some(src) = Self::attr_value(attrs, local_name!("src")) else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some(mut image) = Image::identify(
|
||||||
|
src.to_string(),
|
||||||
|
source_range,
|
||||||
|
self.file_location_directory.clone(),
|
||||||
|
) else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(alt) = Self::attr_value(attrs, local_name!("alt")) {
|
||||||
|
image.set_alt_text(alt.to_string().into());
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(width) = Self::attr_value(attrs, local_name!("width"))
|
||||||
|
.and_then(|width| Self::parse_length(&width))
|
||||||
|
{
|
||||||
|
image.set_width(width);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(height) = Self::attr_value(attrs, local_name!("height"))
|
||||||
|
.and_then(|height| Self::parse_length(&height))
|
||||||
|
{
|
||||||
|
image.set_height(height);
|
||||||
|
}
|
||||||
|
|
||||||
|
elements.push(ParsedMarkdownElement::Image(image));
|
||||||
|
} else {
|
||||||
|
self.consume_children(source_range, node, elements);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
markup5ever_rcdom::NodeData::ProcessingInstruction { .. } => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_children(
|
||||||
|
&self,
|
||||||
|
source_range: Range<usize>,
|
||||||
|
node: &Rc<markup5ever_rcdom::Node>,
|
||||||
|
elements: &mut Vec<ParsedMarkdownElement>,
|
||||||
|
) {
|
||||||
|
for node in node.children.borrow().iter() {
|
||||||
|
self.parse_html_node(source_range.clone(), node, elements);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Parses the width/height attribute value of an html element (e.g. img element)
|
/// Parses the width/height attribute value of an html element (e.g. img element)
|
||||||
fn parse_length(value: &str) -> Option<ui::DefiniteLength> {
|
fn parse_length(value: &str) -> Option<ui::DefiniteLength> {
|
||||||
if value.ends_with("px") {
|
if value.ends_with("px") {
|
||||||
|
@ -797,51 +894,6 @@ impl<'a> MarkdownParser<'a> {
|
||||||
.map(|value| px(value).into())
|
.map(|value| px(value).into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_html_image(
|
|
||||||
&self,
|
|
||||||
html: scraper::Html,
|
|
||||||
source_range: Range<usize>,
|
|
||||||
) -> Vec<ParsedMarkdownElement> {
|
|
||||||
let mut images = Vec::new();
|
|
||||||
let selector = scraper::Selector::parse("img").unwrap();
|
|
||||||
|
|
||||||
for element in html.select(&selector) {
|
|
||||||
let Some(src) = element.attr("src") else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
let Some(mut image) = Image::identify(
|
|
||||||
src.to_string(),
|
|
||||||
source_range.clone(),
|
|
||||||
self.file_location_directory.clone(),
|
|
||||||
) else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(alt) = element.attr("alt") {
|
|
||||||
image.set_alt_text(alt.to_string().into());
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(width) = element
|
|
||||||
.attr("width")
|
|
||||||
.and_then(|width| Self::parse_length(width))
|
|
||||||
{
|
|
||||||
image.set_width(width);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(height) = element
|
|
||||||
.attr("height")
|
|
||||||
.and_then(|height| Self::parse_length(height))
|
|
||||||
{
|
|
||||||
image.set_height(height);
|
|
||||||
}
|
|
||||||
|
|
||||||
images.push(ParsedMarkdownElement::Image(image));
|
|
||||||
}
|
|
||||||
|
|
||||||
images
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue