Make HTML to Markdown conversion more pluggable (#12653)
This PR overhauls the HTML to Markdown conversion functionality in order to make it more pluggable. This will ultimately allow for supporting a variety of different HTML input structures (both natively and via extensions). As part of this, the `rustdoc_to_markdown` crate has been renamed to `html_to_markdown`. The `MarkdownWriter` now accepts a list of trait objects that can be used to drive the conversion of the HTML into Markdown. Right now we have some generic handler implementations for going from plain HTML elements to their Markdown equivalents, as well as some rustdoc-specific ones. Release Notes: - N/A
This commit is contained in:
parent
1c617474fe
commit
2d9479667f
15 changed files with 671 additions and 320 deletions
135
crates/html_to_markdown/src/markdown.rs
Normal file
135
crates/html_to_markdown/src/markdown.rs
Normal file
|
@ -0,0 +1,135 @@
|
|||
use crate::html_element::HtmlElement;
|
||||
use crate::markdown_writer::{HandleTag, MarkdownWriter, StartTagOutcome};
|
||||
|
||||
pub struct ParagraphHandler;
|
||||
|
||||
impl HandleTag for ParagraphHandler {
|
||||
fn should_handle(&self, _tag: &str) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
if tag.is_inline() && writer.is_inside("p") {
|
||||
if let Some(parent) = writer.current_element_stack().iter().last() {
|
||||
if !parent.is_inline() {
|
||||
if !(writer.markdown.ends_with(' ') || writer.markdown.ends_with('\n')) {
|
||||
writer.push_str(" ");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match tag.tag.as_str() {
|
||||
"p" => writer.push_blank_line(),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HeadingHandler;
|
||||
|
||||
impl HandleTag for HeadingHandler {
|
||||
fn should_handle(&self, tag: &str) -> bool {
|
||||
match tag {
|
||||
"h1" | "h2" | "h3" | "h4" | "h5" | "h6" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
match tag.tag.as_str() {
|
||||
"h1" => writer.push_str("\n\n# "),
|
||||
"h2" => writer.push_str("\n\n## "),
|
||||
"h3" => writer.push_str("\n\n### "),
|
||||
"h4" => writer.push_str("\n\n#### "),
|
||||
"h5" => writer.push_str("\n\n##### "),
|
||||
"h6" => writer.push_str("\n\n###### "),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
|
||||
fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
|
||||
match tag.tag.as_str() {
|
||||
"h1" | "h2" | "h3" | "h4" | "h5" | "h6" => writer.push_blank_line(),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ListHandler;
|
||||
|
||||
impl HandleTag for ListHandler {
|
||||
fn should_handle(&self, tag: &str) -> bool {
|
||||
match tag {
|
||||
"ul" | "ol" | "li" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
match tag.tag.as_str() {
|
||||
"ul" | "ol" => writer.push_newline(),
|
||||
"li" => writer.push_str("- "),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
|
||||
fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
|
||||
match tag.tag.as_str() {
|
||||
"ul" | "ol" => writer.push_newline(),
|
||||
"li" => writer.push_newline(),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StyledTextHandler;
|
||||
|
||||
impl HandleTag for StyledTextHandler {
|
||||
fn should_handle(&self, tag: &str) -> bool {
|
||||
match tag {
|
||||
"strong" | "em" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
match tag.tag.as_str() {
|
||||
"strong" => writer.push_str("**"),
|
||||
"em" => writer.push_str("_"),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
|
||||
fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
|
||||
match tag.tag.as_str() {
|
||||
"strong" => writer.push_str("**"),
|
||||
"em" => writer.push_str("_"),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue