html_to_markdown: Move TableHandler
out of rustdoc
(#12697)
This PR moves the `TableHandler` out of the `rustdoc` module, as it doesn't contain anything specific to rustdoc. Release Notes: - N/A
This commit is contained in:
parent
071270fe88
commit
f3460d440c
3 changed files with 86 additions and 84 deletions
|
@ -16,7 +16,9 @@ use html5ever::tendril::TendrilSink;
|
||||||
use html5ever::tree_builder::TreeBuilderOpts;
|
use html5ever::tree_builder::TreeBuilderOpts;
|
||||||
use markup5ever_rcdom::RcDom;
|
use markup5ever_rcdom::RcDom;
|
||||||
|
|
||||||
use crate::markdown::{HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler};
|
use crate::markdown::{
|
||||||
|
HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler, TableHandler,
|
||||||
|
};
|
||||||
use crate::markdown_writer::{HandleTag, MarkdownWriter};
|
use crate::markdown_writer::{HandleTag, MarkdownWriter};
|
||||||
|
|
||||||
/// Converts the provided HTML to Markdown.
|
/// Converts the provided HTML to Markdown.
|
||||||
|
@ -27,11 +29,11 @@ pub fn convert_html_to_markdown(html: impl Read) -> Result<String> {
|
||||||
Box::new(ParagraphHandler),
|
Box::new(ParagraphHandler),
|
||||||
Box::new(HeadingHandler),
|
Box::new(HeadingHandler),
|
||||||
Box::new(ListHandler),
|
Box::new(ListHandler),
|
||||||
|
Box::new(TableHandler::new()),
|
||||||
Box::new(StyledTextHandler),
|
Box::new(StyledTextHandler),
|
||||||
Box::new(structure::rustdoc::RustdocChromeRemover),
|
Box::new(structure::rustdoc::RustdocChromeRemover),
|
||||||
Box::new(structure::rustdoc::RustdocHeadingHandler),
|
Box::new(structure::rustdoc::RustdocHeadingHandler),
|
||||||
Box::new(structure::rustdoc::RustdocCodeHandler),
|
Box::new(structure::rustdoc::RustdocCodeHandler),
|
||||||
Box::new(structure::rustdoc::RustdocTableHandler::new()),
|
|
||||||
Box::new(structure::rustdoc::RustdocItemHandler),
|
Box::new(structure::rustdoc::RustdocItemHandler),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
@ -51,11 +53,11 @@ pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<String> {
|
||||||
Box::new(ParagraphHandler),
|
Box::new(ParagraphHandler),
|
||||||
Box::new(HeadingHandler),
|
Box::new(HeadingHandler),
|
||||||
Box::new(ListHandler),
|
Box::new(ListHandler),
|
||||||
|
Box::new(TableHandler::new()),
|
||||||
Box::new(StyledTextHandler),
|
Box::new(StyledTextHandler),
|
||||||
Box::new(structure::rustdoc::RustdocChromeRemover),
|
Box::new(structure::rustdoc::RustdocChromeRemover),
|
||||||
Box::new(structure::rustdoc::RustdocHeadingHandler),
|
Box::new(structure::rustdoc::RustdocHeadingHandler),
|
||||||
Box::new(structure::rustdoc::RustdocCodeHandler),
|
Box::new(structure::rustdoc::RustdocCodeHandler),
|
||||||
Box::new(structure::rustdoc::RustdocTableHandler::new()),
|
|
||||||
Box::new(structure::rustdoc::RustdocItemHandler),
|
Box::new(structure::rustdoc::RustdocItemHandler),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
|
@ -101,6 +101,87 @@ impl HandleTag for ListHandler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct TableHandler {
|
||||||
|
/// The number of columns in the current `<table>`.
|
||||||
|
current_table_columns: usize,
|
||||||
|
is_first_th: bool,
|
||||||
|
is_first_td: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TableHandler {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
current_table_columns: 0,
|
||||||
|
is_first_th: true,
|
||||||
|
is_first_td: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HandleTag for TableHandler {
|
||||||
|
fn should_handle(&self, tag: &str) -> bool {
|
||||||
|
match tag {
|
||||||
|
"table" | "thead" | "tbody" | "tr" | "th" | "td" => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_tag_start(
|
||||||
|
&mut self,
|
||||||
|
tag: &HtmlElement,
|
||||||
|
writer: &mut MarkdownWriter,
|
||||||
|
) -> StartTagOutcome {
|
||||||
|
match tag.tag.as_str() {
|
||||||
|
"thead" => writer.push_blank_line(),
|
||||||
|
"tr" => writer.push_newline(),
|
||||||
|
"th" => {
|
||||||
|
self.current_table_columns += 1;
|
||||||
|
if self.is_first_th {
|
||||||
|
self.is_first_th = false;
|
||||||
|
} else {
|
||||||
|
writer.push_str(" ");
|
||||||
|
}
|
||||||
|
writer.push_str("| ");
|
||||||
|
}
|
||||||
|
"td" => {
|
||||||
|
if self.is_first_td {
|
||||||
|
self.is_first_td = false;
|
||||||
|
} else {
|
||||||
|
writer.push_str(" ");
|
||||||
|
}
|
||||||
|
writer.push_str("| ");
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
StartTagOutcome::Continue
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
|
||||||
|
match tag.tag.as_str() {
|
||||||
|
"thead" => {
|
||||||
|
writer.push_newline();
|
||||||
|
for ix in 0..self.current_table_columns {
|
||||||
|
if ix > 0 {
|
||||||
|
writer.push_str(" ");
|
||||||
|
}
|
||||||
|
writer.push_str("| ---");
|
||||||
|
}
|
||||||
|
writer.push_str(" |");
|
||||||
|
self.is_first_th = true;
|
||||||
|
}
|
||||||
|
"tr" => {
|
||||||
|
writer.push_str(" |");
|
||||||
|
self.is_first_td = true;
|
||||||
|
}
|
||||||
|
"table" => {
|
||||||
|
self.current_table_columns = 0;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct StyledTextHandler;
|
pub struct StyledTextHandler;
|
||||||
|
|
||||||
impl HandleTag for StyledTextHandler {
|
impl HandleTag for StyledTextHandler {
|
||||||
|
|
|
@ -96,87 +96,6 @@ impl HandleTag for RustdocCodeHandler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct RustdocTableHandler {
|
|
||||||
/// The number of columns in the current `<table>`.
|
|
||||||
current_table_columns: usize,
|
|
||||||
is_first_th: bool,
|
|
||||||
is_first_td: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl RustdocTableHandler {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
current_table_columns: 0,
|
|
||||||
is_first_th: true,
|
|
||||||
is_first_td: true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl HandleTag for RustdocTableHandler {
|
|
||||||
fn should_handle(&self, tag: &str) -> bool {
|
|
||||||
match tag {
|
|
||||||
"table" | "thead" | "tbody" | "tr" | "th" | "td" => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn handle_tag_start(
|
|
||||||
&mut self,
|
|
||||||
tag: &HtmlElement,
|
|
||||||
writer: &mut MarkdownWriter,
|
|
||||||
) -> StartTagOutcome {
|
|
||||||
match tag.tag.as_str() {
|
|
||||||
"thead" => writer.push_blank_line(),
|
|
||||||
"tr" => writer.push_newline(),
|
|
||||||
"th" => {
|
|
||||||
self.current_table_columns += 1;
|
|
||||||
if self.is_first_th {
|
|
||||||
self.is_first_th = false;
|
|
||||||
} else {
|
|
||||||
writer.push_str(" ");
|
|
||||||
}
|
|
||||||
writer.push_str("| ");
|
|
||||||
}
|
|
||||||
"td" => {
|
|
||||||
if self.is_first_td {
|
|
||||||
self.is_first_td = false;
|
|
||||||
} else {
|
|
||||||
writer.push_str(" ");
|
|
||||||
}
|
|
||||||
writer.push_str("| ");
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
|
|
||||||
StartTagOutcome::Continue
|
|
||||||
}
|
|
||||||
|
|
||||||
fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
|
|
||||||
match tag.tag.as_str() {
|
|
||||||
"thead" => {
|
|
||||||
writer.push_newline();
|
|
||||||
for ix in 0..self.current_table_columns {
|
|
||||||
if ix > 0 {
|
|
||||||
writer.push_str(" ");
|
|
||||||
}
|
|
||||||
writer.push_str("| ---");
|
|
||||||
}
|
|
||||||
writer.push_str(" |");
|
|
||||||
self.is_first_th = true;
|
|
||||||
}
|
|
||||||
"tr" => {
|
|
||||||
writer.push_str(" |");
|
|
||||||
self.is_first_td = true;
|
|
||||||
}
|
|
||||||
"table" => {
|
|
||||||
self.current_table_columns = 0;
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name";
|
const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name";
|
||||||
|
|
||||||
pub struct RustdocItemHandler;
|
pub struct RustdocItemHandler;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue