Add tag handler for collecting crate items from rustdoc output (#12903)
This PR adds a tag handler for collecting crate items from rustdoc's HTML output. This will serve as the foundation for getting more insight into a crate's contents. Release Notes: - N/A
This commit is contained in:
parent
57b87be3a0
commit
8ccd2a0c99
8 changed files with 237 additions and 75 deletions
|
@ -5,7 +5,9 @@ pub mod markdown;
|
|||
mod markdown_writer;
|
||||
pub mod structure;
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::io::Read;
|
||||
use std::rc::Rc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use html5ever::driver::ParseOpts;
|
||||
|
@ -19,13 +21,11 @@ use crate::markdown::{
|
|||
};
|
||||
use crate::markdown_writer::MarkdownWriter;
|
||||
|
||||
pub use crate::markdown_writer::HandleTag;
|
||||
pub use crate::markdown_writer::{HandleTag, TagHandler};
|
||||
use crate::structure::rustdoc::RustdocItem;
|
||||
|
||||
/// Converts the provided HTML to Markdown.
|
||||
pub fn convert_html_to_markdown(
|
||||
html: impl Read,
|
||||
handlers: Vec<Box<dyn HandleTag>>,
|
||||
) -> Result<String> {
|
||||
pub fn convert_html_to_markdown(html: impl Read, handlers: &mut Vec<TagHandler>) -> Result<String> {
|
||||
let dom = parse_html(html).context("failed to parse HTML")?;
|
||||
|
||||
let markdown_writer = MarkdownWriter::new();
|
||||
|
@ -37,21 +37,32 @@ pub fn convert_html_to_markdown(
|
|||
}
|
||||
|
||||
/// Converts the provided rustdoc HTML to Markdown.
|
||||
pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<String> {
|
||||
convert_html_to_markdown(
|
||||
html,
|
||||
vec![
|
||||
Box::new(ParagraphHandler),
|
||||
Box::new(HeadingHandler),
|
||||
Box::new(ListHandler),
|
||||
Box::new(TableHandler::new()),
|
||||
Box::new(StyledTextHandler),
|
||||
Box::new(structure::rustdoc::RustdocChromeRemover),
|
||||
Box::new(structure::rustdoc::RustdocHeadingHandler),
|
||||
Box::new(structure::rustdoc::RustdocCodeHandler),
|
||||
Box::new(structure::rustdoc::RustdocItemHandler),
|
||||
],
|
||||
)
|
||||
pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<(String, Vec<RustdocItem>)> {
|
||||
let item_collector = Rc::new(RefCell::new(structure::rustdoc::RustdocItemCollector::new()));
|
||||
|
||||
let mut handlers: Vec<TagHandler> = vec![
|
||||
Rc::new(RefCell::new(ParagraphHandler)),
|
||||
Rc::new(RefCell::new(HeadingHandler)),
|
||||
Rc::new(RefCell::new(ListHandler)),
|
||||
Rc::new(RefCell::new(TableHandler::new())),
|
||||
Rc::new(RefCell::new(StyledTextHandler)),
|
||||
Rc::new(RefCell::new(structure::rustdoc::RustdocChromeRemover)),
|
||||
Rc::new(RefCell::new(structure::rustdoc::RustdocHeadingHandler)),
|
||||
Rc::new(RefCell::new(structure::rustdoc::RustdocCodeHandler)),
|
||||
Rc::new(RefCell::new(structure::rustdoc::RustdocItemHandler)),
|
||||
item_collector.clone(),
|
||||
];
|
||||
|
||||
let markdown = convert_html_to_markdown(html, &mut handlers)?;
|
||||
|
||||
let items = item_collector
|
||||
.borrow()
|
||||
.items
|
||||
.values()
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok((markdown, items))
|
||||
}
|
||||
|
||||
fn parse_html(mut html: impl Read) -> Result<RcDom> {
|
||||
|
@ -77,6 +88,20 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
|
||||
fn rustdoc_handlers() -> Vec<TagHandler> {
|
||||
vec![
|
||||
Rc::new(RefCell::new(ParagraphHandler)),
|
||||
Rc::new(RefCell::new(HeadingHandler)),
|
||||
Rc::new(RefCell::new(ListHandler)),
|
||||
Rc::new(RefCell::new(TableHandler::new())),
|
||||
Rc::new(RefCell::new(StyledTextHandler)),
|
||||
Rc::new(RefCell::new(structure::rustdoc::RustdocChromeRemover)),
|
||||
Rc::new(RefCell::new(structure::rustdoc::RustdocHeadingHandler)),
|
||||
Rc::new(RefCell::new(structure::rustdoc::RustdocCodeHandler)),
|
||||
Rc::new(RefCell::new(structure::rustdoc::RustdocItemHandler)),
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_main_heading_buttons_get_removed() {
|
||||
let html = indoc! {r##"
|
||||
|
@ -93,7 +118,7 @@ mod tests {
|
|||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
@ -113,7 +138,7 @@ mod tests {
|
|||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
@ -159,7 +184,7 @@ mod tests {
|
|||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
@ -178,7 +203,7 @@ mod tests {
|
|||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
@ -220,7 +245,7 @@ mod tests {
|
|||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
@ -252,7 +277,7 @@ mod tests {
|
|||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
@ -288,7 +313,7 @@ mod tests {
|
|||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
@ -342,7 +367,7 @@ mod tests {
|
|||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue