Add tag handler for collecting crate items from rustdoc output (#12903)
This PR adds a tag handler for collecting crate items from rustdoc's HTML output. This will serve as the foundation for getting more insight into a crate's contents. Release Notes: - N/A
This commit is contained in:
parent
57b87be3a0
commit
8ccd2a0c99
8 changed files with 237 additions and 75 deletions
|
@ -1,3 +1,6 @@
|
|||
use indexmap::IndexMap;
|
||||
use strum::{EnumIter, IntoEnumIterator};
|
||||
|
||||
use crate::html_element::HtmlElement;
|
||||
use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
|
||||
|
||||
|
@ -203,3 +206,132 @@ impl HandleTag for RustdocChromeRemover {
|
|||
StartTagOutcome::Continue
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, EnumIter)]
|
||||
pub enum RustdocItemKind {
|
||||
Mod,
|
||||
Macro,
|
||||
Struct,
|
||||
Enum,
|
||||
Constant,
|
||||
Trait,
|
||||
Function,
|
||||
TypeAlias,
|
||||
AttributeMacro,
|
||||
DeriveMacro,
|
||||
}
|
||||
|
||||
impl RustdocItemKind {
|
||||
const fn class(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Mod => "mod",
|
||||
Self::Macro => "macro",
|
||||
Self::Struct => "struct",
|
||||
Self::Enum => "enum",
|
||||
Self::Constant => "constant",
|
||||
Self::Trait => "trait",
|
||||
Self::Function => "fn",
|
||||
Self::TypeAlias => "type",
|
||||
Self::AttributeMacro => "attr",
|
||||
Self::DeriveMacro => "derive",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RustdocItem {
|
||||
pub kind: RustdocItemKind,
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
impl RustdocItem {
|
||||
pub fn url_path(&self) -> String {
|
||||
let name = &self.name;
|
||||
match self.kind {
|
||||
RustdocItemKind::Mod => format!("{name}/index.html"),
|
||||
RustdocItemKind::Macro
|
||||
| RustdocItemKind::Struct
|
||||
| RustdocItemKind::Enum
|
||||
| RustdocItemKind::Constant
|
||||
| RustdocItemKind::Trait
|
||||
| RustdocItemKind::Function
|
||||
| RustdocItemKind::TypeAlias
|
||||
| RustdocItemKind::AttributeMacro
|
||||
| RustdocItemKind::DeriveMacro => {
|
||||
format!("{kind}.{name}.html", kind = self.kind.class())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RustdocItemCollector {
|
||||
pub items: IndexMap<(RustdocItemKind, String), RustdocItem>,
|
||||
}
|
||||
|
||||
impl RustdocItemCollector {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
items: IndexMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_item(tag: &HtmlElement) -> Option<RustdocItem> {
|
||||
if tag.tag.as_str() != "a" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let href = tag.attr("href")?;
|
||||
if href == "#" {
|
||||
return None;
|
||||
}
|
||||
|
||||
for kind in RustdocItemKind::iter() {
|
||||
if tag.has_class(kind.class()) {
|
||||
let name = href
|
||||
.trim_start_matches(&format!("{}.", kind.class()))
|
||||
.trim_end_matches("/index.html")
|
||||
.trim_end_matches(".html");
|
||||
|
||||
return Some(RustdocItem {
|
||||
kind,
|
||||
name: name.to_owned(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl HandleTag for RustdocItemCollector {
|
||||
fn should_handle(&self, tag: &str) -> bool {
|
||||
tag == "a"
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
match tag.tag.as_str() {
|
||||
"a" => {
|
||||
let is_reexport = writer.current_element_stack().iter().any(|element| {
|
||||
if let Some(id) = element.attr("id") {
|
||||
id.starts_with("reexport.")
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
|
||||
if !is_reexport {
|
||||
if let Some(item) = Self::parse_item(tag) {
|
||||
self.items.insert((item.kind, item.name.clone()), item);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
}
|
||||
|
|
|
@ -144,20 +144,23 @@ impl HandleTag for WikipediaCodeHandler {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
use indoc::indoc;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use crate::{convert_html_to_markdown, markdown};
|
||||
use crate::{convert_html_to_markdown, markdown, TagHandler};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn wikipedia_handlers() -> Vec<Box<dyn HandleTag>> {
|
||||
fn wikipedia_handlers() -> Vec<TagHandler> {
|
||||
vec![
|
||||
Box::new(markdown::ParagraphHandler),
|
||||
Box::new(markdown::HeadingHandler),
|
||||
Box::new(markdown::ListHandler),
|
||||
Box::new(markdown::StyledTextHandler),
|
||||
Box::new(WikipediaChromeRemover),
|
||||
Rc::new(RefCell::new(markdown::ParagraphHandler)),
|
||||
Rc::new(RefCell::new(markdown::HeadingHandler)),
|
||||
Rc::new(RefCell::new(markdown::ListHandler)),
|
||||
Rc::new(RefCell::new(markdown::StyledTextHandler)),
|
||||
Rc::new(RefCell::new(WikipediaChromeRemover)),
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -173,7 +176,7 @@ mod tests {
|
|||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_html_to_markdown(html.as_bytes(), wikipedia_handlers()).unwrap(),
|
||||
convert_html_to_markdown(html.as_bytes(), &mut wikipedia_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue