Add tag handler for collecting crate items from rustdoc output (#12903)
This PR adds a tag handler for collecting crate items from rustdoc's HTML output. This will serve as the foundation for getting more insight into a crate's contents. Release Notes: - N/A
This commit is contained in:
parent
57b87be3a0
commit
8ccd2a0c99
8 changed files with 237 additions and 75 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -5072,10 +5072,12 @@ version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"html5ever",
|
"html5ever",
|
||||||
|
"indexmap 1.9.3",
|
||||||
"indoc",
|
"indoc",
|
||||||
"markup5ever_rcdom",
|
"markup5ever_rcdom",
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
"regex",
|
"regex",
|
||||||
|
"strum",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
use std::cell::RefCell;
|
||||||
|
use std::rc::Rc;
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
@ -5,7 +7,7 @@ use anyhow::{anyhow, bail, Context, Result};
|
||||||
use assistant_slash_command::{SlashCommand, SlashCommandOutput, SlashCommandOutputSection};
|
use assistant_slash_command::{SlashCommand, SlashCommandOutput, SlashCommandOutputSection};
|
||||||
use futures::AsyncReadExt;
|
use futures::AsyncReadExt;
|
||||||
use gpui::{AppContext, Task, WeakView};
|
use gpui::{AppContext, Task, WeakView};
|
||||||
use html_to_markdown::{convert_html_to_markdown, markdown, HandleTag};
|
use html_to_markdown::{convert_html_to_markdown, markdown, TagHandler};
|
||||||
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
|
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
|
||||||
use language::LspAdapterDelegate;
|
use language::LspAdapterDelegate;
|
||||||
use ui::{prelude::*, ButtonLike, ElevationIndex};
|
use ui::{prelude::*, ButtonLike, ElevationIndex};
|
||||||
|
@ -59,24 +61,26 @@ impl FetchSlashCommand {
|
||||||
|
|
||||||
match content_type {
|
match content_type {
|
||||||
ContentType::Html => {
|
ContentType::Html => {
|
||||||
let mut handlers: Vec<Box<dyn HandleTag>> = vec![
|
let mut handlers: Vec<TagHandler> = vec![
|
||||||
Box::new(markdown::ParagraphHandler),
|
Rc::new(RefCell::new(markdown::ParagraphHandler)),
|
||||||
Box::new(markdown::HeadingHandler),
|
Rc::new(RefCell::new(markdown::HeadingHandler)),
|
||||||
Box::new(markdown::ListHandler),
|
Rc::new(RefCell::new(markdown::ListHandler)),
|
||||||
Box::new(markdown::TableHandler::new()),
|
Rc::new(RefCell::new(markdown::TableHandler::new())),
|
||||||
Box::new(markdown::StyledTextHandler),
|
Rc::new(RefCell::new(markdown::StyledTextHandler)),
|
||||||
];
|
];
|
||||||
if url.contains("wikipedia.org") {
|
if url.contains("wikipedia.org") {
|
||||||
use html_to_markdown::structure::wikipedia;
|
use html_to_markdown::structure::wikipedia;
|
||||||
|
|
||||||
handlers.push(Box::new(wikipedia::WikipediaChromeRemover));
|
handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaChromeRemover)));
|
||||||
handlers.push(Box::new(wikipedia::WikipediaInfoboxHandler));
|
handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaInfoboxHandler)));
|
||||||
handlers.push(Box::new(wikipedia::WikipediaCodeHandler::new()));
|
handlers.push(Rc::new(
|
||||||
|
RefCell::new(wikipedia::WikipediaCodeHandler::new()),
|
||||||
|
));
|
||||||
} else {
|
} else {
|
||||||
handlers.push(Box::new(markdown::CodeHandler));
|
handlers.push(Rc::new(RefCell::new(markdown::CodeHandler)));
|
||||||
}
|
}
|
||||||
|
|
||||||
convert_html_to_markdown(&body[..], handlers)
|
convert_html_to_markdown(&body[..], &mut handlers)
|
||||||
}
|
}
|
||||||
ContentType::Plaintext => Ok(std::str::from_utf8(&body)?.to_owned()),
|
ContentType::Plaintext => Ok(std::str::from_utf8(&body)?.to_owned()),
|
||||||
ContentType::Json => {
|
ContentType::Json => {
|
||||||
|
|
|
@ -42,10 +42,9 @@ impl RustdocSlashCommand {
|
||||||
local_cargo_doc_path.push("index.html");
|
local_cargo_doc_path.push("index.html");
|
||||||
|
|
||||||
if let Ok(contents) = fs.load(&local_cargo_doc_path).await {
|
if let Ok(contents) = fs.load(&local_cargo_doc_path).await {
|
||||||
return Ok((
|
let (markdown, _items) = convert_rustdoc_to_markdown(contents.as_bytes())?;
|
||||||
RustdocSource::Local,
|
|
||||||
convert_rustdoc_to_markdown(contents.as_bytes())?,
|
return Ok((RustdocSource::Local, markdown));
|
||||||
));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,10 +77,9 @@ impl RustdocSlashCommand {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((
|
let (markdown, _items) = convert_rustdoc_to_markdown(&body[..])?;
|
||||||
RustdocSource::DocsDotRs,
|
|
||||||
convert_rustdoc_to_markdown(&body[..])?,
|
Ok((RustdocSource::DocsDotRs, markdown))
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn path_to_cargo_toml(project: Model<Project>, cx: &mut AppContext) -> Option<Arc<Path>> {
|
fn path_to_cargo_toml(project: Model<Project>, cx: &mut AppContext) -> Option<Arc<Path>> {
|
||||||
|
|
|
@ -14,8 +14,10 @@ path = "src/html_to_markdown.rs"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
html5ever.workspace = true
|
html5ever.workspace = true
|
||||||
|
indexmap.workspace = true
|
||||||
markup5ever_rcdom.workspace = true
|
markup5ever_rcdom.workspace = true
|
||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
|
strum.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
indoc.workspace = true
|
indoc.workspace = true
|
||||||
|
|
|
@ -5,7 +5,9 @@ pub mod markdown;
|
||||||
mod markdown_writer;
|
mod markdown_writer;
|
||||||
pub mod structure;
|
pub mod structure;
|
||||||
|
|
||||||
|
use std::cell::RefCell;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
|
use std::rc::Rc;
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use html5ever::driver::ParseOpts;
|
use html5ever::driver::ParseOpts;
|
||||||
|
@ -19,13 +21,11 @@ use crate::markdown::{
|
||||||
};
|
};
|
||||||
use crate::markdown_writer::MarkdownWriter;
|
use crate::markdown_writer::MarkdownWriter;
|
||||||
|
|
||||||
pub use crate::markdown_writer::HandleTag;
|
pub use crate::markdown_writer::{HandleTag, TagHandler};
|
||||||
|
use crate::structure::rustdoc::RustdocItem;
|
||||||
|
|
||||||
/// Converts the provided HTML to Markdown.
|
/// Converts the provided HTML to Markdown.
|
||||||
pub fn convert_html_to_markdown(
|
pub fn convert_html_to_markdown(html: impl Read, handlers: &mut Vec<TagHandler>) -> Result<String> {
|
||||||
html: impl Read,
|
|
||||||
handlers: Vec<Box<dyn HandleTag>>,
|
|
||||||
) -> Result<String> {
|
|
||||||
let dom = parse_html(html).context("failed to parse HTML")?;
|
let dom = parse_html(html).context("failed to parse HTML")?;
|
||||||
|
|
||||||
let markdown_writer = MarkdownWriter::new();
|
let markdown_writer = MarkdownWriter::new();
|
||||||
|
@ -37,21 +37,32 @@ pub fn convert_html_to_markdown(
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts the provided rustdoc HTML to Markdown.
|
/// Converts the provided rustdoc HTML to Markdown.
|
||||||
pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<String> {
|
pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<(String, Vec<RustdocItem>)> {
|
||||||
convert_html_to_markdown(
|
let item_collector = Rc::new(RefCell::new(structure::rustdoc::RustdocItemCollector::new()));
|
||||||
html,
|
|
||||||
vec![
|
let mut handlers: Vec<TagHandler> = vec![
|
||||||
Box::new(ParagraphHandler),
|
Rc::new(RefCell::new(ParagraphHandler)),
|
||||||
Box::new(HeadingHandler),
|
Rc::new(RefCell::new(HeadingHandler)),
|
||||||
Box::new(ListHandler),
|
Rc::new(RefCell::new(ListHandler)),
|
||||||
Box::new(TableHandler::new()),
|
Rc::new(RefCell::new(TableHandler::new())),
|
||||||
Box::new(StyledTextHandler),
|
Rc::new(RefCell::new(StyledTextHandler)),
|
||||||
Box::new(structure::rustdoc::RustdocChromeRemover),
|
Rc::new(RefCell::new(structure::rustdoc::RustdocChromeRemover)),
|
||||||
Box::new(structure::rustdoc::RustdocHeadingHandler),
|
Rc::new(RefCell::new(structure::rustdoc::RustdocHeadingHandler)),
|
||||||
Box::new(structure::rustdoc::RustdocCodeHandler),
|
Rc::new(RefCell::new(structure::rustdoc::RustdocCodeHandler)),
|
||||||
Box::new(structure::rustdoc::RustdocItemHandler),
|
Rc::new(RefCell::new(structure::rustdoc::RustdocItemHandler)),
|
||||||
],
|
item_collector.clone(),
|
||||||
)
|
];
|
||||||
|
|
||||||
|
let markdown = convert_html_to_markdown(html, &mut handlers)?;
|
||||||
|
|
||||||
|
let items = item_collector
|
||||||
|
.borrow()
|
||||||
|
.items
|
||||||
|
.values()
|
||||||
|
.cloned()
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
Ok((markdown, items))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_html(mut html: impl Read) -> Result<RcDom> {
|
fn parse_html(mut html: impl Read) -> Result<RcDom> {
|
||||||
|
@ -77,6 +88,20 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
fn rustdoc_handlers() -> Vec<TagHandler> {
|
||||||
|
vec![
|
||||||
|
Rc::new(RefCell::new(ParagraphHandler)),
|
||||||
|
Rc::new(RefCell::new(HeadingHandler)),
|
||||||
|
Rc::new(RefCell::new(ListHandler)),
|
||||||
|
Rc::new(RefCell::new(TableHandler::new())),
|
||||||
|
Rc::new(RefCell::new(StyledTextHandler)),
|
||||||
|
Rc::new(RefCell::new(structure::rustdoc::RustdocChromeRemover)),
|
||||||
|
Rc::new(RefCell::new(structure::rustdoc::RustdocHeadingHandler)),
|
||||||
|
Rc::new(RefCell::new(structure::rustdoc::RustdocCodeHandler)),
|
||||||
|
Rc::new(RefCell::new(structure::rustdoc::RustdocItemHandler)),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_main_heading_buttons_get_removed() {
|
fn test_main_heading_buttons_get_removed() {
|
||||||
let html = indoc! {r##"
|
let html = indoc! {r##"
|
||||||
|
@ -93,7 +118,7 @@ mod tests {
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||||
expected
|
expected
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -113,7 +138,7 @@ mod tests {
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||||
expected
|
expected
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -159,7 +184,7 @@ mod tests {
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||||
expected
|
expected
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -178,7 +203,7 @@ mod tests {
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||||
expected
|
expected
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -220,7 +245,7 @@ mod tests {
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||||
expected
|
expected
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -252,7 +277,7 @@ mod tests {
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||||
expected
|
expected
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -288,7 +313,7 @@ mod tests {
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||||
expected
|
expected
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -342,7 +367,7 @@ mod tests {
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
convert_rustdoc_to_markdown(html.as_bytes()).unwrap(),
|
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||||
expected
|
expected
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
|
use std::cell::RefCell;
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
|
use std::rc::Rc;
|
||||||
use std::sync::OnceLock;
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
@ -22,6 +24,8 @@ pub enum StartTagOutcome {
|
||||||
Skip,
|
Skip,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub type TagHandler = Rc<RefCell<dyn HandleTag>>;
|
||||||
|
|
||||||
pub struct MarkdownWriter {
|
pub struct MarkdownWriter {
|
||||||
current_element_stack: VecDeque<HtmlElement>,
|
current_element_stack: VecDeque<HtmlElement>,
|
||||||
pub(crate) markdown: String,
|
pub(crate) markdown: String,
|
||||||
|
@ -60,12 +64,8 @@ impl MarkdownWriter {
|
||||||
self.push_str("\n\n");
|
self.push_str("\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(
|
pub fn run(mut self, root_node: &Handle, handlers: &mut Vec<TagHandler>) -> Result<String> {
|
||||||
mut self,
|
self.visit_node(&root_node, handlers)?;
|
||||||
root_node: &Handle,
|
|
||||||
mut handlers: Vec<Box<dyn HandleTag>>,
|
|
||||||
) -> Result<String> {
|
|
||||||
self.visit_node(&root_node, &mut handlers)?;
|
|
||||||
Ok(Self::prettify_markdown(self.markdown))
|
Ok(Self::prettify_markdown(self.markdown))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ impl MarkdownWriter {
|
||||||
markdown.trim().to_string()
|
markdown.trim().to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn visit_node(&mut self, node: &Handle, handlers: &mut [Box<dyn HandleTag>]) -> Result<()> {
|
fn visit_node(&mut self, node: &Handle, handlers: &mut [TagHandler]) -> Result<()> {
|
||||||
let mut current_element = None;
|
let mut current_element = None;
|
||||||
|
|
||||||
match node.data {
|
match node.data {
|
||||||
|
@ -128,14 +128,10 @@ impl MarkdownWriter {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn start_tag(
|
fn start_tag(&mut self, tag: &HtmlElement, handlers: &mut [TagHandler]) -> StartTagOutcome {
|
||||||
&mut self,
|
|
||||||
tag: &HtmlElement,
|
|
||||||
handlers: &mut [Box<dyn HandleTag>],
|
|
||||||
) -> StartTagOutcome {
|
|
||||||
for handler in handlers {
|
for handler in handlers {
|
||||||
if handler.should_handle(tag.tag.as_str()) {
|
if handler.borrow().should_handle(tag.tag.as_str()) {
|
||||||
match handler.handle_tag_start(tag, self) {
|
match handler.borrow_mut().handle_tag_start(tag, self) {
|
||||||
StartTagOutcome::Continue => {}
|
StartTagOutcome::Continue => {}
|
||||||
StartTagOutcome::Skip => return StartTagOutcome::Skip,
|
StartTagOutcome::Skip => return StartTagOutcome::Skip,
|
||||||
}
|
}
|
||||||
|
@ -145,17 +141,17 @@ impl MarkdownWriter {
|
||||||
StartTagOutcome::Continue
|
StartTagOutcome::Continue
|
||||||
}
|
}
|
||||||
|
|
||||||
fn end_tag(&mut self, tag: &HtmlElement, handlers: &mut [Box<dyn HandleTag>]) {
|
fn end_tag(&mut self, tag: &HtmlElement, handlers: &mut [TagHandler]) {
|
||||||
for handler in handlers {
|
for handler in handlers {
|
||||||
if handler.should_handle(tag.tag.as_str()) {
|
if handler.borrow().should_handle(tag.tag.as_str()) {
|
||||||
handler.handle_tag_end(tag, self);
|
handler.borrow_mut().handle_tag_end(tag, self);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn visit_text(&mut self, text: String, handlers: &mut [Box<dyn HandleTag>]) -> Result<()> {
|
fn visit_text(&mut self, text: String, handlers: &mut [TagHandler]) -> Result<()> {
|
||||||
for handler in handlers {
|
for handler in handlers {
|
||||||
match handler.handle_text(&text, self) {
|
match handler.borrow_mut().handle_text(&text, self) {
|
||||||
HandlerOutcome::Handled => return Ok(()),
|
HandlerOutcome::Handled => return Ok(()),
|
||||||
HandlerOutcome::NoOp => {}
|
HandlerOutcome::NoOp => {}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
use indexmap::IndexMap;
|
||||||
|
use strum::{EnumIter, IntoEnumIterator};
|
||||||
|
|
||||||
use crate::html_element::HtmlElement;
|
use crate::html_element::HtmlElement;
|
||||||
use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
|
use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
|
||||||
|
|
||||||
|
@ -203,3 +206,132 @@ impl HandleTag for RustdocChromeRemover {
|
||||||
StartTagOutcome::Continue
|
StartTagOutcome::Continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, EnumIter)]
|
||||||
|
pub enum RustdocItemKind {
|
||||||
|
Mod,
|
||||||
|
Macro,
|
||||||
|
Struct,
|
||||||
|
Enum,
|
||||||
|
Constant,
|
||||||
|
Trait,
|
||||||
|
Function,
|
||||||
|
TypeAlias,
|
||||||
|
AttributeMacro,
|
||||||
|
DeriveMacro,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RustdocItemKind {
|
||||||
|
const fn class(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Mod => "mod",
|
||||||
|
Self::Macro => "macro",
|
||||||
|
Self::Struct => "struct",
|
||||||
|
Self::Enum => "enum",
|
||||||
|
Self::Constant => "constant",
|
||||||
|
Self::Trait => "trait",
|
||||||
|
Self::Function => "fn",
|
||||||
|
Self::TypeAlias => "type",
|
||||||
|
Self::AttributeMacro => "attr",
|
||||||
|
Self::DeriveMacro => "derive",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct RustdocItem {
|
||||||
|
pub kind: RustdocItemKind,
|
||||||
|
pub name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RustdocItem {
|
||||||
|
pub fn url_path(&self) -> String {
|
||||||
|
let name = &self.name;
|
||||||
|
match self.kind {
|
||||||
|
RustdocItemKind::Mod => format!("{name}/index.html"),
|
||||||
|
RustdocItemKind::Macro
|
||||||
|
| RustdocItemKind::Struct
|
||||||
|
| RustdocItemKind::Enum
|
||||||
|
| RustdocItemKind::Constant
|
||||||
|
| RustdocItemKind::Trait
|
||||||
|
| RustdocItemKind::Function
|
||||||
|
| RustdocItemKind::TypeAlias
|
||||||
|
| RustdocItemKind::AttributeMacro
|
||||||
|
| RustdocItemKind::DeriveMacro => {
|
||||||
|
format!("{kind}.{name}.html", kind = self.kind.class())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct RustdocItemCollector {
|
||||||
|
pub items: IndexMap<(RustdocItemKind, String), RustdocItem>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RustdocItemCollector {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
items: IndexMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_item(tag: &HtmlElement) -> Option<RustdocItem> {
|
||||||
|
if tag.tag.as_str() != "a" {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let href = tag.attr("href")?;
|
||||||
|
if href == "#" {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
for kind in RustdocItemKind::iter() {
|
||||||
|
if tag.has_class(kind.class()) {
|
||||||
|
let name = href
|
||||||
|
.trim_start_matches(&format!("{}.", kind.class()))
|
||||||
|
.trim_end_matches("/index.html")
|
||||||
|
.trim_end_matches(".html");
|
||||||
|
|
||||||
|
return Some(RustdocItem {
|
||||||
|
kind,
|
||||||
|
name: name.to_owned(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HandleTag for RustdocItemCollector {
|
||||||
|
fn should_handle(&self, tag: &str) -> bool {
|
||||||
|
tag == "a"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_tag_start(
|
||||||
|
&mut self,
|
||||||
|
tag: &HtmlElement,
|
||||||
|
writer: &mut MarkdownWriter,
|
||||||
|
) -> StartTagOutcome {
|
||||||
|
match tag.tag.as_str() {
|
||||||
|
"a" => {
|
||||||
|
let is_reexport = writer.current_element_stack().iter().any(|element| {
|
||||||
|
if let Some(id) = element.attr("id") {
|
||||||
|
id.starts_with("reexport.")
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if !is_reexport {
|
||||||
|
if let Some(item) = Self::parse_item(tag) {
|
||||||
|
self.items.insert((item.kind, item.name.clone()), item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
StartTagOutcome::Continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -144,20 +144,23 @@ impl HandleTag for WikipediaCodeHandler {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use std::cell::RefCell;
|
||||||
|
use std::rc::Rc;
|
||||||
|
|
||||||
use indoc::indoc;
|
use indoc::indoc;
|
||||||
use pretty_assertions::assert_eq;
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
use crate::{convert_html_to_markdown, markdown};
|
use crate::{convert_html_to_markdown, markdown, TagHandler};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
fn wikipedia_handlers() -> Vec<Box<dyn HandleTag>> {
|
fn wikipedia_handlers() -> Vec<TagHandler> {
|
||||||
vec![
|
vec![
|
||||||
Box::new(markdown::ParagraphHandler),
|
Rc::new(RefCell::new(markdown::ParagraphHandler)),
|
||||||
Box::new(markdown::HeadingHandler),
|
Rc::new(RefCell::new(markdown::HeadingHandler)),
|
||||||
Box::new(markdown::ListHandler),
|
Rc::new(RefCell::new(markdown::ListHandler)),
|
||||||
Box::new(markdown::StyledTextHandler),
|
Rc::new(RefCell::new(markdown::StyledTextHandler)),
|
||||||
Box::new(WikipediaChromeRemover),
|
Rc::new(RefCell::new(WikipediaChromeRemover)),
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,7 +176,7 @@ mod tests {
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
convert_html_to_markdown(html.as_bytes(), wikipedia_handlers()).unwrap(),
|
convert_html_to_markdown(html.as_bytes(), &mut wikipedia_handlers()).unwrap(),
|
||||||
expected
|
expected
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue