Add rustdoc_to_markdown crate (#12445)

This PR adds a new crate for converting rustdoc output to Markdown.

We're leveraging Servo's `html5ever` to parse the Markdown content, and
then walking the DOM nodes to convert it to a Markdown string.

The Markdown output will be continued to be refined, but it's in a place
where it should be reasonable.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-05-29 16:05:16 -04:00 committed by GitHub
parent a22cd95f9d
commit 5bcb9ed017
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 420 additions and 17 deletions

View file

@ -0,0 +1,36 @@
//! Provides conversion from rustdoc's HTML output to Markdown.
#![deny(missing_docs)]
mod markdown_writer;
use anyhow::{Context, Result};
use html5ever::driver::ParseOpts;
use html5ever::parse_document;
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::TreeBuilderOpts;
use markup5ever_rcdom::RcDom;
use crate::markdown_writer::MarkdownWriter;
/// Converts the provided rustdoc HTML to Markdown.
pub fn convert_rustdoc_to_markdown(html: &str) -> Result<String> {
let parse_options = ParseOpts {
tree_builder: TreeBuilderOpts {
drop_doctype: true,
..Default::default()
},
..Default::default()
};
let dom = parse_document(RcDom::default(), parse_options)
.from_utf8()
.read_from(&mut html.as_bytes())
.context("failed to parse rustdoc HTML")?;
let markdown_writer = MarkdownWriter::new();
let markdown = markdown_writer
.run(&dom.document)
.context("failed to convert rustdoc to HTML")?;
Ok(markdown)
}