Make HTML to Markdown conversion more pluggable (#12653)

This PR overhauls the HTML to Markdown conversion functionality in order
to make it more pluggable. This will ultimately allow for supporting a
variety of different HTML input structures (both natively and via
extensions).

As part of this, the `rustdoc_to_markdown` crate has been renamed to
`html_to_markdown`.

The `MarkdownWriter` now accepts a list of trait objects that can be
used to drive the conversion of the HTML into Markdown. Right now we
have some generic handler implementations for going from plain HTML
elements to their Markdown equivalents, as well as some rustdoc-specific
ones.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-06-04 16:14:26 -04:00 committed by GitHub
parent 1c617474fe
commit 2d9479667f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 671 additions and 320 deletions

View file

@ -0,0 +1,22 @@
[package]
name = "html_to_markdown"
version = "0.1.0"
edition = "2021"
publish = false
license = "GPL-3.0-or-later"
[lints]
workspace = true
[lib]
path = "src/html_to_markdown.rs"
[dependencies]
anyhow.workspace = true
html5ever.workspace = true
markup5ever_rcdom.workspace = true
regex.workspace = true
[dev-dependencies]
indoc.workspace = true
pretty_assertions.workspace = true