rustdoc_to_markdown: Clean up heading spacing (#12456)
This PR cleans up the spacing around the Markdown headings in the output so that they are consistent. Release Notes: - N/A
This commit is contained in:
parent
08881828ce
commit
abec028e58
3 changed files with 28 additions and 7 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -8639,6 +8639,7 @@ dependencies = [
|
||||||
"html5ever",
|
"html5ever",
|
||||||
"indoc",
|
"indoc",
|
||||||
"markup5ever_rcdom",
|
"markup5ever_rcdom",
|
||||||
|
"regex",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
@ -15,6 +15,7 @@ path = "src/rustdoc_to_markdown.rs"
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
html5ever.workspace = true
|
html5ever.workspace = true
|
||||||
markup5ever_rcdom.workspace = true
|
markup5ever_rcdom.workspace = true
|
||||||
|
regex.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
indoc.workspace = true
|
indoc.workspace = true
|
||||||
|
|
|
@ -1,9 +1,21 @@
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use html5ever::Attribute;
|
use html5ever::Attribute;
|
||||||
use markup5ever_rcdom::{Handle, NodeData};
|
use markup5ever_rcdom::{Handle, NodeData};
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
fn empty_line_regex() -> &'static Regex {
|
||||||
|
static REGEX: OnceLock<Regex> = OnceLock::new();
|
||||||
|
REGEX.get_or_init(|| Regex::new(r"^\s*$").unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn more_than_three_newlines_regex() -> &'static Regex {
|
||||||
|
static REGEX: OnceLock<Regex> = OnceLock::new();
|
||||||
|
REGEX.get_or_init(|| Regex::new(r"\n{3,}").unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct HtmlElement {
|
struct HtmlElement {
|
||||||
|
@ -48,7 +60,14 @@ impl MarkdownWriter {
|
||||||
|
|
||||||
pub fn run(mut self, root_node: &Handle) -> Result<String> {
|
pub fn run(mut self, root_node: &Handle) -> Result<String> {
|
||||||
self.visit_node(&root_node)?;
|
self.visit_node(&root_node)?;
|
||||||
Ok(self.markdown.trim().to_string())
|
Ok(Self::prettify_markdown(self.markdown))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prettify_markdown(markdown: String) -> String {
|
||||||
|
let markdown = empty_line_regex().replace_all(&markdown, "");
|
||||||
|
let markdown = more_than_three_newlines_regex().replace_all(&markdown, "\n\n");
|
||||||
|
|
||||||
|
markdown.trim().to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn visit_node(&mut self, node: &Handle) -> Result<()> {
|
fn visit_node(&mut self, node: &Handle) -> Result<()> {
|
||||||
|
@ -107,12 +126,12 @@ impl MarkdownWriter {
|
||||||
fn start_tag(&mut self, tag: &HtmlElement) -> StartTagOutcome {
|
fn start_tag(&mut self, tag: &HtmlElement) -> StartTagOutcome {
|
||||||
match tag.tag.as_str() {
|
match tag.tag.as_str() {
|
||||||
"head" | "script" | "nav" => return StartTagOutcome::Skip,
|
"head" | "script" | "nav" => return StartTagOutcome::Skip,
|
||||||
"h1" => self.push_str("\n# "),
|
"h1" => self.push_str("\n\n# "),
|
||||||
"h2" => self.push_str("\n## "),
|
"h2" => self.push_str("\n\n## "),
|
||||||
"h3" => self.push_str("\n### "),
|
"h3" => self.push_str("\n\n### "),
|
||||||
"h4" => self.push_str("\n#### "),
|
"h4" => self.push_str("\n\n#### "),
|
||||||
"h5" => self.push_str("\n##### "),
|
"h5" => self.push_str("\n\n##### "),
|
||||||
"h6" => self.push_str("\n###### "),
|
"h6" => self.push_str("\n\n###### "),
|
||||||
"code" => {
|
"code" => {
|
||||||
if !self.is_inside("pre") {
|
if !self.is_inside("pre") {
|
||||||
self.push_str("`")
|
self.push_str("`")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue