Add tag handler for collecting crate items from rustdoc output (#12903)

This PR adds a tag handler for collecting crate items from rustdoc's
HTML output.

This will serve as the foundation for getting more insight into a
crate's contents.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-06-11 15:56:37 -04:00 committed by GitHub
parent 57b87be3a0
commit 8ccd2a0c99
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 237 additions and 75 deletions

View file

@ -1,3 +1,5 @@
use std::cell::RefCell;
use std::rc::Rc;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
@ -5,7 +7,7 @@ use anyhow::{anyhow, bail, Context, Result};
use assistant_slash_command::{SlashCommand, SlashCommandOutput, SlashCommandOutputSection};
use futures::AsyncReadExt;
use gpui::{AppContext, Task, WeakView};
use html_to_markdown::{convert_html_to_markdown, markdown, HandleTag};
use html_to_markdown::{convert_html_to_markdown, markdown, TagHandler};
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use language::LspAdapterDelegate;
use ui::{prelude::*, ButtonLike, ElevationIndex};
@ -59,24 +61,26 @@ impl FetchSlashCommand {
match content_type {
ContentType::Html => {
let mut handlers: Vec<Box<dyn HandleTag>> = vec![
Box::new(markdown::ParagraphHandler),
Box::new(markdown::HeadingHandler),
Box::new(markdown::ListHandler),
Box::new(markdown::TableHandler::new()),
Box::new(markdown::StyledTextHandler),
let mut handlers: Vec<TagHandler> = vec![
Rc::new(RefCell::new(markdown::ParagraphHandler)),
Rc::new(RefCell::new(markdown::HeadingHandler)),
Rc::new(RefCell::new(markdown::ListHandler)),
Rc::new(RefCell::new(markdown::TableHandler::new())),
Rc::new(RefCell::new(markdown::StyledTextHandler)),
];
if url.contains("wikipedia.org") {
use html_to_markdown::structure::wikipedia;
handlers.push(Box::new(wikipedia::WikipediaChromeRemover));
handlers.push(Box::new(wikipedia::WikipediaInfoboxHandler));
handlers.push(Box::new(wikipedia::WikipediaCodeHandler::new()));
handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaChromeRemover)));
handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaInfoboxHandler)));
handlers.push(Rc::new(
RefCell::new(wikipedia::WikipediaCodeHandler::new()),
));
} else {
handlers.push(Box::new(markdown::CodeHandler));
handlers.push(Rc::new(RefCell::new(markdown::CodeHandler)));
}
convert_html_to_markdown(&body[..], handlers)
convert_html_to_markdown(&body[..], &mut handlers)
}
ContentType::Plaintext => Ok(std::str::from_utf8(&body)?.to_owned()),
ContentType::Json => {

View file

@ -42,10 +42,9 @@ impl RustdocSlashCommand {
local_cargo_doc_path.push("index.html");
if let Ok(contents) = fs.load(&local_cargo_doc_path).await {
return Ok((
RustdocSource::Local,
convert_rustdoc_to_markdown(contents.as_bytes())?,
));
let (markdown, _items) = convert_rustdoc_to_markdown(contents.as_bytes())?;
return Ok((RustdocSource::Local, markdown));
}
}
@ -78,10 +77,9 @@ impl RustdocSlashCommand {
);
}
Ok((
RustdocSource::DocsDotRs,
convert_rustdoc_to_markdown(&body[..])?,
))
let (markdown, _items) = convert_rustdoc_to_markdown(&body[..])?;
Ok((RustdocSource::DocsDotRs, markdown))
}
fn path_to_cargo_toml(project: Model<Project>, cx: &mut AppContext) -> Option<Arc<Path>> {