assistant: Add /fetch slash command (#12645)

This PR adds a new `/fetch` slash command to the Assistant for fetching
the content of an arbitrary URL as Markdown.

Currently it's just using the same HTML to Markdown conversion that
`/rustdoc` uses, but I'll be working to refine the output to be more
widely useful.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-06-04 11:56:23 -04:00 committed by GitHub
parent 910f668f4d
commit e4bb666eab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 163 additions and 10 deletions

View file

@ -16,8 +16,31 @@ use markup5ever_rcdom::RcDom;
use crate::markdown_writer::MarkdownWriter;
/// Converts the provided HTML to Markdown.
pub fn convert_html_to_markdown(html: impl Read) -> Result<String> {
let dom = parse_html(html).context("failed to parse HTML")?;
let markdown_writer = MarkdownWriter::new();
let markdown = markdown_writer
.run(&dom.document)
.context("failed to convert HTML to Markdown")?;
Ok(markdown)
}
/// Converts the provided rustdoc HTML to Markdown.
pub fn convert_rustdoc_to_markdown(mut html: impl Read) -> Result<String> {
pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<String> {
let dom = parse_html(html).context("failed to parse rustdoc HTML")?;
let markdown_writer = MarkdownWriter::new();
let markdown = markdown_writer
.run(&dom.document)
.context("failed to convert rustdoc HTML to Markdown")?;
Ok(markdown)
}
fn parse_html(mut html: impl Read) -> Result<RcDom> {
let parse_options = ParseOpts {
tree_builder: TreeBuilderOpts {
drop_doctype: true,
@ -28,14 +51,9 @@ pub fn convert_rustdoc_to_markdown(mut html: impl Read) -> Result<String> {
let dom = parse_document(RcDom::default(), parse_options)
.from_utf8()
.read_from(&mut html)
.context("failed to parse rustdoc HTML")?;
.context("failed to parse HTML document")?;
let markdown_writer = MarkdownWriter::new();
let markdown = markdown_writer
.run(&dom.document)
.context("failed to convert rustdoc to HTML")?;
Ok(markdown)
Ok(dom)
}
#[cfg(test)]