ZIm/crates/indexed_docs/src/providers/rustdoc.rs
Marshall Bowers e3b0de5dda
assistant: Auto-suggest crates for /docs when using the docs.rs provider (#16169)
This PR improves the initial experience of using `/docs docs-rs` with an
empty index.

We now show a brief explainer of what is expected (a crate name) as well
as list some popular Rust crates to try:

<img width="540" alt="Screenshot 2024-08-13 at 12 25 39 PM"
src="https://github.com/user-attachments/assets/942de250-2901-45df-9e3e-52ff3b3fc517">

Release Notes:

- N/A
2024-08-13 12:37:13 -04:00

269 lines
8 KiB
Rust

mod item;
mod to_markdown;
use futures::future::BoxFuture;
pub use item::*;
pub use to_markdown::convert_rustdoc_to_markdown;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::{bail, Context, Result};
use async_trait::async_trait;
use collections::{HashSet, VecDeque};
use fs::Fs;
use futures::{AsyncReadExt, FutureExt};
use http_client::{AsyncBody, HttpClient, HttpClientWithUrl};
use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
#[derive(Debug)]
struct RustdocItemWithHistory {
pub item: RustdocItem,
#[cfg(debug_assertions)]
pub history: Vec<String>,
}
pub struct LocalRustdocProvider {
fs: Arc<dyn Fs>,
cargo_workspace_root: PathBuf,
}
impl LocalRustdocProvider {
pub fn id() -> ProviderId {
ProviderId("rustdoc".into())
}
pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
Self {
fs,
cargo_workspace_root,
}
}
}
#[async_trait]
impl IndexedDocsProvider for LocalRustdocProvider {
fn id(&self) -> ProviderId {
Self::id()
}
fn database_path(&self) -> PathBuf {
paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
}
async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
index_rustdoc(package, database, {
move |crate_name, item| {
let fs = self.fs.clone();
let cargo_workspace_root = self.cargo_workspace_root.clone();
let crate_name = crate_name.clone();
let item = item.cloned();
async move {
let target_doc_path = cargo_workspace_root.join("target/doc");
let mut local_cargo_doc_path = target_doc_path.join(crate_name.as_ref());
if !fs.is_dir(&local_cargo_doc_path).await {
let cargo_doc_exists_at_all = fs.is_dir(&target_doc_path).await;
if cargo_doc_exists_at_all {
bail!(
"no docs directory for '{crate_name}'. if this is a valid crate name, try running `cargo doc`"
);
} else {
bail!("no cargo doc directory. run `cargo doc`");
}
}
if let Some(item) = item {
local_cargo_doc_path.push(item.url_path());
} else {
local_cargo_doc_path.push("index.html");
}
let Ok(contents) = fs.load(&local_cargo_doc_path).await else {
return Ok(None);
};
Ok(Some(contents))
}
.boxed()
}
})
.await
}
}
pub struct DocsDotRsProvider {
http_client: Arc<HttpClientWithUrl>,
}
impl DocsDotRsProvider {
/// The list of crates to auto-suggest for the docs.rs provider when
/// the index is empty.
///
/// List has been chosen loosely based on [this list](https://lib.rs/std) of
/// popular Rust libraries.
///
/// Keep this alphabetized.
pub const AUTO_SUGGESTED_CRATES: &'static [&'static str] = &[
"anyhow",
"axum",
"chrono",
"itertools",
"rand",
"regex",
"serde",
"strum",
"thiserror",
"tokio",
];
pub fn id() -> ProviderId {
ProviderId("docs-rs".into())
}
pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
Self { http_client }
}
}
#[async_trait]
impl IndexedDocsProvider for DocsDotRsProvider {
fn id(&self) -> ProviderId {
Self::id()
}
fn database_path(&self) -> PathBuf {
paths::support_dir().join("docs/rust/docs-rs-db.1.mdb")
}
async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
index_rustdoc(package, database, {
move |crate_name, item| {
let http_client = self.http_client.clone();
let crate_name = crate_name.clone();
let item = item.cloned();
async move {
let version = "latest";
let path = format!(
"{crate_name}/{version}/{crate_name}{item_path}",
item_path = item
.map(|item| format!("/{}", item.url_path()))
.unwrap_or_default()
);
let mut response = http_client
.get(
&format!("https://docs.rs/{path}"),
AsyncBody::default(),
true,
)
.await?;
let mut body = Vec::new();
response
.body_mut()
.read_to_end(&mut body)
.await
.context("error reading docs.rs response body")?;
if response.status().is_client_error() {
let text = String::from_utf8_lossy(body.as_slice());
bail!(
"status error {}, response: {text:?}",
response.status().as_u16()
);
}
Ok(Some(String::from_utf8(body)?))
}
.boxed()
}
})
.await
}
}
async fn index_rustdoc(
package: PackageName,
database: Arc<IndexedDocsDatabase>,
fetch_page: impl Fn(&PackageName, Option<&RustdocItem>) -> BoxFuture<'static, Result<Option<String>>>
+ Send
+ Sync,
) -> Result<()> {
let Some(package_root_content) = fetch_page(&package, None).await? else {
return Ok(());
};
let (crate_root_markdown, items) =
convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
database
.insert(package.to_string(), crate_root_markdown)
.await?;
let mut seen_items = HashSet::from_iter(items.clone());
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
item,
#[cfg(debug_assertions)]
history: Vec::new(),
}));
while let Some(item_with_history) = items_to_visit.pop_front() {
let item = &item_with_history.item;
let Some(result) = fetch_page(&package, Some(&item)).await.with_context(|| {
#[cfg(debug_assertions)]
{
format!(
"failed to fetch {item:?}: {history:?}",
history = item_with_history.history
)
}
#[cfg(not(debug_assertions))]
{
format!("failed to fetch {item:?}")
}
})?
else {
continue;
};
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
database
.insert(format!("{package}::{}", item.display()), markdown)
.await?;
let parent_item = item;
for mut item in referenced_items {
if seen_items.contains(&item) {
continue;
}
seen_items.insert(item.clone());
item.path.extend(parent_item.path.clone());
match parent_item.kind {
RustdocItemKind::Mod => {
item.path.push(parent_item.name.clone());
}
_ => {}
}
items_to_visit.push_back(RustdocItemWithHistory {
#[cfg(debug_assertions)]
history: {
let mut history = item_with_history.history.clone();
history.push(item.url_path());
history
},
item,
});
}
}
Ok(())
}