rustdoc: Index crates progressively (#13011)
This PR updates the rustdoc indexing to be more progressive. Rather than waiting until we've crawled the entire crate to begin writing to the database, we instead start writing the docs as we go. This makes it so you can start getting completions while the indexing is still running. Release Notes: - N/A
This commit is contained in:
parent
0705fb9b97
commit
6181ac6bad
4 changed files with 31 additions and 40 deletions
232
crates/rustdoc/src/indexer.rs
Normal file
232
crates/rustdoc/src/indexer.rs
Normal file
|
@ -0,0 +1,232 @@
|
|||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use collections::{HashSet, VecDeque};
|
||||
use fs::Fs;
|
||||
use futures::AsyncReadExt;
|
||||
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
|
||||
|
||||
use crate::{convert_rustdoc_to_markdown, RustdocDatabase, RustdocItem, RustdocItemKind};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum RustdocSource {
|
||||
/// The docs were sourced from local `cargo doc` output.
|
||||
Local,
|
||||
/// The docs were sourced from `docs.rs`.
|
||||
DocsDotRs,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait RustdocProvider {
|
||||
async fn fetch_page(
|
||||
&self,
|
||||
crate_name: &str,
|
||||
item: Option<&RustdocItem>,
|
||||
) -> Result<Option<String>>;
|
||||
}
|
||||
|
||||
pub struct LocalProvider {
|
||||
fs: Arc<dyn Fs>,
|
||||
cargo_workspace_root: PathBuf,
|
||||
}
|
||||
|
||||
impl LocalProvider {
|
||||
pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
|
||||
Self {
|
||||
fs,
|
||||
cargo_workspace_root,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl RustdocProvider for LocalProvider {
|
||||
async fn fetch_page(
|
||||
&self,
|
||||
crate_name: &str,
|
||||
item: Option<&RustdocItem>,
|
||||
) -> Result<Option<String>> {
|
||||
let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
|
||||
local_cargo_doc_path.push(&crate_name);
|
||||
if let Some(item) = item {
|
||||
local_cargo_doc_path.push(item.url_path());
|
||||
} else {
|
||||
local_cargo_doc_path.push("index.html");
|
||||
}
|
||||
|
||||
println!("Fetching {}", local_cargo_doc_path.display());
|
||||
|
||||
let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(contents))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocsDotRsProvider {
|
||||
http_client: Arc<HttpClientWithUrl>,
|
||||
}
|
||||
|
||||
impl DocsDotRsProvider {
|
||||
pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
|
||||
Self { http_client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl RustdocProvider for DocsDotRsProvider {
|
||||
async fn fetch_page(
|
||||
&self,
|
||||
crate_name: &str,
|
||||
item: Option<&RustdocItem>,
|
||||
) -> Result<Option<String>> {
|
||||
let version = "latest";
|
||||
let path = format!(
|
||||
"{crate_name}/{version}/{crate_name}{item_path}",
|
||||
item_path = item
|
||||
.map(|item| format!("/{}", item.url_path()))
|
||||
.unwrap_or_default()
|
||||
);
|
||||
|
||||
println!("Fetching {}", &format!("https://docs.rs/{path}"));
|
||||
|
||||
let mut response = self
|
||||
.http_client
|
||||
.get(
|
||||
&format!("https://docs.rs/{path}"),
|
||||
AsyncBody::default(),
|
||||
true,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut body = Vec::new();
|
||||
response
|
||||
.body_mut()
|
||||
.read_to_end(&mut body)
|
||||
.await
|
||||
.context("error reading docs.rs response body")?;
|
||||
|
||||
if response.status().is_client_error() {
|
||||
let text = String::from_utf8_lossy(body.as_slice());
|
||||
bail!(
|
||||
"status error {}, response: {text:?}",
|
||||
response.status().as_u16()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Some(String::from_utf8(body)?))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RustdocItemWithHistory {
|
||||
pub item: RustdocItem,
|
||||
#[cfg(debug_assertions)]
|
||||
pub history: Vec<String>,
|
||||
}
|
||||
|
||||
pub(crate) struct RustdocIndexer {
|
||||
database: Arc<RustdocDatabase>,
|
||||
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
|
||||
}
|
||||
|
||||
impl RustdocIndexer {
|
||||
pub fn new(
|
||||
database: Arc<RustdocDatabase>,
|
||||
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
|
||||
) -> Self {
|
||||
Self { database, provider }
|
||||
}
|
||||
|
||||
/// Indexes the crate with the given name.
|
||||
pub async fn index(&self, crate_name: String) -> Result<()> {
|
||||
let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let (crate_root_markdown, items) =
|
||||
convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
|
||||
|
||||
self.database
|
||||
.insert(crate_name.clone(), None, crate_root_markdown)
|
||||
.await?;
|
||||
|
||||
let mut seen_items = HashSet::from_iter(items.clone());
|
||||
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
|
||||
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
|
||||
item,
|
||||
#[cfg(debug_assertions)]
|
||||
history: Vec::new(),
|
||||
}));
|
||||
|
||||
while let Some(item_with_history) = items_to_visit.pop_front() {
|
||||
let item = &item_with_history.item;
|
||||
|
||||
println!("Visiting {:?} {:?} {}", &item.kind, &item.path, &item.name);
|
||||
|
||||
let Some(result) = self
|
||||
.provider
|
||||
.fetch_page(&crate_name, Some(&item))
|
||||
.await
|
||||
.with_context(|| {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
format!(
|
||||
"failed to fetch {item:?}: {history:?}",
|
||||
history = item_with_history.history
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
{
|
||||
format!("failed to fetch {item:?}")
|
||||
}
|
||||
})?
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
|
||||
|
||||
self.database
|
||||
.insert(
|
||||
format!("{crate_name}::{}", item.display()),
|
||||
Some(item),
|
||||
markdown,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let parent_item = item;
|
||||
for mut item in referenced_items {
|
||||
if seen_items.contains(&item) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seen_items.insert(item.clone());
|
||||
|
||||
item.path.extend(parent_item.path.clone());
|
||||
match parent_item.kind {
|
||||
RustdocItemKind::Mod => {
|
||||
item.path.push(parent_item.name.clone());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
items_to_visit.push_back(RustdocItemWithHistory {
|
||||
#[cfg(debug_assertions)]
|
||||
history: {
|
||||
let mut history = item_with_history.history.clone();
|
||||
history.push(item.url_path());
|
||||
history
|
||||
},
|
||||
item,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue