Allow extensions to define providers for indexing docs (#13755)
This PR provides extensions with the ability to define providers for indexing docs. Release Notes: - N/A
This commit is contained in:
parent
b7cb2381f2
commit
5c7a8f779a
19 changed files with 407 additions and 213 deletions
|
@ -1,4 +1,3 @@
|
|||
mod indexer;
|
||||
mod providers;
|
||||
mod registry;
|
||||
mod store;
|
||||
|
|
|
@ -1,122 +0,0 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use collections::{HashSet, VecDeque};
|
||||
|
||||
use crate::{
|
||||
convert_rustdoc_to_markdown, IndexedDocsDatabase, PackageName, RustdocItem, RustdocItemKind,
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
pub trait IndexedDocsProvider {
|
||||
async fn fetch_page(
|
||||
&self,
|
||||
package: &PackageName,
|
||||
item: Option<&RustdocItem>,
|
||||
) -> Result<Option<String>>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RustdocItemWithHistory {
|
||||
pub item: RustdocItem,
|
||||
#[cfg(debug_assertions)]
|
||||
pub history: Vec<String>,
|
||||
}
|
||||
|
||||
pub(crate) struct DocsIndexer {
|
||||
database: Arc<IndexedDocsDatabase>,
|
||||
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
|
||||
}
|
||||
|
||||
impl DocsIndexer {
|
||||
pub fn new(
|
||||
database: Arc<IndexedDocsDatabase>,
|
||||
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
|
||||
) -> Self {
|
||||
Self { database, provider }
|
||||
}
|
||||
|
||||
/// Indexes the package with the given name.
|
||||
pub async fn index(&self, package: PackageName) -> Result<()> {
|
||||
let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let (crate_root_markdown, items) =
|
||||
convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
|
||||
|
||||
self.database
|
||||
.insert(package.clone(), None, crate_root_markdown)
|
||||
.await?;
|
||||
|
||||
let mut seen_items = HashSet::from_iter(items.clone());
|
||||
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
|
||||
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
|
||||
item,
|
||||
#[cfg(debug_assertions)]
|
||||
history: Vec::new(),
|
||||
}));
|
||||
|
||||
while let Some(item_with_history) = items_to_visit.pop_front() {
|
||||
let item = &item_with_history.item;
|
||||
|
||||
let Some(result) = self
|
||||
.provider
|
||||
.fetch_page(&package, Some(&item))
|
||||
.await
|
||||
.with_context(|| {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
format!(
|
||||
"failed to fetch {item:?}: {history:?}",
|
||||
history = item_with_history.history
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
{
|
||||
format!("failed to fetch {item:?}")
|
||||
}
|
||||
})?
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
|
||||
|
||||
self.database
|
||||
.insert(package.clone(), Some(item), markdown)
|
||||
.await?;
|
||||
|
||||
let parent_item = item;
|
||||
for mut item in referenced_items {
|
||||
if seen_items.contains(&item) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seen_items.insert(item.clone());
|
||||
|
||||
item.path.extend(parent_item.path.clone());
|
||||
match parent_item.kind {
|
||||
RustdocItemKind::Mod => {
|
||||
item.path.push(parent_item.name.clone());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
items_to_visit.push_back(RustdocItemWithHistory {
|
||||
#[cfg(debug_assertions)]
|
||||
history: {
|
||||
let mut history = item_with_history.history.clone();
|
||||
history.push(item.url_path());
|
||||
history
|
||||
},
|
||||
item,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -9,12 +9,12 @@ use std::sync::Arc;
|
|||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use collections::{HashSet, VecDeque};
|
||||
use fs::Fs;
|
||||
use futures::AsyncReadExt;
|
||||
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
|
||||
|
||||
use crate::indexer::IndexedDocsProvider;
|
||||
use crate::PackageName;
|
||||
use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum RustdocSource {
|
||||
|
@ -26,6 +26,125 @@ pub enum RustdocSource {
|
|||
DocsDotRs,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RustdocItemWithHistory {
|
||||
pub item: RustdocItem,
|
||||
#[cfg(debug_assertions)]
|
||||
pub history: Vec<String>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait RustdocProvider {
|
||||
async fn fetch_page(
|
||||
&self,
|
||||
package: &PackageName,
|
||||
item: Option<&RustdocItem>,
|
||||
) -> Result<Option<String>>;
|
||||
}
|
||||
|
||||
pub struct RustdocIndexer {
|
||||
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
|
||||
}
|
||||
|
||||
impl RustdocIndexer {
|
||||
pub fn new(provider: Box<dyn RustdocProvider + Send + Sync + 'static>) -> Self {
|
||||
Self { provider }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl IndexedDocsProvider for RustdocIndexer {
|
||||
fn id(&self) -> ProviderId {
|
||||
ProviderId::rustdoc()
|
||||
}
|
||||
|
||||
fn database_path(&self) -> PathBuf {
|
||||
paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
|
||||
}
|
||||
|
||||
async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
|
||||
let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let (crate_root_markdown, items) =
|
||||
convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
|
||||
|
||||
database
|
||||
.insert(package.to_string(), crate_root_markdown)
|
||||
.await?;
|
||||
|
||||
let mut seen_items = HashSet::from_iter(items.clone());
|
||||
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
|
||||
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
|
||||
item,
|
||||
#[cfg(debug_assertions)]
|
||||
history: Vec::new(),
|
||||
}));
|
||||
|
||||
while let Some(item_with_history) = items_to_visit.pop_front() {
|
||||
let item = &item_with_history.item;
|
||||
|
||||
let Some(result) = self
|
||||
.provider
|
||||
.fetch_page(&package, Some(&item))
|
||||
.await
|
||||
.with_context(|| {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
format!(
|
||||
"failed to fetch {item:?}: {history:?}",
|
||||
history = item_with_history.history
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
{
|
||||
format!("failed to fetch {item:?}")
|
||||
}
|
||||
})?
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
|
||||
|
||||
database
|
||||
.insert(format!("{package}::{}", item.display()), markdown)
|
||||
.await?;
|
||||
|
||||
let parent_item = item;
|
||||
for mut item in referenced_items {
|
||||
if seen_items.contains(&item) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seen_items.insert(item.clone());
|
||||
|
||||
item.path.extend(parent_item.path.clone());
|
||||
match parent_item.kind {
|
||||
RustdocItemKind::Mod => {
|
||||
item.path.push(parent_item.name.clone());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
items_to_visit.push_back(RustdocItemWithHistory {
|
||||
#[cfg(debug_assertions)]
|
||||
history: {
|
||||
let mut history = item_with_history.history.clone();
|
||||
history.push(item.url_path());
|
||||
history
|
||||
},
|
||||
item,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LocalProvider {
|
||||
fs: Arc<dyn Fs>,
|
||||
cargo_workspace_root: PathBuf,
|
||||
|
@ -41,7 +160,7 @@ impl LocalProvider {
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl IndexedDocsProvider for LocalProvider {
|
||||
impl RustdocProvider for LocalProvider {
|
||||
async fn fetch_page(
|
||||
&self,
|
||||
crate_name: &PackageName,
|
||||
|
@ -74,7 +193,7 @@ impl DocsDotRsProvider {
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl IndexedDocsProvider for DocsDotRsProvider {
|
||||
impl RustdocProvider for DocsDotRsProvider {
|
||||
async fn fetch_page(
|
||||
&self,
|
||||
crate_name: &PackageName,
|
||||
|
|
|
@ -4,7 +4,7 @@ use collections::HashMap;
|
|||
use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, UpdateGlobal};
|
||||
use parking_lot::RwLock;
|
||||
|
||||
use crate::{IndexedDocsStore, Provider, ProviderId};
|
||||
use crate::{IndexedDocsProvider, IndexedDocsStore, ProviderId};
|
||||
|
||||
struct GlobalIndexedDocsRegistry(Arc<IndexedDocsRegistry>);
|
||||
|
||||
|
@ -34,9 +34,12 @@ impl IndexedDocsRegistry {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn register_provider(&self, provider: Provider) {
|
||||
pub fn register_provider(
|
||||
&self,
|
||||
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
|
||||
) {
|
||||
self.stores_by_provider.write().insert(
|
||||
provider.id.clone(),
|
||||
provider.id(),
|
||||
Arc::new(IndexedDocsStore::new(provider, self.executor.clone())),
|
||||
);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ use std::sync::atomic::AtomicBool;
|
|||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use async_trait::async_trait;
|
||||
use collections::HashMap;
|
||||
use derive_more::{Deref, Display};
|
||||
use futures::future::{self, BoxFuture, Shared};
|
||||
|
@ -15,11 +16,10 @@ use parking_lot::RwLock;
|
|||
use serde::{Deserialize, Serialize};
|
||||
use util::ResultExt;
|
||||
|
||||
use crate::indexer::{DocsIndexer, IndexedDocsProvider};
|
||||
use crate::{IndexedDocsRegistry, RustdocItem};
|
||||
use crate::IndexedDocsRegistry;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
|
||||
pub struct ProviderId(Arc<str>);
|
||||
pub struct ProviderId(pub Arc<str>);
|
||||
|
||||
impl ProviderId {
|
||||
pub fn rustdoc() -> Self {
|
||||
|
@ -27,20 +27,6 @@ impl ProviderId {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct Provider {
|
||||
pub id: ProviderId,
|
||||
pub database_path: PathBuf,
|
||||
}
|
||||
|
||||
impl Provider {
|
||||
pub fn rustdoc() -> Self {
|
||||
Self {
|
||||
id: ProviderId("rustdoc".into()),
|
||||
database_path: paths::support_dir().join("docs/rust/rustdoc-db.1.mdb"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The name of a package.
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
|
||||
pub struct PackageName(Arc<str>);
|
||||
|
@ -51,11 +37,24 @@ impl From<&str> for PackageName {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait IndexedDocsProvider {
|
||||
/// Returns the ID of this provider.
|
||||
fn id(&self) -> ProviderId;
|
||||
|
||||
/// Returns the path to the database for this provider.
|
||||
fn database_path(&self) -> PathBuf;
|
||||
|
||||
/// Indexes the package with the given name.
|
||||
async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()>;
|
||||
}
|
||||
|
||||
/// A store for indexed docs.
|
||||
pub struct IndexedDocsStore {
|
||||
executor: BackgroundExecutor,
|
||||
database_future:
|
||||
Shared<BoxFuture<'static, Result<Arc<IndexedDocsDatabase>, Arc<anyhow::Error>>>>,
|
||||
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
|
||||
indexing_tasks_by_package:
|
||||
RwLock<HashMap<PackageName, Shared<Task<Result<(), Arc<anyhow::Error>>>>>>,
|
||||
}
|
||||
|
@ -68,11 +67,15 @@ impl IndexedDocsStore {
|
|||
.ok_or_else(|| anyhow!("no indexed docs store found for {provider}"))
|
||||
}
|
||||
|
||||
pub fn new(provider: Provider, executor: BackgroundExecutor) -> Self {
|
||||
pub fn new(
|
||||
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
|
||||
executor: BackgroundExecutor,
|
||||
) -> Self {
|
||||
let database_future = executor
|
||||
.spawn({
|
||||
let executor = executor.clone();
|
||||
async move { IndexedDocsDatabase::new(provider.database_path, executor) }
|
||||
let database_path = provider.database_path();
|
||||
async move { IndexedDocsDatabase::new(database_path, executor) }
|
||||
})
|
||||
.then(|result| future::ready(result.map(Arc::new).map_err(Arc::new)))
|
||||
.boxed()
|
||||
|
@ -81,6 +84,7 @@ impl IndexedDocsStore {
|
|||
Self {
|
||||
executor,
|
||||
database_future,
|
||||
provider,
|
||||
indexing_tasks_by_package: RwLock::new(HashMap::default()),
|
||||
}
|
||||
}
|
||||
|
@ -95,18 +99,23 @@ impl IndexedDocsStore {
|
|||
package: PackageName,
|
||||
item_path: Option<String>,
|
||||
) -> Result<MarkdownDocs> {
|
||||
let item_path = if let Some(item_path) = item_path {
|
||||
format!("{package}::{item_path}")
|
||||
} else {
|
||||
package.to_string()
|
||||
};
|
||||
|
||||
self.database_future
|
||||
.clone()
|
||||
.await
|
||||
.map_err(|err| anyhow!(err))?
|
||||
.load(package, item_path)
|
||||
.load(item_path)
|
||||
.await
|
||||
}
|
||||
|
||||
pub fn index(
|
||||
self: Arc<Self>,
|
||||
package: PackageName,
|
||||
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
|
||||
) -> Shared<Task<Result<(), Arc<anyhow::Error>>>> {
|
||||
if let Some(existing_task) = self.indexing_tasks_by_package.read().get(&package) {
|
||||
return existing_task.clone();
|
||||
|
@ -132,9 +141,7 @@ impl IndexedDocsStore {
|
|||
.clone()
|
||||
.await
|
||||
.map_err(|err| anyhow!(err))?;
|
||||
let indexer = DocsIndexer::new(database, provider);
|
||||
|
||||
indexer.index(package.clone()).await
|
||||
this.provider.index(package, database).await
|
||||
};
|
||||
|
||||
index_task.await.map_err(Arc::new)
|
||||
|
@ -192,7 +199,7 @@ impl IndexedDocsStore {
|
|||
#[derive(Debug, PartialEq, Eq, Clone, Display, Serialize, Deserialize)]
|
||||
pub struct MarkdownDocs(pub String);
|
||||
|
||||
pub(crate) struct IndexedDocsDatabase {
|
||||
pub struct IndexedDocsDatabase {
|
||||
executor: BackgroundExecutor,
|
||||
env: heed::Env,
|
||||
entries: Database<SerdeBincode<String>, SerdeBincode<MarkdownDocs>>,
|
||||
|
@ -237,44 +244,25 @@ impl IndexedDocsDatabase {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn load(
|
||||
&self,
|
||||
package: PackageName,
|
||||
item_path: Option<String>,
|
||||
) -> Task<Result<MarkdownDocs>> {
|
||||
pub fn load(&self, key: String) -> Task<Result<MarkdownDocs>> {
|
||||
let env = self.env.clone();
|
||||
let entries = self.entries;
|
||||
let item_path = if let Some(item_path) = item_path {
|
||||
format!("{package}::{item_path}")
|
||||
} else {
|
||||
package.to_string()
|
||||
};
|
||||
|
||||
self.executor.spawn(async move {
|
||||
let txn = env.read_txn()?;
|
||||
entries
|
||||
.get(&txn, &item_path)?
|
||||
.ok_or_else(|| anyhow!("no docs found for {item_path}"))
|
||||
.get(&txn, &key)?
|
||||
.ok_or_else(|| anyhow!("no docs found for {key}"))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn insert(
|
||||
&self,
|
||||
package: PackageName,
|
||||
item: Option<&RustdocItem>,
|
||||
docs: String,
|
||||
) -> Task<Result<()>> {
|
||||
pub fn insert(&self, key: String, docs: String) -> Task<Result<()>> {
|
||||
let env = self.env.clone();
|
||||
let entries = self.entries;
|
||||
let (item_path, entry) = if let Some(item) = item {
|
||||
(format!("{package}::{}", item.display()), MarkdownDocs(docs))
|
||||
} else {
|
||||
(package.to_string(), MarkdownDocs(docs))
|
||||
};
|
||||
|
||||
self.executor.spawn(async move {
|
||||
let mut txn = env.write_txn()?;
|
||||
entries.put(&mut txn, &item_path, &entry)?;
|
||||
entries.put(&mut txn, &key, &MarkdownDocs(docs))?;
|
||||
txn.commit()?;
|
||||
Ok(())
|
||||
})
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue