Start work on genericizing /rustdoc
(#13745)
This PR begins the process of making the backing infrastructure for the `/rustdoc` command more generic such that it can be applied to additional documentation providers. In this PR we: - Rename the `rustdoc` crate to `indexed_docs` as a more general-purpose name - Start moving rustdoc-specific functionality into `indexed_docs::providers::rustdoc` - Add an `IndexedDocsRegistry` to hold multiple `IndexedDocsStore`s (one per provider) We haven't yet removed the rustdoc-specific bits in the `DocsIndexer`. That will follow soon. Release Notes: - N/A
This commit is contained in:
parent
eab98eb9c9
commit
7460381285
18 changed files with 425 additions and 355 deletions
282
crates/indexed_docs/src/store.rs
Normal file
282
crates/indexed_docs/src/store.rs
Normal file
|
@ -0,0 +1,282 @@
|
|||
use std::path::PathBuf;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use collections::HashMap;
|
||||
use derive_more::{Deref, Display};
|
||||
use futures::future::{self, BoxFuture, Shared};
|
||||
use futures::FutureExt;
|
||||
use fuzzy::StringMatchCandidate;
|
||||
use gpui::{AppContext, BackgroundExecutor, Task};
|
||||
use heed::types::SerdeBincode;
|
||||
use heed::Database;
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use util::ResultExt;
|
||||
|
||||
use crate::indexer::{DocsIndexer, IndexedDocsProvider};
|
||||
use crate::{IndexedDocsRegistry, RustdocItem};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
|
||||
pub struct ProviderId(Arc<str>);
|
||||
|
||||
impl ProviderId {
|
||||
pub fn rustdoc() -> Self {
|
||||
Self("rustdoc".into())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Provider {
|
||||
pub id: ProviderId,
|
||||
pub database_path: PathBuf,
|
||||
}
|
||||
|
||||
impl Provider {
|
||||
pub fn rustdoc() -> Self {
|
||||
Self {
|
||||
id: ProviderId("rustdoc".into()),
|
||||
database_path: paths::support_dir().join("docs/rust/rustdoc-db.1.mdb"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The name of a package.
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
|
||||
pub struct PackageName(Arc<str>);
|
||||
|
||||
impl From<&str> for PackageName {
|
||||
fn from(value: &str) -> Self {
|
||||
Self(value.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// A store for indexed docs.
|
||||
pub struct IndexedDocsStore {
|
||||
executor: BackgroundExecutor,
|
||||
database_future:
|
||||
Shared<BoxFuture<'static, Result<Arc<IndexedDocsDatabase>, Arc<anyhow::Error>>>>,
|
||||
indexing_tasks_by_package:
|
||||
RwLock<HashMap<PackageName, Shared<Task<Result<(), Arc<anyhow::Error>>>>>>,
|
||||
}
|
||||
|
||||
impl IndexedDocsStore {
|
||||
pub fn try_global(provider: ProviderId, cx: &AppContext) -> Result<Arc<Self>> {
|
||||
let registry = IndexedDocsRegistry::global(cx);
|
||||
registry
|
||||
.get_provider_store(provider.clone())
|
||||
.ok_or_else(|| anyhow!("no indexed docs store found for {provider}"))
|
||||
}
|
||||
|
||||
pub fn new(provider: Provider, executor: BackgroundExecutor) -> Self {
|
||||
let database_future = executor
|
||||
.spawn({
|
||||
let executor = executor.clone();
|
||||
async move { IndexedDocsDatabase::new(provider.database_path, executor) }
|
||||
})
|
||||
.then(|result| future::ready(result.map(Arc::new).map_err(Arc::new)))
|
||||
.boxed()
|
||||
.shared();
|
||||
|
||||
Self {
|
||||
executor,
|
||||
database_future,
|
||||
indexing_tasks_by_package: RwLock::new(HashMap::default()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the package with the given name is currently being indexed.
|
||||
pub fn is_indexing(&self, package: &PackageName) -> bool {
|
||||
self.indexing_tasks_by_package.read().contains_key(package)
|
||||
}
|
||||
|
||||
pub async fn load(
|
||||
&self,
|
||||
package: PackageName,
|
||||
item_path: Option<String>,
|
||||
) -> Result<MarkdownDocs> {
|
||||
self.database_future
|
||||
.clone()
|
||||
.await
|
||||
.map_err(|err| anyhow!(err))?
|
||||
.load(package, item_path)
|
||||
.await
|
||||
}
|
||||
|
||||
pub fn index(
|
||||
self: Arc<Self>,
|
||||
package: PackageName,
|
||||
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
|
||||
) -> Shared<Task<Result<(), Arc<anyhow::Error>>>> {
|
||||
if let Some(existing_task) = self.indexing_tasks_by_package.read().get(&package) {
|
||||
return existing_task.clone();
|
||||
}
|
||||
|
||||
let indexing_task = self
|
||||
.executor
|
||||
.spawn({
|
||||
let this = self.clone();
|
||||
let package = package.clone();
|
||||
async move {
|
||||
let _finally = util::defer({
|
||||
let this = this.clone();
|
||||
let package = package.clone();
|
||||
move || {
|
||||
this.indexing_tasks_by_package.write().remove(&package);
|
||||
}
|
||||
});
|
||||
|
||||
let index_task = async {
|
||||
let database = this
|
||||
.database_future
|
||||
.clone()
|
||||
.await
|
||||
.map_err(|err| anyhow!(err))?;
|
||||
let indexer = DocsIndexer::new(database, provider);
|
||||
|
||||
indexer.index(package.clone()).await
|
||||
};
|
||||
|
||||
index_task.await.map_err(Arc::new)
|
||||
}
|
||||
})
|
||||
.shared();
|
||||
|
||||
self.indexing_tasks_by_package
|
||||
.write()
|
||||
.insert(package, indexing_task.clone());
|
||||
|
||||
indexing_task
|
||||
}
|
||||
|
||||
pub fn search(&self, query: String) -> Task<Vec<String>> {
|
||||
let executor = self.executor.clone();
|
||||
let database_future = self.database_future.clone();
|
||||
self.executor.spawn(async move {
|
||||
if query.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let Some(database) = database_future.await.map_err(|err| anyhow!(err)).log_err() else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
let Some(items) = database.keys().await.log_err() else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
let candidates = items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(ix, item_path)| StringMatchCandidate::new(ix, item_path.clone()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let matches = fuzzy::match_strings(
|
||||
&candidates,
|
||||
&query,
|
||||
false,
|
||||
100,
|
||||
&AtomicBool::default(),
|
||||
executor,
|
||||
)
|
||||
.await;
|
||||
|
||||
matches
|
||||
.into_iter()
|
||||
.map(|mat| items[mat.candidate_id].clone())
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Display, Serialize, Deserialize)]
|
||||
pub struct MarkdownDocs(pub String);
|
||||
|
||||
pub(crate) struct IndexedDocsDatabase {
|
||||
executor: BackgroundExecutor,
|
||||
env: heed::Env,
|
||||
entries: Database<SerdeBincode<String>, SerdeBincode<MarkdownDocs>>,
|
||||
}
|
||||
|
||||
impl IndexedDocsDatabase {
|
||||
pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result<Self> {
|
||||
std::fs::create_dir_all(&path)?;
|
||||
|
||||
const ONE_GB_IN_BYTES: usize = 1024 * 1024 * 1024;
|
||||
let env = unsafe {
|
||||
heed::EnvOpenOptions::new()
|
||||
.map_size(ONE_GB_IN_BYTES)
|
||||
.max_dbs(1)
|
||||
.open(path)?
|
||||
};
|
||||
|
||||
let mut txn = env.write_txn()?;
|
||||
let entries = env.create_database(&mut txn, Some("rustdoc_entries"))?;
|
||||
txn.commit()?;
|
||||
|
||||
Ok(Self {
|
||||
executor,
|
||||
env,
|
||||
entries,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn keys(&self) -> Task<Result<Vec<String>>> {
|
||||
let env = self.env.clone();
|
||||
let entries = self.entries;
|
||||
|
||||
self.executor.spawn(async move {
|
||||
let txn = env.read_txn()?;
|
||||
let mut iter = entries.iter(&txn)?;
|
||||
let mut keys = Vec::new();
|
||||
while let Some((key, _value)) = iter.next().transpose()? {
|
||||
keys.push(key);
|
||||
}
|
||||
|
||||
Ok(keys)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load(
|
||||
&self,
|
||||
package: PackageName,
|
||||
item_path: Option<String>,
|
||||
) -> Task<Result<MarkdownDocs>> {
|
||||
let env = self.env.clone();
|
||||
let entries = self.entries;
|
||||
let item_path = if let Some(item_path) = item_path {
|
||||
format!("{package}::{item_path}")
|
||||
} else {
|
||||
package.to_string()
|
||||
};
|
||||
|
||||
self.executor.spawn(async move {
|
||||
let txn = env.read_txn()?;
|
||||
entries
|
||||
.get(&txn, &item_path)?
|
||||
.ok_or_else(|| anyhow!("no docs found for {item_path}"))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn insert(
|
||||
&self,
|
||||
package: PackageName,
|
||||
item: Option<&RustdocItem>,
|
||||
docs: String,
|
||||
) -> Task<Result<()>> {
|
||||
let env = self.env.clone();
|
||||
let entries = self.entries;
|
||||
let (item_path, entry) = if let Some(item) = item {
|
||||
(format!("{package}::{}", item.display()), MarkdownDocs(docs))
|
||||
} else {
|
||||
(package.to_string(), MarkdownDocs(docs))
|
||||
};
|
||||
|
||||
self.executor.spawn(async move {
|
||||
let mut txn = env.write_txn()?;
|
||||
entries.put(&mut txn, &item_path, &entry)?;
|
||||
txn.commit()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue