Persist index for /rustdoc
in LMDB (#12988)
This PR updates the `/rustdoc` command with persistence for the documented rustdoc items. Now when you run `/rustdoc --index <CRATE_NAME>` it will index the crate and store the results in LMDB. The documented items will then be read from the database when searching using `/rustdoc` and persist across restarts of Zed. Release Notes: - N/A
This commit is contained in:
parent
6c70a809ec
commit
85acc2be44
5 changed files with 174 additions and 38 deletions
4
Cargo.lock
generated
4
Cargo.lock
generated
|
@ -8718,13 +8718,15 @@ dependencies = [
|
||||||
"futures 0.3.28",
|
"futures 0.3.28",
|
||||||
"fuzzy",
|
"fuzzy",
|
||||||
"gpui",
|
"gpui",
|
||||||
|
"heed",
|
||||||
"html_to_markdown",
|
"html_to_markdown",
|
||||||
"http 0.1.0",
|
"http 0.1.0",
|
||||||
"indexmap 1.9.3",
|
"indexmap 1.9.3",
|
||||||
"indoc",
|
"indoc",
|
||||||
"parking_lot",
|
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
|
"serde",
|
||||||
"strum",
|
"strum",
|
||||||
|
"util",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
@ -124,10 +124,7 @@ impl SlashCommand for RustdocSlashCommand {
|
||||||
let store = RustdocStore::global(cx);
|
let store = RustdocStore::global(cx);
|
||||||
cx.background_executor().spawn(async move {
|
cx.background_executor().spawn(async move {
|
||||||
let items = store.search(query).await;
|
let items = store.search(query).await;
|
||||||
Ok(items
|
Ok(items)
|
||||||
.into_iter()
|
|
||||||
.map(|(crate_name, item)| format!("{crate_name}::{}", item.display()))
|
|
||||||
.collect())
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -228,7 +225,7 @@ impl SlashCommand for RustdocSlashCommand {
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
if let Ok(item_docs) = item_docs {
|
if let Ok(item_docs) = item_docs {
|
||||||
anyhow::Ok((RustdocSource::Local, item_docs))
|
anyhow::Ok((RustdocSource::Local, item_docs.docs().to_owned()))
|
||||||
} else {
|
} else {
|
||||||
Self::build_message(
|
Self::build_message(
|
||||||
fs,
|
fs,
|
||||||
|
|
|
@ -19,11 +19,13 @@ fs.workspace = true
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
fuzzy.workspace = true
|
fuzzy.workspace = true
|
||||||
gpui.workspace = true
|
gpui.workspace = true
|
||||||
|
heed.workspace = true
|
||||||
html_to_markdown.workspace = true
|
html_to_markdown.workspace = true
|
||||||
http.workspace = true
|
http.workspace = true
|
||||||
indexmap.workspace = true
|
indexmap.workspace = true
|
||||||
parking_lot.workspace = true
|
serde.workspace = true
|
||||||
strum.workspace = true
|
strum.workspace = true
|
||||||
|
util.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
indoc.workspace = true
|
indoc.workspace = true
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
use strum::EnumIter;
|
use strum::EnumIter;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, EnumIter)]
|
#[derive(
|
||||||
|
Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Serialize, Deserialize, EnumIter,
|
||||||
|
)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
pub enum RustdocItemKind {
|
pub enum RustdocItemKind {
|
||||||
Mod,
|
Mod,
|
||||||
Macro,
|
Macro,
|
||||||
|
|
|
@ -1,14 +1,20 @@
|
||||||
|
use std::path::PathBuf;
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::{anyhow, Result};
|
use anyhow::{anyhow, Result};
|
||||||
use collections::HashMap;
|
use futures::future::{self, BoxFuture, Shared};
|
||||||
|
use futures::FutureExt;
|
||||||
use fuzzy::StringMatchCandidate;
|
use fuzzy::StringMatchCandidate;
|
||||||
use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal};
|
use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal};
|
||||||
use parking_lot::RwLock;
|
use heed::types::SerdeBincode;
|
||||||
|
use heed::Database;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use util::paths::SUPPORT_DIR;
|
||||||
|
use util::ResultExt;
|
||||||
|
|
||||||
use crate::crawler::{RustdocCrawler, RustdocProvider};
|
use crate::crawler::{RustdocCrawler, RustdocProvider};
|
||||||
use crate::RustdocItem;
|
use crate::{RustdocItem, RustdocItemKind};
|
||||||
|
|
||||||
struct GlobalRustdocStore(Arc<RustdocStore>);
|
struct GlobalRustdocStore(Arc<RustdocStore>);
|
||||||
|
|
||||||
|
@ -16,7 +22,7 @@ impl Global for GlobalRustdocStore {}
|
||||||
|
|
||||||
pub struct RustdocStore {
|
pub struct RustdocStore {
|
||||||
executor: BackgroundExecutor,
|
executor: BackgroundExecutor,
|
||||||
docs: Arc<RwLock<HashMap<(String, RustdocItem), String>>>,
|
database_future: Shared<BoxFuture<'static, Result<Arc<RustdocDatabase>, Arc<anyhow::Error>>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RustdocStore {
|
impl RustdocStore {
|
||||||
|
@ -32,26 +38,34 @@ impl RustdocStore {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(executor: BackgroundExecutor) -> Self {
|
pub fn new(executor: BackgroundExecutor) -> Self {
|
||||||
|
let database_future = executor
|
||||||
|
.spawn({
|
||||||
|
let executor = executor.clone();
|
||||||
|
async move {
|
||||||
|
RustdocDatabase::new(SUPPORT_DIR.join("docs/rust/rustdoc-db.0.mdb"), executor)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.then(|result| future::ready(result.map(Arc::new).map_err(Arc::new)))
|
||||||
|
.boxed()
|
||||||
|
.shared();
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
executor,
|
executor,
|
||||||
docs: Arc::new(RwLock::new(HashMap::default())),
|
database_future,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn load(&self, crate_name: String, item_path: Option<String>) -> Task<Result<String>> {
|
pub async fn load(
|
||||||
let item_docs = self
|
&self,
|
||||||
.docs
|
crate_name: String,
|
||||||
.read()
|
item_path: Option<String>,
|
||||||
.iter()
|
) -> Result<RustdocDatabaseEntry> {
|
||||||
.find_map(|((item_crate_name, item), item_docs)| {
|
self.database_future
|
||||||
if item_crate_name == &crate_name && item_path == Some(item.display()) {
|
.clone()
|
||||||
Some(item_docs.clone())
|
.await
|
||||||
} else {
|
.map_err(|err| anyhow!(err))?
|
||||||
None
|
.load(crate_name, item_path)
|
||||||
}
|
.await
|
||||||
});
|
|
||||||
|
|
||||||
Task::ready(item_docs.ok_or_else(|| anyhow!("no docs found")))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn index(
|
pub fn index(
|
||||||
|
@ -59,42 +73,50 @@ impl RustdocStore {
|
||||||
crate_name: String,
|
crate_name: String,
|
||||||
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
|
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
|
||||||
) -> Task<Result<()>> {
|
) -> Task<Result<()>> {
|
||||||
let docs = self.docs.clone();
|
let database_future = self.database_future.clone();
|
||||||
self.executor.spawn(async move {
|
self.executor.spawn(async move {
|
||||||
let crawler = RustdocCrawler::new(provider);
|
let crawler = RustdocCrawler::new(provider);
|
||||||
|
|
||||||
println!("Indexing {crate_name}");
|
|
||||||
|
|
||||||
let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else {
|
let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut lock = docs.write();
|
let database = database_future.await.map_err(|err| anyhow!(err))?;
|
||||||
|
|
||||||
|
database
|
||||||
|
.insert(crate_name.clone(), None, crate_docs.crate_root_markdown)
|
||||||
|
.await?;
|
||||||
|
|
||||||
for (item, item_docs) in crate_docs.items {
|
for (item, item_docs) in crate_docs.items {
|
||||||
lock.insert((crate_name.clone(), item), item_docs);
|
database
|
||||||
|
.insert(crate_name.clone(), Some(&item), item_docs)
|
||||||
|
.await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn search(&self, query: String) -> Task<Vec<(String, RustdocItem)>> {
|
pub fn search(&self, query: String) -> Task<Vec<String>> {
|
||||||
let executor = self.executor.clone();
|
let executor = self.executor.clone();
|
||||||
let docs = self.docs.read().clone();
|
let database_future = self.database_future.clone();
|
||||||
self.executor.spawn(async move {
|
self.executor.spawn(async move {
|
||||||
if query.is_empty() {
|
if query.is_empty() {
|
||||||
return Vec::new();
|
return Vec::new();
|
||||||
}
|
}
|
||||||
|
|
||||||
let items = docs.keys().collect::<Vec<_>>();
|
let Some(database) = database_future.await.map_err(|err| anyhow!(err)).log_err() else {
|
||||||
|
return Vec::new();
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some(items) = database.keys().await.log_err() else {
|
||||||
|
return Vec::new();
|
||||||
|
};
|
||||||
|
|
||||||
let candidates = items
|
let candidates = items
|
||||||
.iter()
|
.iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.map(|(ix, (crate_name, item))| {
|
.map(|(ix, item_path)| StringMatchCandidate::new(ix, item_path.clone()))
|
||||||
StringMatchCandidate::new(ix, format!("{crate_name}::{}", item.display()))
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let matches = fuzzy::match_strings(
|
let matches = fuzzy::match_strings(
|
||||||
|
@ -114,3 +136,112 @@ impl RustdocStore {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub enum RustdocDatabaseEntry {
|
||||||
|
Crate { docs: String },
|
||||||
|
Item { kind: RustdocItemKind, docs: String },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RustdocDatabaseEntry {
|
||||||
|
pub fn docs(&self) -> &str {
|
||||||
|
match self {
|
||||||
|
Self::Crate { docs } | Self::Item { docs, .. } => &docs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RustdocDatabase {
|
||||||
|
executor: BackgroundExecutor,
|
||||||
|
env: heed::Env,
|
||||||
|
entries: Database<SerdeBincode<String>, SerdeBincode<RustdocDatabaseEntry>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RustdocDatabase {
|
||||||
|
pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result<Self> {
|
||||||
|
std::fs::create_dir_all(&path)?;
|
||||||
|
|
||||||
|
const ONE_GB_IN_BYTES: usize = 1024 * 1024 * 1024;
|
||||||
|
let env = unsafe {
|
||||||
|
heed::EnvOpenOptions::new()
|
||||||
|
.map_size(ONE_GB_IN_BYTES)
|
||||||
|
.max_dbs(1)
|
||||||
|
.open(path)?
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut txn = env.write_txn()?;
|
||||||
|
let entries = env.create_database(&mut txn, Some("rustdoc_entries"))?;
|
||||||
|
txn.commit()?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
executor,
|
||||||
|
env,
|
||||||
|
entries,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn keys(&self) -> Task<Result<Vec<String>>> {
|
||||||
|
let env = self.env.clone();
|
||||||
|
let entries = self.entries;
|
||||||
|
|
||||||
|
self.executor.spawn(async move {
|
||||||
|
let txn = env.read_txn()?;
|
||||||
|
let mut iter = entries.iter(&txn)?;
|
||||||
|
let mut keys = Vec::new();
|
||||||
|
while let Some((key, _value)) = iter.next().transpose()? {
|
||||||
|
keys.push(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(keys)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load(
|
||||||
|
&self,
|
||||||
|
crate_name: String,
|
||||||
|
item_path: Option<String>,
|
||||||
|
) -> Task<Result<RustdocDatabaseEntry>> {
|
||||||
|
let env = self.env.clone();
|
||||||
|
let entries = self.entries;
|
||||||
|
let item_path = if let Some(item_path) = item_path {
|
||||||
|
format!("{crate_name}::{item_path}")
|
||||||
|
} else {
|
||||||
|
crate_name
|
||||||
|
};
|
||||||
|
|
||||||
|
self.executor.spawn(async move {
|
||||||
|
let txn = env.read_txn()?;
|
||||||
|
entries
|
||||||
|
.get(&txn, &item_path)?
|
||||||
|
.ok_or_else(|| anyhow!("no docs found for {item_path}"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn insert(
|
||||||
|
&self,
|
||||||
|
crate_name: String,
|
||||||
|
item: Option<&RustdocItem>,
|
||||||
|
docs: String,
|
||||||
|
) -> Task<Result<()>> {
|
||||||
|
let env = self.env.clone();
|
||||||
|
let entries = self.entries;
|
||||||
|
let (item_path, entry) = if let Some(item) = item {
|
||||||
|
(
|
||||||
|
format!("{crate_name}::{}", item.display()),
|
||||||
|
RustdocDatabaseEntry::Item {
|
||||||
|
kind: item.kind,
|
||||||
|
docs,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
(crate_name, RustdocDatabaseEntry::Crate { docs })
|
||||||
|
};
|
||||||
|
|
||||||
|
self.executor.spawn(async move {
|
||||||
|
let mut txn = env.write_txn()?;
|
||||||
|
entries.put(&mut txn, &item_path, &entry)?;
|
||||||
|
txn.commit()?;
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue