assistant: Add MVP for /rustdoc
using indexed docs (#12952)
This PR adds an MVP of retrieving docs using the `/rustdoc` command from an indexed set of docs. To try this out: 1. Build local docs using `cargo doc` 2. Index the docs for the crate you want to search using `/rustdoc --index <CRATE_NAME>` - Note: This may take a while, depending on the size of the crate 3. Search for docs using `/rustdoc my_crate::path::to::item` - You should get completions for the available items Here are some screenshots of it in action: <img width="640" alt="Screenshot 2024-06-12 at 6 19 20 PM" src="https://github.com/zed-industries/zed/assets/1486634/6c49bec9-d084-4dcb-a92c-1b4c557ee9ce"> <img width="636" alt="Screenshot 2024-06-12 at 6 52 56 PM" src="https://github.com/zed-industries/zed/assets/1486634/636a651c-7d02-48dc-b05c-931f33c49f9c"> Release Notes: - N/A
This commit is contained in:
parent
ec086945fc
commit
0ac9af94e0
8 changed files with 295 additions and 42 deletions
|
@ -17,9 +17,12 @@ async-trait.workspace = true
|
|||
collections.workspace = true
|
||||
fs.workspace = true
|
||||
futures.workspace = true
|
||||
fuzzy.workspace = true
|
||||
gpui.workspace = true
|
||||
html_to_markdown.workspace = true
|
||||
http.workspace = true
|
||||
indexmap.workspace = true
|
||||
parking_lot.workspace = true
|
||||
strum.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -7,6 +7,7 @@ use collections::{HashSet, VecDeque};
|
|||
use fs::Fs;
|
||||
use futures::AsyncReadExt;
|
||||
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
|
||||
use indexmap::IndexMap;
|
||||
|
||||
use crate::{convert_rustdoc_to_markdown, RustdocItem, RustdocItemKind};
|
||||
|
||||
|
@ -51,11 +52,12 @@ impl RustdocProvider for LocalProvider {
|
|||
let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
|
||||
local_cargo_doc_path.push(&crate_name);
|
||||
if let Some(item) = item {
|
||||
if !item.path.is_empty() {
|
||||
local_cargo_doc_path.push(item.path.join("/"));
|
||||
}
|
||||
local_cargo_doc_path.push(item.url_path());
|
||||
} else {
|
||||
local_cargo_doc_path.push("index.html");
|
||||
}
|
||||
local_cargo_doc_path.push("index.html");
|
||||
|
||||
println!("Fetching {}", local_cargo_doc_path.display());
|
||||
|
||||
let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
|
||||
return Ok(None);
|
||||
|
@ -120,12 +122,18 @@ impl RustdocProvider for DocsDotRsProvider {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct RustdocItemWithHistory {
|
||||
#[derive(Debug)]
|
||||
struct RustdocItemWithHistory {
|
||||
pub item: RustdocItem,
|
||||
#[cfg(debug_assertions)]
|
||||
pub history: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct CrateDocs {
|
||||
pub crate_root_markdown: String,
|
||||
pub items: IndexMap<RustdocItem, String>,
|
||||
}
|
||||
|
||||
pub struct RustdocCrawler {
|
||||
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
|
||||
}
|
||||
|
@ -135,14 +143,16 @@ impl RustdocCrawler {
|
|||
Self { provider }
|
||||
}
|
||||
|
||||
pub async fn crawl(&self, crate_name: String) -> Result<Option<String>> {
|
||||
let Some(crate_index_content) = self.provider.fetch_page(&crate_name, None).await? else {
|
||||
pub async fn crawl(&self, crate_name: String) -> Result<Option<CrateDocs>> {
|
||||
let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let (_markdown, items) = convert_rustdoc_to_markdown(crate_index_content.as_bytes())?;
|
||||
let (crate_root_markdown, items) =
|
||||
convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
|
||||
|
||||
let mut seen_items = HashSet::default();
|
||||
let mut docs_by_item = IndexMap::new();
|
||||
let mut seen_items = HashSet::from_iter(items.clone());
|
||||
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
|
||||
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
|
||||
item,
|
||||
|
@ -152,6 +162,7 @@ impl RustdocCrawler {
|
|||
|
||||
while let Some(item_with_history) = items_to_visit.pop_front() {
|
||||
let item = &item_with_history.item;
|
||||
|
||||
println!("Visiting {:?} {:?} {}", &item.kind, &item.path, &item.name);
|
||||
|
||||
let Some(result) = self
|
||||
|
@ -176,23 +187,27 @@ impl RustdocCrawler {
|
|||
continue;
|
||||
};
|
||||
|
||||
let (_markdown, mut items) = convert_rustdoc_to_markdown(result.as_bytes())?;
|
||||
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
|
||||
|
||||
seen_items.insert(item.clone());
|
||||
docs_by_item.insert(item.clone(), markdown);
|
||||
|
||||
for child in &mut items {
|
||||
child.path.extend(item.path.clone());
|
||||
match item.kind {
|
||||
let parent_item = item;
|
||||
for mut item in referenced_items {
|
||||
if seen_items.contains(&item) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seen_items.insert(item.clone());
|
||||
|
||||
item.path.extend(parent_item.path.clone());
|
||||
match parent_item.kind {
|
||||
RustdocItemKind::Mod => {
|
||||
child.path.push(item.name.clone());
|
||||
item.path.push(parent_item.name.clone());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let unseen_items = items
|
||||
.into_iter()
|
||||
.map(|item| RustdocItemWithHistory {
|
||||
items_to_visit.push_back(RustdocItemWithHistory {
|
||||
#[cfg(debug_assertions)]
|
||||
history: {
|
||||
let mut history = item_with_history.history.clone();
|
||||
|
@ -200,12 +215,13 @@ impl RustdocCrawler {
|
|||
history
|
||||
},
|
||||
item,
|
||||
})
|
||||
.filter(|item| !seen_items.contains(&item.item));
|
||||
|
||||
items_to_visit.extend(unseen_items);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(String::new()))
|
||||
Ok(Some(CrateDocs {
|
||||
crate_root_markdown,
|
||||
items: docs_by_item,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,6 +43,13 @@ pub struct RustdocItem {
|
|||
}
|
||||
|
||||
impl RustdocItem {
|
||||
pub fn display(&self) -> String {
|
||||
let mut path_segments = self.path.clone();
|
||||
path_segments.push(self.name.clone());
|
||||
|
||||
path_segments.join("::")
|
||||
}
|
||||
|
||||
pub fn url_path(&self) -> String {
|
||||
let name = &self.name;
|
||||
let mut path_components = self.path.clone();
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
pub mod crawler;
|
||||
mod item;
|
||||
mod store;
|
||||
mod to_markdown;
|
||||
|
||||
pub use crate::item::*;
|
||||
pub use crate::store::*;
|
||||
pub use crate::to_markdown::convert_rustdoc_to_markdown;
|
||||
|
|
116
crates/rustdoc/src/store.rs
Normal file
116
crates/rustdoc/src/store.rs
Normal file
|
@ -0,0 +1,116 @@
|
|||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use collections::HashMap;
|
||||
use fuzzy::StringMatchCandidate;
|
||||
use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal};
|
||||
use parking_lot::RwLock;
|
||||
|
||||
use crate::crawler::{RustdocCrawler, RustdocProvider};
|
||||
use crate::RustdocItem;
|
||||
|
||||
struct GlobalRustdocStore(Arc<RustdocStore>);
|
||||
|
||||
impl Global for GlobalRustdocStore {}
|
||||
|
||||
pub struct RustdocStore {
|
||||
executor: BackgroundExecutor,
|
||||
docs: Arc<RwLock<HashMap<(String, RustdocItem), String>>>,
|
||||
}
|
||||
|
||||
impl RustdocStore {
|
||||
pub fn global(cx: &AppContext) -> Arc<Self> {
|
||||
GlobalRustdocStore::global(cx).0.clone()
|
||||
}
|
||||
|
||||
pub fn init_global(cx: &mut AppContext) {
|
||||
GlobalRustdocStore::set_global(
|
||||
cx,
|
||||
GlobalRustdocStore(Arc::new(Self::new(cx.background_executor().clone()))),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn new(executor: BackgroundExecutor) -> Self {
|
||||
Self {
|
||||
executor,
|
||||
docs: Arc::new(RwLock::new(HashMap::default())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load(&self, crate_name: String, item_path: Option<String>) -> Task<Result<String>> {
|
||||
let item_docs = self
|
||||
.docs
|
||||
.read()
|
||||
.iter()
|
||||
.find_map(|((item_crate_name, item), item_docs)| {
|
||||
if item_crate_name == &crate_name && item_path == Some(item.display()) {
|
||||
Some(item_docs.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
Task::ready(item_docs.ok_or_else(|| anyhow!("no docs found")))
|
||||
}
|
||||
|
||||
pub fn index(
|
||||
&self,
|
||||
crate_name: String,
|
||||
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
|
||||
) -> Task<Result<()>> {
|
||||
let docs = self.docs.clone();
|
||||
self.executor.spawn(async move {
|
||||
let crawler = RustdocCrawler::new(provider);
|
||||
|
||||
println!("Indexing {crate_name}");
|
||||
|
||||
let Some(crate_docs) = crawler.crawl(crate_name.clone()).await? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let mut lock = docs.write();
|
||||
|
||||
for (item, item_docs) in crate_docs.items {
|
||||
lock.insert((crate_name.clone(), item), item_docs);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn search(&self, query: String) -> Task<Vec<(String, RustdocItem)>> {
|
||||
let executor = self.executor.clone();
|
||||
let docs = self.docs.read().clone();
|
||||
self.executor.spawn(async move {
|
||||
if query.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let items = docs.keys().collect::<Vec<_>>();
|
||||
|
||||
let candidates = items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(ix, (crate_name, item))| {
|
||||
StringMatchCandidate::new(ix, format!("{crate_name}::{}", item.display()))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let matches = fuzzy::match_strings(
|
||||
&candidates,
|
||||
&query,
|
||||
false,
|
||||
100,
|
||||
&AtomicBool::default(),
|
||||
executor,
|
||||
)
|
||||
.await;
|
||||
|
||||
matches
|
||||
.into_iter()
|
||||
.map(|mat| items[mat.candidate_id].clone())
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue