Start work on genericizing /rustdoc
(#13745)
This PR begins the process of making the backing infrastructure for the `/rustdoc` command more generic such that it can be applied to additional documentation providers. In this PR we: - Rename the `rustdoc` crate to `indexed_docs` as a more general-purpose name - Start moving rustdoc-specific functionality into `indexed_docs::providers::rustdoc` - Add an `IndexedDocsRegistry` to hold multiple `IndexedDocsStore`s (one per provider) We haven't yet removed the rustdoc-specific bits in the `DocsIndexer`. That will follow soon. Release Notes: - N/A
This commit is contained in:
parent
eab98eb9c9
commit
7460381285
18 changed files with 425 additions and 355 deletions
35
crates/indexed_docs/Cargo.toml
Normal file
35
crates/indexed_docs/Cargo.toml
Normal file
|
@ -0,0 +1,35 @@
|
|||
[package]
|
||||
name = "indexed_docs"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
publish = false
|
||||
license = "GPL-3.0-or-later"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[lib]
|
||||
path = "src/indexed_docs.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
async-trait.workspace = true
|
||||
collections.workspace = true
|
||||
derive_more.workspace = true
|
||||
fs.workspace = true
|
||||
futures.workspace = true
|
||||
fuzzy.workspace = true
|
||||
gpui.workspace = true
|
||||
heed.workspace = true
|
||||
html_to_markdown.workspace = true
|
||||
http.workspace = true
|
||||
indexmap.workspace = true
|
||||
parking_lot.workspace = true
|
||||
paths.workspace = true
|
||||
serde.workspace = true
|
||||
strum.workspace = true
|
||||
util.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
indoc.workspace = true
|
||||
pretty_assertions.workspace = true
|
1
crates/indexed_docs/LICENSE-GPL
Symbolic link
1
crates/indexed_docs/LICENSE-GPL
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../LICENSE-GPL
|
8
crates/indexed_docs/src/indexed_docs.rs
Normal file
8
crates/indexed_docs/src/indexed_docs.rs
Normal file
|
@ -0,0 +1,8 @@
|
|||
mod indexer;
|
||||
mod providers;
|
||||
mod registry;
|
||||
mod store;
|
||||
|
||||
pub use crate::providers::rustdoc::*;
|
||||
pub use crate::registry::*;
|
||||
pub use crate::store::*;
|
122
crates/indexed_docs/src/indexer.rs
Normal file
122
crates/indexed_docs/src/indexer.rs
Normal file
|
@ -0,0 +1,122 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use collections::{HashSet, VecDeque};
|
||||
|
||||
use crate::{
|
||||
convert_rustdoc_to_markdown, IndexedDocsDatabase, PackageName, RustdocItem, RustdocItemKind,
|
||||
};
|
||||
|
||||
#[async_trait]
|
||||
pub trait IndexedDocsProvider {
|
||||
async fn fetch_page(
|
||||
&self,
|
||||
package: &PackageName,
|
||||
item: Option<&RustdocItem>,
|
||||
) -> Result<Option<String>>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RustdocItemWithHistory {
|
||||
pub item: RustdocItem,
|
||||
#[cfg(debug_assertions)]
|
||||
pub history: Vec<String>,
|
||||
}
|
||||
|
||||
pub(crate) struct DocsIndexer {
|
||||
database: Arc<IndexedDocsDatabase>,
|
||||
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
|
||||
}
|
||||
|
||||
impl DocsIndexer {
|
||||
pub fn new(
|
||||
database: Arc<IndexedDocsDatabase>,
|
||||
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
|
||||
) -> Self {
|
||||
Self { database, provider }
|
||||
}
|
||||
|
||||
/// Indexes the package with the given name.
|
||||
pub async fn index(&self, package: PackageName) -> Result<()> {
|
||||
let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let (crate_root_markdown, items) =
|
||||
convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
|
||||
|
||||
self.database
|
||||
.insert(package.clone(), None, crate_root_markdown)
|
||||
.await?;
|
||||
|
||||
let mut seen_items = HashSet::from_iter(items.clone());
|
||||
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
|
||||
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
|
||||
item,
|
||||
#[cfg(debug_assertions)]
|
||||
history: Vec::new(),
|
||||
}));
|
||||
|
||||
while let Some(item_with_history) = items_to_visit.pop_front() {
|
||||
let item = &item_with_history.item;
|
||||
|
||||
let Some(result) = self
|
||||
.provider
|
||||
.fetch_page(&package, Some(&item))
|
||||
.await
|
||||
.with_context(|| {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
format!(
|
||||
"failed to fetch {item:?}: {history:?}",
|
||||
history = item_with_history.history
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
{
|
||||
format!("failed to fetch {item:?}")
|
||||
}
|
||||
})?
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
|
||||
|
||||
self.database
|
||||
.insert(package.clone(), Some(item), markdown)
|
||||
.await?;
|
||||
|
||||
let parent_item = item;
|
||||
for mut item in referenced_items {
|
||||
if seen_items.contains(&item) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seen_items.insert(item.clone());
|
||||
|
||||
item.path.extend(parent_item.path.clone());
|
||||
match parent_item.kind {
|
||||
RustdocItemKind::Mod => {
|
||||
item.path.push(parent_item.name.clone());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
items_to_visit.push_back(RustdocItemWithHistory {
|
||||
#[cfg(debug_assertions)]
|
||||
history: {
|
||||
let mut history = item_with_history.history.clone();
|
||||
history.push(item.url_path());
|
||||
history
|
||||
},
|
||||
item,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
1
crates/indexed_docs/src/providers.rs
Normal file
1
crates/indexed_docs/src/providers.rs
Normal file
|
@ -0,0 +1 @@
|
|||
pub mod rustdoc;
|
117
crates/indexed_docs/src/providers/rustdoc.rs
Normal file
117
crates/indexed_docs/src/providers/rustdoc.rs
Normal file
|
@ -0,0 +1,117 @@
|
|||
mod item;
|
||||
mod to_markdown;
|
||||
|
||||
pub use item::*;
|
||||
pub use to_markdown::convert_rustdoc_to_markdown;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use fs::Fs;
|
||||
use futures::AsyncReadExt;
|
||||
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
|
||||
|
||||
use crate::indexer::IndexedDocsProvider;
|
||||
use crate::PackageName;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum RustdocSource {
|
||||
/// The docs were sourced from Zed's rustdoc index.
|
||||
Index,
|
||||
/// The docs were sourced from local `cargo doc` output.
|
||||
Local,
|
||||
/// The docs were sourced from `docs.rs`.
|
||||
DocsDotRs,
|
||||
}
|
||||
|
||||
pub struct LocalProvider {
|
||||
fs: Arc<dyn Fs>,
|
||||
cargo_workspace_root: PathBuf,
|
||||
}
|
||||
|
||||
impl LocalProvider {
|
||||
pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
|
||||
Self {
|
||||
fs,
|
||||
cargo_workspace_root,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl IndexedDocsProvider for LocalProvider {
|
||||
async fn fetch_page(
|
||||
&self,
|
||||
crate_name: &PackageName,
|
||||
item: Option<&RustdocItem>,
|
||||
) -> Result<Option<String>> {
|
||||
let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
|
||||
local_cargo_doc_path.push(crate_name.as_ref());
|
||||
if let Some(item) = item {
|
||||
local_cargo_doc_path.push(item.url_path());
|
||||
} else {
|
||||
local_cargo_doc_path.push("index.html");
|
||||
}
|
||||
|
||||
let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(contents))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocsDotRsProvider {
|
||||
http_client: Arc<HttpClientWithUrl>,
|
||||
}
|
||||
|
||||
impl DocsDotRsProvider {
|
||||
pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
|
||||
Self { http_client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl IndexedDocsProvider for DocsDotRsProvider {
|
||||
async fn fetch_page(
|
||||
&self,
|
||||
crate_name: &PackageName,
|
||||
item: Option<&RustdocItem>,
|
||||
) -> Result<Option<String>> {
|
||||
let version = "latest";
|
||||
let path = format!(
|
||||
"{crate_name}/{version}/{crate_name}{item_path}",
|
||||
item_path = item
|
||||
.map(|item| format!("/{}", item.url_path()))
|
||||
.unwrap_or_default()
|
||||
);
|
||||
|
||||
let mut response = self
|
||||
.http_client
|
||||
.get(
|
||||
&format!("https://docs.rs/{path}"),
|
||||
AsyncBody::default(),
|
||||
true,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut body = Vec::new();
|
||||
response
|
||||
.body_mut()
|
||||
.read_to_end(&mut body)
|
||||
.await
|
||||
.context("error reading docs.rs response body")?;
|
||||
|
||||
if response.status().is_client_error() {
|
||||
let text = String::from_utf8_lossy(body.as_slice());
|
||||
bail!(
|
||||
"status error {}, response: {text:?}",
|
||||
response.status().as_u16()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Some(String::from_utf8(body)?))
|
||||
}
|
||||
}
|
82
crates/indexed_docs/src/providers/rustdoc/item.rs
Normal file
82
crates/indexed_docs/src/providers/rustdoc/item.rs
Normal file
|
@ -0,0 +1,82 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use strum::EnumIter;
|
||||
|
||||
#[derive(
|
||||
Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Serialize, Deserialize, EnumIter,
|
||||
)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RustdocItemKind {
|
||||
Mod,
|
||||
Macro,
|
||||
Struct,
|
||||
Enum,
|
||||
Constant,
|
||||
Trait,
|
||||
Function,
|
||||
TypeAlias,
|
||||
AttributeMacro,
|
||||
DeriveMacro,
|
||||
}
|
||||
|
||||
impl RustdocItemKind {
|
||||
pub(crate) const fn class(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Mod => "mod",
|
||||
Self::Macro => "macro",
|
||||
Self::Struct => "struct",
|
||||
Self::Enum => "enum",
|
||||
Self::Constant => "constant",
|
||||
Self::Trait => "trait",
|
||||
Self::Function => "fn",
|
||||
Self::TypeAlias => "type",
|
||||
Self::AttributeMacro => "attr",
|
||||
Self::DeriveMacro => "derive",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
||||
pub struct RustdocItem {
|
||||
pub kind: RustdocItemKind,
|
||||
/// The item path, up until the name of the item.
|
||||
pub path: Vec<Arc<str>>,
|
||||
/// The name of the item.
|
||||
pub name: Arc<str>,
|
||||
}
|
||||
|
||||
impl RustdocItem {
|
||||
pub fn display(&self) -> String {
|
||||
let mut path_segments = self.path.clone();
|
||||
path_segments.push(self.name.clone());
|
||||
|
||||
path_segments.join("::")
|
||||
}
|
||||
|
||||
pub fn url_path(&self) -> String {
|
||||
let name = &self.name;
|
||||
let mut path_components = self.path.clone();
|
||||
|
||||
match self.kind {
|
||||
RustdocItemKind::Mod => {
|
||||
path_components.push(name.clone());
|
||||
path_components.push("index.html".into());
|
||||
}
|
||||
RustdocItemKind::Macro
|
||||
| RustdocItemKind::Struct
|
||||
| RustdocItemKind::Enum
|
||||
| RustdocItemKind::Constant
|
||||
| RustdocItemKind::Trait
|
||||
| RustdocItemKind::Function
|
||||
| RustdocItemKind::TypeAlias
|
||||
| RustdocItemKind::AttributeMacro
|
||||
| RustdocItemKind::DeriveMacro => {
|
||||
path_components
|
||||
.push(format!("{kind}.{name}.html", kind = self.kind.class()).into());
|
||||
}
|
||||
}
|
||||
|
||||
path_components.join("/")
|
||||
}
|
||||
}
|
627
crates/indexed_docs/src/providers/rustdoc/to_markdown.rs
Normal file
627
crates/indexed_docs/src/providers/rustdoc/to_markdown.rs
Normal file
|
@ -0,0 +1,627 @@
|
|||
use std::cell::RefCell;
|
||||
use std::io::Read;
|
||||
use std::rc::Rc;
|
||||
|
||||
use anyhow::Result;
|
||||
use html_to_markdown::markdown::{
|
||||
HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler, TableHandler,
|
||||
};
|
||||
use html_to_markdown::{
|
||||
convert_html_to_markdown, HandleTag, HandlerOutcome, HtmlElement, MarkdownWriter,
|
||||
StartTagOutcome, TagHandler,
|
||||
};
|
||||
use indexmap::IndexSet;
|
||||
use strum::IntoEnumIterator;
|
||||
|
||||
use crate::{RustdocItem, RustdocItemKind};
|
||||
|
||||
/// Converts the provided rustdoc HTML to Markdown.
|
||||
pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<(String, Vec<RustdocItem>)> {
|
||||
let item_collector = Rc::new(RefCell::new(RustdocItemCollector::new()));
|
||||
|
||||
let mut handlers: Vec<TagHandler> = vec![
|
||||
Rc::new(RefCell::new(ParagraphHandler)),
|
||||
Rc::new(RefCell::new(HeadingHandler)),
|
||||
Rc::new(RefCell::new(ListHandler)),
|
||||
Rc::new(RefCell::new(TableHandler::new())),
|
||||
Rc::new(RefCell::new(StyledTextHandler)),
|
||||
Rc::new(RefCell::new(RustdocChromeRemover)),
|
||||
Rc::new(RefCell::new(RustdocHeadingHandler)),
|
||||
Rc::new(RefCell::new(RustdocCodeHandler)),
|
||||
Rc::new(RefCell::new(RustdocItemHandler)),
|
||||
item_collector.clone(),
|
||||
];
|
||||
|
||||
let markdown = convert_html_to_markdown(html, &mut handlers)?;
|
||||
|
||||
let items = item_collector
|
||||
.borrow()
|
||||
.items
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok((markdown, items))
|
||||
}
|
||||
|
||||
pub struct RustdocHeadingHandler;
|
||||
|
||||
impl HandleTag for RustdocHeadingHandler {
|
||||
fn should_handle(&self, _tag: &str) -> bool {
|
||||
// We're only handling text, so we don't need to visit any tags.
|
||||
false
|
||||
}
|
||||
|
||||
fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
|
||||
if writer.is_inside("h1")
|
||||
|| writer.is_inside("h2")
|
||||
|| writer.is_inside("h3")
|
||||
|| writer.is_inside("h4")
|
||||
|| writer.is_inside("h5")
|
||||
|| writer.is_inside("h6")
|
||||
{
|
||||
let text = text
|
||||
.trim_matches(|char| char == '\n' || char == '\r')
|
||||
.replace('\n', " ");
|
||||
writer.push_str(&text);
|
||||
|
||||
return HandlerOutcome::Handled;
|
||||
}
|
||||
|
||||
HandlerOutcome::NoOp
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RustdocCodeHandler;
|
||||
|
||||
impl HandleTag for RustdocCodeHandler {
|
||||
fn should_handle(&self, tag: &str) -> bool {
|
||||
match tag {
|
||||
"pre" | "code" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
match tag.tag() {
|
||||
"code" => {
|
||||
if !writer.is_inside("pre") {
|
||||
writer.push_str("`");
|
||||
}
|
||||
}
|
||||
"pre" => {
|
||||
let classes = tag.classes();
|
||||
let is_rust = classes.iter().any(|class| class == "rust");
|
||||
let language = is_rust
|
||||
.then(|| "rs")
|
||||
.or_else(|| {
|
||||
classes.iter().find_map(|class| {
|
||||
if let Some((_, language)) = class.split_once("language-") {
|
||||
Some(language.trim())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.unwrap_or("");
|
||||
|
||||
writer.push_str(&format!("\n\n```{language}\n"));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
|
||||
fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
|
||||
match tag.tag() {
|
||||
"code" => {
|
||||
if !writer.is_inside("pre") {
|
||||
writer.push_str("`");
|
||||
}
|
||||
}
|
||||
"pre" => writer.push_str("\n```\n"),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
|
||||
if writer.is_inside("pre") {
|
||||
writer.push_str(&text);
|
||||
return HandlerOutcome::Handled;
|
||||
}
|
||||
|
||||
HandlerOutcome::NoOp
|
||||
}
|
||||
}
|
||||
|
||||
const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name";
|
||||
|
||||
pub struct RustdocItemHandler;
|
||||
|
||||
impl RustdocItemHandler {
|
||||
/// Returns whether we're currently inside of an `.item-name` element, which
|
||||
/// rustdoc uses to display Rust items in a list.
|
||||
fn is_inside_item_name(writer: &MarkdownWriter) -> bool {
|
||||
writer
|
||||
.current_element_stack()
|
||||
.iter()
|
||||
.any(|element| element.has_class(RUSTDOC_ITEM_NAME_CLASS))
|
||||
}
|
||||
}
|
||||
|
||||
impl HandleTag for RustdocItemHandler {
|
||||
fn should_handle(&self, tag: &str) -> bool {
|
||||
match tag {
|
||||
"div" | "span" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
match tag.tag() {
|
||||
"div" | "span" => {
|
||||
if Self::is_inside_item_name(writer) && tag.has_class("stab") {
|
||||
writer.push_str(" [");
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
|
||||
fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
|
||||
match tag.tag() {
|
||||
"div" | "span" => {
|
||||
if tag.has_class(RUSTDOC_ITEM_NAME_CLASS) {
|
||||
writer.push_str(": ");
|
||||
}
|
||||
|
||||
if Self::is_inside_item_name(writer) && tag.has_class("stab") {
|
||||
writer.push_str("]");
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
|
||||
if Self::is_inside_item_name(writer)
|
||||
&& !writer.is_inside("span")
|
||||
&& !writer.is_inside("code")
|
||||
{
|
||||
writer.push_str(&format!("`{text}`"));
|
||||
return HandlerOutcome::Handled;
|
||||
}
|
||||
|
||||
HandlerOutcome::NoOp
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RustdocChromeRemover;
|
||||
|
||||
impl HandleTag for RustdocChromeRemover {
|
||||
fn should_handle(&self, tag: &str) -> bool {
|
||||
match tag {
|
||||
"head" | "script" | "nav" | "summary" | "button" | "a" | "div" | "span" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
_writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
match tag.tag() {
|
||||
"head" | "script" | "nav" => return StartTagOutcome::Skip,
|
||||
"summary" => {
|
||||
if tag.has_class("hideme") {
|
||||
return StartTagOutcome::Skip;
|
||||
}
|
||||
}
|
||||
"button" => {
|
||||
if tag.attr("id").as_deref() == Some("copy-path") {
|
||||
return StartTagOutcome::Skip;
|
||||
}
|
||||
}
|
||||
"a" => {
|
||||
if tag.has_any_classes(&["anchor", "doc-anchor", "src"]) {
|
||||
return StartTagOutcome::Skip;
|
||||
}
|
||||
}
|
||||
"div" | "span" => {
|
||||
if tag.has_any_classes(&["nav-container", "sidebar-elems", "out-of-band"]) {
|
||||
return StartTagOutcome::Skip;
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RustdocItemCollector {
|
||||
pub items: IndexSet<RustdocItem>,
|
||||
}
|
||||
|
||||
impl RustdocItemCollector {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
items: IndexSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_item(tag: &HtmlElement) -> Option<RustdocItem> {
|
||||
if tag.tag() != "a" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let href = tag.attr("href")?;
|
||||
if href.starts_with('#') || href.starts_with("https://") || href.starts_with("../") {
|
||||
return None;
|
||||
}
|
||||
|
||||
for kind in RustdocItemKind::iter() {
|
||||
if tag.has_class(kind.class()) {
|
||||
let mut parts = href.trim_end_matches("/index.html").split('/');
|
||||
|
||||
if let Some(last_component) = parts.next_back() {
|
||||
let last_component = match last_component.split_once('#') {
|
||||
Some((component, _fragment)) => component,
|
||||
None => last_component,
|
||||
};
|
||||
|
||||
let name = last_component
|
||||
.trim_start_matches(&format!("{}.", kind.class()))
|
||||
.trim_end_matches(".html");
|
||||
|
||||
return Some(RustdocItem {
|
||||
kind,
|
||||
name: name.into(),
|
||||
path: parts.map(Into::into).collect(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl HandleTag for RustdocItemCollector {
|
||||
fn should_handle(&self, tag: &str) -> bool {
|
||||
tag == "a"
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
match tag.tag() {
|
||||
"a" => {
|
||||
let is_reexport = writer.current_element_stack().iter().any(|element| {
|
||||
if let Some(id) = element.attr("id") {
|
||||
id.starts_with("reexport.") || id.starts_with("method.")
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
|
||||
if !is_reexport {
|
||||
if let Some(item) = Self::parse_item(tag) {
|
||||
self.items.insert(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use html_to_markdown::{convert_html_to_markdown, TagHandler};
|
||||
use indoc::indoc;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn rustdoc_handlers() -> Vec<TagHandler> {
|
||||
vec![
|
||||
Rc::new(RefCell::new(ParagraphHandler)),
|
||||
Rc::new(RefCell::new(HeadingHandler)),
|
||||
Rc::new(RefCell::new(ListHandler)),
|
||||
Rc::new(RefCell::new(TableHandler::new())),
|
||||
Rc::new(RefCell::new(StyledTextHandler)),
|
||||
Rc::new(RefCell::new(RustdocChromeRemover)),
|
||||
Rc::new(RefCell::new(RustdocHeadingHandler)),
|
||||
Rc::new(RefCell::new(RustdocCodeHandler)),
|
||||
Rc::new(RefCell::new(RustdocItemHandler)),
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_main_heading_buttons_get_removed() {
|
||||
let html = indoc! {r##"
|
||||
<div class="main-heading">
|
||||
<h1>Crate <a class="mod" href="#">serde</a><button id="copy-path" title="Copy item path to clipboard">Copy item path</button></h1>
|
||||
<span class="out-of-band">
|
||||
<a class="src" href="../src/serde/lib.rs.html#1-340">source</a> · <button id="toggle-all-docs" title="collapse all docs">[<span>−</span>]</button>
|
||||
</span>
|
||||
</div>
|
||||
"##};
|
||||
let expected = indoc! {"
|
||||
# Crate serde
|
||||
"}
|
||||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_paragraph() {
|
||||
let html = indoc! {r#"
|
||||
<p>In particular, the last point is what sets <code>axum</code> apart from other frameworks.
|
||||
<code>axum</code> doesn’t have its own middleware system but instead uses
|
||||
<a href="https://docs.rs/tower-service/0.3.2/x86_64-unknown-linux-gnu/tower_service/trait.Service.html" title="trait tower_service::Service"><code>tower::Service</code></a>. This means <code>axum</code> gets timeouts, tracing, compression,
|
||||
authorization, and more, for free. It also enables you to share middleware with
|
||||
applications written using <a href="http://crates.io/crates/hyper"><code>hyper</code></a> or <a href="http://crates.io/crates/tonic"><code>tonic</code></a>.</p>
|
||||
"#};
|
||||
let expected = indoc! {"
|
||||
In particular, the last point is what sets `axum` apart from other frameworks. `axum` doesn’t have its own middleware system but instead uses `tower::Service`. This means `axum` gets timeouts, tracing, compression, authorization, and more, for free. It also enables you to share middleware with applications written using `hyper` or `tonic`.
|
||||
"}
|
||||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_paragraphs() {
|
||||
let html = indoc! {r##"
|
||||
<h2 id="serde"><a class="doc-anchor" href="#serde">§</a>Serde</h2>
|
||||
<p>Serde is a framework for <em><strong>ser</strong></em>ializing and <em><strong>de</strong></em>serializing Rust data
|
||||
structures efficiently and generically.</p>
|
||||
<p>The Serde ecosystem consists of data structures that know how to serialize
|
||||
and deserialize themselves along with data formats that know how to
|
||||
serialize and deserialize other things. Serde provides the layer by which
|
||||
these two groups interact with each other, allowing any supported data
|
||||
structure to be serialized and deserialized using any supported data format.</p>
|
||||
<p>See the Serde website <a href="https://serde.rs/">https://serde.rs/</a> for additional documentation and
|
||||
usage examples.</p>
|
||||
<h3 id="design"><a class="doc-anchor" href="#design">§</a>Design</h3>
|
||||
<p>Where many other languages rely on runtime reflection for serializing data,
|
||||
Serde is instead built on Rust’s powerful trait system. A data structure
|
||||
that knows how to serialize and deserialize itself is one that implements
|
||||
Serde’s <code>Serialize</code> and <code>Deserialize</code> traits (or uses Serde’s derive
|
||||
attribute to automatically generate implementations at compile time). This
|
||||
avoids any overhead of reflection or runtime type information. In fact in
|
||||
many situations the interaction between data structure and data format can
|
||||
be completely optimized away by the Rust compiler, leaving Serde
|
||||
serialization to perform the same speed as a handwritten serializer for the
|
||||
specific selection of data structure and data format.</p>
|
||||
"##};
|
||||
let expected = indoc! {"
|
||||
## Serde
|
||||
|
||||
Serde is a framework for _**ser**_ializing and _**de**_serializing Rust data structures efficiently and generically.
|
||||
|
||||
The Serde ecosystem consists of data structures that know how to serialize and deserialize themselves along with data formats that know how to serialize and deserialize other things. Serde provides the layer by which these two groups interact with each other, allowing any supported data structure to be serialized and deserialized using any supported data format.
|
||||
|
||||
See the Serde website https://serde.rs/ for additional documentation and usage examples.
|
||||
|
||||
### Design
|
||||
|
||||
Where many other languages rely on runtime reflection for serializing data, Serde is instead built on Rust’s powerful trait system. A data structure that knows how to serialize and deserialize itself is one that implements Serde’s `Serialize` and `Deserialize` traits (or uses Serde’s derive attribute to automatically generate implementations at compile time). This avoids any overhead of reflection or runtime type information. In fact in many situations the interaction between data structure and data format can be completely optimized away by the Rust compiler, leaving Serde serialization to perform the same speed as a handwritten serializer for the specific selection of data structure and data format.
|
||||
"}
|
||||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_styled_text() {
|
||||
let html = indoc! {r#"
|
||||
<p>This text is <strong>bolded</strong>.</p>
|
||||
<p>This text is <em>italicized</em>.</p>
|
||||
"#};
|
||||
let expected = indoc! {"
|
||||
This text is **bolded**.
|
||||
|
||||
This text is _italicized_.
|
||||
"}
|
||||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rust_code_block() {
|
||||
let html = indoc! {r#"
|
||||
<pre class="rust rust-example-rendered"><code><span class="kw">use </span>axum::extract::{Path, Query, Json};
|
||||
<span class="kw">use </span>std::collections::HashMap;
|
||||
|
||||
<span class="comment">// `Path` gives you the path parameters and deserializes them.
|
||||
</span><span class="kw">async fn </span>path(Path(user_id): Path<u32>) {}
|
||||
|
||||
<span class="comment">// `Query` gives you the query parameters and deserializes them.
|
||||
</span><span class="kw">async fn </span>query(Query(params): Query<HashMap<String, String>>) {}
|
||||
|
||||
<span class="comment">// Buffer the request body and deserialize it as JSON into a
|
||||
// `serde_json::Value`. `Json` supports any type that implements
|
||||
// `serde::Deserialize`.
|
||||
</span><span class="kw">async fn </span>json(Json(payload): Json<serde_json::Value>) {}</code></pre>
|
||||
"#};
|
||||
let expected = indoc! {"
|
||||
```rs
|
||||
use axum::extract::{Path, Query, Json};
|
||||
use std::collections::HashMap;
|
||||
|
||||
// `Path` gives you the path parameters and deserializes them.
|
||||
async fn path(Path(user_id): Path<u32>) {}
|
||||
|
||||
// `Query` gives you the query parameters and deserializes them.
|
||||
async fn query(Query(params): Query<HashMap<String, String>>) {}
|
||||
|
||||
// Buffer the request body and deserialize it as JSON into a
|
||||
// `serde_json::Value`. `Json` supports any type that implements
|
||||
// `serde::Deserialize`.
|
||||
async fn json(Json(payload): Json<serde_json::Value>) {}
|
||||
```
|
||||
"}
|
||||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_toml_code_block() {
|
||||
let html = indoc! {r##"
|
||||
<h2 id="required-dependencies"><a class="doc-anchor" href="#required-dependencies">§</a>Required dependencies</h2>
|
||||
<p>To use axum there are a few dependencies you have to pull in as well:</p>
|
||||
<div class="example-wrap"><pre class="language-toml"><code>[dependencies]
|
||||
axum = "<latest-version>"
|
||||
tokio = { version = "<latest-version>", features = ["full"] }
|
||||
tower = "<latest-version>"
|
||||
</code></pre></div>
|
||||
"##};
|
||||
let expected = indoc! {r#"
|
||||
## Required dependencies
|
||||
|
||||
To use axum there are a few dependencies you have to pull in as well:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
axum = "<latest-version>"
|
||||
tokio = { version = "<latest-version>", features = ["full"] }
|
||||
tower = "<latest-version>"
|
||||
|
||||
```
|
||||
"#}
|
||||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_item_table() {
|
||||
let html = indoc! {r##"
|
||||
<h2 id="structs" class="section-header">Structs<a href="#structs" class="anchor">§</a></h2>
|
||||
<ul class="item-table">
|
||||
<li><div class="item-name"><a class="struct" href="struct.Error.html" title="struct axum::Error">Error</a></div><div class="desc docblock-short">Errors that can happen when using axum.</div></li>
|
||||
<li><div class="item-name"><a class="struct" href="struct.Extension.html" title="struct axum::Extension">Extension</a></div><div class="desc docblock-short">Extractor and response for extensions.</div></li>
|
||||
<li><div class="item-name"><a class="struct" href="struct.Form.html" title="struct axum::Form">Form</a><span class="stab portability" title="Available on crate feature `form` only"><code>form</code></span></div><div class="desc docblock-short">URL encoded extractor and response.</div></li>
|
||||
<li><div class="item-name"><a class="struct" href="struct.Json.html" title="struct axum::Json">Json</a><span class="stab portability" title="Available on crate feature `json` only"><code>json</code></span></div><div class="desc docblock-short">JSON Extractor / Response.</div></li>
|
||||
<li><div class="item-name"><a class="struct" href="struct.Router.html" title="struct axum::Router">Router</a></div><div class="desc docblock-short">The router type for composing handlers and services.</div></li></ul>
|
||||
<h2 id="functions" class="section-header">Functions<a href="#functions" class="anchor">§</a></h2>
|
||||
<ul class="item-table">
|
||||
<li><div class="item-name"><a class="fn" href="fn.serve.html" title="fn axum::serve">serve</a><span class="stab portability" title="Available on crate feature `tokio` and (crate features `http1` or `http2`) only"><code>tokio</code> and (<code>http1</code> or <code>http2</code>)</span></div><div class="desc docblock-short">Serve the service with the supplied listener.</div></li>
|
||||
</ul>
|
||||
"##};
|
||||
let expected = indoc! {r#"
|
||||
## Structs
|
||||
|
||||
- `Error`: Errors that can happen when using axum.
|
||||
- `Extension`: Extractor and response for extensions.
|
||||
- `Form` [`form`]: URL encoded extractor and response.
|
||||
- `Json` [`json`]: JSON Extractor / Response.
|
||||
- `Router`: The router type for composing handlers and services.
|
||||
|
||||
## Functions
|
||||
|
||||
- `serve` [`tokio` and (`http1` or `http2`)]: Serve the service with the supplied listener.
|
||||
"#}
|
||||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_table() {
|
||||
let html = indoc! {r##"
|
||||
<h2 id="feature-flags"><a class="doc-anchor" href="#feature-flags">§</a>Feature flags</h2>
|
||||
<p>axum uses a set of <a href="https://doc.rust-lang.org/cargo/reference/features.html#the-features-section">feature flags</a> to reduce the amount of compiled and
|
||||
optional dependencies.</p>
|
||||
<p>The following optional features are available:</p>
|
||||
<div><table><thead><tr><th>Name</th><th>Description</th><th>Default?</th></tr></thead><tbody>
|
||||
<tr><td><code>http1</code></td><td>Enables hyper’s <code>http1</code> feature</td><td>Yes</td></tr>
|
||||
<tr><td><code>http2</code></td><td>Enables hyper’s <code>http2</code> feature</td><td>No</td></tr>
|
||||
<tr><td><code>json</code></td><td>Enables the <a href="struct.Json.html" title="struct axum::Json"><code>Json</code></a> type and some similar convenience functionality</td><td>Yes</td></tr>
|
||||
<tr><td><code>macros</code></td><td>Enables optional utility macros</td><td>No</td></tr>
|
||||
<tr><td><code>matched-path</code></td><td>Enables capturing of every request’s router path and the <a href="extract/struct.MatchedPath.html" title="struct axum::extract::MatchedPath"><code>MatchedPath</code></a> extractor</td><td>Yes</td></tr>
|
||||
<tr><td><code>multipart</code></td><td>Enables parsing <code>multipart/form-data</code> requests with <a href="extract/struct.Multipart.html" title="struct axum::extract::Multipart"><code>Multipart</code></a></td><td>No</td></tr>
|
||||
<tr><td><code>original-uri</code></td><td>Enables capturing of every request’s original URI and the <a href="extract/struct.OriginalUri.html" title="struct axum::extract::OriginalUri"><code>OriginalUri</code></a> extractor</td><td>Yes</td></tr>
|
||||
<tr><td><code>tokio</code></td><td>Enables <code>tokio</code> as a dependency and <code>axum::serve</code>, <code>SSE</code> and <code>extract::connect_info</code> types.</td><td>Yes</td></tr>
|
||||
<tr><td><code>tower-log</code></td><td>Enables <code>tower</code>’s <code>log</code> feature</td><td>Yes</td></tr>
|
||||
<tr><td><code>tracing</code></td><td>Log rejections from built-in extractors</td><td>Yes</td></tr>
|
||||
<tr><td><code>ws</code></td><td>Enables WebSockets support via <a href="extract/ws/index.html" title="mod axum::extract::ws"><code>extract::ws</code></a></td><td>No</td></tr>
|
||||
<tr><td><code>form</code></td><td>Enables the <code>Form</code> extractor</td><td>Yes</td></tr>
|
||||
<tr><td><code>query</code></td><td>Enables the <code>Query</code> extractor</td><td>Yes</td></tr>
|
||||
</tbody></table>
|
||||
"##};
|
||||
let expected = indoc! {r#"
|
||||
## Feature flags
|
||||
|
||||
axum uses a set of feature flags to reduce the amount of compiled and optional dependencies.
|
||||
|
||||
The following optional features are available:
|
||||
|
||||
| Name | Description | Default? |
|
||||
| --- | --- | --- |
|
||||
| `http1` | Enables hyper’s `http1` feature | Yes |
|
||||
| `http2` | Enables hyper’s `http2` feature | No |
|
||||
| `json` | Enables the `Json` type and some similar convenience functionality | Yes |
|
||||
| `macros` | Enables optional utility macros | No |
|
||||
| `matched-path` | Enables capturing of every request’s router path and the `MatchedPath` extractor | Yes |
|
||||
| `multipart` | Enables parsing `multipart/form-data` requests with `Multipart` | No |
|
||||
| `original-uri` | Enables capturing of every request’s original URI and the `OriginalUri` extractor | Yes |
|
||||
| `tokio` | Enables `tokio` as a dependency and `axum::serve`, `SSE` and `extract::connect_info` types. | Yes |
|
||||
| `tower-log` | Enables `tower`’s `log` feature | Yes |
|
||||
| `tracing` | Log rejections from built-in extractors | Yes |
|
||||
| `ws` | Enables WebSockets support via `extract::ws` | No |
|
||||
| `form` | Enables the `Form` extractor | Yes |
|
||||
| `query` | Enables the `Query` extractor | Yes |
|
||||
"#}
|
||||
.trim();
|
||||
|
||||
assert_eq!(
|
||||
convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(),
|
||||
expected
|
||||
)
|
||||
}
|
||||
}
|
47
crates/indexed_docs/src/registry.rs
Normal file
47
crates/indexed_docs/src/registry.rs
Normal file
|
@ -0,0 +1,47 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use collections::HashMap;
|
||||
use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, UpdateGlobal};
|
||||
use parking_lot::RwLock;
|
||||
|
||||
use crate::{IndexedDocsStore, Provider, ProviderId};
|
||||
|
||||
struct GlobalIndexedDocsRegistry(Arc<IndexedDocsRegistry>);
|
||||
|
||||
impl Global for GlobalIndexedDocsRegistry {}
|
||||
|
||||
pub struct IndexedDocsRegistry {
|
||||
executor: BackgroundExecutor,
|
||||
stores_by_provider: RwLock<HashMap<ProviderId, Arc<IndexedDocsStore>>>,
|
||||
}
|
||||
|
||||
impl IndexedDocsRegistry {
|
||||
pub fn global(cx: &AppContext) -> Arc<Self> {
|
||||
GlobalIndexedDocsRegistry::global(cx).0.clone()
|
||||
}
|
||||
|
||||
pub fn init_global(cx: &mut AppContext) {
|
||||
GlobalIndexedDocsRegistry::set_global(
|
||||
cx,
|
||||
GlobalIndexedDocsRegistry(Arc::new(Self::new(cx.background_executor().clone()))),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn new(executor: BackgroundExecutor) -> Self {
|
||||
Self {
|
||||
executor,
|
||||
stores_by_provider: RwLock::new(HashMap::default()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register_provider(&self, provider: Provider) {
|
||||
self.stores_by_provider.write().insert(
|
||||
provider.id.clone(),
|
||||
Arc::new(IndexedDocsStore::new(provider, self.executor.clone())),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn get_provider_store(&self, provider_id: ProviderId) -> Option<Arc<IndexedDocsStore>> {
|
||||
self.stores_by_provider.read().get(&provider_id).cloned()
|
||||
}
|
||||
}
|
282
crates/indexed_docs/src/store.rs
Normal file
282
crates/indexed_docs/src/store.rs
Normal file
|
@ -0,0 +1,282 @@
|
|||
use std::path::PathBuf;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use collections::HashMap;
|
||||
use derive_more::{Deref, Display};
|
||||
use futures::future::{self, BoxFuture, Shared};
|
||||
use futures::FutureExt;
|
||||
use fuzzy::StringMatchCandidate;
|
||||
use gpui::{AppContext, BackgroundExecutor, Task};
|
||||
use heed::types::SerdeBincode;
|
||||
use heed::Database;
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use util::ResultExt;
|
||||
|
||||
use crate::indexer::{DocsIndexer, IndexedDocsProvider};
|
||||
use crate::{IndexedDocsRegistry, RustdocItem};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
|
||||
pub struct ProviderId(Arc<str>);
|
||||
|
||||
impl ProviderId {
|
||||
pub fn rustdoc() -> Self {
|
||||
Self("rustdoc".into())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Provider {
|
||||
pub id: ProviderId,
|
||||
pub database_path: PathBuf,
|
||||
}
|
||||
|
||||
impl Provider {
|
||||
pub fn rustdoc() -> Self {
|
||||
Self {
|
||||
id: ProviderId("rustdoc".into()),
|
||||
database_path: paths::support_dir().join("docs/rust/rustdoc-db.1.mdb"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The name of a package.
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
|
||||
pub struct PackageName(Arc<str>);
|
||||
|
||||
impl From<&str> for PackageName {
|
||||
fn from(value: &str) -> Self {
|
||||
Self(value.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// A store for indexed docs.
|
||||
pub struct IndexedDocsStore {
|
||||
executor: BackgroundExecutor,
|
||||
database_future:
|
||||
Shared<BoxFuture<'static, Result<Arc<IndexedDocsDatabase>, Arc<anyhow::Error>>>>,
|
||||
indexing_tasks_by_package:
|
||||
RwLock<HashMap<PackageName, Shared<Task<Result<(), Arc<anyhow::Error>>>>>>,
|
||||
}
|
||||
|
||||
impl IndexedDocsStore {
|
||||
pub fn try_global(provider: ProviderId, cx: &AppContext) -> Result<Arc<Self>> {
|
||||
let registry = IndexedDocsRegistry::global(cx);
|
||||
registry
|
||||
.get_provider_store(provider.clone())
|
||||
.ok_or_else(|| anyhow!("no indexed docs store found for {provider}"))
|
||||
}
|
||||
|
||||
pub fn new(provider: Provider, executor: BackgroundExecutor) -> Self {
|
||||
let database_future = executor
|
||||
.spawn({
|
||||
let executor = executor.clone();
|
||||
async move { IndexedDocsDatabase::new(provider.database_path, executor) }
|
||||
})
|
||||
.then(|result| future::ready(result.map(Arc::new).map_err(Arc::new)))
|
||||
.boxed()
|
||||
.shared();
|
||||
|
||||
Self {
|
||||
executor,
|
||||
database_future,
|
||||
indexing_tasks_by_package: RwLock::new(HashMap::default()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the package with the given name is currently being indexed.
|
||||
pub fn is_indexing(&self, package: &PackageName) -> bool {
|
||||
self.indexing_tasks_by_package.read().contains_key(package)
|
||||
}
|
||||
|
||||
pub async fn load(
|
||||
&self,
|
||||
package: PackageName,
|
||||
item_path: Option<String>,
|
||||
) -> Result<MarkdownDocs> {
|
||||
self.database_future
|
||||
.clone()
|
||||
.await
|
||||
.map_err(|err| anyhow!(err))?
|
||||
.load(package, item_path)
|
||||
.await
|
||||
}
|
||||
|
||||
pub fn index(
|
||||
self: Arc<Self>,
|
||||
package: PackageName,
|
||||
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
|
||||
) -> Shared<Task<Result<(), Arc<anyhow::Error>>>> {
|
||||
if let Some(existing_task) = self.indexing_tasks_by_package.read().get(&package) {
|
||||
return existing_task.clone();
|
||||
}
|
||||
|
||||
let indexing_task = self
|
||||
.executor
|
||||
.spawn({
|
||||
let this = self.clone();
|
||||
let package = package.clone();
|
||||
async move {
|
||||
let _finally = util::defer({
|
||||
let this = this.clone();
|
||||
let package = package.clone();
|
||||
move || {
|
||||
this.indexing_tasks_by_package.write().remove(&package);
|
||||
}
|
||||
});
|
||||
|
||||
let index_task = async {
|
||||
let database = this
|
||||
.database_future
|
||||
.clone()
|
||||
.await
|
||||
.map_err(|err| anyhow!(err))?;
|
||||
let indexer = DocsIndexer::new(database, provider);
|
||||
|
||||
indexer.index(package.clone()).await
|
||||
};
|
||||
|
||||
index_task.await.map_err(Arc::new)
|
||||
}
|
||||
})
|
||||
.shared();
|
||||
|
||||
self.indexing_tasks_by_package
|
||||
.write()
|
||||
.insert(package, indexing_task.clone());
|
||||
|
||||
indexing_task
|
||||
}
|
||||
|
||||
pub fn search(&self, query: String) -> Task<Vec<String>> {
|
||||
let executor = self.executor.clone();
|
||||
let database_future = self.database_future.clone();
|
||||
self.executor.spawn(async move {
|
||||
if query.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let Some(database) = database_future.await.map_err(|err| anyhow!(err)).log_err() else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
let Some(items) = database.keys().await.log_err() else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
||||
let candidates = items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(ix, item_path)| StringMatchCandidate::new(ix, item_path.clone()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let matches = fuzzy::match_strings(
|
||||
&candidates,
|
||||
&query,
|
||||
false,
|
||||
100,
|
||||
&AtomicBool::default(),
|
||||
executor,
|
||||
)
|
||||
.await;
|
||||
|
||||
matches
|
||||
.into_iter()
|
||||
.map(|mat| items[mat.candidate_id].clone())
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Display, Serialize, Deserialize)]
|
||||
pub struct MarkdownDocs(pub String);
|
||||
|
||||
pub(crate) struct IndexedDocsDatabase {
|
||||
executor: BackgroundExecutor,
|
||||
env: heed::Env,
|
||||
entries: Database<SerdeBincode<String>, SerdeBincode<MarkdownDocs>>,
|
||||
}
|
||||
|
||||
impl IndexedDocsDatabase {
|
||||
pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result<Self> {
|
||||
std::fs::create_dir_all(&path)?;
|
||||
|
||||
const ONE_GB_IN_BYTES: usize = 1024 * 1024 * 1024;
|
||||
let env = unsafe {
|
||||
heed::EnvOpenOptions::new()
|
||||
.map_size(ONE_GB_IN_BYTES)
|
||||
.max_dbs(1)
|
||||
.open(path)?
|
||||
};
|
||||
|
||||
let mut txn = env.write_txn()?;
|
||||
let entries = env.create_database(&mut txn, Some("rustdoc_entries"))?;
|
||||
txn.commit()?;
|
||||
|
||||
Ok(Self {
|
||||
executor,
|
||||
env,
|
||||
entries,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn keys(&self) -> Task<Result<Vec<String>>> {
|
||||
let env = self.env.clone();
|
||||
let entries = self.entries;
|
||||
|
||||
self.executor.spawn(async move {
|
||||
let txn = env.read_txn()?;
|
||||
let mut iter = entries.iter(&txn)?;
|
||||
let mut keys = Vec::new();
|
||||
while let Some((key, _value)) = iter.next().transpose()? {
|
||||
keys.push(key);
|
||||
}
|
||||
|
||||
Ok(keys)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn load(
|
||||
&self,
|
||||
package: PackageName,
|
||||
item_path: Option<String>,
|
||||
) -> Task<Result<MarkdownDocs>> {
|
||||
let env = self.env.clone();
|
||||
let entries = self.entries;
|
||||
let item_path = if let Some(item_path) = item_path {
|
||||
format!("{package}::{item_path}")
|
||||
} else {
|
||||
package.to_string()
|
||||
};
|
||||
|
||||
self.executor.spawn(async move {
|
||||
let txn = env.read_txn()?;
|
||||
entries
|
||||
.get(&txn, &item_path)?
|
||||
.ok_or_else(|| anyhow!("no docs found for {item_path}"))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn insert(
|
||||
&self,
|
||||
package: PackageName,
|
||||
item: Option<&RustdocItem>,
|
||||
docs: String,
|
||||
) -> Task<Result<()>> {
|
||||
let env = self.env.clone();
|
||||
let entries = self.entries;
|
||||
let (item_path, entry) = if let Some(item) = item {
|
||||
(format!("{package}::{}", item.display()), MarkdownDocs(docs))
|
||||
} else {
|
||||
(package.to_string(), MarkdownDocs(docs))
|
||||
};
|
||||
|
||||
self.executor.spawn(async move {
|
||||
let mut txn = env.write_txn()?;
|
||||
entries.put(&mut txn, &item_path, &entry)?;
|
||||
txn.commit()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue