From f17f63ec84424f772bfdb7c7998db598829596bf Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Sat, 16 Aug 2025 15:00:31 -0400 Subject: [PATCH] Remove `/docs` slash command (#36325) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR removes the `/docs` slash command. We never fully shipped this—with it requiring explicit opt-in via a setting—and it doesn't seem like the feature is needed in an agentic world. Release Notes: - Removed the `/docs` slash command. --- Cargo.lock | 30 - Cargo.toml | 2 - assets/settings/default.json | 5 - crates/agent_ui/Cargo.toml | 1 - crates/agent_ui/src/agent_configuration.rs | 1 - crates/agent_ui/src/agent_ui.rs | 7 - crates/agent_ui/src/slash_command_settings.rs | 11 - crates/agent_ui/src/text_thread_editor.rs | 86 +-- crates/assistant_slash_commands/Cargo.toml | 1 - .../src/assistant_slash_commands.rs | 2 - .../src/docs_command.rs | 543 --------------- crates/extension/src/extension_host_proxy.rs | 34 - crates/extension/src/extension_manifest.rs | 7 - crates/extension_cli/src/main.rs | 4 - .../extension_compilation_benchmark.rs | 1 - .../extension_host/src/capability_granter.rs | 1 - crates/extension_host/src/extension_host.rs | 15 +- .../src/extension_store_test.rs | 3 - crates/indexed_docs/Cargo.toml | 38 -- crates/indexed_docs/LICENSE-GPL | 1 - .../src/extension_indexed_docs_provider.rs | 81 --- crates/indexed_docs/src/indexed_docs.rs | 16 - crates/indexed_docs/src/providers.rs | 1 - crates/indexed_docs/src/providers/rustdoc.rs | 291 --------- .../src/providers/rustdoc/item.rs | 82 --- .../src/providers/rustdoc/popular_crates.txt | 252 ------- .../src/providers/rustdoc/to_markdown.rs | 618 ------------------ crates/indexed_docs/src/registry.rs | 62 -- crates/indexed_docs/src/store.rs | 346 ---------- typos.toml | 3 - 30 files changed, 6 insertions(+), 2539 deletions(-) delete mode 100644 crates/assistant_slash_commands/src/docs_command.rs delete mode 100644 crates/indexed_docs/Cargo.toml delete mode 120000 crates/indexed_docs/LICENSE-GPL delete mode 100644 crates/indexed_docs/src/extension_indexed_docs_provider.rs delete mode 100644 crates/indexed_docs/src/indexed_docs.rs delete mode 100644 crates/indexed_docs/src/providers.rs delete mode 100644 crates/indexed_docs/src/providers/rustdoc.rs delete mode 100644 crates/indexed_docs/src/providers/rustdoc/item.rs delete mode 100644 crates/indexed_docs/src/providers/rustdoc/popular_crates.txt delete mode 100644 crates/indexed_docs/src/providers/rustdoc/to_markdown.rs delete mode 100644 crates/indexed_docs/src/registry.rs delete mode 100644 crates/indexed_docs/src/store.rs diff --git a/Cargo.lock b/Cargo.lock index 5100a63477..b4bf705eb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -347,7 +347,6 @@ dependencies = [ "gpui", "html_to_markdown", "http_client", - "indexed_docs", "indoc", "inventory", "itertools 0.14.0", @@ -872,7 +871,6 @@ dependencies = [ "gpui", "html_to_markdown", "http_client", - "indexed_docs", "language", "pretty_assertions", "project", @@ -8383,34 +8381,6 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0263a3d970d5c054ed9312c0057b4f3bde9c0b33836d3637361d4a9e6e7a408" -[[package]] -name = "indexed_docs" -version = "0.1.0" -dependencies = [ - "anyhow", - "async-trait", - "cargo_metadata", - "collections", - "derive_more 0.99.19", - "extension", - "fs", - "futures 0.3.31", - "fuzzy", - "gpui", - "heed", - "html_to_markdown", - "http_client", - "indexmap", - "indoc", - "parking_lot", - "paths", - "pretty_assertions", - "serde", - "strum 0.27.1", - "util", - "workspace-hack", -] - [[package]] name = "indexmap" version = "2.9.0" diff --git a/Cargo.toml b/Cargo.toml index a94db953ab..b3105bd97c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,7 +81,6 @@ members = [ "crates/http_client_tls", "crates/icons", "crates/image_viewer", - "crates/indexed_docs", "crates/edit_prediction", "crates/edit_prediction_button", "crates/inspector_ui", @@ -305,7 +304,6 @@ http_client = { path = "crates/http_client" } http_client_tls = { path = "crates/http_client_tls" } icons = { path = "crates/icons" } image_viewer = { path = "crates/image_viewer" } -indexed_docs = { path = "crates/indexed_docs" } edit_prediction = { path = "crates/edit_prediction" } edit_prediction_button = { path = "crates/edit_prediction_button" } inspector_ui = { path = "crates/inspector_ui" } diff --git a/assets/settings/default.json b/assets/settings/default.json index ff000001b5..6a8b034268 100644 --- a/assets/settings/default.json +++ b/assets/settings/default.json @@ -887,11 +887,6 @@ }, // The settings for slash commands. "slash_commands": { - // Settings for the `/docs` slash command. - "docs": { - // Whether `/docs` is enabled. - "enabled": false - }, // Settings for the `/project` slash command. "project": { // Whether `/project` is enabled. diff --git a/crates/agent_ui/Cargo.toml b/crates/agent_ui/Cargo.toml index 13fd9d13c5..fbf8590e68 100644 --- a/crates/agent_ui/Cargo.toml +++ b/crates/agent_ui/Cargo.toml @@ -50,7 +50,6 @@ fuzzy.workspace = true gpui.workspace = true html_to_markdown.workspace = true http_client.workspace = true -indexed_docs.workspace = true indoc.workspace = true inventory.workspace = true itertools.workspace = true diff --git a/crates/agent_ui/src/agent_configuration.rs b/crates/agent_ui/src/agent_configuration.rs index 96558f1bea..4a2dd88c33 100644 --- a/crates/agent_ui/src/agent_configuration.rs +++ b/crates/agent_ui/src/agent_configuration.rs @@ -1035,7 +1035,6 @@ fn extension_only_provides_context_server(manifest: &ExtensionManifest) -> bool && manifest.grammars.is_empty() && manifest.language_servers.is_empty() && manifest.slash_commands.is_empty() - && manifest.indexed_docs_providers.is_empty() && manifest.snippets.is_none() && manifest.debug_locators.is_empty() } diff --git a/crates/agent_ui/src/agent_ui.rs b/crates/agent_ui/src/agent_ui.rs index 4f5f022593..f25b576886 100644 --- a/crates/agent_ui/src/agent_ui.rs +++ b/crates/agent_ui/src/agent_ui.rs @@ -242,7 +242,6 @@ pub fn init( client.telemetry().clone(), cx, ); - indexed_docs::init(cx); cx.observe_new(move |workspace, window, cx| { ConfigureContextServerModal::register(workspace, language_registry.clone(), window, cx) }) @@ -409,12 +408,6 @@ fn update_slash_commands_from_settings(cx: &mut App) { let slash_command_registry = SlashCommandRegistry::global(cx); let settings = SlashCommandSettings::get_global(cx); - if settings.docs.enabled { - slash_command_registry.register_command(assistant_slash_commands::DocsSlashCommand, true); - } else { - slash_command_registry.unregister_command(assistant_slash_commands::DocsSlashCommand); - } - if settings.cargo_workspace.enabled { slash_command_registry .register_command(assistant_slash_commands::CargoWorkspaceSlashCommand, true); diff --git a/crates/agent_ui/src/slash_command_settings.rs b/crates/agent_ui/src/slash_command_settings.rs index f254d00ec6..73e5622aa9 100644 --- a/crates/agent_ui/src/slash_command_settings.rs +++ b/crates/agent_ui/src/slash_command_settings.rs @@ -7,22 +7,11 @@ use settings::{Settings, SettingsSources}; /// Settings for slash commands. #[derive(Deserialize, Serialize, Debug, Default, Clone, JsonSchema)] pub struct SlashCommandSettings { - /// Settings for the `/docs` slash command. - #[serde(default)] - pub docs: DocsCommandSettings, /// Settings for the `/cargo-workspace` slash command. #[serde(default)] pub cargo_workspace: CargoWorkspaceCommandSettings, } -/// Settings for the `/docs` slash command. -#[derive(Deserialize, Serialize, Debug, Default, Clone, JsonSchema)] -pub struct DocsCommandSettings { - /// Whether `/docs` is enabled. - #[serde(default)] - pub enabled: bool, -} - /// Settings for the `/cargo-workspace` slash command. #[derive(Deserialize, Serialize, Debug, Default, Clone, JsonSchema)] pub struct CargoWorkspaceCommandSettings { diff --git a/crates/agent_ui/src/text_thread_editor.rs b/crates/agent_ui/src/text_thread_editor.rs index 2e3b4ed890..8c1e163eca 100644 --- a/crates/agent_ui/src/text_thread_editor.rs +++ b/crates/agent_ui/src/text_thread_editor.rs @@ -5,10 +5,7 @@ use crate::{ use agent_settings::{AgentSettings, CompletionMode}; use anyhow::Result; use assistant_slash_command::{SlashCommand, SlashCommandOutputSection, SlashCommandWorkingSet}; -use assistant_slash_commands::{ - DefaultSlashCommand, DocsSlashCommand, DocsSlashCommandArgs, FileSlashCommand, - selections_creases, -}; +use assistant_slash_commands::{DefaultSlashCommand, FileSlashCommand, selections_creases}; use client::{proto, zed_urls}; use collections::{BTreeSet, HashMap, HashSet, hash_map}; use editor::{ @@ -30,7 +27,6 @@ use gpui::{ StatefulInteractiveElement, Styled, Subscription, Task, Transformation, WeakEntity, actions, div, img, percentage, point, prelude::*, pulsating_between, size, }; -use indexed_docs::IndexedDocsStore; use language::{ BufferSnapshot, LspAdapterDelegate, ToOffset, language_settings::{SoftWrap, all_language_settings}, @@ -77,7 +73,7 @@ use crate::{slash_command::SlashCommandCompletionProvider, slash_command_picker} use assistant_context::{ AssistantContext, CacheStatus, Content, ContextEvent, ContextId, InvokedSlashCommandId, InvokedSlashCommandStatus, Message, MessageId, MessageMetadata, MessageStatus, - ParsedSlashCommand, PendingSlashCommandStatus, ThoughtProcessOutputSection, + PendingSlashCommandStatus, ThoughtProcessOutputSection, }; actions!( @@ -701,19 +697,7 @@ impl TextThreadEditor { } }; let render_trailer = { - let command = command.clone(); - move |row, _unfold, _window: &mut Window, cx: &mut App| { - // TODO: In the future we should investigate how we can expose - // this as a hook on the `SlashCommand` trait so that we don't - // need to special-case it here. - if command.name == DocsSlashCommand::NAME { - return render_docs_slash_command_trailer( - row, - command.clone(), - cx, - ); - } - + move |_row, _unfold, _window: &mut Window, _cx: &mut App| { Empty.into_any() } }; @@ -2398,70 +2382,6 @@ fn render_pending_slash_command_gutter_decoration( icon.into_any_element() } -fn render_docs_slash_command_trailer( - row: MultiBufferRow, - command: ParsedSlashCommand, - cx: &mut App, -) -> AnyElement { - if command.arguments.is_empty() { - return Empty.into_any(); - } - let args = DocsSlashCommandArgs::parse(&command.arguments); - - let Some(store) = args - .provider() - .and_then(|provider| IndexedDocsStore::try_global(provider, cx).ok()) - else { - return Empty.into_any(); - }; - - let Some(package) = args.package() else { - return Empty.into_any(); - }; - - let mut children = Vec::new(); - - if store.is_indexing(&package) { - children.push( - div() - .id(("crates-being-indexed", row.0)) - .child(Icon::new(IconName::ArrowCircle).with_animation( - "arrow-circle", - Animation::new(Duration::from_secs(4)).repeat(), - |icon, delta| icon.transform(Transformation::rotate(percentage(delta))), - )) - .tooltip({ - let package = package.clone(); - Tooltip::text(format!("Indexing {package}…")) - }) - .into_any_element(), - ); - } - - if let Some(latest_error) = store.latest_error_for_package(&package) { - children.push( - div() - .id(("latest-error", row.0)) - .child( - Icon::new(IconName::Warning) - .size(IconSize::Small) - .color(Color::Warning), - ) - .tooltip(Tooltip::text(format!("Failed to index: {latest_error}"))) - .into_any_element(), - ) - } - - let is_indexing = store.is_indexing(&package); - let latest_error = store.latest_error_for_package(&package); - - if !is_indexing && latest_error.is_none() { - return Empty.into_any(); - } - - h_flex().gap_2().children(children).into_any_element() -} - #[derive(Debug, Clone, Serialize, Deserialize)] struct CopyMetadata { creases: Vec, diff --git a/crates/assistant_slash_commands/Cargo.toml b/crates/assistant_slash_commands/Cargo.toml index f703a753f5..c054c3ced8 100644 --- a/crates/assistant_slash_commands/Cargo.toml +++ b/crates/assistant_slash_commands/Cargo.toml @@ -27,7 +27,6 @@ globset.workspace = true gpui.workspace = true html_to_markdown.workspace = true http_client.workspace = true -indexed_docs.workspace = true language.workspace = true project.workspace = true prompt_store.workspace = true diff --git a/crates/assistant_slash_commands/src/assistant_slash_commands.rs b/crates/assistant_slash_commands/src/assistant_slash_commands.rs index fa5dd8b683..fb00a91219 100644 --- a/crates/assistant_slash_commands/src/assistant_slash_commands.rs +++ b/crates/assistant_slash_commands/src/assistant_slash_commands.rs @@ -3,7 +3,6 @@ mod context_server_command; mod default_command; mod delta_command; mod diagnostics_command; -mod docs_command; mod fetch_command; mod file_command; mod now_command; @@ -18,7 +17,6 @@ pub use crate::context_server_command::*; pub use crate::default_command::*; pub use crate::delta_command::*; pub use crate::diagnostics_command::*; -pub use crate::docs_command::*; pub use crate::fetch_command::*; pub use crate::file_command::*; pub use crate::now_command::*; diff --git a/crates/assistant_slash_commands/src/docs_command.rs b/crates/assistant_slash_commands/src/docs_command.rs deleted file mode 100644 index bd87c72849..0000000000 --- a/crates/assistant_slash_commands/src/docs_command.rs +++ /dev/null @@ -1,543 +0,0 @@ -use std::path::Path; -use std::sync::Arc; -use std::sync::atomic::AtomicBool; -use std::time::Duration; - -use anyhow::{Context as _, Result, anyhow, bail}; -use assistant_slash_command::{ - ArgumentCompletion, SlashCommand, SlashCommandOutput, SlashCommandOutputSection, - SlashCommandResult, -}; -use gpui::{App, BackgroundExecutor, Entity, Task, WeakEntity}; -use indexed_docs::{ - DocsDotRsProvider, IndexedDocsRegistry, IndexedDocsStore, LocalRustdocProvider, PackageName, - ProviderId, -}; -use language::{BufferSnapshot, LspAdapterDelegate}; -use project::{Project, ProjectPath}; -use ui::prelude::*; -use util::{ResultExt, maybe}; -use workspace::Workspace; - -pub struct DocsSlashCommand; - -impl DocsSlashCommand { - pub const NAME: &'static str = "docs"; - - fn path_to_cargo_toml(project: Entity, cx: &mut App) -> Option> { - let worktree = project.read(cx).worktrees(cx).next()?; - let worktree = worktree.read(cx); - let entry = worktree.entry_for_path("Cargo.toml")?; - let path = ProjectPath { - worktree_id: worktree.id(), - path: entry.path.clone(), - }; - Some(Arc::from( - project.read(cx).absolute_path(&path, cx)?.as_path(), - )) - } - - /// Ensures that the indexed doc providers for Rust are registered. - /// - /// Ideally we would do this sooner, but we need to wait until we're able to - /// access the workspace so we can read the project. - fn ensure_rust_doc_providers_are_registered( - &self, - workspace: Option>, - cx: &mut App, - ) { - let indexed_docs_registry = IndexedDocsRegistry::global(cx); - if indexed_docs_registry - .get_provider_store(LocalRustdocProvider::id()) - .is_none() - { - let index_provider_deps = maybe!({ - let workspace = workspace - .as_ref() - .context("no workspace")? - .upgrade() - .context("workspace dropped")?; - let project = workspace.read(cx).project().clone(); - let fs = project.read(cx).fs().clone(); - let cargo_workspace_root = Self::path_to_cargo_toml(project, cx) - .and_then(|path| path.parent().map(|path| path.to_path_buf())) - .context("no Cargo workspace root found")?; - - anyhow::Ok((fs, cargo_workspace_root)) - }); - - if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() { - indexed_docs_registry.register_provider(Box::new(LocalRustdocProvider::new( - fs, - cargo_workspace_root, - ))); - } - } - - if indexed_docs_registry - .get_provider_store(DocsDotRsProvider::id()) - .is_none() - { - let http_client = maybe!({ - let workspace = workspace - .as_ref() - .context("no workspace")? - .upgrade() - .context("workspace was dropped")?; - let project = workspace.read(cx).project().clone(); - anyhow::Ok(project.read(cx).client().http_client()) - }); - - if let Some(http_client) = http_client.log_err() { - indexed_docs_registry - .register_provider(Box::new(DocsDotRsProvider::new(http_client))); - } - } - } - - /// Runs just-in-time indexing for a given package, in case the slash command - /// is run without any entries existing in the index. - fn run_just_in_time_indexing( - store: Arc, - key: String, - package: PackageName, - executor: BackgroundExecutor, - ) -> Task<()> { - executor.clone().spawn(async move { - let (prefix, needs_full_index) = if let Some((prefix, _)) = key.split_once('*') { - // If we have a wildcard in the search, we want to wait until - // we've completely finished indexing so we get a full set of - // results for the wildcard. - (prefix.to_string(), true) - } else { - (key, false) - }; - - // If we already have some entries, we assume that we've indexed the package before - // and don't need to do it again. - let has_any_entries = store - .any_with_prefix(prefix.clone()) - .await - .unwrap_or_default(); - if has_any_entries { - return (); - }; - - let index_task = store.clone().index(package.clone()); - - if needs_full_index { - _ = index_task.await; - } else { - loop { - executor.timer(Duration::from_millis(200)).await; - - if store - .any_with_prefix(prefix.clone()) - .await - .unwrap_or_default() - || !store.is_indexing(&package) - { - break; - } - } - } - }) - } -} - -impl SlashCommand for DocsSlashCommand { - fn name(&self) -> String { - Self::NAME.into() - } - - fn description(&self) -> String { - "insert docs".into() - } - - fn menu_text(&self) -> String { - "Insert Documentation".into() - } - - fn requires_argument(&self) -> bool { - true - } - - fn complete_argument( - self: Arc, - arguments: &[String], - _cancel: Arc, - workspace: Option>, - _: &mut Window, - cx: &mut App, - ) -> Task>> { - self.ensure_rust_doc_providers_are_registered(workspace, cx); - - let indexed_docs_registry = IndexedDocsRegistry::global(cx); - let args = DocsSlashCommandArgs::parse(arguments); - let store = args - .provider() - .context("no docs provider specified") - .and_then(|provider| IndexedDocsStore::try_global(provider, cx)); - cx.background_spawn(async move { - fn build_completions(items: Vec) -> Vec { - items - .into_iter() - .map(|item| ArgumentCompletion { - label: item.clone().into(), - new_text: item.to_string(), - after_completion: assistant_slash_command::AfterCompletion::Run, - replace_previous_arguments: false, - }) - .collect() - } - - match args { - DocsSlashCommandArgs::NoProvider => { - let providers = indexed_docs_registry.list_providers(); - if providers.is_empty() { - return Ok(vec![ArgumentCompletion { - label: "No available docs providers.".into(), - new_text: String::new(), - after_completion: false.into(), - replace_previous_arguments: false, - }]); - } - - Ok(providers - .into_iter() - .map(|provider| ArgumentCompletion { - label: provider.to_string().into(), - new_text: provider.to_string(), - after_completion: false.into(), - replace_previous_arguments: false, - }) - .collect()) - } - DocsSlashCommandArgs::SearchPackageDocs { - provider, - package, - index, - } => { - let store = store?; - - if index { - // We don't need to hold onto this task, as the `IndexedDocsStore` will hold it - // until it completes. - drop(store.clone().index(package.as_str().into())); - } - - let suggested_packages = store.clone().suggest_packages().await?; - let search_results = store.search(package).await; - - let mut items = build_completions(search_results); - let workspace_crate_completions = suggested_packages - .into_iter() - .filter(|package_name| { - !items - .iter() - .any(|item| item.label.text() == package_name.as_ref()) - }) - .map(|package_name| ArgumentCompletion { - label: format!("{package_name} (unindexed)").into(), - new_text: format!("{package_name}"), - after_completion: true.into(), - replace_previous_arguments: false, - }) - .collect::>(); - items.extend(workspace_crate_completions); - - if items.is_empty() { - return Ok(vec![ArgumentCompletion { - label: format!( - "Enter a {package_term} name.", - package_term = package_term(&provider) - ) - .into(), - new_text: provider.to_string(), - after_completion: false.into(), - replace_previous_arguments: false, - }]); - } - - Ok(items) - } - DocsSlashCommandArgs::SearchItemDocs { item_path, .. } => { - let store = store?; - let items = store.search(item_path).await; - Ok(build_completions(items)) - } - } - }) - } - - fn run( - self: Arc, - arguments: &[String], - _context_slash_command_output_sections: &[SlashCommandOutputSection], - _context_buffer: BufferSnapshot, - _workspace: WeakEntity, - _delegate: Option>, - _: &mut Window, - cx: &mut App, - ) -> Task { - if arguments.is_empty() { - return Task::ready(Err(anyhow!("missing an argument"))); - }; - - let args = DocsSlashCommandArgs::parse(arguments); - let executor = cx.background_executor().clone(); - let task = cx.background_spawn({ - let store = args - .provider() - .context("no docs provider specified") - .and_then(|provider| IndexedDocsStore::try_global(provider, cx)); - async move { - let (provider, key) = match args.clone() { - DocsSlashCommandArgs::NoProvider => bail!("no docs provider specified"), - DocsSlashCommandArgs::SearchPackageDocs { - provider, package, .. - } => (provider, package), - DocsSlashCommandArgs::SearchItemDocs { - provider, - item_path, - .. - } => (provider, item_path), - }; - - if key.trim().is_empty() { - bail!( - "no {package_term} name provided", - package_term = package_term(&provider) - ); - } - - let store = store?; - - if let Some(package) = args.package() { - Self::run_just_in_time_indexing(store.clone(), key.clone(), package, executor) - .await; - } - - let (text, ranges) = if let Some((prefix, _)) = key.split_once('*') { - let docs = store.load_many_by_prefix(prefix.to_string()).await?; - - let mut text = String::new(); - let mut ranges = Vec::new(); - - for (key, docs) in docs { - let prev_len = text.len(); - - text.push_str(&docs.0); - text.push_str("\n"); - ranges.push((key, prev_len..text.len())); - text.push_str("\n"); - } - - (text, ranges) - } else { - let item_docs = store.load(key.clone()).await?; - let text = item_docs.to_string(); - let range = 0..text.len(); - - (text, vec![(key, range)]) - }; - - anyhow::Ok((provider, text, ranges)) - } - }); - - cx.foreground_executor().spawn(async move { - let (provider, text, ranges) = task.await?; - Ok(SlashCommandOutput { - text, - sections: ranges - .into_iter() - .map(|(key, range)| SlashCommandOutputSection { - range, - icon: IconName::FileDoc, - label: format!("docs ({provider}): {key}",).into(), - metadata: None, - }) - .collect(), - run_commands_in_text: false, - } - .to_event_stream()) - }) - } -} - -fn is_item_path_delimiter(char: char) -> bool { - !char.is_alphanumeric() && char != '-' && char != '_' -} - -#[derive(Debug, PartialEq, Clone)] -pub enum DocsSlashCommandArgs { - NoProvider, - SearchPackageDocs { - provider: ProviderId, - package: String, - index: bool, - }, - SearchItemDocs { - provider: ProviderId, - package: String, - item_path: String, - }, -} - -impl DocsSlashCommandArgs { - pub fn parse(arguments: &[String]) -> Self { - let Some(provider) = arguments - .get(0) - .cloned() - .filter(|arg| !arg.trim().is_empty()) - else { - return Self::NoProvider; - }; - let provider = ProviderId(provider.into()); - let Some(argument) = arguments.get(1) else { - return Self::NoProvider; - }; - - if let Some((package, rest)) = argument.split_once(is_item_path_delimiter) { - if rest.trim().is_empty() { - Self::SearchPackageDocs { - provider, - package: package.to_owned(), - index: true, - } - } else { - Self::SearchItemDocs { - provider, - package: package.to_owned(), - item_path: argument.to_owned(), - } - } - } else { - Self::SearchPackageDocs { - provider, - package: argument.to_owned(), - index: false, - } - } - } - - pub fn provider(&self) -> Option { - match self { - Self::NoProvider => None, - Self::SearchPackageDocs { provider, .. } | Self::SearchItemDocs { provider, .. } => { - Some(provider.clone()) - } - } - } - - pub fn package(&self) -> Option { - match self { - Self::NoProvider => None, - Self::SearchPackageDocs { package, .. } | Self::SearchItemDocs { package, .. } => { - Some(package.as_str().into()) - } - } - } -} - -/// Returns the term used to refer to a package. -fn package_term(provider: &ProviderId) -> &'static str { - if provider == &DocsDotRsProvider::id() || provider == &LocalRustdocProvider::id() { - return "crate"; - } - - "package" -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_docs_slash_command_args() { - assert_eq!( - DocsSlashCommandArgs::parse(&["".to_string()]), - DocsSlashCommandArgs::NoProvider - ); - assert_eq!( - DocsSlashCommandArgs::parse(&["rustdoc".to_string()]), - DocsSlashCommandArgs::NoProvider - ); - - assert_eq!( - DocsSlashCommandArgs::parse(&["rustdoc".to_string(), "".to_string()]), - DocsSlashCommandArgs::SearchPackageDocs { - provider: ProviderId("rustdoc".into()), - package: "".into(), - index: false - } - ); - assert_eq!( - DocsSlashCommandArgs::parse(&["gleam".to_string(), "".to_string()]), - DocsSlashCommandArgs::SearchPackageDocs { - provider: ProviderId("gleam".into()), - package: "".into(), - index: false - } - ); - - assert_eq!( - DocsSlashCommandArgs::parse(&["rustdoc".to_string(), "gpui".to_string()]), - DocsSlashCommandArgs::SearchPackageDocs { - provider: ProviderId("rustdoc".into()), - package: "gpui".into(), - index: false, - } - ); - assert_eq!( - DocsSlashCommandArgs::parse(&["gleam".to_string(), "gleam_stdlib".to_string()]), - DocsSlashCommandArgs::SearchPackageDocs { - provider: ProviderId("gleam".into()), - package: "gleam_stdlib".into(), - index: false - } - ); - - // Adding an item path delimiter indicates we can start indexing. - assert_eq!( - DocsSlashCommandArgs::parse(&["rustdoc".to_string(), "gpui:".to_string()]), - DocsSlashCommandArgs::SearchPackageDocs { - provider: ProviderId("rustdoc".into()), - package: "gpui".into(), - index: true, - } - ); - assert_eq!( - DocsSlashCommandArgs::parse(&["gleam".to_string(), "gleam_stdlib/".to_string()]), - DocsSlashCommandArgs::SearchPackageDocs { - provider: ProviderId("gleam".into()), - package: "gleam_stdlib".into(), - index: true - } - ); - - assert_eq!( - DocsSlashCommandArgs::parse(&[ - "rustdoc".to_string(), - "gpui::foo::bar::Baz".to_string() - ]), - DocsSlashCommandArgs::SearchItemDocs { - provider: ProviderId("rustdoc".into()), - package: "gpui".into(), - item_path: "gpui::foo::bar::Baz".into() - } - ); - assert_eq!( - DocsSlashCommandArgs::parse(&[ - "gleam".to_string(), - "gleam_stdlib/gleam/int".to_string() - ]), - DocsSlashCommandArgs::SearchItemDocs { - provider: ProviderId("gleam".into()), - package: "gleam_stdlib".into(), - item_path: "gleam_stdlib/gleam/int".into() - } - ); - } -} diff --git a/crates/extension/src/extension_host_proxy.rs b/crates/extension/src/extension_host_proxy.rs index 917739759f..6a24e3ba3f 100644 --- a/crates/extension/src/extension_host_proxy.rs +++ b/crates/extension/src/extension_host_proxy.rs @@ -28,7 +28,6 @@ pub struct ExtensionHostProxy { snippet_proxy: RwLock>>, slash_command_proxy: RwLock>>, context_server_proxy: RwLock>>, - indexed_docs_provider_proxy: RwLock>>, debug_adapter_provider_proxy: RwLock>>, } @@ -54,7 +53,6 @@ impl ExtensionHostProxy { snippet_proxy: RwLock::default(), slash_command_proxy: RwLock::default(), context_server_proxy: RwLock::default(), - indexed_docs_provider_proxy: RwLock::default(), debug_adapter_provider_proxy: RwLock::default(), } } @@ -87,14 +85,6 @@ impl ExtensionHostProxy { self.context_server_proxy.write().replace(Arc::new(proxy)); } - pub fn register_indexed_docs_provider_proxy( - &self, - proxy: impl ExtensionIndexedDocsProviderProxy, - ) { - self.indexed_docs_provider_proxy - .write() - .replace(Arc::new(proxy)); - } pub fn register_debug_adapter_proxy(&self, proxy: impl ExtensionDebugAdapterProviderProxy) { self.debug_adapter_provider_proxy .write() @@ -408,30 +398,6 @@ impl ExtensionContextServerProxy for ExtensionHostProxy { } } -pub trait ExtensionIndexedDocsProviderProxy: Send + Sync + 'static { - fn register_indexed_docs_provider(&self, extension: Arc, provider_id: Arc); - - fn unregister_indexed_docs_provider(&self, provider_id: Arc); -} - -impl ExtensionIndexedDocsProviderProxy for ExtensionHostProxy { - fn register_indexed_docs_provider(&self, extension: Arc, provider_id: Arc) { - let Some(proxy) = self.indexed_docs_provider_proxy.read().clone() else { - return; - }; - - proxy.register_indexed_docs_provider(extension, provider_id) - } - - fn unregister_indexed_docs_provider(&self, provider_id: Arc) { - let Some(proxy) = self.indexed_docs_provider_proxy.read().clone() else { - return; - }; - - proxy.unregister_indexed_docs_provider(provider_id) - } -} - pub trait ExtensionDebugAdapterProviderProxy: Send + Sync + 'static { fn register_debug_adapter( &self, diff --git a/crates/extension/src/extension_manifest.rs b/crates/extension/src/extension_manifest.rs index 5852b3e3fc..f5296198b0 100644 --- a/crates/extension/src/extension_manifest.rs +++ b/crates/extension/src/extension_manifest.rs @@ -84,8 +84,6 @@ pub struct ExtensionManifest { #[serde(default)] pub slash_commands: BTreeMap, SlashCommandManifestEntry>, #[serde(default)] - pub indexed_docs_providers: BTreeMap, IndexedDocsProviderEntry>, - #[serde(default)] pub snippets: Option, #[serde(default)] pub capabilities: Vec, @@ -195,9 +193,6 @@ pub struct SlashCommandManifestEntry { pub requires_argument: bool, } -#[derive(Clone, PartialEq, Eq, Debug, Deserialize, Serialize)] -pub struct IndexedDocsProviderEntry {} - #[derive(Clone, PartialEq, Eq, Debug, Deserialize, Serialize)] pub struct DebugAdapterManifestEntry { pub schema_path: Option, @@ -271,7 +266,6 @@ fn manifest_from_old_manifest( language_servers: Default::default(), context_servers: BTreeMap::default(), slash_commands: BTreeMap::default(), - indexed_docs_providers: BTreeMap::default(), snippets: None, capabilities: Vec::new(), debug_adapters: Default::default(), @@ -304,7 +298,6 @@ mod tests { language_servers: BTreeMap::default(), context_servers: BTreeMap::default(), slash_commands: BTreeMap::default(), - indexed_docs_providers: BTreeMap::default(), snippets: None, capabilities: vec![], debug_adapters: Default::default(), diff --git a/crates/extension_cli/src/main.rs b/crates/extension_cli/src/main.rs index ab4a9cddb0..d6c0501efd 100644 --- a/crates/extension_cli/src/main.rs +++ b/crates/extension_cli/src/main.rs @@ -144,10 +144,6 @@ fn extension_provides(manifest: &ExtensionManifest) -> BTreeSet ExtensionManifest { .collect(), context_servers: BTreeMap::default(), slash_commands: BTreeMap::default(), - indexed_docs_providers: BTreeMap::default(), snippets: None, capabilities: vec![ExtensionCapability::ProcessExec( extension::ProcessExecCapability { diff --git a/crates/extension_host/src/capability_granter.rs b/crates/extension_host/src/capability_granter.rs index c77e5ecba1..5a2093c1dd 100644 --- a/crates/extension_host/src/capability_granter.rs +++ b/crates/extension_host/src/capability_granter.rs @@ -108,7 +108,6 @@ mod tests { language_servers: BTreeMap::default(), context_servers: BTreeMap::default(), slash_commands: BTreeMap::default(), - indexed_docs_providers: BTreeMap::default(), snippets: None, capabilities: vec![], debug_adapters: Default::default(), diff --git a/crates/extension_host/src/extension_host.rs b/crates/extension_host/src/extension_host.rs index 67baf4e692..46deacfe69 100644 --- a/crates/extension_host/src/extension_host.rs +++ b/crates/extension_host/src/extension_host.rs @@ -16,9 +16,9 @@ pub use extension::ExtensionManifest; use extension::extension_builder::{CompileExtensionOptions, ExtensionBuilder}; use extension::{ ExtensionContextServerProxy, ExtensionDebugAdapterProviderProxy, ExtensionEvents, - ExtensionGrammarProxy, ExtensionHostProxy, ExtensionIndexedDocsProviderProxy, - ExtensionLanguageProxy, ExtensionLanguageServerProxy, ExtensionSlashCommandProxy, - ExtensionSnippetProxy, ExtensionThemeProxy, + ExtensionGrammarProxy, ExtensionHostProxy, ExtensionLanguageProxy, + ExtensionLanguageServerProxy, ExtensionSlashCommandProxy, ExtensionSnippetProxy, + ExtensionThemeProxy, }; use fs::{Fs, RemoveOptions}; use futures::future::join_all; @@ -1192,10 +1192,6 @@ impl ExtensionStore { for (command_name, _) in &extension.manifest.slash_commands { self.proxy.unregister_slash_command(command_name.clone()); } - for (provider_id, _) in &extension.manifest.indexed_docs_providers { - self.proxy - .unregister_indexed_docs_provider(provider_id.clone()); - } } self.wasm_extensions @@ -1399,11 +1395,6 @@ impl ExtensionStore { .register_context_server(extension.clone(), id.clone(), cx); } - for (provider_id, _provider) in &manifest.indexed_docs_providers { - this.proxy - .register_indexed_docs_provider(extension.clone(), provider_id.clone()); - } - for (debug_adapter, meta) in &manifest.debug_adapters { let mut path = root_dir.clone(); path.push(Path::new(manifest.id.as_ref())); diff --git a/crates/extension_host/src/extension_store_test.rs b/crates/extension_host/src/extension_store_test.rs index c31774c20d..347a610439 100644 --- a/crates/extension_host/src/extension_store_test.rs +++ b/crates/extension_host/src/extension_store_test.rs @@ -160,7 +160,6 @@ async fn test_extension_store(cx: &mut TestAppContext) { language_servers: BTreeMap::default(), context_servers: BTreeMap::default(), slash_commands: BTreeMap::default(), - indexed_docs_providers: BTreeMap::default(), snippets: None, capabilities: Vec::new(), debug_adapters: Default::default(), @@ -191,7 +190,6 @@ async fn test_extension_store(cx: &mut TestAppContext) { language_servers: BTreeMap::default(), context_servers: BTreeMap::default(), slash_commands: BTreeMap::default(), - indexed_docs_providers: BTreeMap::default(), snippets: None, capabilities: Vec::new(), debug_adapters: Default::default(), @@ -371,7 +369,6 @@ async fn test_extension_store(cx: &mut TestAppContext) { language_servers: BTreeMap::default(), context_servers: BTreeMap::default(), slash_commands: BTreeMap::default(), - indexed_docs_providers: BTreeMap::default(), snippets: None, capabilities: Vec::new(), debug_adapters: Default::default(), diff --git a/crates/indexed_docs/Cargo.toml b/crates/indexed_docs/Cargo.toml deleted file mode 100644 index eb269ad939..0000000000 --- a/crates/indexed_docs/Cargo.toml +++ /dev/null @@ -1,38 +0,0 @@ -[package] -name = "indexed_docs" -version = "0.1.0" -edition.workspace = true -publish.workspace = true -license = "GPL-3.0-or-later" - -[lints] -workspace = true - -[lib] -path = "src/indexed_docs.rs" - -[dependencies] -anyhow.workspace = true -async-trait.workspace = true -cargo_metadata.workspace = true -collections.workspace = true -derive_more.workspace = true -extension.workspace = true -fs.workspace = true -futures.workspace = true -fuzzy.workspace = true -gpui.workspace = true -heed.workspace = true -html_to_markdown.workspace = true -http_client.workspace = true -indexmap.workspace = true -parking_lot.workspace = true -paths.workspace = true -serde.workspace = true -strum.workspace = true -util.workspace = true -workspace-hack.workspace = true - -[dev-dependencies] -indoc.workspace = true -pretty_assertions.workspace = true diff --git a/crates/indexed_docs/LICENSE-GPL b/crates/indexed_docs/LICENSE-GPL deleted file mode 120000 index 89e542f750..0000000000 --- a/crates/indexed_docs/LICENSE-GPL +++ /dev/null @@ -1 +0,0 @@ -../../LICENSE-GPL \ No newline at end of file diff --git a/crates/indexed_docs/src/extension_indexed_docs_provider.rs b/crates/indexed_docs/src/extension_indexed_docs_provider.rs deleted file mode 100644 index c77ea4066d..0000000000 --- a/crates/indexed_docs/src/extension_indexed_docs_provider.rs +++ /dev/null @@ -1,81 +0,0 @@ -use std::path::PathBuf; -use std::sync::Arc; - -use anyhow::Result; -use async_trait::async_trait; -use extension::{Extension, ExtensionHostProxy, ExtensionIndexedDocsProviderProxy}; -use gpui::App; - -use crate::{ - IndexedDocsDatabase, IndexedDocsProvider, IndexedDocsRegistry, PackageName, ProviderId, -}; - -pub fn init(cx: &mut App) { - let proxy = ExtensionHostProxy::default_global(cx); - proxy.register_indexed_docs_provider_proxy(IndexedDocsRegistryProxy { - indexed_docs_registry: IndexedDocsRegistry::global(cx), - }); -} - -struct IndexedDocsRegistryProxy { - indexed_docs_registry: Arc, -} - -impl ExtensionIndexedDocsProviderProxy for IndexedDocsRegistryProxy { - fn register_indexed_docs_provider(&self, extension: Arc, provider_id: Arc) { - self.indexed_docs_registry - .register_provider(Box::new(ExtensionIndexedDocsProvider::new( - extension, - ProviderId(provider_id), - ))); - } - - fn unregister_indexed_docs_provider(&self, provider_id: Arc) { - self.indexed_docs_registry - .unregister_provider(&ProviderId(provider_id)); - } -} - -pub struct ExtensionIndexedDocsProvider { - extension: Arc, - id: ProviderId, -} - -impl ExtensionIndexedDocsProvider { - pub fn new(extension: Arc, id: ProviderId) -> Self { - Self { extension, id } - } -} - -#[async_trait] -impl IndexedDocsProvider for ExtensionIndexedDocsProvider { - fn id(&self) -> ProviderId { - self.id.clone() - } - - fn database_path(&self) -> PathBuf { - let mut database_path = PathBuf::from(self.extension.work_dir().as_ref()); - database_path.push("docs"); - database_path.push(format!("{}.0.mdb", self.id)); - - database_path - } - - async fn suggest_packages(&self) -> Result> { - let packages = self - .extension - .suggest_docs_packages(self.id.0.clone()) - .await?; - - Ok(packages - .into_iter() - .map(|package| PackageName::from(package.as_str())) - .collect()) - } - - async fn index(&self, package: PackageName, database: Arc) -> Result<()> { - self.extension - .index_docs(self.id.0.clone(), package.as_ref().into(), database) - .await - } -} diff --git a/crates/indexed_docs/src/indexed_docs.rs b/crates/indexed_docs/src/indexed_docs.rs deleted file mode 100644 index 97538329d4..0000000000 --- a/crates/indexed_docs/src/indexed_docs.rs +++ /dev/null @@ -1,16 +0,0 @@ -mod extension_indexed_docs_provider; -mod providers; -mod registry; -mod store; - -use gpui::App; - -pub use crate::extension_indexed_docs_provider::ExtensionIndexedDocsProvider; -pub use crate::providers::rustdoc::*; -pub use crate::registry::*; -pub use crate::store::*; - -pub fn init(cx: &mut App) { - IndexedDocsRegistry::init_global(cx); - extension_indexed_docs_provider::init(cx); -} diff --git a/crates/indexed_docs/src/providers.rs b/crates/indexed_docs/src/providers.rs deleted file mode 100644 index c6505a2ab6..0000000000 --- a/crates/indexed_docs/src/providers.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod rustdoc; diff --git a/crates/indexed_docs/src/providers/rustdoc.rs b/crates/indexed_docs/src/providers/rustdoc.rs deleted file mode 100644 index ac6dc3a10b..0000000000 --- a/crates/indexed_docs/src/providers/rustdoc.rs +++ /dev/null @@ -1,291 +0,0 @@ -mod item; -mod to_markdown; - -use cargo_metadata::MetadataCommand; -use futures::future::BoxFuture; -pub use item::*; -use parking_lot::RwLock; -pub use to_markdown::convert_rustdoc_to_markdown; - -use std::collections::BTreeSet; -use std::path::PathBuf; -use std::sync::{Arc, LazyLock}; -use std::time::{Duration, Instant}; - -use anyhow::{Context as _, Result, bail}; -use async_trait::async_trait; -use collections::{HashSet, VecDeque}; -use fs::Fs; -use futures::{AsyncReadExt, FutureExt}; -use http_client::{AsyncBody, HttpClient, HttpClientWithUrl}; - -use crate::{IndexedDocsDatabase, IndexedDocsProvider, PackageName, ProviderId}; - -#[derive(Debug)] -struct RustdocItemWithHistory { - pub item: RustdocItem, - #[cfg(debug_assertions)] - pub history: Vec, -} - -pub struct LocalRustdocProvider { - fs: Arc, - cargo_workspace_root: PathBuf, -} - -impl LocalRustdocProvider { - pub fn id() -> ProviderId { - ProviderId("rustdoc".into()) - } - - pub fn new(fs: Arc, cargo_workspace_root: PathBuf) -> Self { - Self { - fs, - cargo_workspace_root, - } - } -} - -#[async_trait] -impl IndexedDocsProvider for LocalRustdocProvider { - fn id(&self) -> ProviderId { - Self::id() - } - - fn database_path(&self) -> PathBuf { - paths::data_dir().join("docs/rust/rustdoc-db.1.mdb") - } - - async fn suggest_packages(&self) -> Result> { - static WORKSPACE_CRATES: LazyLock, Instant)>>> = - LazyLock::new(|| RwLock::new(None)); - - if let Some((crates, fetched_at)) = &*WORKSPACE_CRATES.read() { - if fetched_at.elapsed() < Duration::from_secs(300) { - return Ok(crates.iter().cloned().collect()); - } - } - - let workspace = MetadataCommand::new() - .manifest_path(self.cargo_workspace_root.join("Cargo.toml")) - .exec() - .context("failed to load cargo metadata")?; - - let workspace_crates = workspace - .packages - .into_iter() - .map(|package| PackageName::from(package.name.as_str())) - .collect::>(); - - *WORKSPACE_CRATES.write() = Some((workspace_crates.clone(), Instant::now())); - - Ok(workspace_crates.into_iter().collect()) - } - - async fn index(&self, package: PackageName, database: Arc) -> Result<()> { - index_rustdoc(package, database, { - move |crate_name, item| { - let fs = self.fs.clone(); - let cargo_workspace_root = self.cargo_workspace_root.clone(); - let crate_name = crate_name.clone(); - let item = item.cloned(); - async move { - let target_doc_path = cargo_workspace_root.join("target/doc"); - let mut local_cargo_doc_path = target_doc_path.join(crate_name.as_ref().replace('-', "_")); - - if !fs.is_dir(&local_cargo_doc_path).await { - let cargo_doc_exists_at_all = fs.is_dir(&target_doc_path).await; - if cargo_doc_exists_at_all { - bail!( - "no docs directory for '{crate_name}'. if this is a valid crate name, try running `cargo doc`" - ); - } else { - bail!("no cargo doc directory. run `cargo doc`"); - } - } - - if let Some(item) = item { - local_cargo_doc_path.push(item.url_path()); - } else { - local_cargo_doc_path.push("index.html"); - } - - let Ok(contents) = fs.load(&local_cargo_doc_path).await else { - return Ok(None); - }; - - Ok(Some(contents)) - } - .boxed() - } - }) - .await - } -} - -pub struct DocsDotRsProvider { - http_client: Arc, -} - -impl DocsDotRsProvider { - pub fn id() -> ProviderId { - ProviderId("docs-rs".into()) - } - - pub fn new(http_client: Arc) -> Self { - Self { http_client } - } -} - -#[async_trait] -impl IndexedDocsProvider for DocsDotRsProvider { - fn id(&self) -> ProviderId { - Self::id() - } - - fn database_path(&self) -> PathBuf { - paths::data_dir().join("docs/rust/docs-rs-db.1.mdb") - } - - async fn suggest_packages(&self) -> Result> { - static POPULAR_CRATES: LazyLock> = LazyLock::new(|| { - include_str!("./rustdoc/popular_crates.txt") - .lines() - .filter(|line| !line.starts_with('#')) - .map(|line| PackageName::from(line.trim())) - .collect() - }); - - Ok(POPULAR_CRATES.clone()) - } - - async fn index(&self, package: PackageName, database: Arc) -> Result<()> { - index_rustdoc(package, database, { - move |crate_name, item| { - let http_client = self.http_client.clone(); - let crate_name = crate_name.clone(); - let item = item.cloned(); - async move { - let version = "latest"; - let path = format!( - "{crate_name}/{version}/{crate_name}{item_path}", - item_path = item - .map(|item| format!("/{}", item.url_path())) - .unwrap_or_default() - ); - - let mut response = http_client - .get( - &format!("https://docs.rs/{path}"), - AsyncBody::default(), - true, - ) - .await?; - - let mut body = Vec::new(); - response - .body_mut() - .read_to_end(&mut body) - .await - .context("error reading docs.rs response body")?; - - if response.status().is_client_error() { - let text = String::from_utf8_lossy(body.as_slice()); - bail!( - "status error {}, response: {text:?}", - response.status().as_u16() - ); - } - - Ok(Some(String::from_utf8(body)?)) - } - .boxed() - } - }) - .await - } -} - -async fn index_rustdoc( - package: PackageName, - database: Arc, - fetch_page: impl Fn( - &PackageName, - Option<&RustdocItem>, - ) -> BoxFuture<'static, Result>> - + Send - + Sync, -) -> Result<()> { - let Some(package_root_content) = fetch_page(&package, None).await? else { - return Ok(()); - }; - - let (crate_root_markdown, items) = - convert_rustdoc_to_markdown(package_root_content.as_bytes())?; - - database - .insert(package.to_string(), crate_root_markdown) - .await?; - - let mut seen_items = HashSet::from_iter(items.clone()); - let mut items_to_visit: VecDeque = - VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory { - item, - #[cfg(debug_assertions)] - history: Vec::new(), - })); - - while let Some(item_with_history) = items_to_visit.pop_front() { - let item = &item_with_history.item; - - let Some(result) = fetch_page(&package, Some(item)).await.with_context(|| { - #[cfg(debug_assertions)] - { - format!( - "failed to fetch {item:?}: {history:?}", - history = item_with_history.history - ) - } - - #[cfg(not(debug_assertions))] - { - format!("failed to fetch {item:?}") - } - })? - else { - continue; - }; - - let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?; - - database - .insert(format!("{package}::{}", item.display()), markdown) - .await?; - - let parent_item = item; - for mut item in referenced_items { - if seen_items.contains(&item) { - continue; - } - - seen_items.insert(item.clone()); - - item.path.extend(parent_item.path.clone()); - if parent_item.kind == RustdocItemKind::Mod { - item.path.push(parent_item.name.clone()); - } - - items_to_visit.push_back(RustdocItemWithHistory { - #[cfg(debug_assertions)] - history: { - let mut history = item_with_history.history.clone(); - history.push(item.url_path()); - history - }, - item, - }); - } - } - - Ok(()) -} diff --git a/crates/indexed_docs/src/providers/rustdoc/item.rs b/crates/indexed_docs/src/providers/rustdoc/item.rs deleted file mode 100644 index 7d9023ef3e..0000000000 --- a/crates/indexed_docs/src/providers/rustdoc/item.rs +++ /dev/null @@ -1,82 +0,0 @@ -use std::sync::Arc; - -use serde::{Deserialize, Serialize}; -use strum::EnumIter; - -#[derive( - Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Serialize, Deserialize, EnumIter, -)] -#[serde(rename_all = "snake_case")] -pub enum RustdocItemKind { - Mod, - Macro, - Struct, - Enum, - Constant, - Trait, - Function, - TypeAlias, - AttributeMacro, - DeriveMacro, -} - -impl RustdocItemKind { - pub(crate) const fn class(&self) -> &'static str { - match self { - Self::Mod => "mod", - Self::Macro => "macro", - Self::Struct => "struct", - Self::Enum => "enum", - Self::Constant => "constant", - Self::Trait => "trait", - Self::Function => "fn", - Self::TypeAlias => "type", - Self::AttributeMacro => "attr", - Self::DeriveMacro => "derive", - } - } -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] -pub struct RustdocItem { - pub kind: RustdocItemKind, - /// The item path, up until the name of the item. - pub path: Vec>, - /// The name of the item. - pub name: Arc, -} - -impl RustdocItem { - pub fn display(&self) -> String { - let mut path_segments = self.path.clone(); - path_segments.push(self.name.clone()); - - path_segments.join("::") - } - - pub fn url_path(&self) -> String { - let name = &self.name; - let mut path_components = self.path.clone(); - - match self.kind { - RustdocItemKind::Mod => { - path_components.push(name.clone()); - path_components.push("index.html".into()); - } - RustdocItemKind::Macro - | RustdocItemKind::Struct - | RustdocItemKind::Enum - | RustdocItemKind::Constant - | RustdocItemKind::Trait - | RustdocItemKind::Function - | RustdocItemKind::TypeAlias - | RustdocItemKind::AttributeMacro - | RustdocItemKind::DeriveMacro => { - path_components - .push(format!("{kind}.{name}.html", kind = self.kind.class()).into()); - } - } - - path_components.join("/") - } -} diff --git a/crates/indexed_docs/src/providers/rustdoc/popular_crates.txt b/crates/indexed_docs/src/providers/rustdoc/popular_crates.txt deleted file mode 100644 index ce2c3d51d8..0000000000 --- a/crates/indexed_docs/src/providers/rustdoc/popular_crates.txt +++ /dev/null @@ -1,252 +0,0 @@ -# A list of the most popular Rust crates. -# Sourced from https://lib.rs/std. -serde -serde_json -syn -clap -thiserror -rand -log -tokio -anyhow -regex -quote -proc-macro2 -base64 -itertools -chrono -lazy_static -once_cell -libc -reqwest -futures -bitflags -tracing -url -bytes -toml -tempfile -uuid -indexmap -env_logger -num-traits -async-trait -sha2 -hex -tracing-subscriber -http -parking_lot -cfg-if -futures-util -cc -hashbrown -rayon -hyper -getrandom -semver -strum -flate2 -tokio-util -smallvec -criterion -paste -heck -rand_core -nom -rustls -nix -glob -time -byteorder -strum_macros -serde_yaml -wasm-bindgen -ahash -either -num_cpus -rand_chacha -prost -percent-encoding -pin-project-lite -tokio-stream -bincode -walkdir -bindgen -axum -windows-sys -futures-core -ring -digest -num-bigint -rustls-pemfile -serde_with -crossbeam-channel -tokio-rustls -hmac -fastrand -dirs -zeroize -socket2 -pin-project -tower -derive_more -memchr -toml_edit -static_assertions -pretty_assertions -js-sys -convert_case -unicode-width -pkg-config -itoa -colored -rustc-hash -darling -mime -web-sys -image -bytemuck -which -sha1 -dashmap -arrayvec -fnv -tonic -humantime -libloading -winapi -rustc_version -http-body -indoc -num -home -serde_urlencoded -http-body-util -unicode-segmentation -num-integer -webpki-roots -phf -futures-channel -indicatif -petgraph -ordered-float -strsim -zstd -console -encoding_rs -wasm-bindgen-futures -urlencoding -subtle -crc32fast -slab -rustix -predicates -spin -hyper-rustls -backtrace -rustversion -mio -scopeguard -proc-macro-error -hyper-util -ryu -prost-types -textwrap -memmap2 -zip -zerocopy -generic-array -tar -pyo3 -async-stream -quick-xml -memoffset -csv -crossterm -windows -num_enum -tokio-tungstenite -crossbeam-utils -async-channel -lru -aes -futures-lite -tracing-core -prettyplease -httparse -serde_bytes -tracing-log -tower-service -cargo_metadata -pest -mime_guess -tower-http -data-encoding -native-tls -prost-build -proptest -derivative -serial_test -libm -half -futures-io -bitvec -rustls-native-certs -ureq -object -anstyle -tonic-build -form_urlencoded -num-derive -pest_derive -schemars -proc-macro-crate -rstest -futures-executor -assert_cmd -termcolor -serde_repr -ctrlc -sha3 -clap_complete -flume -mockall -ipnet -aho-corasick -atty -signal-hook -async-std -filetime -num-complex -opentelemetry -cmake -arc-swap -derive_builder -async-recursion -dyn-clone -bumpalo -fs_extra -git2 -sysinfo -shlex -instant -approx -rmp-serde -rand_distr -rustls-pki-types -maplit -sqlx -blake3 -hyper-tls -dotenvy -jsonwebtoken -openssl-sys -crossbeam -camino -winreg -config -rsa -bit-vec -chrono-tz -async-lock -bstr diff --git a/crates/indexed_docs/src/providers/rustdoc/to_markdown.rs b/crates/indexed_docs/src/providers/rustdoc/to_markdown.rs deleted file mode 100644 index 87e3863728..0000000000 --- a/crates/indexed_docs/src/providers/rustdoc/to_markdown.rs +++ /dev/null @@ -1,618 +0,0 @@ -use std::cell::RefCell; -use std::io::Read; -use std::rc::Rc; - -use anyhow::Result; -use html_to_markdown::markdown::{ - HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler, TableHandler, -}; -use html_to_markdown::{ - HandleTag, HandlerOutcome, HtmlElement, MarkdownWriter, StartTagOutcome, TagHandler, - convert_html_to_markdown, -}; -use indexmap::IndexSet; -use strum::IntoEnumIterator; - -use crate::{RustdocItem, RustdocItemKind}; - -/// Converts the provided rustdoc HTML to Markdown. -pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<(String, Vec)> { - let item_collector = Rc::new(RefCell::new(RustdocItemCollector::new())); - - let mut handlers: Vec = vec![ - Rc::new(RefCell::new(ParagraphHandler)), - Rc::new(RefCell::new(HeadingHandler)), - Rc::new(RefCell::new(ListHandler)), - Rc::new(RefCell::new(TableHandler::new())), - Rc::new(RefCell::new(StyledTextHandler)), - Rc::new(RefCell::new(RustdocChromeRemover)), - Rc::new(RefCell::new(RustdocHeadingHandler)), - Rc::new(RefCell::new(RustdocCodeHandler)), - Rc::new(RefCell::new(RustdocItemHandler)), - item_collector.clone(), - ]; - - let markdown = convert_html_to_markdown(html, &mut handlers)?; - - let items = item_collector - .borrow() - .items - .iter() - .cloned() - .collect::>(); - - Ok((markdown, items)) -} - -pub struct RustdocHeadingHandler; - -impl HandleTag for RustdocHeadingHandler { - fn should_handle(&self, _tag: &str) -> bool { - // We're only handling text, so we don't need to visit any tags. - false - } - - fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome { - if writer.is_inside("h1") - || writer.is_inside("h2") - || writer.is_inside("h3") - || writer.is_inside("h4") - || writer.is_inside("h5") - || writer.is_inside("h6") - { - let text = text - .trim_matches(|char| char == '\n' || char == '\r') - .replace('\n', " "); - writer.push_str(&text); - - return HandlerOutcome::Handled; - } - - HandlerOutcome::NoOp - } -} - -pub struct RustdocCodeHandler; - -impl HandleTag for RustdocCodeHandler { - fn should_handle(&self, tag: &str) -> bool { - matches!(tag, "pre" | "code") - } - - fn handle_tag_start( - &mut self, - tag: &HtmlElement, - writer: &mut MarkdownWriter, - ) -> StartTagOutcome { - match tag.tag() { - "code" => { - if !writer.is_inside("pre") { - writer.push_str("`"); - } - } - "pre" => { - let classes = tag.classes(); - let is_rust = classes.iter().any(|class| class == "rust"); - let language = is_rust - .then_some("rs") - .or_else(|| { - classes.iter().find_map(|class| { - if let Some((_, language)) = class.split_once("language-") { - Some(language.trim()) - } else { - None - } - }) - }) - .unwrap_or(""); - - writer.push_str(&format!("\n\n```{language}\n")); - } - _ => {} - } - - StartTagOutcome::Continue - } - - fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag() { - "code" => { - if !writer.is_inside("pre") { - writer.push_str("`"); - } - } - "pre" => writer.push_str("\n```\n"), - _ => {} - } - } - - fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome { - if writer.is_inside("pre") { - writer.push_str(text); - return HandlerOutcome::Handled; - } - - HandlerOutcome::NoOp - } -} - -const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name"; - -pub struct RustdocItemHandler; - -impl RustdocItemHandler { - /// Returns whether we're currently inside of an `.item-name` element, which - /// rustdoc uses to display Rust items in a list. - fn is_inside_item_name(writer: &MarkdownWriter) -> bool { - writer - .current_element_stack() - .iter() - .any(|element| element.has_class(RUSTDOC_ITEM_NAME_CLASS)) - } -} - -impl HandleTag for RustdocItemHandler { - fn should_handle(&self, tag: &str) -> bool { - matches!(tag, "div" | "span") - } - - fn handle_tag_start( - &mut self, - tag: &HtmlElement, - writer: &mut MarkdownWriter, - ) -> StartTagOutcome { - match tag.tag() { - "div" | "span" => { - if Self::is_inside_item_name(writer) && tag.has_class("stab") { - writer.push_str(" ["); - } - } - _ => {} - } - - StartTagOutcome::Continue - } - - fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag() { - "div" | "span" => { - if tag.has_class(RUSTDOC_ITEM_NAME_CLASS) { - writer.push_str(": "); - } - - if Self::is_inside_item_name(writer) && tag.has_class("stab") { - writer.push_str("]"); - } - } - _ => {} - } - } - - fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome { - if Self::is_inside_item_name(writer) - && !writer.is_inside("span") - && !writer.is_inside("code") - { - writer.push_str(&format!("`{text}`")); - return HandlerOutcome::Handled; - } - - HandlerOutcome::NoOp - } -} - -pub struct RustdocChromeRemover; - -impl HandleTag for RustdocChromeRemover { - fn should_handle(&self, tag: &str) -> bool { - matches!( - tag, - "head" | "script" | "nav" | "summary" | "button" | "a" | "div" | "span" - ) - } - - fn handle_tag_start( - &mut self, - tag: &HtmlElement, - _writer: &mut MarkdownWriter, - ) -> StartTagOutcome { - match tag.tag() { - "head" | "script" | "nav" => return StartTagOutcome::Skip, - "summary" => { - if tag.has_class("hideme") { - return StartTagOutcome::Skip; - } - } - "button" => { - if tag.attr("id").as_deref() == Some("copy-path") { - return StartTagOutcome::Skip; - } - } - "a" => { - if tag.has_any_classes(&["anchor", "doc-anchor", "src"]) { - return StartTagOutcome::Skip; - } - } - "div" | "span" => { - if tag.has_any_classes(&["nav-container", "sidebar-elems", "out-of-band"]) { - return StartTagOutcome::Skip; - } - } - - _ => {} - } - - StartTagOutcome::Continue - } -} - -pub struct RustdocItemCollector { - pub items: IndexSet, -} - -impl RustdocItemCollector { - pub fn new() -> Self { - Self { - items: IndexSet::new(), - } - } - - fn parse_item(tag: &HtmlElement) -> Option { - if tag.tag() != "a" { - return None; - } - - let href = tag.attr("href")?; - if href.starts_with('#') || href.starts_with("https://") || href.starts_with("../") { - return None; - } - - for kind in RustdocItemKind::iter() { - if tag.has_class(kind.class()) { - let mut parts = href.trim_end_matches("/index.html").split('/'); - - if let Some(last_component) = parts.next_back() { - let last_component = match last_component.split_once('#') { - Some((component, _fragment)) => component, - None => last_component, - }; - - let name = last_component - .trim_start_matches(&format!("{}.", kind.class())) - .trim_end_matches(".html"); - - return Some(RustdocItem { - kind, - name: name.into(), - path: parts.map(Into::into).collect(), - }); - } - } - } - - None - } -} - -impl HandleTag for RustdocItemCollector { - fn should_handle(&self, tag: &str) -> bool { - tag == "a" - } - - fn handle_tag_start( - &mut self, - tag: &HtmlElement, - writer: &mut MarkdownWriter, - ) -> StartTagOutcome { - if tag.tag() == "a" { - let is_reexport = writer.current_element_stack().iter().any(|element| { - if let Some(id) = element.attr("id") { - id.starts_with("reexport.") || id.starts_with("method.") - } else { - false - } - }); - - if !is_reexport { - if let Some(item) = Self::parse_item(tag) { - self.items.insert(item); - } - } - } - - StartTagOutcome::Continue - } -} - -#[cfg(test)] -mod tests { - use html_to_markdown::{TagHandler, convert_html_to_markdown}; - use indoc::indoc; - use pretty_assertions::assert_eq; - - use super::*; - - fn rustdoc_handlers() -> Vec { - vec![ - Rc::new(RefCell::new(ParagraphHandler)), - Rc::new(RefCell::new(HeadingHandler)), - Rc::new(RefCell::new(ListHandler)), - Rc::new(RefCell::new(TableHandler::new())), - Rc::new(RefCell::new(StyledTextHandler)), - Rc::new(RefCell::new(RustdocChromeRemover)), - Rc::new(RefCell::new(RustdocHeadingHandler)), - Rc::new(RefCell::new(RustdocCodeHandler)), - Rc::new(RefCell::new(RustdocItemHandler)), - ] - } - - #[test] - fn test_main_heading_buttons_get_removed() { - let html = indoc! {r##" -
-

Crate serde

- - source · - -
- "##}; - let expected = indoc! {" - # Crate serde - "} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_single_paragraph() { - let html = indoc! {r#" -

In particular, the last point is what sets axum apart from other frameworks. - axum doesn’t have its own middleware system but instead uses - tower::Service. This means axum gets timeouts, tracing, compression, - authorization, and more, for free. It also enables you to share middleware with - applications written using hyper or tonic.

- "#}; - let expected = indoc! {" - In particular, the last point is what sets `axum` apart from other frameworks. `axum` doesn’t have its own middleware system but instead uses `tower::Service`. This means `axum` gets timeouts, tracing, compression, authorization, and more, for free. It also enables you to share middleware with applications written using `hyper` or `tonic`. - "} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_multiple_paragraphs() { - let html = indoc! {r##" -

§Serde

-

Serde is a framework for serializing and deserializing Rust data - structures efficiently and generically.

-

The Serde ecosystem consists of data structures that know how to serialize - and deserialize themselves along with data formats that know how to - serialize and deserialize other things. Serde provides the layer by which - these two groups interact with each other, allowing any supported data - structure to be serialized and deserialized using any supported data format.

-

See the Serde website https://serde.rs/ for additional documentation and - usage examples.

-

§Design

-

Where many other languages rely on runtime reflection for serializing data, - Serde is instead built on Rust’s powerful trait system. A data structure - that knows how to serialize and deserialize itself is one that implements - Serde’s Serialize and Deserialize traits (or uses Serde’s derive - attribute to automatically generate implementations at compile time). This - avoids any overhead of reflection or runtime type information. In fact in - many situations the interaction between data structure and data format can - be completely optimized away by the Rust compiler, leaving Serde - serialization to perform the same speed as a handwritten serializer for the - specific selection of data structure and data format.

- "##}; - let expected = indoc! {" - ## Serde - - Serde is a framework for _**ser**_ializing and _**de**_serializing Rust data structures efficiently and generically. - - The Serde ecosystem consists of data structures that know how to serialize and deserialize themselves along with data formats that know how to serialize and deserialize other things. Serde provides the layer by which these two groups interact with each other, allowing any supported data structure to be serialized and deserialized using any supported data format. - - See the Serde website https://serde.rs/ for additional documentation and usage examples. - - ### Design - - Where many other languages rely on runtime reflection for serializing data, Serde is instead built on Rust’s powerful trait system. A data structure that knows how to serialize and deserialize itself is one that implements Serde’s `Serialize` and `Deserialize` traits (or uses Serde’s derive attribute to automatically generate implementations at compile time). This avoids any overhead of reflection or runtime type information. In fact in many situations the interaction between data structure and data format can be completely optimized away by the Rust compiler, leaving Serde serialization to perform the same speed as a handwritten serializer for the specific selection of data structure and data format. - "} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_styled_text() { - let html = indoc! {r#" -

This text is bolded.

-

This text is italicized.

- "#}; - let expected = indoc! {" - This text is **bolded**. - - This text is _italicized_. - "} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_rust_code_block() { - let html = indoc! {r#" -
use axum::extract::{Path, Query, Json};
-            use std::collections::HashMap;
-
-            // `Path` gives you the path parameters and deserializes them.
-            async fn path(Path(user_id): Path<u32>) {}
-
-            // `Query` gives you the query parameters and deserializes them.
-            async fn query(Query(params): Query<HashMap<String, String>>) {}
-
-            // Buffer the request body and deserialize it as JSON into a
-            // `serde_json::Value`. `Json` supports any type that implements
-            // `serde::Deserialize`.
-            async fn json(Json(payload): Json<serde_json::Value>) {}
- "#}; - let expected = indoc! {" - ```rs - use axum::extract::{Path, Query, Json}; - use std::collections::HashMap; - - // `Path` gives you the path parameters and deserializes them. - async fn path(Path(user_id): Path) {} - - // `Query` gives you the query parameters and deserializes them. - async fn query(Query(params): Query>) {} - - // Buffer the request body and deserialize it as JSON into a - // `serde_json::Value`. `Json` supports any type that implements - // `serde::Deserialize`. - async fn json(Json(payload): Json) {} - ``` - "} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_toml_code_block() { - let html = indoc! {r##" -

§Required dependencies

-

To use axum there are a few dependencies you have to pull in as well:

-
[dependencies]
-            axum = "<latest-version>"
-            tokio = { version = "<latest-version>", features = ["full"] }
-            tower = "<latest-version>"
-            
- "##}; - let expected = indoc! {r#" - ## Required dependencies - - To use axum there are a few dependencies you have to pull in as well: - - ```toml - [dependencies] - axum = "" - tokio = { version = "", features = ["full"] } - tower = "" - - ``` - "#} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_item_table() { - let html = indoc! {r##" -

Structs§

-
    -
  • Errors that can happen when using axum.
  • -
  • Extractor and response for extensions.
  • -
  • Formform
    URL encoded extractor and response.
  • -
  • Jsonjson
    JSON Extractor / Response.
  • -
  • The router type for composing handlers and services.
-

Functions§

-
    -
  • servetokio and (http1 or http2)
    Serve the service with the supplied listener.
  • -
- "##}; - let expected = indoc! {r#" - ## Structs - - - `Error`: Errors that can happen when using axum. - - `Extension`: Extractor and response for extensions. - - `Form` [`form`]: URL encoded extractor and response. - - `Json` [`json`]: JSON Extractor / Response. - - `Router`: The router type for composing handlers and services. - - ## Functions - - - `serve` [`tokio` and (`http1` or `http2`)]: Serve the service with the supplied listener. - "#} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } - - #[test] - fn test_table() { - let html = indoc! {r##" -

§Feature flags

-

axum uses a set of feature flags to reduce the amount of compiled and - optional dependencies.

-

The following optional features are available:

-
- - - - - - - - - - - - - -
NameDescriptionDefault?
http1Enables hyper’s http1 featureYes
http2Enables hyper’s http2 featureNo
jsonEnables the Json type and some similar convenience functionalityYes
macrosEnables optional utility macrosNo
matched-pathEnables capturing of every request’s router path and the MatchedPath extractorYes
multipartEnables parsing multipart/form-data requests with MultipartNo
original-uriEnables capturing of every request’s original URI and the OriginalUri extractorYes
tokioEnables tokio as a dependency and axum::serve, SSE and extract::connect_info types.Yes
tower-logEnables tower’s log featureYes
tracingLog rejections from built-in extractorsYes
wsEnables WebSockets support via extract::wsNo
formEnables the Form extractorYes
queryEnables the Query extractorYes
- "##}; - let expected = indoc! {r#" - ## Feature flags - - axum uses a set of feature flags to reduce the amount of compiled and optional dependencies. - - The following optional features are available: - - | Name | Description | Default? | - | --- | --- | --- | - | `http1` | Enables hyper’s `http1` feature | Yes | - | `http2` | Enables hyper’s `http2` feature | No | - | `json` | Enables the `Json` type and some similar convenience functionality | Yes | - | `macros` | Enables optional utility macros | No | - | `matched-path` | Enables capturing of every request’s router path and the `MatchedPath` extractor | Yes | - | `multipart` | Enables parsing `multipart/form-data` requests with `Multipart` | No | - | `original-uri` | Enables capturing of every request’s original URI and the `OriginalUri` extractor | Yes | - | `tokio` | Enables `tokio` as a dependency and `axum::serve`, `SSE` and `extract::connect_info` types. | Yes | - | `tower-log` | Enables `tower`’s `log` feature | Yes | - | `tracing` | Log rejections from built-in extractors | Yes | - | `ws` | Enables WebSockets support via `extract::ws` | No | - | `form` | Enables the `Form` extractor | Yes | - | `query` | Enables the `Query` extractor | Yes | - "#} - .trim(); - - assert_eq!( - convert_html_to_markdown(html.as_bytes(), &mut rustdoc_handlers()).unwrap(), - expected - ) - } -} diff --git a/crates/indexed_docs/src/registry.rs b/crates/indexed_docs/src/registry.rs deleted file mode 100644 index 6757cd9c1a..0000000000 --- a/crates/indexed_docs/src/registry.rs +++ /dev/null @@ -1,62 +0,0 @@ -use std::sync::Arc; - -use collections::HashMap; -use gpui::{App, BackgroundExecutor, Global, ReadGlobal, UpdateGlobal}; -use parking_lot::RwLock; - -use crate::{IndexedDocsProvider, IndexedDocsStore, ProviderId}; - -struct GlobalIndexedDocsRegistry(Arc); - -impl Global for GlobalIndexedDocsRegistry {} - -pub struct IndexedDocsRegistry { - executor: BackgroundExecutor, - stores_by_provider: RwLock>>, -} - -impl IndexedDocsRegistry { - pub fn global(cx: &App) -> Arc { - GlobalIndexedDocsRegistry::global(cx).0.clone() - } - - pub(crate) fn init_global(cx: &mut App) { - GlobalIndexedDocsRegistry::set_global( - cx, - GlobalIndexedDocsRegistry(Arc::new(Self::new(cx.background_executor().clone()))), - ); - } - - pub fn new(executor: BackgroundExecutor) -> Self { - Self { - executor, - stores_by_provider: RwLock::new(HashMap::default()), - } - } - - pub fn list_providers(&self) -> Vec { - self.stores_by_provider - .read() - .keys() - .cloned() - .collect::>() - } - - pub fn register_provider( - &self, - provider: Box, - ) { - self.stores_by_provider.write().insert( - provider.id(), - Arc::new(IndexedDocsStore::new(provider, self.executor.clone())), - ); - } - - pub fn unregister_provider(&self, provider_id: &ProviderId) { - self.stores_by_provider.write().remove(provider_id); - } - - pub fn get_provider_store(&self, provider_id: ProviderId) -> Option> { - self.stores_by_provider.read().get(&provider_id).cloned() - } -} diff --git a/crates/indexed_docs/src/store.rs b/crates/indexed_docs/src/store.rs deleted file mode 100644 index 1407078efa..0000000000 --- a/crates/indexed_docs/src/store.rs +++ /dev/null @@ -1,346 +0,0 @@ -use std::path::PathBuf; -use std::sync::Arc; -use std::sync::atomic::AtomicBool; - -use anyhow::{Context as _, Result, anyhow}; -use async_trait::async_trait; -use collections::HashMap; -use derive_more::{Deref, Display}; -use futures::FutureExt; -use futures::future::{self, BoxFuture, Shared}; -use fuzzy::StringMatchCandidate; -use gpui::{App, BackgroundExecutor, Task}; -use heed::Database; -use heed::types::SerdeBincode; -use parking_lot::RwLock; -use serde::{Deserialize, Serialize}; -use util::ResultExt; - -use crate::IndexedDocsRegistry; - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)] -pub struct ProviderId(pub Arc); - -/// The name of a package. -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)] -pub struct PackageName(Arc); - -impl From<&str> for PackageName { - fn from(value: &str) -> Self { - Self(value.into()) - } -} - -#[async_trait] -pub trait IndexedDocsProvider { - /// Returns the ID of this provider. - fn id(&self) -> ProviderId; - - /// Returns the path to the database for this provider. - fn database_path(&self) -> PathBuf; - - /// Returns a list of packages as suggestions to be included in the search - /// results. - /// - /// This can be used to provide completions for known packages (e.g., from the - /// local project or a registry) before a package has been indexed. - async fn suggest_packages(&self) -> Result>; - - /// Indexes the package with the given name. - async fn index(&self, package: PackageName, database: Arc) -> Result<()>; -} - -/// A store for indexed docs. -pub struct IndexedDocsStore { - executor: BackgroundExecutor, - database_future: - Shared, Arc>>>, - provider: Box, - indexing_tasks_by_package: - RwLock>>>>>, - latest_errors_by_package: RwLock>>, -} - -impl IndexedDocsStore { - pub fn try_global(provider: ProviderId, cx: &App) -> Result> { - let registry = IndexedDocsRegistry::global(cx); - registry - .get_provider_store(provider.clone()) - .with_context(|| format!("no indexed docs store found for {provider}")) - } - - pub fn new( - provider: Box, - executor: BackgroundExecutor, - ) -> Self { - let database_future = executor - .spawn({ - let executor = executor.clone(); - let database_path = provider.database_path(); - async move { IndexedDocsDatabase::new(database_path, executor) } - }) - .then(|result| future::ready(result.map(Arc::new).map_err(Arc::new))) - .boxed() - .shared(); - - Self { - executor, - database_future, - provider, - indexing_tasks_by_package: RwLock::new(HashMap::default()), - latest_errors_by_package: RwLock::new(HashMap::default()), - } - } - - pub fn latest_error_for_package(&self, package: &PackageName) -> Option> { - self.latest_errors_by_package.read().get(package).cloned() - } - - /// Returns whether the package with the given name is currently being indexed. - pub fn is_indexing(&self, package: &PackageName) -> bool { - self.indexing_tasks_by_package.read().contains_key(package) - } - - pub async fn load(&self, key: String) -> Result { - self.database_future - .clone() - .await - .map_err(|err| anyhow!(err))? - .load(key) - .await - } - - pub async fn load_many_by_prefix(&self, prefix: String) -> Result> { - self.database_future - .clone() - .await - .map_err(|err| anyhow!(err))? - .load_many_by_prefix(prefix) - .await - } - - /// Returns whether any entries exist with the given prefix. - pub async fn any_with_prefix(&self, prefix: String) -> Result { - self.database_future - .clone() - .await - .map_err(|err| anyhow!(err))? - .any_with_prefix(prefix) - .await - } - - pub fn suggest_packages(self: Arc) -> Task>> { - let this = self.clone(); - self.executor - .spawn(async move { this.provider.suggest_packages().await }) - } - - pub fn index( - self: Arc, - package: PackageName, - ) -> Shared>>> { - if let Some(existing_task) = self.indexing_tasks_by_package.read().get(&package) { - return existing_task.clone(); - } - - let indexing_task = self - .executor - .spawn({ - let this = self.clone(); - let package = package.clone(); - async move { - let _finally = util::defer({ - let this = this.clone(); - let package = package.clone(); - move || { - this.indexing_tasks_by_package.write().remove(&package); - } - }); - - let index_task = { - let package = package.clone(); - async { - let database = this - .database_future - .clone() - .await - .map_err(|err| anyhow!(err))?; - this.provider.index(package, database).await - } - }; - - let result = index_task.await.map_err(Arc::new); - match &result { - Ok(_) => { - this.latest_errors_by_package.write().remove(&package); - } - Err(err) => { - this.latest_errors_by_package - .write() - .insert(package, err.to_string().into()); - } - } - - result - } - }) - .shared(); - - self.indexing_tasks_by_package - .write() - .insert(package, indexing_task.clone()); - - indexing_task - } - - pub fn search(&self, query: String) -> Task> { - let executor = self.executor.clone(); - let database_future = self.database_future.clone(); - self.executor.spawn(async move { - let Some(database) = database_future.await.map_err(|err| anyhow!(err)).log_err() else { - return Vec::new(); - }; - - let Some(items) = database.keys().await.log_err() else { - return Vec::new(); - }; - - let candidates = items - .iter() - .enumerate() - .map(|(ix, item_path)| StringMatchCandidate::new(ix, &item_path)) - .collect::>(); - - let matches = fuzzy::match_strings( - &candidates, - &query, - false, - true, - 100, - &AtomicBool::default(), - executor, - ) - .await; - - matches - .into_iter() - .map(|mat| items[mat.candidate_id].clone()) - .collect() - }) - } -} - -#[derive(Debug, PartialEq, Eq, Clone, Display, Serialize, Deserialize)] -pub struct MarkdownDocs(pub String); - -pub struct IndexedDocsDatabase { - executor: BackgroundExecutor, - env: heed::Env, - entries: Database, SerdeBincode>, -} - -impl IndexedDocsDatabase { - pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result { - std::fs::create_dir_all(&path)?; - - const ONE_GB_IN_BYTES: usize = 1024 * 1024 * 1024; - let env = unsafe { - heed::EnvOpenOptions::new() - .map_size(ONE_GB_IN_BYTES) - .max_dbs(1) - .open(path)? - }; - - let mut txn = env.write_txn()?; - let entries = env.create_database(&mut txn, Some("rustdoc_entries"))?; - txn.commit()?; - - Ok(Self { - executor, - env, - entries, - }) - } - - pub fn keys(&self) -> Task>> { - let env = self.env.clone(); - let entries = self.entries; - - self.executor.spawn(async move { - let txn = env.read_txn()?; - let mut iter = entries.iter(&txn)?; - let mut keys = Vec::new(); - while let Some((key, _value)) = iter.next().transpose()? { - keys.push(key); - } - - Ok(keys) - }) - } - - pub fn load(&self, key: String) -> Task> { - let env = self.env.clone(); - let entries = self.entries; - - self.executor.spawn(async move { - let txn = env.read_txn()?; - entries - .get(&txn, &key)? - .with_context(|| format!("no docs found for {key}")) - }) - } - - pub fn load_many_by_prefix(&self, prefix: String) -> Task>> { - let env = self.env.clone(); - let entries = self.entries; - - self.executor.spawn(async move { - let txn = env.read_txn()?; - let results = entries - .iter(&txn)? - .filter_map(|entry| { - let (key, value) = entry.ok()?; - if key.starts_with(&prefix) { - Some((key, value)) - } else { - None - } - }) - .collect::>(); - - Ok(results) - }) - } - - /// Returns whether any entries exist with the given prefix. - pub fn any_with_prefix(&self, prefix: String) -> Task> { - let env = self.env.clone(); - let entries = self.entries; - - self.executor.spawn(async move { - let txn = env.read_txn()?; - let any = entries - .iter(&txn)? - .any(|entry| entry.map_or(false, |(key, _value)| key.starts_with(&prefix))); - Ok(any) - }) - } - - pub fn insert(&self, key: String, docs: String) -> Task> { - let env = self.env.clone(); - let entries = self.entries; - - self.executor.spawn(async move { - let mut txn = env.write_txn()?; - entries.put(&mut txn, &key, &MarkdownDocs(docs))?; - txn.commit()?; - Ok(()) - }) - } -} - -impl extension::KeyValueStoreDelegate for IndexedDocsDatabase { - fn insert(&self, key: String, docs: String) -> Task> { - IndexedDocsDatabase::insert(&self, key, docs) - } -} diff --git a/typos.toml b/typos.toml index 336a829a44..e5f02b6415 100644 --- a/typos.toml +++ b/typos.toml @@ -16,9 +16,6 @@ extend-exclude = [ "crates/google_ai/src/supported_countries.rs", "crates/open_ai/src/supported_countries.rs", - # Some crate names are flagged as typos. - "crates/indexed_docs/src/providers/rustdoc/popular_crates.txt", - # Some mock data is flagged as typos. "crates/assistant_tools/src/web_search_tool.rs",