gleam: Extract to external repository (#19072)

This PR transfers the Gleam extension over to the @gleam-lang
organization:

https://github.com/gleam-lang/zed-gleam

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-10-11 10:05:46 -04:00 committed by GitHub
parent 79ed217e42
commit d976c5f1b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 3 additions and 1324 deletions

24
Cargo.lock generated
View file

@ -395,7 +395,7 @@ dependencies = [
"gpui",
"handlebars 4.5.0",
"heed",
"html_to_markdown 0.1.0",
"html_to_markdown",
"http_client",
"indexed_docs",
"indoc",
@ -5461,18 +5461,6 @@ dependencies = [
"regex",
]
[[package]]
name = "html_to_markdown"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e608e8dd0939bfb6b516d96a5919751b835297a02230aecb88d2fc84ebebaa8a"
dependencies = [
"anyhow",
"html5ever",
"markup5ever_rcdom",
"regex",
]
[[package]]
name = "http"
version = "0.2.12"
@ -5776,7 +5764,7 @@ dependencies = [
"fuzzy",
"gpui",
"heed",
"html_to_markdown 0.1.0",
"html_to_markdown",
"http_client",
"indexmap 1.9.3",
"indoc",
@ -14591,14 +14579,6 @@ dependencies = [
"wit-bindgen",
]
[[package]]
name = "zed_gleam"
version = "0.2.0"
dependencies = [
"html_to_markdown 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"zed_extension_api 0.1.0",
]
[[package]]
name = "zed_glsl"
version = "0.1.0"

View file

@ -144,7 +144,6 @@ members = [
"extensions/elm",
"extensions/emmet",
"extensions/erlang",
"extensions/gleam",
"extensions/glsl",
"extensions/haskell",
"extensions/html",

View file

@ -1,6 +1,6 @@
# Gleam
Gleam support is available through the [Gleam extension](https://github.com/zed-industries/zed/tree/main/extensions/gleam). To learn about Gleam, see the [docs](https://gleam.run/documentation/) or check out the [`stdlib` reference](https://hexdocs.pm/gleam_stdlib/). The Gleam language server has a variety of features, including go-to definition, automatic imports, and [more](https://gleam.run/language-server/).
Gleam support is available through the [Gleam extension](https://github.com/gleam-lang/zed-gleam). To learn about Gleam, see the [docs](https://gleam.run/documentation/) or check out the [`stdlib` reference](https://hexdocs.pm/gleam_stdlib/). The Gleam language server has a variety of features, including go-to definition, automatic imports, and [more](https://gleam.run/language-server/).
- Tree Sitter: [gleam-lang/tree-sitter-gleam](https://github.com/gleam-lang/tree-sitter-gleam)
- Language Server: [gleam lsp](https://github.com/gleam-lang/gleam/tree/main/compiler-core/src/language_server)

View file

@ -1,17 +0,0 @@
[package]
name = "zed_gleam"
version = "0.2.0"
edition = "2021"
publish = false
license = "Apache-2.0"
[lints]
workspace = true
[lib]
path = "src/gleam.rs"
crate-type = ["cdylib"]
[dependencies]
html_to_markdown = "0.1.0"
zed_extension_api = "0.1.0"

View file

@ -1 +0,0 @@
../../LICENSE-APACHE

View file

@ -1,21 +0,0 @@
id = "gleam"
name = "Gleam"
description = "Gleam support."
version = "0.2.0"
schema_version = 1
authors = ["Marshall Bowers <elliott.codes@gmail.com>"]
repository = "https://github.com/zed-industries/zed"
[language_servers.gleam]
name = "Gleam LSP"
language = "Gleam"
[grammars.gleam]
repository = "https://github.com/gleam-lang/tree-sitter-gleam"
commit = "426e67087fd62be5f4533581b5916b2cf010fb5b"
[slash_commands.gleam-project]
description = "Returns information about the current Gleam project."
requires_argument = false
[indexed_docs_providers.gleam-hexdocs]

View file

@ -1,12 +0,0 @@
name = "Gleam"
grammar = "gleam"
path_suffixes = ["gleam"]
line_comments = ["// ", "/// "]
autoclose_before = ";:.,=}])>"
brackets = [
{ start = "{", end = "}", close = true, newline = true },
{ start = "[", end = "]", close = true, newline = true },
{ start = "(", end = ")", close = true, newline = true },
{ start = "\"", end = "\"", close = true, newline = false, not_in = ["string", "comment"] },
]
tab_size = 2

View file

@ -1,130 +0,0 @@
; Comments
(module_comment) @comment
(statement_comment) @comment
(comment) @comment
; Constants
(constant
name: (identifier) @constant)
; Variables
(identifier) @variable
(discard) @comment.unused
; Modules
(module) @module
(import alias: (identifier) @module)
(remote_type_identifier
module: (identifier) @module)
(remote_constructor_name
module: (identifier) @module)
((field_access
record: (identifier) @module
field: (label) @function)
(#is-not? local))
; Functions
(unqualified_import (identifier) @function)
(unqualified_import "type" (type_identifier) @type)
(unqualified_import (type_identifier) @constructor)
(function
name: (identifier) @function)
(external_function
name: (identifier) @function)
(function_parameter
name: (identifier) @variable.parameter)
((function_call
function: (identifier) @function)
(#is-not? local))
((binary_expression
operator: "|>"
right: (identifier) @function)
(#is-not? local))
; "Properties"
; Assumed to be intended to refer to a name for a field; something that comes
; before ":" or after "."
; e.g. record field names, tuple indices, names for named arguments, etc
(label) @property
(tuple_access
index: (integer) @property)
; Attributes
(attribute
"@" @attribute
name: (identifier) @attribute)
(attribute_value (identifier) @constant)
; Type names
(remote_type_identifier) @type
(type_identifier) @type
; Data constructors
(constructor_name) @constructor
; Literals
(string) @string
((escape_sequence) @warning
; Deprecated in v0.33.0-rc2:
(#eq? @warning "\\e"))
(escape_sequence) @string.escape
(bit_string_segment_option) @function.builtin
(integer) @number
(float) @number
; Reserved identifiers
; TODO: when tree-sitter supports `#any-of?` in the Rust bindings,
; refactor this to use `#any-of?` rather than `#match?`
((identifier) @warning
(#match? @warning "^(auto|delegate|derive|else|implement|macro|test|echo)$"))
; Keywords
[
(visibility_modifier) ; "pub"
(opacity_modifier) ; "opaque"
"as"
"assert"
"case"
"const"
; DEPRECATED: 'external' was removed in v0.30.
"external"
"fn"
"if"
"import"
"let"
"panic"
"todo"
"type"
"use"
] @keyword
; Operators
(binary_expression
operator: _ @operator)
(boolean_negation "!" @operator)
(integer_negation "-" @operator)
; Punctuation
[
"("
")"
"["
"]"
"{"
"}"
"<<"
">>"
] @punctuation.bracket
[
"."
","
;; Controversial -- maybe some are operators?
":"
"#"
"="
"->"
".."
"-"
"<-"
] @punctuation.delimiter

View file

@ -1,3 +0,0 @@
(_ "[" "]" @end) @indent
(_ "{" "}" @end) @indent
(_ "(" ")" @end) @indent

View file

@ -1,33 +0,0 @@
(external_type
(visibility_modifier)? @context
"type" @context
(type_name) @name) @item
(type_definition
(visibility_modifier)? @context
(opacity_modifier)? @context
"type" @context
(type_name) @name) @item
(data_constructor
(constructor_name) @name) @item
(data_constructor_argument
(label) @name) @item
(type_alias
(visibility_modifier)? @context
"type" @context
(type_name) @name) @item
(function
(visibility_modifier)? @context
"fn" @context
name: (_) @name) @item
(constant
(visibility_modifier)? @context
"const" @context
name: (_) @name) @item
(statement_comment) @annotation

View file

@ -1,25 +0,0 @@
; Functions with names ending in `_test`.
; This matches the standalone test style used by Startest and Gleeunit.
(
(
(function name: (_) @run
(#match? @run ".*_test$"))
) @gleam-test
(#set! tag gleam-test)
)
; `describe` API for Startest.
(
(function_call
function: (_) @name
(#any-of? @name "describe" "it")
arguments: (arguments
.
(argument
value: (string (quoted_content) @run)
)
)
)
(#set! tag gleam-test)
) @gleam-test

View file

@ -1,13 +0,0 @@
[
{
"label": "gleam test",
"command": "gleam",
"args": ["test"]
},
{
"label": "gleam test $ZED_SYMBOL",
"command": "gleam",
"args": ["test", "--", "--test-name-filter=$ZED_SYMBOL"],
"tags": ["gleam-test"]
}
]

View file

@ -1,581 +0,0 @@
# The list of Gleam packages.
# Sourced from `https://packages.gleam.run/packages.sqlite`.
act
adglent
ag_html
aham
akaridb
alanttest1
alpaca
amf0
amnesiac
antigone
apollo
aragorn2
arcana_signals
arctic
argamak
argus
argv
ask
asterix
atomic_array
aws4_request
bare_package1
bare_package_one
bare_package_two
based
based_pg
based_sqlite
beecrypt
bidict
bigben
bigi
binary_search
birdie
birl
biscotto
bison
blah
blask
bliss
bravo
bungle
bytesize
cactus
cake
carpenter
catppuccin
cave3dplus
cgi
chatbot
check_maybe_div_by_zero
chip
chomp
chrobot
chromatic
classify
cleam
collatz
colored
colours
comet
commonmark
conllu
context_fp_gleam
conversation
cors_builder
cosepo
cosmos
counter
crabbucket_pgo
crabbucket_redis
crossbar
css_select
cymbal
dahlia
dbots
decepticon
decipher
decode
dedent
defangle
defer_g
delay
dew
dig
discord_gleam
domu
dot_env
dotenv_gleam
dove
ecoji
edit_distance
efetch
email
embeds
emel
envoy
esgleam
espresso
espresso_pgo_wrapper
eval
event_hub
eventsourcing
eventsourcing_postgres
eventsourcing_sqlite
exception
exercism_test_runner
facet
facquest
falala
falcon
feather
fetch_event
ffmpeg
fibo
file_streams
filepath
filespy
finch_gleam
first_gleam_publish_package
flash
fluoresce
fmglee
fmt
for_the_crows
form_coder
formal
fp
fp2
fp2_gleam
fp_gl
fresnel
fswalk
functx
funtil
gacache
galant
gap
garnet_tool
gary
gbase32_clockwork
gcalc
gchess
gemqtt
gen_core_erlang
gen_gleam
geny
germinal
ggleam
gild
gild_frontend
gip
gjwt
gl
glacier
glacier_gleeunit
gladvent
glailglind
glam
glame
glaml
glance
glance_printer
glanoid
glare
glatch
glats
glatus
glcode
gleaf
gleam
gleam_bbmustache
gleam_bitwise
gleam_bson
gleam_community_ansi
gleam_community_colour
gleam_community_maths
gleam_community_path
gleam_cors
gleam_cowboy
gleam_cowboy_websockets
gleam_crypto
gleam_deno
gleam_dotenv
gleam_elli
gleam_email
gleam_erlang
gleam_erlexec
gleam_fetch
gleam_gun
gleam_hackney
gleam_hexpm
gleam_html
gleam_http
gleam_httpc
gleam_javascript
gleam_json
gleam_module_javascript_test
gleam_mongo
gleam_nodejs
gleam_os_mon
gleam_otp
gleam_package_interface
gleam_pgo
gleam_qs
gleam_sendgrid
gleam_stats
gleam_stdlib
gleam_synapses
gleam_tailwind
gleam_tcp
gleam_test
gleam_toml
gleam_xml
gleam_yaml
gleam_zlists
gleambox
gleamix
gleamql
gleamsver
gleamy_bench
gleamy_structures
gleamyshell
gleanix
glearray
gleastsq
gleative
gleb128
glector
gledis
gledo
gleebor
gleenix
gleepl
gleescript
gleesend
gleeunit
gleez
gleither
glemini
glemo
glemplate
glemtext
glen
glen_node
glency
glentities
glenv
glenvy
glerd
glerd_json
glerd_valid
glerm
gleroglero
glesha
glesha2
glevatar
glevenshtein
glex
glexec
glexer
glexif
glib
gliberapay
glibsql
gliew
glimiter
glimmer
glimt
gling
glint
glisbn
glisdigit
glisten
glistix_gleeunit
glistix_nix
glitch
glitter
glittr
glitzer
gliua
globe
glog
glome
gloml
glomp
gloom
glormat
gloss
glotel
glove
glow
glow_auth
glubs
glubsub
glucose
glue
gluid
gluon
gluple
glv8
glx
glychee
glyph
glyph_codegen
glzoneinfo
gmysql
go_over
gopenai
gpsd_json
gpxb
grammy
gramps
graph
grille_pain
gripe
gserde
gstripe
gsv
gtempo
gts
gtui
gu
gwitch
gwr
gwt
gxid
gzlib
halo
handles
hardcache
hello_joe
howdy
howdy_authentication_cookies
howdy_uuid
htmb
htmgrrrl
html_components
html_dsl
html_lustre_converter
html_parser
htmz
httpp
hug
humanise
hyphenation
ids
ieee_float
illustrious
immutable_lru
integer_complexity
ior
iox
iso_8859
ivy
jackson
jasper
javascript_dom_parser
jbs
jot
json_canvas
juno
justin
keccak_gleam
kick
kielet
kirala_bbmarkdown
kirala_l4u
kirala_markdown
kreator
libsql
lite_fs
logging
lotta
lumi
lustre
lustre_animation
lustre_carousel
lustre_dev_tools
lustre_hash_state
lustre_http
lustre_hx
lustre_limiter
lustre_routed
lustre_ssg
lustre_transition
lustre_ui
lustre_virtual_list
lustre_websocket
lzf_gleam
marceau
mat
meadow
melon
midas
migrant
mineflayer
minigen
mist
mockth
modem
monies
morse_code_translator
mote
mug
mumu
mungo
nakai
nanoworker
nbeet
nerf
nessie
nessie_cluster
ngs
nibble
nimiq_gleam
node_socket_client
node_tags
non_empty_list
novdom
novdom_dev_tools
novdom_testing
observatory
open_color
openfeature
opt_args_with_defs_for_gleam
oteap
outcome
outil
owoify_gleam
p5js_gleam
palindrome
panel
parallel_map
parser_gleam
party
parz
pb_lite
pears
peggy
phonetic_gleam
phony
phosphor_lustre
pickle
pika_id
pine
pink
plex_pin_auth
plinth
plunk
pngleam
pojo
pona
postgresql_protocol
pprint
prequel
pretty_diff
priorityq
prng
process_groups
process_waiter
processgroups
promgleam
psg
puddle
punycode
qcheck
qcheck_gleeunit_utils
qs
queryb
question
rad
rada
radiate
radish
radish_fork
ramble
ranged_int
ranger
rank
react_gleam
reactive_signal
ream
recursive
redraw
redraw_dom
ref
rememo
remote_data
render_md
repeatedly
rizzo
runetracer
scaffold_gleam
scriptorium
sequin
shakespeare
shamir
sheen
shellout
shimmer
showtime
signal
signal_pgo
simple_pubsub
simplifile
singularity
sketch
sketch_css
sketch_lustre
slackin
snag
snowgleam
sol
sparkle
spinner
sprinkle
sprocket
sqlight
squirrel
stacky
staff_ai
starmap
startest
stdin
stego
stoiridh_version
storch
stratus
string_format
sturnidae
sunny
surreal_gleam
survey
swen_jwt
systemd_status
tardis
tcpea
telega
temporary
term_size
testbldr
testcontainers_gleam
the_stars
tinyroute
tom
tote
translate
transparent_http
trie_again
trust
tubes
tulip
tupler
typed_headers
valid
validate_monadic
varasto
vindaloo
vleam
wasmify
weapp
webls
webmidi
wechat_dev_tools
wemote
wimp
wink
wisp
wisp_flash
wolf
worm
wp_tables
xmb
xmleam
xmlm
ygleam
youid
zeptomail
zip_list

View file

@ -1,249 +0,0 @@
mod hexdocs;
use std::fs;
use std::sync::LazyLock;
use zed::lsp::CompletionKind;
use zed::{
CodeLabel, CodeLabelSpan, KeyValueStore, LanguageServerId, SlashCommand, SlashCommandOutput,
SlashCommandOutputSection,
};
use zed_extension_api::{self as zed, Result};
struct GleamExtension {
cached_binary_path: Option<String>,
}
impl GleamExtension {
fn language_server_binary_path(
&mut self,
language_server_id: &LanguageServerId,
worktree: &zed::Worktree,
) -> Result<String> {
if let Some(path) = worktree.which("gleam") {
return Ok(path);
}
if let Some(path) = &self.cached_binary_path {
if fs::metadata(path).map_or(false, |stat| stat.is_file()) {
return Ok(path.clone());
}
}
zed::set_language_server_installation_status(
language_server_id,
&zed::LanguageServerInstallationStatus::CheckingForUpdate,
);
let release = zed::latest_github_release(
"gleam-lang/gleam",
zed::GithubReleaseOptions {
require_assets: true,
pre_release: false,
},
)?;
let (platform, arch) = zed::current_platform();
let asset_name = format!(
"gleam-{version}-{arch}-{os}.tar.gz",
version = release.version,
arch = match arch {
zed::Architecture::Aarch64 => "aarch64",
zed::Architecture::X86 => "x86",
zed::Architecture::X8664 => "x86_64",
},
os = match platform {
zed::Os::Mac => "apple-darwin",
zed::Os::Linux => "unknown-linux-musl",
zed::Os::Windows => "pc-windows-msvc",
},
);
let asset = release
.assets
.iter()
.find(|asset| asset.name == asset_name)
.ok_or_else(|| format!("no asset found matching {:?}", asset_name))?;
let version_dir = format!("gleam-{}", release.version);
let binary_path = format!("{version_dir}/gleam");
if !fs::metadata(&binary_path).map_or(false, |stat| stat.is_file()) {
zed::set_language_server_installation_status(
language_server_id,
&zed::LanguageServerInstallationStatus::Downloading,
);
zed::download_file(
&asset.download_url,
&version_dir,
zed::DownloadedFileType::GzipTar,
)
.map_err(|e| format!("failed to download file: {e}"))?;
let entries =
fs::read_dir(".").map_err(|e| format!("failed to list working directory {e}"))?;
for entry in entries {
let entry = entry.map_err(|e| format!("failed to load directory entry {e}"))?;
if entry.file_name().to_str() != Some(&version_dir) {
fs::remove_dir_all(entry.path()).ok();
}
}
}
self.cached_binary_path = Some(binary_path.clone());
Ok(binary_path)
}
}
impl zed::Extension for GleamExtension {
fn new() -> Self {
Self {
cached_binary_path: None,
}
}
fn language_server_command(
&mut self,
language_server_id: &LanguageServerId,
worktree: &zed::Worktree,
) -> Result<zed::Command> {
Ok(zed::Command {
command: self.language_server_binary_path(language_server_id, worktree)?,
args: vec!["lsp".to_string()],
env: Default::default(),
})
}
fn label_for_completion(
&self,
_language_server_id: &LanguageServerId,
completion: zed::lsp::Completion,
) -> Option<zed::CodeLabel> {
let name = &completion.label;
let ty = strip_newlines_from_detail(&completion.detail?);
let let_binding = "let a";
let colon = ": ";
let assignment = " = ";
let call = match completion.kind? {
CompletionKind::Function | CompletionKind::Constructor => "()",
_ => "",
};
let code = format!("{let_binding}{colon}{ty}{assignment}{name}{call}");
Some(CodeLabel {
spans: vec![
CodeLabelSpan::code_range({
let start = let_binding.len() + colon.len() + ty.len() + assignment.len();
start..start + name.len()
}),
CodeLabelSpan::code_range({
let start = let_binding.len();
start..start + colon.len()
}),
CodeLabelSpan::code_range({
let start = let_binding.len() + colon.len();
start..start + ty.len()
}),
],
filter_range: (0..name.len()).into(),
code,
})
}
fn run_slash_command(
&self,
command: SlashCommand,
_args: Vec<String>,
worktree: Option<&zed::Worktree>,
) -> Result<SlashCommandOutput, String> {
match command.name.as_str() {
"gleam-project" => {
let worktree = worktree.ok_or("no worktree")?;
let mut text = String::new();
text.push_str("You are in a Gleam project.\n");
if let Ok(gleam_toml) = worktree.read_text_file("gleam.toml") {
text.push_str("The `gleam.toml` is as follows:\n");
text.push_str(&gleam_toml);
}
Ok(SlashCommandOutput {
sections: vec![SlashCommandOutputSection {
range: (0..text.len()).into(),
label: "gleam-project".to_string(),
}],
text,
})
}
command => Err(format!("unknown slash command: \"{command}\"")),
}
}
fn suggest_docs_packages(&self, provider: String) -> Result<Vec<String>, String> {
match provider.as_str() {
"gleam-hexdocs" => {
static GLEAM_PACKAGES: LazyLock<Vec<String>> = LazyLock::new(|| {
include_str!("../packages.txt")
.lines()
.filter(|line| !line.starts_with('#'))
.map(|line| line.trim().to_owned())
.collect()
});
Ok(GLEAM_PACKAGES.clone())
}
_ => Ok(Vec::new()),
}
}
fn index_docs(
&self,
provider: String,
package: String,
database: &KeyValueStore,
) -> Result<(), String> {
match provider.as_str() {
"gleam-hexdocs" => hexdocs::index(package, database),
_ => Ok(()),
}
}
}
zed::register_extension!(GleamExtension);
/// Removes newlines from the completion detail.
///
/// The Gleam LSP can return types containing newlines, which causes formatting
/// issues within the Zed completions menu.
fn strip_newlines_from_detail(detail: &str) -> String {
let without_newlines = detail
.replace("->\n ", "-> ")
.replace("\n ", "")
.replace(",\n", "");
let comma_delimited_parts = without_newlines.split(',');
comma_delimited_parts
.map(|part| part.trim())
.collect::<Vec<_>>()
.join(", ")
}
#[cfg(test)]
mod tests {
use crate::strip_newlines_from_detail;
#[test]
fn test_strip_newlines_from_detail() {
let detail = "fn(\n Selector(a),\n b,\n fn(Dynamic, Dynamic, Dynamic, Dynamic, Dynamic, Dynamic, Dynamic) -> a,\n) -> Selector(a)";
let expected = "fn(Selector(a), b, fn(Dynamic, Dynamic, Dynamic, Dynamic, Dynamic, Dynamic, Dynamic) -> a) -> Selector(a)";
assert_eq!(strip_newlines_from_detail(detail), expected);
let detail = "fn(Selector(a), b, fn(Dynamic, Dynamic, Dynamic, Dynamic, Dynamic, Dynamic) -> a) ->\n Selector(a)";
let expected = "fn(Selector(a), b, fn(Dynamic, Dynamic, Dynamic, Dynamic, Dynamic, Dynamic) -> a) -> Selector(a)";
assert_eq!(strip_newlines_from_detail(detail), expected);
let detail = "fn(\n Method,\n List(#(String, String)),\n a,\n Scheme,\n String,\n Option(Int),\n String,\n Option(String),\n) -> Request(a)";
let expected = "fn(Method, List(#(String, String)), a, Scheme, String, Option(Int), String, Option(String)) -> Request(a)";
assert_eq!(strip_newlines_from_detail(detail), expected);
}
}

View file

@ -1,215 +0,0 @@
use std::cell::RefCell;
use std::collections::BTreeSet;
use std::io::{self, Read};
use std::rc::Rc;
use html_to_markdown::markdown::{
HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler, TableHandler,
};
use html_to_markdown::{
convert_html_to_markdown, HandleTag, HandlerOutcome, HtmlElement, MarkdownWriter,
StartTagOutcome, TagHandler,
};
use zed_extension_api::{
http_client::{HttpMethod, HttpRequest, RedirectPolicy},
KeyValueStore, Result,
};
pub fn index(package: String, database: &KeyValueStore) -> Result<()> {
let headers = vec![(
"User-Agent".to_string(),
"Zed (Gleam Extension)".to_string(),
)];
let response = HttpRequest::builder()
.method(HttpMethod::Get)
.url(format!("https://hexdocs.pm/{package}"))
.headers(headers.clone())
.redirect_policy(RedirectPolicy::FollowAll)
.build()?
.fetch()?;
let (package_root_markdown, modules) =
convert_hexdocs_to_markdown(&mut io::Cursor::new(&response.body))?;
database.insert(&package, &package_root_markdown)?;
for module in modules {
let response = HttpRequest::builder()
.method(HttpMethod::Get)
.url(format!("https://hexdocs.pm/{package}/{module}.html"))
.headers(headers.clone())
.redirect_policy(RedirectPolicy::FollowAll)
.build()?
.fetch()?;
let (markdown, _modules) =
convert_hexdocs_to_markdown(&mut io::Cursor::new(&response.body))?;
database.insert(&format!("{module} ({package})"), &markdown)?;
}
Ok(())
}
pub fn convert_hexdocs_to_markdown(html: impl Read) -> Result<(String, Vec<String>)> {
let module_collector = Rc::new(RefCell::new(GleamModuleCollector::new()));
let mut handlers: Vec<TagHandler> = vec![
module_collector.clone(),
Rc::new(RefCell::new(GleamChromeRemover)),
Rc::new(RefCell::new(NavSkipper::new(ParagraphHandler))),
Rc::new(RefCell::new(NavSkipper::new(HeadingHandler))),
Rc::new(RefCell::new(NavSkipper::new(ListHandler))),
Rc::new(RefCell::new(NavSkipper::new(TableHandler::new()))),
Rc::new(RefCell::new(NavSkipper::new(StyledTextHandler))),
];
let markdown = convert_html_to_markdown(html, &mut handlers)
.map_err(|err| format!("failed to convert docs to Markdown {err}"))?;
let modules = module_collector
.borrow()
.modules
.iter()
.cloned()
.collect::<Vec<_>>();
Ok((markdown, modules))
}
/// A higher-order handler that skips all content from the `nav`.
///
/// We still need to traverse the `nav` for collecting information, but
/// we don't want to include any of its content in the resulting Markdown.
pub struct NavSkipper<T: HandleTag> {
handler: T,
}
impl<T: HandleTag> NavSkipper<T> {
pub fn new(handler: T) -> Self {
Self { handler }
}
}
impl<T: HandleTag> HandleTag for NavSkipper<T> {
fn should_handle(&self, tag: &str) -> bool {
tag == "nav" || self.handler.should_handle(tag)
}
fn handle_tag_start(
&mut self,
tag: &HtmlElement,
writer: &mut MarkdownWriter,
) -> StartTagOutcome {
if writer.is_inside("nav") {
return StartTagOutcome::Continue;
}
self.handler.handle_tag_start(tag, writer)
}
fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
if writer.is_inside("nav") {
return;
}
self.handler.handle_tag_end(tag, writer)
}
fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
if writer.is_inside("nav") {
return HandlerOutcome::Handled;
}
self.handler.handle_text(text, writer)
}
}
pub struct GleamChromeRemover;
impl HandleTag for GleamChromeRemover {
fn should_handle(&self, tag: &str) -> bool {
matches!(
tag,
"head" | "script" | "style" | "svg" | "header" | "footer" | "a"
)
}
fn handle_tag_start(
&mut self,
tag: &HtmlElement,
_writer: &mut MarkdownWriter,
) -> StartTagOutcome {
match tag.tag() {
"head" | "script" | "style" | "svg" | "header" | "footer" => {
return StartTagOutcome::Skip;
}
"a" => {
if tag.attr("onclick").is_some() {
return StartTagOutcome::Skip;
}
}
_ => {}
}
StartTagOutcome::Continue
}
}
pub struct GleamModuleCollector {
modules: BTreeSet<String>,
has_seen_modules_header: bool,
}
impl GleamModuleCollector {
pub fn new() -> Self {
Self {
modules: BTreeSet::new(),
has_seen_modules_header: false,
}
}
fn parse_module(tag: &HtmlElement) -> Option<String> {
if tag.tag() != "a" {
return None;
}
let href = tag.attr("href")?;
if href.starts_with('#') || href.starts_with("https://") || href.starts_with("../") {
return None;
}
let module_name = href.trim_start_matches("./").trim_end_matches(".html");
Some(module_name.to_owned())
}
}
impl HandleTag for GleamModuleCollector {
fn should_handle(&self, tag: &str) -> bool {
matches!(tag, "h2" | "a")
}
fn handle_tag_start(
&mut self,
tag: &HtmlElement,
writer: &mut MarkdownWriter,
) -> StartTagOutcome {
if tag.tag() == "a" && self.has_seen_modules_header && writer.is_inside("li") {
if let Some(module_name) = Self::parse_module(tag) {
self.modules.insert(module_name);
}
}
StartTagOutcome::Continue
}
fn handle_text(&mut self, text: &str, writer: &mut MarkdownWriter) -> HandlerOutcome {
if writer.is_inside("nav") && writer.is_inside("h2") && text == "Modules" {
self.has_seen_modules_header = true;
}
HandlerOutcome::NoOp
}
}