From e630ff38c4f4099e4e9c8d926c6a75c3e364fc58 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Mon, 17 Jul 2023 16:29:25 -0400 Subject: [PATCH] add embedding treesitter queries for elixir --- Cargo.lock | 13 +- crates/vector_store/Cargo.toml | 1 + crates/vector_store/src/vector_store_tests.rs | 182 ++++++++++++++++++ crates/zed/src/languages/elixir/embedding.scm | 27 +++ 4 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 crates/zed/src/languages/elixir/embedding.scm diff --git a/Cargo.lock b/Cargo.lock index 28a0e76d14..8fcca507d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7982,6 +7982,16 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-elixir" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a9916f3e1c80b3c8aab8582604e97e8720cb9b893489b347cf999f80f9d469e" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "tree-sitter-elixir" version = "0.1.0" @@ -8519,6 +8529,7 @@ dependencies = [ "tiktoken-rs 0.5.0", "tree-sitter", "tree-sitter-cpp", + "tree-sitter-elixir 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter-rust", "tree-sitter-toml 0.20.0", "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -9558,7 +9569,7 @@ dependencies = [ "tree-sitter-c", "tree-sitter-cpp", "tree-sitter-css", - "tree-sitter-elixir", + "tree-sitter-elixir 0.1.0 (git+https://github.com/elixir-lang/tree-sitter-elixir?rev=4ba9dab6e2602960d95b2b625f3386c27e08084e)", "tree-sitter-embedded-template", "tree-sitter-go", "tree-sitter-heex", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index 0009665e26..6808f6c630 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -55,3 +55,4 @@ tree-sitter-typescript = "*" tree-sitter-rust = "*" tree-sitter-toml = "*" tree-sitter-cpp = "*" +tree-sitter-elixir = "*" diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index 3a9e1748c5..d55dfcfc71 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -342,6 +342,143 @@ async fn test_code_context_retrieval_javascript() { } } +#[gpui::test] +async fn test_code_context_retrieval_elixir() { + let language = elixir_lang(); + let mut retriever = CodeContextRetriever::new(); + + let text = r#" +defmodule File.Stream do + @moduledoc """ + Defines a `File.Stream` struct returned by `File.stream!/3`. + + The following fields are public: + + * `path` - the file path + * `modes` - the file modes + * `raw` - a boolean indicating if bin functions should be used + * `line_or_bytes` - if reading should read lines or a given number of bytes + * `node` - the node the file belongs to + + """ + + defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil + + @type t :: %__MODULE__{} + + @doc false + def __build__(path, modes, line_or_bytes) do + raw = :lists.keyfind(:encoding, 1, modes) == false + + modes = + case raw do + true -> + case :lists.keyfind(:read_ahead, 1, modes) do + {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)] + {:read_ahead, _} -> [:raw | modes] + false -> [:raw, :read_ahead | modes] + end + + false -> + modes + end + + %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()} + + end +"# + .unindent(); + + let parsed_files = retriever + .parse_file(Path::new("foo.ex"), &text, language) + .unwrap(); + + let test_documents = &[ + Document{ + name: "defmodule File.Stream".into(), + range: 0..1132, + content: r#" + The below code snippet is from file 'foo.ex' + + ```elixir + defmodule File.Stream do + @moduledoc """ + Defines a `File.Stream` struct returned by `File.stream!/3`. + + The following fields are public: + + * `path` - the file path + * `modes` - the file modes + * `raw` - a boolean indicating if bin functions should be used + * `line_or_bytes` - if reading should read lines or a given number of bytes + * `node` - the node the file belongs to + + """ + + defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil + + @type t :: %__MODULE__{} + + @doc false + def __build__(path, modes, line_or_bytes) do + raw = :lists.keyfind(:encoding, 1, modes) == false + + modes = + case raw do + true -> + case :lists.keyfind(:read_ahead, 1, modes) do + {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)] + {:read_ahead, _} -> [:raw | modes] + false -> [:raw, :read_ahead | modes] + end + + false -> + modes + end + + %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()} + + end + ```"#.unindent(), + embedding: vec![], + }, + Document { + name: "def __build__".into(), + range: 574..1132, + content: r#" +The below code snippet is from file 'foo.ex' + +```elixir +@doc false +def __build__(path, modes, line_or_bytes) do + raw = :lists.keyfind(:encoding, 1, modes) == false + + modes = + case raw do + true -> + case :lists.keyfind(:read_ahead, 1, modes) do + {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)] + {:read_ahead, _} -> [:raw | modes] + false -> [:raw, :read_ahead | modes] + end + + false -> + modes + end + + %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()} + + end +```"# + .unindent(), + embedding: vec![], + }]; + + for idx in 0..test_documents.len() { + assert_eq!(test_documents[idx], parsed_files[idx]); + } +} + #[gpui::test] async fn test_code_context_retrieval_cpp() { let language = cpp_lang(); @@ -850,3 +987,48 @@ fn cpp_lang() -> Arc { .unwrap(), ) } + +fn elixir_lang() -> Arc { + Arc::new( + Language::new( + LanguageConfig { + name: "Elixir".into(), + path_suffixes: vec!["rs".into()], + ..Default::default() + }, + Some(tree_sitter_elixir::language()), + ) + .with_embedding_query( + r#" + ( + (unary_operator + operator: "@" + operand: (call + target: (identifier) @unary + (#match? @unary "^(doc)$")) + ) @context + . + (call + target: (identifier) @name + (arguments + [ + (identifier) @name + (call + target: (identifier) @name) + (binary_operator + left: (call + target: (identifier) @name) + operator: "when") + ]) + (#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item + ) + + (call + target: (identifier) @name + (arguments (alias) @name) + (#match? @name "^(defmodule|defprotocol)$")) @item + "#, + ) + .unwrap(), + ) +} diff --git a/crates/zed/src/languages/elixir/embedding.scm b/crates/zed/src/languages/elixir/embedding.scm new file mode 100644 index 0000000000..16ad20746d --- /dev/null +++ b/crates/zed/src/languages/elixir/embedding.scm @@ -0,0 +1,27 @@ +( + (unary_operator + operator: "@" + operand: (call + target: (identifier) @unary + (#match? @unary "^(doc)$")) + ) @context + . + (call + target: (identifier) @name + (arguments + [ + (identifier) @name + (call + target: (identifier) @name) + (binary_operator + left: (call + target: (identifier) @name) + operator: "when") + ]) + (#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item + ) + + (call + target: (identifier) @name + (arguments (alias) @name) + (#match? @name "^(defmodule|defprotocol)$")) @item