add embedding treesitter queries for elixir

This commit is contained in:
KCaverly 2023-07-17 16:29:25 -04:00
parent f0bf60fded
commit e630ff38c4
4 changed files with 222 additions and 1 deletions

13
Cargo.lock generated
View file

@ -7982,6 +7982,16 @@ dependencies = [
"tree-sitter",
]
[[package]]
name = "tree-sitter-elixir"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a9916f3e1c80b3c8aab8582604e97e8720cb9b893489b347cf999f80f9d469e"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-elixir"
version = "0.1.0"
@ -8519,6 +8529,7 @@ dependencies = [
"tiktoken-rs 0.5.0",
"tree-sitter",
"tree-sitter-cpp",
"tree-sitter-elixir 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tree-sitter-rust",
"tree-sitter-toml 0.20.0",
"tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -9558,7 +9569,7 @@ dependencies = [
"tree-sitter-c",
"tree-sitter-cpp",
"tree-sitter-css",
"tree-sitter-elixir",
"tree-sitter-elixir 0.1.0 (git+https://github.com/elixir-lang/tree-sitter-elixir?rev=4ba9dab6e2602960d95b2b625f3386c27e08084e)",
"tree-sitter-embedded-template",
"tree-sitter-go",
"tree-sitter-heex",

View file

@ -55,3 +55,4 @@ tree-sitter-typescript = "*"
tree-sitter-rust = "*"
tree-sitter-toml = "*"
tree-sitter-cpp = "*"
tree-sitter-elixir = "*"

View file

@ -342,6 +342,143 @@ async fn test_code_context_retrieval_javascript() {
}
}
#[gpui::test]
async fn test_code_context_retrieval_elixir() {
let language = elixir_lang();
let mut retriever = CodeContextRetriever::new();
let text = r#"
defmodule File.Stream do
@moduledoc """
Defines a `File.Stream` struct returned by `File.stream!/3`.
The following fields are public:
* `path` - the file path
* `modes` - the file modes
* `raw` - a boolean indicating if bin functions should be used
* `line_or_bytes` - if reading should read lines or a given number of bytes
* `node` - the node the file belongs to
"""
defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
@type t :: %__MODULE__{}
@doc false
def __build__(path, modes, line_or_bytes) do
raw = :lists.keyfind(:encoding, 1, modes) == false
modes =
case raw do
true ->
case :lists.keyfind(:read_ahead, 1, modes) do
{:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
{:read_ahead, _} -> [:raw | modes]
false -> [:raw, :read_ahead | modes]
end
false ->
modes
end
%File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
end
"#
.unindent();
let parsed_files = retriever
.parse_file(Path::new("foo.ex"), &text, language)
.unwrap();
let test_documents = &[
Document{
name: "defmodule File.Stream".into(),
range: 0..1132,
content: r#"
The below code snippet is from file 'foo.ex'
```elixir
defmodule File.Stream do
@moduledoc """
Defines a `File.Stream` struct returned by `File.stream!/3`.
The following fields are public:
* `path` - the file path
* `modes` - the file modes
* `raw` - a boolean indicating if bin functions should be used
* `line_or_bytes` - if reading should read lines or a given number of bytes
* `node` - the node the file belongs to
"""
defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
@type t :: %__MODULE__{}
@doc false
def __build__(path, modes, line_or_bytes) do
raw = :lists.keyfind(:encoding, 1, modes) == false
modes =
case raw do
true ->
case :lists.keyfind(:read_ahead, 1, modes) do
{:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
{:read_ahead, _} -> [:raw | modes]
false -> [:raw, :read_ahead | modes]
end
false ->
modes
end
%File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
end
```"#.unindent(),
embedding: vec![],
},
Document {
name: "def __build__".into(),
range: 574..1132,
content: r#"
The below code snippet is from file 'foo.ex'
```elixir
@doc false
def __build__(path, modes, line_or_bytes) do
raw = :lists.keyfind(:encoding, 1, modes) == false
modes =
case raw do
true ->
case :lists.keyfind(:read_ahead, 1, modes) do
{:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
{:read_ahead, _} -> [:raw | modes]
false -> [:raw, :read_ahead | modes]
end
false ->
modes
end
%File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
end
```"#
.unindent(),
embedding: vec![],
}];
for idx in 0..test_documents.len() {
assert_eq!(test_documents[idx], parsed_files[idx]);
}
}
#[gpui::test]
async fn test_code_context_retrieval_cpp() {
let language = cpp_lang();
@ -850,3 +987,48 @@ fn cpp_lang() -> Arc<Language> {
.unwrap(),
)
}
fn elixir_lang() -> Arc<Language> {
Arc::new(
Language::new(
LanguageConfig {
name: "Elixir".into(),
path_suffixes: vec!["rs".into()],
..Default::default()
},
Some(tree_sitter_elixir::language()),
)
.with_embedding_query(
r#"
(
(unary_operator
operator: "@"
operand: (call
target: (identifier) @unary
(#match? @unary "^(doc)$"))
) @context
.
(call
target: (identifier) @name
(arguments
[
(identifier) @name
(call
target: (identifier) @name)
(binary_operator
left: (call
target: (identifier) @name)
operator: "when")
])
(#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item
)
(call
target: (identifier) @name
(arguments (alias) @name)
(#match? @name "^(defmodule|defprotocol)$")) @item
"#,
)
.unwrap(),
)
}

View file

@ -0,0 +1,27 @@
(
(unary_operator
operator: "@"
operand: (call
target: (identifier) @unary
(#match? @unary "^(doc)$"))
) @context
.
(call
target: (identifier) @name
(arguments
[
(identifier) @name
(call
target: (identifier) @name)
(binary_operator
left: (call
target: (identifier) @name)
operator: "when")
])
(#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item
)
(call
target: (identifier) @name
(arguments (alias) @name)
(#match? @name "^(defmodule|defprotocol)$")) @item