From 0d14bbbf5b14ae4045cea65a68e3d6341f48f79c Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 12 Sep 2023 20:36:06 -0400 Subject: [PATCH] add eval values for tree-sitter --- crates/semantic_index/README.md | 39 +++----- crates/semantic_index/eval/tree-sitter.json | 98 ++++++++++++++++++++- 2 files changed, 110 insertions(+), 27 deletions(-) diff --git a/crates/semantic_index/README.md b/crates/semantic_index/README.md index 86e68dc414..85f83af121 100644 --- a/crates/semantic_index/README.md +++ b/crates/semantic_index/README.md @@ -1,31 +1,20 @@ -WIP: Sample SQL Queries -/* +# Semantic Index -create table "files" ( -"id" INTEGER PRIMARY KEY, -"path" VARCHAR, -"sha1" VARCHAR, -); +## Evaluation -create table symbols ( -"file_id" INTEGER REFERENCES("files", "id") ON CASCADE DELETE, -"offset" INTEGER, -"embedding" VECTOR, -); +### Metrics -insert into "files" ("path", "sha1") values ("src/main.rs", "sha1") return id; -insert into symbols ( -"file_id", -"start", -"end", -"embedding" -) values ( -(id,), -(id,), -(id,), -(id,), -) +nDCG@k: +- "The value of NDCG is determined by comparing the relevance of the items returned by the search engine to the relevance of the item that a hypothetical "ideal" search engine would return. +- "The relevance of result is represented by a score (also known as a 'grade') that is assigned to the search query. The scores of these results are then discounted based on their position in the search results -- did they get recommended first or last?" +MRR@k: +- "Mean reciprocal rank quantifies the rank of the first relevant item found in teh recommendation list." -*/ +MAP@k: +- "Mean average precision averages the precision@k metric at each relevant item position in the recommendation list. + +Resources: +- [Evaluating recommendation metrics](https://www.shaped.ai/blog/evaluating-recommendation-systems-map-mmr-ndcg) +- [Math Walkthrough](https://towardsdatascience.com/demystifying-ndcg-bee3be58cfe0) diff --git a/crates/semantic_index/eval/tree-sitter.json b/crates/semantic_index/eval/tree-sitter.json index a469543cf4..4f2edfb063 100644 --- a/crates/semantic_index/eval/tree-sitter.json +++ b/crates/semantic_index/eval/tree-sitter.json @@ -3,8 +3,102 @@ "commit": "46af27796a76c72d8466627d499f2bca4af958ee", "assertions": [ { - "query": "", - "matches": [] + "query": "What attributes are available for the tags configuration struct?", + "matches": [ + "tags/src/lib.rs:24" + ] + }, + { + "query": "create a new tag configuration", + "matches": [ + "tags/src/lib.rs:119" + ] + }, + { + "query": "generate tags based on config", + "matches": [ + "tags/src/lib.rs:261", + ] + }, + { + "query": "match on ts quantifier in rust", + "matches": [ + "lib/binding_rust/lib.rs:139" + ] + }, + { + "query": "cli command to generate tags", + "matches": [ + "cli/src/tags.rs:10" + ] + }, + { + "query": "what version of the tree-sitter-tags package is active?", + "matches": [ + "tags/Cargo.toml:4" + ] + }, + { + "query": "Insert a new parse state", + "matches": [ + "cli/src/generate/build_tables/build_parse_table.rs:153" + ] + }, + { + "query": "Handle conflict when numerous actions occur on the same symbol", + "matches": [ + "cli/src/generate/build_tables/build_parse_table.rs:363", + "cli/src/generate/build_tables/build_parse_table.rs:442", + ] + }, + { + "query": "Match based on associativity of actions", + "matches": [ + "cri/src/generate/build_tables/build_parse_table.rs:542", + ] + }, + { + "query": "Format token set display", + "matches": [ + "cli/src/generate/build_tables/item.rs:246", + ] + }, + { + "query": "extract choices from rule", + "matches": [ + "cli/src/generate/prepare_grammar/flatten_grammar.rs:124" + ] + }, + { + "query": "How do we identify if a symbol is being used?", + "matches": [ + "cli/src/generate/prepare_grammar/flatten_grammar.rs:175" + ] + }, + { + "query": "How do we launch the playground?", + "matches": [ + "cli/src/playground.rs:46" + ] + }, + { + "query": "How do we test treesitter query matches in rust?", + "matches": [ + "cli/src/query_testing.rs:152", + "cli/src/tests/query_test.rs:781", + "cli/src/tests/query_test.rs:2163", + "cli/src/tests/query_test.rs:3781", + "cli/src/tests/query_test.rs:887" + ] + }, + { + "query": "What does the CLI do?", + "matches": [ + "cli/README.md:10", + "cli/loader/README.md:3", + "docs/section-5-implementation.md:14", + "docs/section-5-implementation.md:18" + ] } ] }