add eval values for tree-sitter

2023-09-12 20:36:06 -04:00 · 2023-09-12 20:36:06 -04:00 · 0d14bbbf5b
commit 0d14bbbf5b
parent 66c967da88
2 changed files with 110 additions and 27 deletions
--- a/crates/semantic_index/README.md
+++ b/crates/semantic_index/README.md
@ -1,31 +1,20 @@

-WIP: Sample SQL Queries
-/*
+# Semantic Index

-create table "files" (
-"id" INTEGER PRIMARY KEY,
-"path" VARCHAR,
-"sha1" VARCHAR,
-);
+## Evaluation

-create table symbols (
-"file_id" INTEGER REFERENCES("files", "id") ON CASCADE DELETE,
-"offset" INTEGER,
-"embedding" VECTOR,
-);
+### Metrics

-insert into "files" ("path", "sha1") values ("src/main.rs", "sha1") return id;
-insert into symbols (
-"file_id",
-"start",
-"end",
-"embedding"
-) values (
-(id,),
-(id,),
-(id,),
-(id,),
-)
+nDCG@k:
+- "The value of NDCG is determined by comparing the relevance of the items returned by the search engine to the relevance of the item that a hypothetical "ideal" search engine would return.
+- "The relevance of result is represented by a score (also known as a 'grade') that is assigned to the search query. The scores of these results are then discounted based on their position in the search results -- did they get recommended first or last?"

+MRR@k:
+- "Mean reciprocal rank quantifies the rank of the first relevant item found in teh recommendation list."

-*/
+MAP@k:
+- "Mean average precision averages the precision@k metric at each relevant item position in the recommendation list.
+
+Resources:
+- [Evaluating recommendation metrics](https://www.shaped.ai/blog/evaluating-recommendation-systems-map-mmr-ndcg)
+- [Math Walkthrough](https://towardsdatascience.com/demystifying-ndcg-bee3be58cfe0)