From 66c967da8837b572b6f07c1965d020a0779f439f Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 12 Sep 2023 16:25:31 -0400
Subject: [PATCH 01/14] start work on eval script for semantic_index

---
 Cargo.lock                                  | 19 ++++
 crates/semantic_index/Cargo.toml            |  4 +
 crates/semantic_index/eval/tree-sitter.json | 10 +++
 crates/semantic_index/examples/eval.rs      | 97 +++++++++++++++++++++
 script/evaluate_semantic_index              |  3 +
 5 files changed, 133 insertions(+)
 create mode 100644 crates/semantic_index/eval/tree-sitter.json
 create mode 100644 crates/semantic_index/examples/eval.rs
 create mode 100755 script/evaluate_semantic_index
diff --git a/Cargo.lock b/Cargo.lock
index 775e1d2b8e..a66391ed07 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3061,6 +3061,8 @@ dependencies = [
  "libc",
  "libgit2-sys",
  "log",
+ "openssl-probe",
+ "openssl-sys",
  "url",
 ]
 
@@ -4015,7 +4017,9 @@ checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4"
 dependencies = [
  "cc",
  "libc",
+ "libssh2-sys",
  "libz-sys",
+ "openssl-sys",
  "pkg-config",
 ]
 
@@ -4056,6 +4060,20 @@ dependencies = [
  "vcpkg",
 ]
 
+[[package]]
+name = "libssh2-sys"
+version = "0.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b094a36eb4b8b8c8a7b4b8ae43b2944502be3e59cd87687595cf6b0a71b3f4ca"
+dependencies = [
+ "cc",
+ "libc",
+ "libz-sys",
+ "openssl-sys",
+ "pkg-config",
+ "vcpkg",
+]
+
 [[package]]
 name = "libz-sys"
 version = "1.1.12"
@@ -6731,6 +6749,7 @@ dependencies = [
  "editor",
  "env_logger 0.9.3",
  "futures 0.3.28",
+ "git2",
  "globset",
  "gpui",
  "isahc",
diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml
index 72a36efd50..b5537dd2fa 100644
--- a/crates/semantic_index/Cargo.toml
+++ b/crates/semantic_index/Cargo.toml
@@ -50,6 +50,7 @@ project = { path = "../project", features = ["test-support"] }
 rpc = { path = "../rpc", features = ["test-support"] }
 workspace = { path = "../workspace", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"]}
+git2 = { version = "0.15"}
 
 pretty_assertions.workspace = true
 rand.workspace = true
@@ -67,3 +68,6 @@ tree-sitter-elixir.workspace = true
 tree-sitter-lua.workspace = true
 tree-sitter-ruby.workspace = true
 tree-sitter-php.workspace = true
+
+[[example]]
+name = "eval"
diff --git a/crates/semantic_index/eval/tree-sitter.json b/crates/semantic_index/eval/tree-sitter.json
new file mode 100644
index 0000000000..a469543cf4
--- /dev/null
+++ b/crates/semantic_index/eval/tree-sitter.json
@@ -0,0 +1,10 @@
+{
+  "repo": "https://github.com/tree-sitter/tree-sitter.git",
+  "commit": "46af27796a76c72d8466627d499f2bca4af958ee",
+  "assertions": [
+    {
+      "query": "",
+      "matches": []
+    }
+  ]
+}
diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
new file mode 100644
index 0000000000..c3950757ce
--- /dev/null
+++ b/crates/semantic_index/examples/eval.rs
@@ -0,0 +1,97 @@
+use git2::{Object, Oid, Repository};
+use serde::Deserialize;
+use std::path::{Path, PathBuf};
+use std::{env, fs};
+
+#[derive(Deserialize, Clone)]
+struct QueryMatches {
+    query: String,
+    matches: Vec<String>,
+}
+
+#[derive(Deserialize, Clone)]
+struct RepoEval {
+    repo: String,
+    commit: String,
+    assertions: Vec<QueryMatches>,
+}
+
+const TMP_REPO_PATH: &str = "./target/eval_repos";
+
+fn parse_eval() -> anyhow::Result<Vec<RepoEval>> {
+    let eval_folder = env::current_dir()?
+        .as_path()
+        .parent()
+        .unwrap()
+        .join("crates/semantic_index/eval");
+
+    let mut repo_evals: Vec<RepoEval> = Vec::new();
+    for entry in fs::read_dir(eval_folder)? {
+        let file_path = entry.unwrap().path();
+        if let Some(extension) = file_path.extension() {
+            if extension == "json" {
+                if let Ok(file) = fs::read_to_string(file_path) {
+                    let repo_eval = serde_json::from_str(file.as_str());
+
+                    match repo_eval {
+                        Ok(repo_eval) => {
+                            repo_evals.push(repo_eval);
+                        }
+                        Err(err) => {
+                            println!("Err: {:?}", err);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(repo_evals)
+}
+
+fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<PathBuf> {
+    let repo_name = Path::new(repo_eval.repo.as_str())
+        .file_name()
+        .unwrap()
+        .to_str()
+        .unwrap()
+        .to_owned()
+        .replace(".git", "");
+    let clone_path = Path::new(TMP_REPO_PATH).join(&repo_name).to_path_buf();
+
+    // Delete Clone Path if already exists
+    let _ = fs::remove_dir_all(&clone_path);
+
+    // Clone in Repo
+    git2::build::RepoBuilder::new()
+        // .branch(repo_eval.sha.as_str())
+        .clone(repo_eval.repo.as_str(), clone_path.as_path())?;
+
+    let repo: Repository = Repository::open(clone_path.clone())?;
+    let obj: Object = repo
+        .find_commit(Oid::from_str(repo_eval.commit.as_str())?)?
+        .into_object();
+    repo.checkout_tree(&obj, None)?;
+    repo.set_head_detached(obj.id())?;
+
+    Ok(clone_path)
+}
+
+fn main() {
+    if let Ok(repo_evals) = parse_eval() {
+        for repo in repo_evals {
+            let cloned = clone_repo(repo.clone());
+            match cloned {
+                Ok(clone_path) => {
+                    println!(
+                        "Cloned {:?} @ {:?} into {:?}",
+                        repo.repo, repo.commit, clone_path
+                    );
+                }
+                Err(err) => {
+                    println!("Error Cloning: {:?}", err);
+                }
+            }
+        }
+    }
+}
diff --git a/script/evaluate_semantic_index b/script/evaluate_semantic_index
new file mode 100755
index 0000000000..e9a96a02b4
--- /dev/null
+++ b/script/evaluate_semantic_index
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+cargo run -p semantic_index --example eval

From 0d14bbbf5b14ae4045cea65a68e3d6341f48f79c Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 12 Sep 2023 20:36:06 -0400
Subject: [PATCH 02/14] add eval values for tree-sitter

---
 crates/semantic_index/README.md             | 39 +++-----
 crates/semantic_index/eval/tree-sitter.json | 98 ++++++++++++++++++++-
 2 files changed, 110 insertions(+), 27 deletions(-)

diff --git a/crates/semantic_index/README.md b/crates/semantic_index/README.md
index 86e68dc414..85f83af121 100644
--- a/crates/semantic_index/README.md
+++ b/crates/semantic_index/README.md
@@ -1,31 +1,20 @@
 
-WIP: Sample SQL Queries
-/*
+# Semantic Index
 
-create table "files" (
-"id" INTEGER PRIMARY KEY,
-"path" VARCHAR,
-"sha1" VARCHAR,
-);
+## Evaluation
 
-create table symbols (
-"file_id" INTEGER REFERENCES("files", "id") ON CASCADE DELETE,
-"offset" INTEGER,
-"embedding" VECTOR,
-);
+### Metrics
 
-insert into "files" ("path", "sha1") values ("src/main.rs", "sha1") return id;
-insert into symbols (
-"file_id",
-"start",
-"end",
-"embedding"
-) values (
-(id,),
-(id,),
-(id,),
-(id,),
-)
+nDCG@k:
+- "The value of NDCG is determined by comparing the relevance of the items returned by the search engine to the relevance of the item that a hypothetical "ideal" search engine would return.
+- "The relevance of result is represented by a score (also known as a 'grade') that is assigned to the search query. The scores of these results are then discounted based on their position in the search results -- did they get recommended first or last?"
 
+MRR@k:
+- "Mean reciprocal rank quantifies the rank of the first relevant item found in teh recommendation list."
 
-*/
+MAP@k:
+- "Mean average precision averages the precision@k metric at each relevant item position in the recommendation list.
+
+Resources:
+- [Evaluating recommendation metrics](https://www.shaped.ai/blog/evaluating-recommendation-systems-map-mmr-ndcg)
+- [Math Walkthrough](https://towardsdatascience.com/demystifying-ndcg-bee3be58cfe0)
diff --git a/crates/semantic_index/eval/tree-sitter.json b/crates/semantic_index/eval/tree-sitter.json
index a469543cf4..4f2edfb063 100644
--- a/crates/semantic_index/eval/tree-sitter.json
+++ b/crates/semantic_index/eval/tree-sitter.json
@@ -3,8 +3,102 @@
   "commit": "46af27796a76c72d8466627d499f2bca4af958ee",
   "assertions": [
     {
-      "query": "",
-      "matches": []
+      "query": "What attributes are available for the tags configuration struct?",
+      "matches": [
+        "tags/src/lib.rs:24"
+      ]
+    },
+    {
+      "query": "create a new tag configuration",
+      "matches": [
+        "tags/src/lib.rs:119"
+      ]
+    },
+    {
+      "query": "generate tags based on config",
+      "matches": [
+        "tags/src/lib.rs:261",
+      ]
+    },
+    {
+      "query": "match on ts quantifier in rust",
+      "matches": [
+        "lib/binding_rust/lib.rs:139"
+      ]
+    },
+    {
+      "query": "cli command to generate tags",
+      "matches": [
+        "cli/src/tags.rs:10"
+      ]
+    },
+    {
+      "query": "what version of the tree-sitter-tags package is active?",
+      "matches": [
+        "tags/Cargo.toml:4"
+      ]
+    },
+    {
+      "query": "Insert a new parse state",
+      "matches": [
+        "cli/src/generate/build_tables/build_parse_table.rs:153"
+      ]
+    },
+    {
+      "query": "Handle conflict when numerous actions occur on the same symbol",
+      "matches": [
+        "cli/src/generate/build_tables/build_parse_table.rs:363",
+        "cli/src/generate/build_tables/build_parse_table.rs:442",
+      ]
+    },
+    {
+      "query": "Match based on associativity of actions",
+      "matches": [
+        "cri/src/generate/build_tables/build_parse_table.rs:542",
+      ]
+    },
+    {
+      "query": "Format token set display",
+      "matches": [
+        "cli/src/generate/build_tables/item.rs:246",
+      ]
+    },
+    {
+      "query": "extract choices from rule",
+      "matches": [
+        "cli/src/generate/prepare_grammar/flatten_grammar.rs:124"
+      ]
+    },
+    {
+      "query": "How do we identify if a symbol is being used?",
+      "matches": [
+        "cli/src/generate/prepare_grammar/flatten_grammar.rs:175"
+      ]
+    },
+    {
+      "query": "How do we launch the playground?",
+      "matches": [
+        "cli/src/playground.rs:46"
+      ]
+    },
+    {
+      "query": "How do we test treesitter query matches in rust?",
+      "matches": [
+        "cli/src/query_testing.rs:152",
+        "cli/src/tests/query_test.rs:781",
+        "cli/src/tests/query_test.rs:2163",
+        "cli/src/tests/query_test.rs:3781",
+        "cli/src/tests/query_test.rs:887"
+      ]
+    },
+    {
+      "query": "What does the CLI do?",
+      "matches": [
+        "cli/README.md:10",
+        "cli/loader/README.md:3",
+        "docs/section-5-implementation.md:14",
+        "docs/section-5-implementation.md:18"
+      ]
     }
   ]
 }

From d4fbe990520fd079dc99e8120a0d08ff1076ef69 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 12 Sep 2023 21:27:35 -0400
Subject: [PATCH 03/14] add eval for gpt-engineer

---
 crates/semantic_index/eval/gpt-engineer.json | 114 +++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 crates/semantic_index/eval/gpt-engineer.json

diff --git a/crates/semantic_index/eval/gpt-engineer.json b/crates/semantic_index/eval/gpt-engineer.json
new file mode 100644
index 0000000000..d7c08cd505
--- /dev/null
+++ b/crates/semantic_index/eval/gpt-engineer.json
@@ -0,0 +1,114 @@
+{
+  "repo": "https://github.com/AntonOsika/gpt-engineer.git",
+  "commit": "7735a6445bae3611c62f521e6464c67c957f87c2",
+  "assertions": [
+    {
+      "query": "How do I contribute to this project?",
+      "matches": [
+        ".github/CONTRIBUTING.md:1",
+        "ROADMAP.md:48"
+      ]
+    },
+    {
+      "query": "What version of the openai package is active?",
+      "matches": [
+        "pyproject.toml:14"
+      ]
+    },
+    {
+      "query": "Ask user for clarification",
+      "matches": [
+        "gpt-engineer/steps.py:69"
+      ]
+    },
+    {
+      "query": "generate tests for python code",
+      "matches": [
+        "gpt-engineer/steps.py:153"
+      ]
+    },
+    {
+      "query": "get item from database based on key",
+      "matches": [
+        "gpt-engineer/db.py:42",
+        "gpt-engineer/db.py:68"
+      ]
+    },
+    {
+      "query": "prompt user to select files",
+      "matches": [
+        "gpt-engineer/file_selector.py:171",
+        "gpt-engineer/file_selector.py:306",
+        "gpt-engineer/file_selector.py:289",
+        "gpt-engineer/file_selector.py:234"
+      ]
+    },
+    {
+      "query": "send to rudderstack",
+      "matches": [
+        "gpt-engineer/collect.py:11",
+        "gpt-engineer/collect.py:38"
+      ]
+    },
+    {
+      "query": "parse code blocks from chat messages",
+      "matches": [
+        "gpt-engineer/chat_to_files.py:10",
+        "docs/intro/chat_parsing.md:1"
+      ]
+    },
+    {
+      "query": "how do I use the docker cli?",
+      "matches": [
+        "docker/README.md:1"
+      ]
+    },
+    {
+      "query": "ask the user if the code ran successfully?",
+      "matches": [
+        "gpt-engineer/learning.py:54"
+      ]
+    },
+    {
+      "query": "how is consent granted by the user?",
+      "matches": [
+        "gpt-engineer/learning.py:107",
+        "gpt-engineer/learning.py:130",
+        "gpt-engineer/learning.py:152"
+      ]
+    },
+    {
+      "query": "what are all the different steps the agent can take?",
+      "matches": [
+        "docs/intro/steps_module.md:1",
+        "gpt-engineer/steps.py:391"
+      ]
+    },
+    {
+      "query": "ask the user for clarification?",
+      "matches": [
+        "gpt-engineer/steps.py:69"
+      ]
+    },
+    {
+      "query": "what models are available?",
+      "matches": [
+        "gpt-engineer/ai.py:315",
+        "gpt-engineer/ai.py:341",
+        "docs/open-models.md:1"
+      ]
+    },
+    {
+      "query": "what is the current focus of the project?",
+      "matches": [
+        "ROADMAP.md:11"
+      ]
+    },
+    {
+      "query": "does the agent know how to fix code?",
+      "matches": [
+        "gpt-engineer/steps.py:367"
+      ]
+    }
+  ]
+}

From 6f29582fb064e709056236ceb732335d63bbbfe4 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Wed, 13 Sep 2023 10:32:36 -0400
Subject: [PATCH 04/14] progress on eval

---
 crates/semantic_index/eval/gpt-engineer.json |  2 +-
 crates/semantic_index/eval/tree-sitter.json  |  2 +-
 crates/semantic_index/examples/eval.rs       | 80 +++++++++++++++++++-
 3 files changed, 79 insertions(+), 5 deletions(-)

diff --git a/crates/semantic_index/eval/gpt-engineer.json b/crates/semantic_index/eval/gpt-engineer.json
index d7c08cd505..64322e8384 100644
--- a/crates/semantic_index/eval/gpt-engineer.json
+++ b/crates/semantic_index/eval/gpt-engineer.json
@@ -12,7 +12,7 @@
     {
       "query": "What version of the openai package is active?",
       "matches": [
-        "pyproject.toml:14"
+        "pyprojet.toml:14"
       ]
     },
     {
diff --git a/crates/semantic_index/eval/tree-sitter.json b/crates/semantic_index/eval/tree-sitter.json
index 4f2edfb063..52d1e9df16 100644
--- a/crates/semantic_index/eval/tree-sitter.json
+++ b/crates/semantic_index/eval/tree-sitter.json
@@ -48,7 +48,7 @@
       "query": "Handle conflict when numerous actions occur on the same symbol",
       "matches": [
         "cli/src/generate/build_tables/build_parse_table.rs:363",
-        "cli/src/generate/build_tables/build_parse_table.rs:442",
+        "cli/src/generate/build_tables/build_parse_table.rs:442"
       ]
     },
     {
diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
index c3950757ce..f666f5c281 100644
--- a/crates/semantic_index/examples/eval.rs
+++ b/crates/semantic_index/examples/eval.rs
@@ -1,19 +1,36 @@
 use git2::{Object, Oid, Repository};
+use semantic_index::SearchResult;
 use serde::Deserialize;
 use std::path::{Path, PathBuf};
 use std::{env, fs};
 
 #[derive(Deserialize, Clone)]
-struct QueryMatches {
+struct EvaluationQuery {
     query: String,
     matches: Vec<String>,
 }
 
+impl EvaluationQuery {
+    fn match_pairs(&self) -> Vec<(PathBuf, usize)> {
+        let mut pairs = Vec::new();
+        for match_identifier in self.matches {
+            let match_parts = match_identifier.split(":");
+
+            if let Some(file_path) = match_parts.next() {
+                if let Some(row_number) = match_parts.next() {
+                    pairs.push((PathBuf::from(file_path), from_str::<usize>(row_number)));
+                }
+            }
+
+        pairs
+    }
+}
+
 #[derive(Deserialize, Clone)]
 struct RepoEval {
     repo: String,
     commit: String,
-    assertions: Vec<QueryMatches>,
+    assertions: Vec<EvaluationQuery>,
 }
 
 const TMP_REPO_PATH: &str = "./target/eval_repos";
@@ -77,7 +94,60 @@ fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<PathBuf> {
     Ok(clone_path)
 }
 
+fn dcg(hits: Vec<usize>) -> f32 {
+    let mut result = 0.0;
+    for (idx, hit) in hits.iter().enumerate() {
+        result += *hit as f32 / (2.0 + idx as f32).log2();
+    }
+
+    println!("DCG: {:?}", result);
+    result
+}
+
+fn evaluate_ndcg(eval_query: EvaluationQuery, search_results: Vec<SearchResult>, k: usize) -> f32 {
+
+    // NDCG or Normalized Discounted Cumulative Gain, is determined by comparing the relevance of
+    // items returned by the search engine relative to the hypothetical ideal.
+    // Relevance is represented as a series of booleans, in which each search result returned
+    // is identified as being inside the test set of matches (1) or not (0).
+
+    // For example, if result 1, 3 and 5 match the 3 relevant results provided
+    // actual dcg is calculated against a vector of [1, 0, 1, 0, 1]
+    // whereas ideal dcg is calculated against a vector of [1, 1, 1, 0, 0]
+    // as this ideal vector assumes the 3 relevant results provided were returned first
+    // normalized dcg is then calculated as actual dcg / ideal dcg.
+
+    // NDCG ranges from 0 to 1, which higher values indicating better performance
+    // Commonly NDCG is expressed as NDCG@k, in which k represents the metric calculated
+    // including only the top k values returned.
+    // The @k metrics can help you identify, at what point does the relevant results start to fall off.
+    // Ie. a NDCG@1 of 0.9 and a NDCG@3 of 0.5 may indicate that the first result returned in usually
+    // very high quality, whereas rank results quickly drop off after the first result.
+
+    let ideal = vec![1; cmp::min(eval_query.matches.len(), k)];
+
+    return dcg(hits) / dcg(ideal);
+}
+
+fn evaluate_map(eval_query: EvaluationQuery, search_results: Vec<SearchResult>, k: usize) -> f32 {
+
+}
+
+fn evaluate_repo(repo_eval: RepoEval, clone_path: PathBuf) {
+
+    // Launch new repo as a new Zed workspace/project
+    // Index the project
+    // Search each eval_query
+    // Calculate Statistics
+
+}
+
 fn main() {
+
+    // zed/main.rs
+    // creating an app and running it, gives you the context.
+    // create a project, find_or_create_local_worktree.
+
     if let Ok(repo_evals) = parse_eval() {
         for repo in repo_evals {
             let cloned = clone_repo(repo.clone());
@@ -85,8 +155,12 @@ fn main() {
                 Ok(clone_path) => {
                     println!(
                         "Cloned {:?} @ {:?} into {:?}",
-                        repo.repo, repo.commit, clone_path
+                        repo.repo, repo.commit, &clone_path
                     );
+
+                    // Evaluate Repo
+                    evaluate_repo(repo, clone_path);
+
                 }
                 Err(err) => {
                     println!("Error Cloning: {:?}", err);

From eff44f9aa4412399c1c642eb271c4e5ec8297cec Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Wed, 13 Sep 2023 20:02:15 -0400
Subject: [PATCH 05/14] semantic index eval, indexing appropriately

---
 Cargo.lock                                  |   4 +
 crates/semantic_index/Cargo.toml            |   4 +
 crates/semantic_index/eval/tree-sitter.json |   6 +-
 crates/semantic_index/examples/eval.rs      | 194 ++++++++++++++++----
 crates/semantic_index/src/semantic_index.rs |   6 +-
 5 files changed, 168 insertions(+), 46 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a66391ed07..b0f46a90d4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6744,6 +6744,7 @@ dependencies = [
  "anyhow",
  "async-trait",
  "bincode",
+ "client",
  "collections",
  "ctor",
  "editor",
@@ -6757,6 +6758,7 @@ dependencies = [
  "lazy_static",
  "log",
  "matrixmultiply",
+ "node_runtime",
  "parking_lot 0.11.2",
  "parse_duration",
  "picker",
@@ -6766,6 +6768,7 @@ dependencies = [
  "rand 0.8.5",
  "rpc",
  "rusqlite",
+ "rust-embed",
  "schemars",
  "serde",
  "serde_json",
@@ -6788,6 +6791,7 @@ dependencies = [
  "unindent",
  "util",
  "workspace",
+ "zed",
 ]
 
 [[package]]
diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml
index b5537dd2fa..a20f29fd68 100644
--- a/crates/semantic_index/Cargo.toml
+++ b/crates/semantic_index/Cargo.toml
@@ -51,6 +51,10 @@ rpc = { path = "../rpc", features = ["test-support"] }
 workspace = { path = "../workspace", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"]}
 git2 = { version = "0.15"}
+rust-embed = { version = "8.0", features = ["include-exclude"] }
+client = { path = "../client" }
+zed = { path = "../zed"}
+node_runtime = { path = "../node_runtime"}
 
 pretty_assertions.workspace = true
 rand.workspace = true
diff --git a/crates/semantic_index/eval/tree-sitter.json b/crates/semantic_index/eval/tree-sitter.json
index 52d1e9df16..d3dcc86937 100644
--- a/crates/semantic_index/eval/tree-sitter.json
+++ b/crates/semantic_index/eval/tree-sitter.json
@@ -17,7 +17,7 @@
     {
       "query": "generate tags based on config",
       "matches": [
-        "tags/src/lib.rs:261",
+        "tags/src/lib.rs:261"
       ]
     },
     {
@@ -54,13 +54,13 @@
     {
       "query": "Match based on associativity of actions",
       "matches": [
-        "cri/src/generate/build_tables/build_parse_table.rs:542",
+        "cri/src/generate/build_tables/build_parse_table.rs:542"
       ]
     },
     {
       "query": "Format token set display",
       "matches": [
-        "cli/src/generate/build_tables/item.rs:246",
+        "cli/src/generate/build_tables/item.rs:246"
       ]
     },
     {
diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
index f666f5c281..67ee52e28c 100644
--- a/crates/semantic_index/examples/eval.rs
+++ b/crates/semantic_index/examples/eval.rs
@@ -1,8 +1,46 @@
+use anyhow::{anyhow, Result};
+use client::{self, UserStore};
 use git2::{Object, Oid, Repository};
-use semantic_index::SearchResult;
+use gpui::{AppContext, AssetSource, ModelHandle, Task};
+use language::LanguageRegistry;
+use node_runtime::RealNodeRuntime;
+use project::{Fs, Project, RealFs};
+use rust_embed::RustEmbed;
+use semantic_index::embedding::OpenAIEmbeddings;
+use semantic_index::semantic_index_settings::SemanticIndexSettings;
+use semantic_index::{SearchResult, SemanticIndex};
 use serde::Deserialize;
-use std::path::{Path, PathBuf};
-use std::{env, fs};
+use settings::{default_settings, handle_settings_file_changes, watch_config_file, SettingsStore};
+use std::path::{self, Path, PathBuf};
+use std::sync::Arc;
+use std::time::Duration;
+use std::{cmp, env, fs};
+use util::channel::{RELEASE_CHANNEL, RELEASE_CHANNEL_NAME};
+use util::http::{self, HttpClient};
+use util::paths::{self, EMBEDDINGS_DIR};
+use zed::languages;
+
+#[derive(RustEmbed)]
+#[folder = "../../assets"]
+#[include = "fonts/**/*"]
+#[include = "icons/**/*"]
+#[include = "themes/**/*"]
+#[include = "sounds/**/*"]
+#[include = "*.md"]
+#[exclude = "*.DS_Store"]
+pub struct Assets;
+
+impl AssetSource for Assets {
+    fn load(&self, path: &str) -> Result<std::borrow::Cow<[u8]>> {
+        Self::get(path)
+            .map(|f| f.data)
+            .ok_or_else(|| anyhow!("could not find asset at path \"{}\"", path))
+    }
+
+    fn list(&self, path: &str) -> Vec<std::borrow::Cow<'static, str>> {
+        Self::iter().filter(|p| p.starts_with(path)).collect()
+    }
+}
 
 #[derive(Deserialize, Clone)]
 struct EvaluationQuery {
@@ -13,15 +51,18 @@ struct EvaluationQuery {
 impl EvaluationQuery {
     fn match_pairs(&self) -> Vec<(PathBuf, usize)> {
         let mut pairs = Vec::new();
-        for match_identifier in self.matches {
-            let match_parts = match_identifier.split(":");
+        for match_identifier in self.matches.iter() {
+            let mut match_parts = match_identifier.split(":");
 
             if let Some(file_path) = match_parts.next() {
                 if let Some(row_number) = match_parts.next() {
-                    pairs.push((PathBuf::from(file_path), from_str::<usize>(row_number)));
+                    pairs.push((
+                        PathBuf::from(file_path),
+                        row_number.parse::<usize>().unwrap(),
+                    ));
                 }
             }
-
+        }
         pairs
     }
 }
@@ -33,7 +74,7 @@ struct RepoEval {
     assertions: Vec<EvaluationQuery>,
 }
 
-const TMP_REPO_PATH: &str = "./target/eval_repos";
+const TMP_REPO_PATH: &str = "eval_repos";
 
 fn parse_eval() -> anyhow::Result<Vec<RepoEval>> {
     let eval_folder = env::current_dir()?
@@ -74,7 +115,12 @@ fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<PathBuf> {
         .unwrap()
         .to_owned()
         .replace(".git", "");
-    let clone_path = Path::new(TMP_REPO_PATH).join(&repo_name).to_path_buf();
+
+    let clone_path = fs::canonicalize(env::current_dir()?)?
+        .parent()
+        .ok_or(anyhow!("path canonicalization failed"))?
+        .join(TMP_REPO_PATH)
+        .join(&repo_name);
 
     // Delete Clone Path if already exists
     let _ = fs::remove_dir_all(&clone_path);
@@ -105,7 +151,6 @@ fn dcg(hits: Vec<usize>) -> f32 {
 }
 
 fn evaluate_ndcg(eval_query: EvaluationQuery, search_results: Vec<SearchResult>, k: usize) -> f32 {
-
     // NDCG or Normalized Discounted Cumulative Gain, is determined by comparing the relevance of
     // items returned by the search engine relative to the hypothetical ideal.
     // Relevance is represented as a series of booleans, in which each search result returned
@@ -125,47 +170,118 @@ fn evaluate_ndcg(eval_query: EvaluationQuery, search_results: Vec<SearchResult>,
     // very high quality, whereas rank results quickly drop off after the first result.
 
     let ideal = vec![1; cmp::min(eval_query.matches.len(), k)];
+    let hits = vec![1];
 
     return dcg(hits) / dcg(ideal);
 }
 
-fn evaluate_map(eval_query: EvaluationQuery, search_results: Vec<SearchResult>, k: usize) -> f32 {
-
-}
-
-fn evaluate_repo(repo_eval: RepoEval, clone_path: PathBuf) {
-
-    // Launch new repo as a new Zed workspace/project
-    // Index the project
-    // Search each eval_query
-    // Calculate Statistics
+// fn evaluate_map(eval_query: EvaluationQuery, search_results: Vec<SearchResult>, k: usize) -> f32 {}
 
+fn init_logger() {
+    env_logger::init();
 }
 
 fn main() {
+    // Launch new repo as a new Zed workspace/project
+    let app = gpui::App::new(Assets).unwrap();
+    let fs = Arc::new(RealFs);
+    let http = http::client();
+    let user_settings_file_rx =
+        watch_config_file(app.background(), fs.clone(), paths::SETTINGS.clone());
+    let http_client = http::client();
+    init_logger();
 
-    // zed/main.rs
-    // creating an app and running it, gives you the context.
-    // create a project, find_or_create_local_worktree.
+    app.run(move |cx| {
+        cx.set_global(*RELEASE_CHANNEL);
 
-    if let Ok(repo_evals) = parse_eval() {
-        for repo in repo_evals {
-            let cloned = clone_repo(repo.clone());
-            match cloned {
-                Ok(clone_path) => {
-                    println!(
-                        "Cloned {:?} @ {:?} into {:?}",
-                        repo.repo, repo.commit, &clone_path
-                    );
+        let client = client::Client::new(http.clone(), cx);
+        let user_store = cx.add_model(|cx| UserStore::new(client.clone(), http_client.clone(), cx));
 
-                    // Evaluate Repo
-                    evaluate_repo(repo, clone_path);
+        // Initialize Settings
+        let mut store = SettingsStore::default();
+        store
+            .set_default_settings(default_settings().as_ref(), cx)
+            .unwrap();
+        cx.set_global(store);
+        handle_settings_file_changes(user_settings_file_rx, cx);
 
-                }
-                Err(err) => {
-                    println!("Error Cloning: {:?}", err);
+        // Initialize Languages
+        let login_shell_env_loaded = Task::ready(());
+        let mut languages = LanguageRegistry::new(login_shell_env_loaded);
+        languages.set_executor(cx.background().clone());
+        let languages = Arc::new(languages);
+
+        let node_runtime = RealNodeRuntime::new(http.clone());
+        languages::init(languages.clone(), node_runtime.clone());
+
+        project::Project::init(&client, cx);
+        semantic_index::init(fs.clone(), http.clone(), languages.clone(), cx);
+
+        settings::register::<SemanticIndexSettings>(cx);
+
+        let db_file_path = EMBEDDINGS_DIR
+            .join(Path::new(RELEASE_CHANNEL_NAME.as_str()))
+            .join("embeddings_db");
+
+        let languages = languages.clone();
+        let fs = fs.clone();
+        cx.spawn(|mut cx| async move {
+            let semantic_index = SemanticIndex::new(
+                fs.clone(),
+                db_file_path,
+                Arc::new(OpenAIEmbeddings::new(http_client, cx.background())),
+                languages.clone(),
+                cx.clone(),
+            )
+            .await?;
+
+            if let Ok(repo_evals) = parse_eval() {
+                for repo in repo_evals {
+                    let cloned = clone_repo(repo.clone());
+                    match cloned {
+                        Ok(clone_path) => {
+                            log::trace!(
+                                "Cloned {:?} @ {:?} into {:?}",
+                                repo.repo,
+                                repo.commit,
+                                &clone_path
+                            );
+
+                            // Create Project
+                            let project = cx.update(|cx| {
+                                Project::local(
+                                    client.clone(),
+                                    user_store.clone(),
+                                    languages.clone(),
+                                    fs.clone(),
+                                    cx,
+                                )
+                            });
+
+                            // Register Worktree
+                            let _ = project
+                                .update(&mut cx, |project, cx| {
+                                    println!(
+                                        "Creating worktree in project: {:?}",
+                                        clone_path.clone()
+                                    );
+                                    project.find_or_create_local_worktree(clone_path, true, cx)
+                                })
+                                .await;
+
+                            let _ = semantic_index
+                                .update(&mut cx, |index, cx| index.index_project(project, cx))
+                                .await;
+                        }
+                        Err(err) => {
+                            log::trace!("Error cloning: {:?}", err);
+                        }
+                    }
                 }
             }
-        }
-    }
+
+            anyhow::Ok(())
+        })
+        .detach();
+    });
 }
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 115bf5d7a8..63bcc900f2 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -1,5 +1,5 @@
 mod db;
-mod embedding;
+pub mod embedding;
 mod embedding_queue;
 mod parsing;
 pub mod semantic_index_settings;
@@ -301,7 +301,7 @@ impl SemanticIndex {
         }
     }
 
-    async fn new(
+    pub async fn new(
         fs: Arc<dyn Fs>,
         database_path: PathBuf,
         embedding_provider: Arc<dyn EmbeddingProvider>,
@@ -837,8 +837,6 @@ impl SemanticIndex {
         cx: &mut ModelContext<Self>,
     ) -> Task<Result<()>> {
         if !self.projects.contains_key(&project.downgrade()) {
-            log::trace!("Registering Project for Semantic Index");
-
             let subscription = cx.subscribe(&project, |this, project, event, cx| match event {
                 project::Event::WorktreeAdded | project::Event::WorktreeRemoved(_) => {
                     this.project_worktrees_changed(project.clone(), cx);

From 0c1b2e5aa6a83b75d218a82676d2523147180a10 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Wed, 13 Sep 2023 20:04:53 -0400
Subject: [PATCH 06/14] cleaned up warnings

---
 crates/semantic_index/examples/eval.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
index 67ee52e28c..27ee93d093 100644
--- a/crates/semantic_index/examples/eval.rs
+++ b/crates/semantic_index/examples/eval.rs
@@ -1,22 +1,21 @@
 use anyhow::{anyhow, Result};
 use client::{self, UserStore};
 use git2::{Object, Oid, Repository};
-use gpui::{AppContext, AssetSource, ModelHandle, Task};
+use gpui::{AssetSource, Task};
 use language::LanguageRegistry;
 use node_runtime::RealNodeRuntime;
-use project::{Fs, Project, RealFs};
+use project::{Project, RealFs};
 use rust_embed::RustEmbed;
 use semantic_index::embedding::OpenAIEmbeddings;
 use semantic_index::semantic_index_settings::SemanticIndexSettings;
 use semantic_index::{SearchResult, SemanticIndex};
 use serde::Deserialize;
 use settings::{default_settings, handle_settings_file_changes, watch_config_file, SettingsStore};
-use std::path::{self, Path, PathBuf};
+use std::path::{Path, PathBuf};
 use std::sync::Arc;
-use std::time::Duration;
 use std::{cmp, env, fs};
 use util::channel::{RELEASE_CHANNEL, RELEASE_CHANNEL_NAME};
-use util::http::{self, HttpClient};
+use util::http::{self};
 use util::paths::{self, EMBEDDINGS_DIR};
 use zed::languages;
 

From 137dda3ee656036ccba6c3554f33e4135c2ba12e Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Thu, 14 Sep 2023 09:30:19 -0400
Subject: [PATCH 07/14] wip eval framework for semantic index

---
 crates/semantic_index/examples/eval.rs | 63 ++++++++++++++++++++++----
 1 file changed, 55 insertions(+), 8 deletions(-)

diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
index 27ee93d093..4935b160a4 100644
--- a/crates/semantic_index/examples/eval.rs
+++ b/crates/semantic_index/examples/eval.rs
@@ -1,7 +1,8 @@
 use anyhow::{anyhow, Result};
 use client::{self, UserStore};
+use collections::HashMap;
 use git2::{Object, Oid, Repository};
-use gpui::{AssetSource, Task};
+use gpui::{AssetSource, AsyncAppContext, ModelHandle, Task};
 use language::LanguageRegistry;
 use node_runtime::RealNodeRuntime;
 use project::{Project, RealFs};
@@ -13,6 +14,7 @@ use serde::Deserialize;
 use settings::{default_settings, handle_settings_file_changes, watch_config_file, SettingsStore};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
+use std::time::{Duration, Instant};
 use std::{cmp, env, fs};
 use util::channel::{RELEASE_CHANNEL, RELEASE_CHANNEL_NAME};
 use util::http::{self};
@@ -73,6 +75,15 @@ struct RepoEval {
     assertions: Vec<EvaluationQuery>,
 }
 
+struct EvaluationResults {
+    token_count: usize,
+    span_count: usize,
+    time_to_index: Duration,
+    time_to_search: Vec<Duration>,
+    ndcg: HashMap<usize, f32>,
+    map: HashMap<usize, f32>,
+}
+
 const TMP_REPO_PATH: &str = "eval_repos";
 
 fn parse_eval() -> anyhow::Result<Vec<RepoEval>> {
@@ -180,6 +191,42 @@ fn init_logger() {
     env_logger::init();
 }
 
+async fn evaluate_repo(
+    index: ModelHandle<SemanticIndex>,
+    project: ModelHandle<Project>,
+    query_matches: Vec<EvaluationQuery>,
+    cx: &mut AsyncAppContext,
+) -> Result<()> {
+    // Index Project
+    let index_t0 = Instant::now();
+    index
+        .update(cx, |index, cx| index.index_project(project.clone(), cx))
+        .await?;
+    let index_time = index_t0.elapsed();
+    println!("Time to Index: {:?}", index_time.as_secs());
+
+    for query in query_matches {
+        // Query each match in order
+        let search_t0 = Instant::now();
+        let search_results = index
+            .update(cx, |index, mut cx| {
+                index.search_project(project.clone(), query.query, 10, vec![], vec![], cx)
+            })
+            .await?;
+        let search_time = search_t0.elapsed();
+        println!("Time to Search: {:?}", search_time.as_secs());
+
+        // Evaluate ndcg@k, for k = 1, 3, 5, 10
+        // Evaluate map@k, for k = 1, 3, 5, 10
+        // Evaluate span count
+        // Evaluate token count
+        // Evaluate time to index
+        // Evaluate time to search
+    }
+
+    anyhow::Ok(())
+}
+
 fn main() {
     // Launch new repo as a new Zed workspace/project
     let app = gpui::App::new(Assets).unwrap();
@@ -260,17 +307,17 @@ fn main() {
                             // Register Worktree
                             let _ = project
                                 .update(&mut cx, |project, cx| {
-                                    println!(
-                                        "Creating worktree in project: {:?}",
-                                        clone_path.clone()
-                                    );
                                     project.find_or_create_local_worktree(clone_path, true, cx)
                                 })
                                 .await;
 
-                            let _ = semantic_index
-                                .update(&mut cx, |index, cx| index.index_project(project, cx))
-                                .await;
+                            evaluate_repo(
+                                semantic_index.clone(),
+                                project,
+                                repo.assertions,
+                                &mut cx,
+                            )
+                            .await?;
                         }
                         Err(err) => {
                             log::trace!("Error cloning: {:?}", err);

From 04bd107ada2417599fe9b2de1974f75c38646832 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Fri, 15 Sep 2023 10:36:21 -0400
Subject: [PATCH 08/14] add ndcg@k to evaluate metrics

---
 crates/semantic_index/eval/gpt-engineer.json | 44 +++++------
 crates/semantic_index/examples/eval.rs       | 78 +++++++++++++++++---
 2 files changed, 88 insertions(+), 34 deletions(-)

diff --git a/crates/semantic_index/eval/gpt-engineer.json b/crates/semantic_index/eval/gpt-engineer.json
index 64322e8384..4a96e9f5ff 100644
--- a/crates/semantic_index/eval/gpt-engineer.json
+++ b/crates/semantic_index/eval/gpt-engineer.json
@@ -1,5 +1,5 @@
 {
-  "repo": "https://github.com/AntonOsika/gpt-engineer.git",
+  "repo": "https://github.com/AntonOsika/gpt_engineer.git",
   "commit": "7735a6445bae3611c62f521e6464c67c957f87c2",
   "assertions": [
     {
@@ -12,48 +12,48 @@
     {
       "query": "What version of the openai package is active?",
       "matches": [
-        "pyprojet.toml:14"
+        "pyproject.toml:14"
       ]
     },
     {
       "query": "Ask user for clarification",
       "matches": [
-        "gpt-engineer/steps.py:69"
+        "gpt_engineer/steps.py:69"
       ]
     },
     {
       "query": "generate tests for python code",
       "matches": [
-        "gpt-engineer/steps.py:153"
+        "gpt_engineer/steps.py:153"
       ]
     },
     {
       "query": "get item from database based on key",
       "matches": [
-        "gpt-engineer/db.py:42",
-        "gpt-engineer/db.py:68"
+        "gpt_engineer/db.py:42",
+        "gpt_engineer/db.py:68"
       ]
     },
     {
       "query": "prompt user to select files",
       "matches": [
-        "gpt-engineer/file_selector.py:171",
-        "gpt-engineer/file_selector.py:306",
-        "gpt-engineer/file_selector.py:289",
-        "gpt-engineer/file_selector.py:234"
+        "gpt_engineer/file_selector.py:171",
+        "gpt_engineer/file_selector.py:306",
+        "gpt_engineer/file_selector.py:289",
+        "gpt_engineer/file_selector.py:234"
       ]
     },
     {
       "query": "send to rudderstack",
       "matches": [
-        "gpt-engineer/collect.py:11",
-        "gpt-engineer/collect.py:38"
+        "gpt_engineer/collect.py:11",
+        "gpt_engineer/collect.py:38"
       ]
     },
     {
       "query": "parse code blocks from chat messages",
       "matches": [
-        "gpt-engineer/chat_to_files.py:10",
+        "gpt_engineer/chat_to_files.py:10",
         "docs/intro/chat_parsing.md:1"
       ]
     },
@@ -66,35 +66,35 @@
     {
       "query": "ask the user if the code ran successfully?",
       "matches": [
-        "gpt-engineer/learning.py:54"
+        "gpt_engineer/learning.py:54"
       ]
     },
     {
       "query": "how is consent granted by the user?",
       "matches": [
-        "gpt-engineer/learning.py:107",
-        "gpt-engineer/learning.py:130",
-        "gpt-engineer/learning.py:152"
+        "gpt_engineer/learning.py:107",
+        "gpt_engineer/learning.py:130",
+        "gpt_engineer/learning.py:152"
       ]
     },
     {
       "query": "what are all the different steps the agent can take?",
       "matches": [
         "docs/intro/steps_module.md:1",
-        "gpt-engineer/steps.py:391"
+        "gpt_engineer/steps.py:391"
       ]
     },
     {
       "query": "ask the user for clarification?",
       "matches": [
-        "gpt-engineer/steps.py:69"
+        "gpt_engineer/steps.py:69"
       ]
     },
     {
       "query": "what models are available?",
       "matches": [
-        "gpt-engineer/ai.py:315",
-        "gpt-engineer/ai.py:341",
+        "gpt_engineer/ai.py:315",
+        "gpt_engineer/ai.py:341",
         "docs/open-models.md:1"
       ]
     },
@@ -107,7 +107,7 @@
     {
       "query": "does the agent know how to fix code?",
       "matches": [
-        "gpt-engineer/steps.py:367"
+        "gpt_engineer/steps.py:367"
       ]
     }
   ]
diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
index 4935b160a4..0a13623b79 100644
--- a/crates/semantic_index/examples/eval.rs
+++ b/crates/semantic_index/examples/eval.rs
@@ -2,7 +2,7 @@ use anyhow::{anyhow, Result};
 use client::{self, UserStore};
 use collections::HashMap;
 use git2::{Object, Oid, Repository};
-use gpui::{AssetSource, AsyncAppContext, ModelHandle, Task};
+use gpui::{AppContext, AssetSource, AsyncAppContext, ModelHandle, Task};
 use language::LanguageRegistry;
 use node_runtime::RealNodeRuntime;
 use project::{Project, RealFs};
@@ -50,17 +50,14 @@ struct EvaluationQuery {
 }
 
 impl EvaluationQuery {
-    fn match_pairs(&self) -> Vec<(PathBuf, usize)> {
+    fn match_pairs(&self) -> Vec<(PathBuf, u32)> {
         let mut pairs = Vec::new();
         for match_identifier in self.matches.iter() {
             let mut match_parts = match_identifier.split(":");
 
             if let Some(file_path) = match_parts.next() {
                 if let Some(row_number) = match_parts.next() {
-                    pairs.push((
-                        PathBuf::from(file_path),
-                        row_number.parse::<usize>().unwrap(),
-                    ));
+                    pairs.push((PathBuf::from(file_path), row_number.parse::<u32>().unwrap()));
                 }
             }
         }
@@ -156,11 +153,15 @@ fn dcg(hits: Vec<usize>) -> f32 {
         result += *hit as f32 / (2.0 + idx as f32).log2();
     }
 
-    println!("DCG: {:?}", result);
     result
 }
 
-fn evaluate_ndcg(eval_query: EvaluationQuery, search_results: Vec<SearchResult>, k: usize) -> f32 {
+fn evaluate_ndcg(
+    eval_query: EvaluationQuery,
+    search_results: Vec<SearchResult>,
+    k: usize,
+    cx: &AsyncAppContext,
+) -> Vec<f32> {
     // NDCG or Normalized Discounted Cumulative Gain, is determined by comparing the relevance of
     // items returned by the search engine relative to the hypothetical ideal.
     // Relevance is represented as a series of booleans, in which each search result returned
@@ -180,9 +181,58 @@ fn evaluate_ndcg(eval_query: EvaluationQuery, search_results: Vec<SearchResult>,
     // very high quality, whereas rank results quickly drop off after the first result.
 
     let ideal = vec![1; cmp::min(eval_query.matches.len(), k)];
-    let hits = vec![1];
 
-    return dcg(hits) / dcg(ideal);
+    let mut hits = Vec::new();
+    for result in search_results {
+        let (path, start_row, end_row) = result.buffer.read_with(cx, |buffer, cx| {
+            let path = buffer.file().unwrap().path().to_path_buf();
+            let start_row = buffer.offset_to_point(result.range.start.offset).row;
+            let end_row = buffer.offset_to_point(result.range.end.offset).row;
+            (path, start_row, end_row)
+        });
+
+        let match_pairs = eval_query.match_pairs();
+        let mut found = 0;
+        for (match_path, match_row) in match_pairs {
+            if match_path == path {
+                if match_row >= start_row && match_row <= end_row {
+                    found = 1;
+                    break;
+                }
+            }
+        }
+
+        hits.push(found);
+    }
+
+    // For now, we are calculating ideal_hits a bit different, as technically
+    // with overlapping ranges, one match can result in more than result.
+    let mut ideal_hits = hits.clone();
+    ideal_hits.retain(|x| x == &1);
+
+    let ideal = if ideal.len() > ideal_hits.len() {
+        ideal
+    } else {
+        ideal_hits
+    };
+
+    // Fill ideal to 10 length
+    let mut filled_ideal = [0; 10];
+    for (idx, i) in ideal.to_vec().into_iter().enumerate() {
+        filled_ideal[idx] = i;
+    }
+
+    let mut ndcg = Vec::new();
+    for idx in 1..(hits.len() + 1) {
+        let hits_at_k = hits[0..idx].to_vec();
+        let ideal_at_k = filled_ideal[0..idx].to_vec();
+
+        let at_k = dcg(hits_at_k.clone()) / dcg(ideal_at_k.clone());
+
+        ndcg.push(at_k);
+    }
+
+    ndcg
 }
 
 // fn evaluate_map(eval_query: EvaluationQuery, search_results: Vec<SearchResult>, k: usize) -> f32 {}
@@ -209,14 +259,17 @@ async fn evaluate_repo(
         // Query each match in order
         let search_t0 = Instant::now();
         let search_results = index
-            .update(cx, |index, mut cx| {
-                index.search_project(project.clone(), query.query, 10, vec![], vec![], cx)
+            .update(cx, |index, cx| {
+                index.search_project(project.clone(), query.clone().query, 10, vec![], vec![], cx)
             })
             .await?;
         let search_time = search_t0.elapsed();
         println!("Time to Search: {:?}", search_time.as_secs());
 
         // Evaluate ndcg@k, for k = 1, 3, 5, 10
+        let ndcg = evaluate_ndcg(query, search_results, 10, cx);
+        println!("NDCG: {:?}", ndcg);
+
         // Evaluate map@k, for k = 1, 3, 5, 10
         // Evaluate span count
         // Evaluate token count
@@ -259,6 +312,7 @@ fn main() {
 
         let node_runtime = RealNodeRuntime::new(http.clone());
         languages::init(languages.clone(), node_runtime.clone());
+        language::init(cx);
 
         project::Project::init(&client, cx);
         semantic_index::init(fs.clone(), http.clone(), languages.clone(), cx);

From 566bb9f71b8beebd723f07b7c83536d80613d7a2 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Mon, 18 Sep 2023 09:57:52 -0400
Subject: [PATCH 09/14] add map to evaluation suite for semantic_index

---
 crates/semantic_index/examples/eval.rs | 82 +++++++++++++++++---------
 1 file changed, 55 insertions(+), 27 deletions(-)

diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
index 0a13623b79..f0243b8b12 100644
--- a/crates/semantic_index/examples/eval.rs
+++ b/crates/semantic_index/examples/eval.rs
@@ -126,6 +126,8 @@ fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<PathBuf> {
     let clone_path = fs::canonicalize(env::current_dir()?)?
         .parent()
         .ok_or(anyhow!("path canonicalization failed"))?
+        .parent()
+        .unwrap()
         .join(TMP_REPO_PATH)
         .join(&repo_name);
 
@@ -156,30 +158,12 @@ fn dcg(hits: Vec<usize>) -> f32 {
     result
 }
 
-fn evaluate_ndcg(
+fn get_hits(
     eval_query: EvaluationQuery,
     search_results: Vec<SearchResult>,
     k: usize,
     cx: &AsyncAppContext,
-) -> Vec<f32> {
-    // NDCG or Normalized Discounted Cumulative Gain, is determined by comparing the relevance of
-    // items returned by the search engine relative to the hypothetical ideal.
-    // Relevance is represented as a series of booleans, in which each search result returned
-    // is identified as being inside the test set of matches (1) or not (0).
-
-    // For example, if result 1, 3 and 5 match the 3 relevant results provided
-    // actual dcg is calculated against a vector of [1, 0, 1, 0, 1]
-    // whereas ideal dcg is calculated against a vector of [1, 1, 1, 0, 0]
-    // as this ideal vector assumes the 3 relevant results provided were returned first
-    // normalized dcg is then calculated as actual dcg / ideal dcg.
-
-    // NDCG ranges from 0 to 1, which higher values indicating better performance
-    // Commonly NDCG is expressed as NDCG@k, in which k represents the metric calculated
-    // including only the top k values returned.
-    // The @k metrics can help you identify, at what point does the relevant results start to fall off.
-    // Ie. a NDCG@1 of 0.9 and a NDCG@3 of 0.5 may indicate that the first result returned in usually
-    // very high quality, whereas rank results quickly drop off after the first result.
-
+) -> (Vec<usize>, Vec<usize>) {
     let ideal = vec![1; cmp::min(eval_query.matches.len(), k)];
 
     let mut hits = Vec::new();
@@ -222,10 +206,32 @@ fn evaluate_ndcg(
         filled_ideal[idx] = i;
     }
 
+    (filled_ideal.to_vec(), hits)
+}
+
+fn evaluate_ndcg(hits: Vec<usize>, ideal: Vec<usize>) -> Vec<f32> {
+    // NDCG or Normalized Discounted Cumulative Gain, is determined by comparing the relevance of
+    // items returned by the search engine relative to the hypothetical ideal.
+    // Relevance is represented as a series of booleans, in which each search result returned
+    // is identified as being inside the test set of matches (1) or not (0).
+
+    // For example, if result 1, 3 and 5 match the 3 relevant results provided
+    // actual dcg is calculated against a vector of [1, 0, 1, 0, 1]
+    // whereas ideal dcg is calculated against a vector of [1, 1, 1, 0, 0]
+    // as this ideal vector assumes the 3 relevant results provided were returned first
+    // normalized dcg is then calculated as actual dcg / ideal dcg.
+
+    // NDCG ranges from 0 to 1, which higher values indicating better performance
+    // Commonly NDCG is expressed as NDCG@k, in which k represents the metric calculated
+    // including only the top k values returned.
+    // The @k metrics can help you identify, at what point does the relevant results start to fall off.
+    // Ie. a NDCG@1 of 0.9 and a NDCG@3 of 0.5 may indicate that the first result returned in usually
+    // very high quality, whereas rank results quickly drop off after the first result.
+
     let mut ndcg = Vec::new();
     for idx in 1..(hits.len() + 1) {
         let hits_at_k = hits[0..idx].to_vec();
-        let ideal_at_k = filled_ideal[0..idx].to_vec();
+        let ideal_at_k = ideal[0..idx].to_vec();
 
         let at_k = dcg(hits_at_k.clone()) / dcg(ideal_at_k.clone());
 
@@ -235,7 +241,24 @@ fn evaluate_ndcg(
     ndcg
 }
 
-// fn evaluate_map(eval_query: EvaluationQuery, search_results: Vec<SearchResult>, k: usize) -> f32 {}
+fn evaluate_map(hits: Vec<usize>) -> Vec<f32> {
+    let mut map_at_k = Vec::new();
+
+    let non_zero = hits.iter().sum::<usize>() as f32;
+    if non_zero == 0.0 {
+        return vec![0.0; hits.len()];
+    }
+
+    let mut rolling_non_zero = 0.0;
+    let mut rolling_map = 0.0;
+    for (idx, h) in hits.into_iter().enumerate() {
+        rolling_non_zero += h as f32;
+        rolling_map += rolling_non_zero / (idx + 1) as f32;
+        map_at_k.push(rolling_map / non_zero);
+    }
+
+    map_at_k
+}
 
 fn init_logger() {
     env_logger::init();
@@ -253,7 +276,7 @@ async fn evaluate_repo(
         .update(cx, |index, cx| index.index_project(project.clone(), cx))
         .await?;
     let index_time = index_t0.elapsed();
-    println!("Time to Index: {:?}", index_time.as_secs());
+    println!("Time to Index: {:?}", index_time.as_millis());
 
     for query in query_matches {
         // Query each match in order
@@ -264,17 +287,22 @@ async fn evaluate_repo(
             })
             .await?;
         let search_time = search_t0.elapsed();
-        println!("Time to Search: {:?}", search_time.as_secs());
+        println!("Time to Search: {:?}", search_time.as_millis());
+
+        // Get Hits/Ideal
+        let k = 10;
+        let (ideal, hits) = self::get_hits(query, search_results, k, cx);
 
         // Evaluate ndcg@k, for k = 1, 3, 5, 10
-        let ndcg = evaluate_ndcg(query, search_results, 10, cx);
+        let ndcg = evaluate_ndcg(hits.clone(), ideal);
         println!("NDCG: {:?}", ndcg);
 
         // Evaluate map@k, for k = 1, 3, 5, 10
+        let map = evaluate_map(hits);
+        println!("MAP: {:?}", map);
+
         // Evaluate span count
         // Evaluate token count
-        // Evaluate time to index
-        // Evaluate time to search
     }
 
     anyhow::Ok(())

From 25bd35742615ee627903b245a3d6c8e1b4ba3f4d Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Mon, 18 Sep 2023 18:25:02 -0400
Subject: [PATCH 10/14] add recall and precision to semantic index

---
 crates/semantic_index/eval/gpt-engineer.json |   2 +-
 crates/semantic_index/examples/eval.rs       | 212 ++++++++++++++++---
 2 files changed, 179 insertions(+), 35 deletions(-)

diff --git a/crates/semantic_index/eval/gpt-engineer.json b/crates/semantic_index/eval/gpt-engineer.json
index 4a96e9f5ff..d008cc65d1 100644
--- a/crates/semantic_index/eval/gpt-engineer.json
+++ b/crates/semantic_index/eval/gpt-engineer.json
@@ -1,5 +1,5 @@
 {
-  "repo": "https://github.com/AntonOsika/gpt_engineer.git",
+  "repo": "https://github.com/AntonOsika/gpt-engineer.git",
   "commit": "7735a6445bae3611c62f521e6464c67c957f87c2",
   "assertions": [
     {
diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
index f0243b8b12..546071c6f1 100644
--- a/crates/semantic_index/examples/eval.rs
+++ b/crates/semantic_index/examples/eval.rs
@@ -10,7 +10,7 @@ use rust_embed::RustEmbed;
 use semantic_index::embedding::OpenAIEmbeddings;
 use semantic_index::semantic_index_settings::SemanticIndexSettings;
 use semantic_index::{SearchResult, SemanticIndex};
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 use settings::{default_settings, handle_settings_file_changes, watch_config_file, SettingsStore};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
@@ -43,7 +43,7 @@ impl AssetSource for Assets {
     }
 }
 
-#[derive(Deserialize, Clone)]
+#[derive(Deserialize, Clone, Serialize)]
 struct EvaluationQuery {
     query: String,
     matches: Vec<String>,
@@ -72,15 +72,6 @@ struct RepoEval {
     assertions: Vec<EvaluationQuery>,
 }
 
-struct EvaluationResults {
-    token_count: usize,
-    span_count: usize,
-    time_to_index: Duration,
-    time_to_search: Vec<Duration>,
-    ndcg: HashMap<usize, f32>,
-    map: HashMap<usize, f32>,
-}
-
 const TMP_REPO_PATH: &str = "eval_repos";
 
 fn parse_eval() -> anyhow::Result<Vec<RepoEval>> {
@@ -114,7 +105,7 @@ fn parse_eval() -> anyhow::Result<Vec<RepoEval>> {
     Ok(repo_evals)
 }
 
-fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<PathBuf> {
+fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<(String, PathBuf)> {
     let repo_name = Path::new(repo_eval.repo.as_str())
         .file_name()
         .unwrap()
@@ -146,7 +137,7 @@ fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<PathBuf> {
     repo.checkout_tree(&obj, None)?;
     repo.set_head_detached(obj.id())?;
 
-    Ok(clone_path)
+    Ok((repo_name, clone_path))
 }
 
 fn dcg(hits: Vec<usize>) -> f32 {
@@ -253,30 +244,165 @@ fn evaluate_map(hits: Vec<usize>) -> Vec<f32> {
     let mut rolling_map = 0.0;
     for (idx, h) in hits.into_iter().enumerate() {
         rolling_non_zero += h as f32;
-        rolling_map += rolling_non_zero / (idx + 1) as f32;
+        if h == 1 {
+            rolling_map += rolling_non_zero / (idx + 1) as f32;
+        }
         map_at_k.push(rolling_map / non_zero);
     }
 
     map_at_k
 }
 
+fn evaluate_mrr(hits: Vec<usize>) -> f32 {
+    for (idx, h) in hits.into_iter().enumerate() {
+        if h == 1 {
+            return 1.0 / (idx + 1) as f32;
+        }
+    }
+
+    return 0.0;
+}
+
 fn init_logger() {
     env_logger::init();
 }
 
+#[derive(Serialize)]
+struct QueryMetrics {
+    query: EvaluationQuery,
+    millis_to_search: Duration,
+    ndcg: Vec<f32>,
+    map: Vec<f32>,
+    mrr: f32,
+    hits: Vec<usize>,
+    precision: Vec<f32>,
+    recall: Vec<f32>,
+}
+
+#[derive(Serialize)]
+struct SummaryMetrics {
+    millis_to_search: f32,
+    ndcg: Vec<f32>,
+    map: Vec<f32>,
+    mrr: f32,
+    precision: Vec<f32>,
+    recall: Vec<f32>,
+}
+
+#[derive(Serialize)]
+struct RepoEvaluationMetrics {
+    millis_to_index: Duration,
+    query_metrics: Vec<QueryMetrics>,
+    repo_metrics: Option<SummaryMetrics>,
+}
+
+impl RepoEvaluationMetrics {
+    fn new(millis_to_index: Duration) -> Self {
+        RepoEvaluationMetrics {
+            millis_to_index,
+            query_metrics: Vec::new(),
+            repo_metrics: None,
+        }
+    }
+
+    fn save(&self, repo_name: String) -> Result<()> {
+        let results_string = serde_json::to_string(&self)?;
+        fs::write(format!("./{}_evaluation.json", repo_name), results_string)
+            .expect("Unable to write file");
+        Ok(())
+    }
+
+    fn summarize(&mut self) {
+        let l = self.query_metrics.len() as f32;
+        let millis_to_search: f32 = self
+            .query_metrics
+            .iter()
+            .map(|metrics| metrics.millis_to_search.as_millis())
+            .sum::<u128>() as f32
+            / l;
+
+        let mut ndcg_sum = vec![0.0; 10];
+        let mut map_sum = vec![0.0; 10];
+        let mut precision_sum = vec![0.0; 10];
+        let mut recall_sum = vec![0.0; 10];
+        let mut mmr_sum = 0.0;
+
+        for query_metric in self.query_metrics.iter() {
+            for (ndcg, query_ndcg) in ndcg_sum.iter_mut().zip(query_metric.ndcg.clone()) {
+                *ndcg += query_ndcg;
+            }
+
+            for (mapp, query_map) in map_sum.iter_mut().zip(query_metric.map.clone()) {
+                *mapp += query_map;
+            }
+
+            for (pre, query_pre) in precision_sum.iter_mut().zip(query_metric.precision.clone()) {
+                *pre += query_pre;
+            }
+
+            for (rec, query_rec) in recall_sum.iter_mut().zip(query_metric.recall.clone()) {
+                *rec += query_rec;
+            }
+
+            mmr_sum += query_metric.mrr;
+        }
+
+        let ndcg = ndcg_sum.iter().map(|val| val / l).collect::<Vec<f32>>();
+        let map = map_sum.iter().map(|val| val / l).collect::<Vec<f32>>();
+        let precision = precision_sum
+            .iter()
+            .map(|val| val / l)
+            .collect::<Vec<f32>>();
+        let recall = recall_sum.iter().map(|val| val / l).collect::<Vec<f32>>();
+        let mrr = mmr_sum / l;
+
+        self.repo_metrics = Some(SummaryMetrics {
+            millis_to_search,
+            ndcg,
+            map,
+            mrr,
+            precision,
+            recall,
+        })
+    }
+}
+
+fn evaluate_precision(hits: Vec<usize>) -> Vec<f32> {
+    let mut rolling_hit: f32 = 0.0;
+    let mut precision = Vec::new();
+    for (idx, hit) in hits.into_iter().enumerate() {
+        rolling_hit += hit as f32;
+        precision.push(rolling_hit / ((idx as f32) + 1.0));
+    }
+
+    precision
+}
+
+fn evaluate_recall(hits: Vec<usize>, ideal: Vec<usize>) -> Vec<f32> {
+    let total_relevant = ideal.iter().sum::<usize>() as f32;
+    let mut recall = Vec::new();
+    let mut rolling_hit: f32 = 0.0;
+    for hit in hits {
+        rolling_hit += hit as f32;
+        recall.push(rolling_hit / total_relevant);
+    }
+
+    recall
+}
+
 async fn evaluate_repo(
+    repo_name: String,
     index: ModelHandle<SemanticIndex>,
     project: ModelHandle<Project>,
     query_matches: Vec<EvaluationQuery>,
     cx: &mut AsyncAppContext,
-) -> Result<()> {
+) -> Result<RepoEvaluationMetrics> {
     // Index Project
     let index_t0 = Instant::now();
     index
         .update(cx, |index, cx| index.index_project(project.clone(), cx))
         .await?;
-    let index_time = index_t0.elapsed();
-    println!("Time to Index: {:?}", index_time.as_millis());
+    let mut repo_metrics = RepoEvaluationMetrics::new(index_t0.elapsed());
 
     for query in query_matches {
         // Query each match in order
@@ -286,26 +412,45 @@ async fn evaluate_repo(
                 index.search_project(project.clone(), query.clone().query, 10, vec![], vec![], cx)
             })
             .await?;
-        let search_time = search_t0.elapsed();
-        println!("Time to Search: {:?}", search_time.as_millis());
+        let millis_to_search = search_t0.elapsed();
 
         // Get Hits/Ideal
         let k = 10;
-        let (ideal, hits) = self::get_hits(query, search_results, k, cx);
+        let (ideal, hits) = self::get_hits(query.clone(), search_results, k, cx);
 
         // Evaluate ndcg@k, for k = 1, 3, 5, 10
-        let ndcg = evaluate_ndcg(hits.clone(), ideal);
-        println!("NDCG: {:?}", ndcg);
+        let ndcg = evaluate_ndcg(hits.clone(), ideal.clone());
 
         // Evaluate map@k, for k = 1, 3, 5, 10
-        let map = evaluate_map(hits);
-        println!("MAP: {:?}", map);
+        let map = evaluate_map(hits.clone());
 
-        // Evaluate span count
-        // Evaluate token count
+        // Evaluate mrr
+        let mrr = evaluate_mrr(hits.clone());
+
+        // Evaluate precision
+        let precision = evaluate_precision(hits.clone());
+
+        // Evaluate Recall
+        let recall = evaluate_recall(hits.clone(), ideal);
+
+        let query_metrics = QueryMetrics {
+            query,
+            millis_to_search,
+            ndcg,
+            map,
+            mrr,
+            hits,
+            precision,
+            recall,
+        };
+
+        repo_metrics.query_metrics.push(query_metrics);
     }
 
-    anyhow::Ok(())
+    repo_metrics.summarize();
+    repo_metrics.save(repo_name);
+
+    anyhow::Ok(repo_metrics)
 }
 
 fn main() {
@@ -367,12 +512,10 @@ fn main() {
                 for repo in repo_evals {
                     let cloned = clone_repo(repo.clone());
                     match cloned {
-                        Ok(clone_path) => {
-                            log::trace!(
+                        Ok((repo_name, clone_path)) => {
+                            println!(
                                 "Cloned {:?} @ {:?} into {:?}",
-                                repo.repo,
-                                repo.commit,
-                                &clone_path
+                                repo.repo, repo.commit, &clone_path
                             );
 
                             // Create Project
@@ -393,7 +536,8 @@ fn main() {
                                 })
                                 .await;
 
-                            evaluate_repo(
+                            let repo_metrics = evaluate_repo(
+                                repo_name,
                                 semantic_index.clone(),
                                 project,
                                 repo.assertions,
@@ -402,7 +546,7 @@ fn main() {
                             .await?;
                         }
                         Err(err) => {
-                            log::trace!("Error cloning: {:?}", err);
+                            println!("Error cloning: {:?}", err);
                         }
                     }
                 }

From d85acceeecc21046397972c10e69586a3fa6b5b7 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 19 Sep 2023 16:13:47 -0400
Subject: [PATCH 11/14] move git2 to workspace dependency globally

---
 Cargo.lock                             | 18 ---------------
 Cargo.toml                             |  1 +
 crates/fs/Cargo.toml                   |  2 +-
 crates/git/Cargo.toml                  |  2 +-
 crates/project/Cargo.toml              |  2 +-
 crates/semantic_index/Cargo.toml       |  2 +-
 crates/semantic_index/examples/eval.rs | 31 ++------------------------
 crates/util/Cargo.toml                 |  4 ++--
 8 files changed, 9 insertions(+), 53 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6753629177..506b104fc3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3059,8 +3059,6 @@ dependencies = [
  "libc",
  "libgit2-sys",
  "log",
- "openssl-probe",
- "openssl-sys",
  "url",
 ]
 
@@ -4023,9 +4021,7 @@ checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4"
 dependencies = [
  "cc",
  "libc",
- "libssh2-sys",
  "libz-sys",
- "openssl-sys",
  "pkg-config",
 ]
 
@@ -4066,20 +4062,6 @@ dependencies = [
  "vcpkg",
 ]
 
-[[package]]
-name = "libssh2-sys"
-version = "0.2.23"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b094a36eb4b8b8c8a7b4b8ae43b2944502be3e59cd87687595cf6b0a71b3f4ca"
-dependencies = [
- "cc",
- "libc",
- "libz-sys",
- "openssl-sys",
- "pkg-config",
- "vcpkg",
-]
-
 [[package]]
 name = "libz-sys"
 version = "1.1.12"
diff --git a/Cargo.toml b/Cargo.toml
index 96070658b9..b8fa79a4e3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -116,6 +116,7 @@ toml = { version = "0.5" }
 tree-sitter = "0.20"
 unindent = { version = "0.1.7" }
 pretty_assertions = "1.3.0"
+git2 = { version = "0.15", default-features = false }
 
 tree-sitter-bash = { git = "https://github.com/tree-sitter/tree-sitter-bash", rev = "1b0321ee85701d5036c334a6f04761cdc672e64c" }
 tree-sitter-c = "0.20.1"
diff --git a/crates/fs/Cargo.toml b/crates/fs/Cargo.toml
index 7584dec21a..78146c3a9d 100644
--- a/crates/fs/Cargo.toml
+++ b/crates/fs/Cargo.toml
@@ -26,7 +26,7 @@ lazy_static.workspace = true
 parking_lot.workspace = true
 smol.workspace = true
 regex.workspace = true
-git2 = { version = "0.15", default-features = false }
+git2.workspace = true
 serde.workspace = true
 serde_derive.workspace = true
 serde_json.workspace = true
diff --git a/crates/git/Cargo.toml b/crates/git/Cargo.toml
index 8b91ee5373..72668ba766 100644
--- a/crates/git/Cargo.toml
+++ b/crates/git/Cargo.toml
@@ -20,7 +20,7 @@ smol.workspace = true
 parking_lot.workspace = true
 async-trait.workspace = true
 futures.workspace = true
-git2 = { version = "0.15", default-features = false }
+git2.workspace = true
 
 [dev-dependencies]
 unindent.workspace = true
diff --git a/crates/project/Cargo.toml b/crates/project/Cargo.toml
index 0dc76ed54a..ffea6646e9 100644
--- a/crates/project/Cargo.toml
+++ b/crates/project/Cargo.toml
@@ -75,6 +75,6 @@ lsp = { path = "../lsp", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"] }
 util = { path = "../util", features = ["test-support"] }
 rpc = { path = "../rpc", features = ["test-support"] }
-git2 = { version = "0.15", default-features = false }
+git2.workspace = true
 tempdir.workspace = true
 unindent.workspace = true
diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml
index 0c31f89b62..2997f5aa0b 100644
--- a/crates/semantic_index/Cargo.toml
+++ b/crates/semantic_index/Cargo.toml
@@ -51,11 +51,11 @@ project = { path = "../project", features = ["test-support"] }
 rpc = { path = "../rpc", features = ["test-support"] }
 workspace = { path = "../workspace", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"]}
-git2 = { version = "0.15"}
 rust-embed = { version = "8.0", features = ["include-exclude"] }
 client = { path = "../client" }
 zed = { path = "../zed"}
 node_runtime = { path = "../node_runtime"}
+git2.workspace = true
 
 pretty_assertions.workspace = true
 rand.workspace = true
diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
index 546071c6f1..37da380b89 100644
--- a/crates/semantic_index/examples/eval.rs
+++ b/crates/semantic_index/examples/eval.rs
@@ -1,12 +1,10 @@
 use anyhow::{anyhow, Result};
 use client::{self, UserStore};
-use collections::HashMap;
 use git2::{Object, Oid, Repository};
-use gpui::{AppContext, AssetSource, AsyncAppContext, ModelHandle, Task};
+use gpui::{AsyncAppContext, ModelHandle, Task};
 use language::LanguageRegistry;
 use node_runtime::RealNodeRuntime;
 use project::{Project, RealFs};
-use rust_embed::RustEmbed;
 use semantic_index::embedding::OpenAIEmbeddings;
 use semantic_index::semantic_index_settings::SemanticIndexSettings;
 use semantic_index::{SearchResult, SemanticIndex};
@@ -21,28 +19,6 @@ use util::http::{self};
 use util::paths::{self, EMBEDDINGS_DIR};
 use zed::languages;
 
-#[derive(RustEmbed)]
-#[folder = "../../assets"]
-#[include = "fonts/**/*"]
-#[include = "icons/**/*"]
-#[include = "themes/**/*"]
-#[include = "sounds/**/*"]
-#[include = "*.md"]
-#[exclude = "*.DS_Store"]
-pub struct Assets;
-
-impl AssetSource for Assets {
-    fn load(&self, path: &str) -> Result<std::borrow::Cow<[u8]>> {
-        Self::get(path)
-            .map(|f| f.data)
-            .ok_or_else(|| anyhow!("could not find asset at path \"{}\"", path))
-    }
-
-    fn list(&self, path: &str) -> Vec<std::borrow::Cow<'static, str>> {
-        Self::iter().filter(|p| p.starts_with(path)).collect()
-    }
-}
-
 #[derive(Deserialize, Clone, Serialize)]
 struct EvaluationQuery {
     query: String,
@@ -455,11 +431,9 @@ async fn evaluate_repo(
 
 fn main() {
     // Launch new repo as a new Zed workspace/project
-    let app = gpui::App::new(Assets).unwrap();
+    let app = gpui::App::new(()).unwrap();
     let fs = Arc::new(RealFs);
     let http = http::client();
-    let user_settings_file_rx =
-        watch_config_file(app.background(), fs.clone(), paths::SETTINGS.clone());
     let http_client = http::client();
     init_logger();
 
@@ -475,7 +449,6 @@ fn main() {
             .set_default_settings(default_settings().as_ref(), cx)
             .unwrap();
         cx.set_global(store);
-        handle_settings_file_changes(user_settings_file_rx, cx);
 
         // Initialize Languages
         let login_shell_env_loaded = Task::ready(());
diff --git a/crates/util/Cargo.toml b/crates/util/Cargo.toml
index 8d9594fbeb..6ab76b0850 100644
--- a/crates/util/Cargo.toml
+++ b/crates/util/Cargo.toml
@@ -25,10 +25,10 @@ rust-embed.workspace = true
 tempdir = { workspace = true, optional = true }
 serde.workspace = true
 serde_json.workspace = true
-git2 = { version = "0.15", default-features = false, optional = true }
+git2 = { workspace = true, optional = true }
 dirs = "3.0"
 take-until = "0.2.0"
 
 [dev-dependencies]
 tempdir.workspace = true
-git2 = { version = "0.15", default-features = false }
+git2.workspace = true

From b57b5c0b3374071342f48b9dea1ee0e3c47edec7 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 19 Sep 2023 16:36:51 -0400
Subject: [PATCH 12/14] updated git2 to use ssl

---
 Cargo.lock | 18 ++++++++++++++++++
 Cargo.toml |  2 +-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/Cargo.lock b/Cargo.lock
index 506b104fc3..6753629177 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3059,6 +3059,8 @@ dependencies = [
  "libc",
  "libgit2-sys",
  "log",
+ "openssl-probe",
+ "openssl-sys",
  "url",
 ]
 
@@ -4021,7 +4023,9 @@ checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4"
 dependencies = [
  "cc",
  "libc",
+ "libssh2-sys",
  "libz-sys",
+ "openssl-sys",
  "pkg-config",
 ]
 
@@ -4062,6 +4066,20 @@ dependencies = [
  "vcpkg",
 ]
 
+[[package]]
+name = "libssh2-sys"
+version = "0.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b094a36eb4b8b8c8a7b4b8ae43b2944502be3e59cd87687595cf6b0a71b3f4ca"
+dependencies = [
+ "cc",
+ "libc",
+ "libz-sys",
+ "openssl-sys",
+ "pkg-config",
+ "vcpkg",
+]
+
 [[package]]
 name = "libz-sys"
 version = "1.1.12"
diff --git a/Cargo.toml b/Cargo.toml
index b8fa79a4e3..3299986d2c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -116,7 +116,7 @@ toml = { version = "0.5" }
 tree-sitter = "0.20"
 unindent = { version = "0.1.7" }
 pretty_assertions = "1.3.0"
-git2 = { version = "0.15", default-features = false }
+git2 = { version = "0.15" }
 
 tree-sitter-bash = { git = "https://github.com/tree-sitter/tree-sitter-bash", rev = "1b0321ee85701d5036c334a6f04761cdc672e64c" }
 tree-sitter-c = "0.20.1"

From 25cb79e475deb1bfe0a3d5a6d46c9c9d06b44898 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 19 Sep 2023 18:55:15 -0400
Subject: [PATCH 13/14] remove git2 dependency for repository cloning in
 semantic_index eval

---
 Cargo.lock                             | 19 ------------------
 Cargo.toml                             |  2 +-
 crates/semantic_index/Cargo.toml       |  1 -
 crates/semantic_index/examples/eval.rs | 27 +++++++++++++-------------
 4 files changed, 14 insertions(+), 35 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6753629177..e6dbada745 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3059,8 +3059,6 @@ dependencies = [
  "libc",
  "libgit2-sys",
  "log",
- "openssl-probe",
- "openssl-sys",
  "url",
 ]
 
@@ -4023,9 +4021,7 @@ checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4"
 dependencies = [
  "cc",
  "libc",
- "libssh2-sys",
  "libz-sys",
- "openssl-sys",
  "pkg-config",
 ]
 
@@ -4066,20 +4062,6 @@ dependencies = [
  "vcpkg",
 ]
 
-[[package]]
-name = "libssh2-sys"
-version = "0.2.23"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b094a36eb4b8b8c8a7b4b8ae43b2944502be3e59cd87687595cf6b0a71b3f4ca"
-dependencies = [
- "cc",
- "libc",
- "libz-sys",
- "openssl-sys",
- "pkg-config",
- "vcpkg",
-]
-
 [[package]]
 name = "libz-sys"
 version = "1.1.12"
@@ -6756,7 +6738,6 @@ dependencies = [
  "editor",
  "env_logger 0.9.3",
  "futures 0.3.28",
- "git2",
  "globset",
  "gpui",
  "isahc",
diff --git a/Cargo.toml b/Cargo.toml
index 3299986d2c..c1876434ad 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -116,7 +116,7 @@ toml = { version = "0.5" }
 tree-sitter = "0.20"
 unindent = { version = "0.1.7" }
 pretty_assertions = "1.3.0"
-git2 = { version = "0.15" }
+git2 = { version = "0.15", default-features = false}
 
 tree-sitter-bash = { git = "https://github.com/tree-sitter/tree-sitter-bash", rev = "1b0321ee85701d5036c334a6f04761cdc672e64c" }
 tree-sitter-c = "0.20.1"
diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml
index 2997f5aa0b..44afecb0c2 100644
--- a/crates/semantic_index/Cargo.toml
+++ b/crates/semantic_index/Cargo.toml
@@ -55,7 +55,6 @@ rust-embed = { version = "8.0", features = ["include-exclude"] }
 client = { path = "../client" }
 zed = { path = "../zed"}
 node_runtime = { path = "../node_runtime"}
-git2.workspace = true
 
 pretty_assertions.workspace = true
 rand.workspace = true
diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
index 37da380b89..be2a1e8a52 100644
--- a/crates/semantic_index/examples/eval.rs
+++ b/crates/semantic_index/examples/eval.rs
@@ -1,6 +1,5 @@
 use anyhow::{anyhow, Result};
 use client::{self, UserStore};
-use git2::{Object, Oid, Repository};
 use gpui::{AsyncAppContext, ModelHandle, Task};
 use language::LanguageRegistry;
 use node_runtime::RealNodeRuntime;
@@ -11,6 +10,7 @@ use semantic_index::{SearchResult, SemanticIndex};
 use serde::{Deserialize, Serialize};
 use settings::{default_settings, handle_settings_file_changes, watch_config_file, SettingsStore};
 use std::path::{Path, PathBuf};
+use std::process::Command;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 use std::{cmp, env, fs};
@@ -95,23 +95,22 @@ fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<(String, PathBuf)> {
         .ok_or(anyhow!("path canonicalization failed"))?
         .parent()
         .unwrap()
-        .join(TMP_REPO_PATH)
-        .join(&repo_name);
+        .join(TMP_REPO_PATH);
 
     // Delete Clone Path if already exists
     let _ = fs::remove_dir_all(&clone_path);
+    let _ = fs::create_dir(&clone_path);
 
-    // Clone in Repo
-    git2::build::RepoBuilder::new()
-        // .branch(repo_eval.sha.as_str())
-        .clone(repo_eval.repo.as_str(), clone_path.as_path())?;
-
-    let repo: Repository = Repository::open(clone_path.clone())?;
-    let obj: Object = repo
-        .find_commit(Oid::from_str(repo_eval.commit.as_str())?)?
-        .into_object();
-    repo.checkout_tree(&obj, None)?;
-    repo.set_head_detached(obj.id())?;
+    let _ = Command::new("git")
+        .args(["clone", repo_eval.repo.as_str()])
+        .current_dir(clone_path.clone())
+        .output()?;
+    // Update clone path to be new directory housing the repo.
+    let clone_path = clone_path.join(repo_name.clone());
+    let _ = Command::new("git")
+        .args(["checkout", repo_eval.commit.as_str()])
+        .current_dir(clone_path.clone())
+        .output()?;
 
     Ok((repo_name, clone_path))
 }

From 11b3bfdc99a9955b9fce62cbc44cec273f987bcc Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 19 Sep 2023 19:05:26 -0400
Subject: [PATCH 14/14] fix warnings

---
 crates/semantic_index/examples/eval.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs
index be2a1e8a52..15406cf63e 100644
--- a/crates/semantic_index/examples/eval.rs
+++ b/crates/semantic_index/examples/eval.rs
@@ -8,7 +8,7 @@ use semantic_index::embedding::OpenAIEmbeddings;
 use semantic_index::semantic_index_settings::SemanticIndexSettings;
 use semantic_index::{SearchResult, SemanticIndex};
 use serde::{Deserialize, Serialize};
-use settings::{default_settings, handle_settings_file_changes, watch_config_file, SettingsStore};
+use settings::{default_settings, SettingsStore};
 use std::path::{Path, PathBuf};
 use std::process::Command;
 use std::sync::Arc;
@@ -16,7 +16,7 @@ use std::time::{Duration, Instant};
 use std::{cmp, env, fs};
 use util::channel::{RELEASE_CHANNEL, RELEASE_CHANNEL_NAME};
 use util::http::{self};
-use util::paths::{self, EMBEDDINGS_DIR};
+use util::paths::EMBEDDINGS_DIR;
 use zed::languages;
 
 #[derive(Deserialize, Clone, Serialize)]
@@ -134,7 +134,7 @@ fn get_hits(
 
     let mut hits = Vec::new();
     for result in search_results {
-        let (path, start_row, end_row) = result.buffer.read_with(cx, |buffer, cx| {
+        let (path, start_row, end_row) = result.buffer.read_with(cx, |buffer, _cx| {
             let path = buffer.file().unwrap().path().to_path_buf();
             let start_row = buffer.offset_to_point(result.range.start.offset).row;
             let end_row = buffer.offset_to_point(result.range.end.offset).row;
@@ -423,7 +423,7 @@ async fn evaluate_repo(
     }
 
     repo_metrics.summarize();
-    repo_metrics.save(repo_name);
+    let _ = repo_metrics.save(repo_name);
 
     anyhow::Ok(repo_metrics)
 }
@@ -508,7 +508,7 @@ fn main() {
                                 })
                                 .await;
 
-                            let repo_metrics = evaluate_repo(
+                            let _ = evaluate_repo(
                                 repo_name,
                                 semantic_index.clone(),
                                 project,