progress on eval
This commit is contained in:
parent
d4fbe99052
commit
6f29582fb0
3 changed files with 79 additions and 5 deletions
|
@ -12,7 +12,7 @@
|
||||||
{
|
{
|
||||||
"query": "What version of the openai package is active?",
|
"query": "What version of the openai package is active?",
|
||||||
"matches": [
|
"matches": [
|
||||||
"pyproject.toml:14"
|
"pyprojet.toml:14"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -48,7 +48,7 @@
|
||||||
"query": "Handle conflict when numerous actions occur on the same symbol",
|
"query": "Handle conflict when numerous actions occur on the same symbol",
|
||||||
"matches": [
|
"matches": [
|
||||||
"cli/src/generate/build_tables/build_parse_table.rs:363",
|
"cli/src/generate/build_tables/build_parse_table.rs:363",
|
||||||
"cli/src/generate/build_tables/build_parse_table.rs:442",
|
"cli/src/generate/build_tables/build_parse_table.rs:442"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,19 +1,36 @@
|
||||||
use git2::{Object, Oid, Repository};
|
use git2::{Object, Oid, Repository};
|
||||||
|
use semantic_index::SearchResult;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::{env, fs};
|
use std::{env, fs};
|
||||||
|
|
||||||
#[derive(Deserialize, Clone)]
|
#[derive(Deserialize, Clone)]
|
||||||
struct QueryMatches {
|
struct EvaluationQuery {
|
||||||
query: String,
|
query: String,
|
||||||
matches: Vec<String>,
|
matches: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl EvaluationQuery {
|
||||||
|
fn match_pairs(&self) -> Vec<(PathBuf, usize)> {
|
||||||
|
let mut pairs = Vec::new();
|
||||||
|
for match_identifier in self.matches {
|
||||||
|
let match_parts = match_identifier.split(":");
|
||||||
|
|
||||||
|
if let Some(file_path) = match_parts.next() {
|
||||||
|
if let Some(row_number) = match_parts.next() {
|
||||||
|
pairs.push((PathBuf::from(file_path), from_str::<usize>(row_number)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pairs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Clone)]
|
#[derive(Deserialize, Clone)]
|
||||||
struct RepoEval {
|
struct RepoEval {
|
||||||
repo: String,
|
repo: String,
|
||||||
commit: String,
|
commit: String,
|
||||||
assertions: Vec<QueryMatches>,
|
assertions: Vec<EvaluationQuery>,
|
||||||
}
|
}
|
||||||
|
|
||||||
const TMP_REPO_PATH: &str = "./target/eval_repos";
|
const TMP_REPO_PATH: &str = "./target/eval_repos";
|
||||||
|
@ -77,7 +94,60 @@ fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<PathBuf> {
|
||||||
Ok(clone_path)
|
Ok(clone_path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn dcg(hits: Vec<usize>) -> f32 {
|
||||||
|
let mut result = 0.0;
|
||||||
|
for (idx, hit) in hits.iter().enumerate() {
|
||||||
|
result += *hit as f32 / (2.0 + idx as f32).log2();
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("DCG: {:?}", result);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn evaluate_ndcg(eval_query: EvaluationQuery, search_results: Vec<SearchResult>, k: usize) -> f32 {
|
||||||
|
|
||||||
|
// NDCG or Normalized Discounted Cumulative Gain, is determined by comparing the relevance of
|
||||||
|
// items returned by the search engine relative to the hypothetical ideal.
|
||||||
|
// Relevance is represented as a series of booleans, in which each search result returned
|
||||||
|
// is identified as being inside the test set of matches (1) or not (0).
|
||||||
|
|
||||||
|
// For example, if result 1, 3 and 5 match the 3 relevant results provided
|
||||||
|
// actual dcg is calculated against a vector of [1, 0, 1, 0, 1]
|
||||||
|
// whereas ideal dcg is calculated against a vector of [1, 1, 1, 0, 0]
|
||||||
|
// as this ideal vector assumes the 3 relevant results provided were returned first
|
||||||
|
// normalized dcg is then calculated as actual dcg / ideal dcg.
|
||||||
|
|
||||||
|
// NDCG ranges from 0 to 1, which higher values indicating better performance
|
||||||
|
// Commonly NDCG is expressed as NDCG@k, in which k represents the metric calculated
|
||||||
|
// including only the top k values returned.
|
||||||
|
// The @k metrics can help you identify, at what point does the relevant results start to fall off.
|
||||||
|
// Ie. a NDCG@1 of 0.9 and a NDCG@3 of 0.5 may indicate that the first result returned in usually
|
||||||
|
// very high quality, whereas rank results quickly drop off after the first result.
|
||||||
|
|
||||||
|
let ideal = vec![1; cmp::min(eval_query.matches.len(), k)];
|
||||||
|
|
||||||
|
return dcg(hits) / dcg(ideal);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn evaluate_map(eval_query: EvaluationQuery, search_results: Vec<SearchResult>, k: usize) -> f32 {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
fn evaluate_repo(repo_eval: RepoEval, clone_path: PathBuf) {
|
||||||
|
|
||||||
|
// Launch new repo as a new Zed workspace/project
|
||||||
|
// Index the project
|
||||||
|
// Search each eval_query
|
||||||
|
// Calculate Statistics
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
|
||||||
|
// zed/main.rs
|
||||||
|
// creating an app and running it, gives you the context.
|
||||||
|
// create a project, find_or_create_local_worktree.
|
||||||
|
|
||||||
if let Ok(repo_evals) = parse_eval() {
|
if let Ok(repo_evals) = parse_eval() {
|
||||||
for repo in repo_evals {
|
for repo in repo_evals {
|
||||||
let cloned = clone_repo(repo.clone());
|
let cloned = clone_repo(repo.clone());
|
||||||
|
@ -85,8 +155,12 @@ fn main() {
|
||||||
Ok(clone_path) => {
|
Ok(clone_path) => {
|
||||||
println!(
|
println!(
|
||||||
"Cloned {:?} @ {:?} into {:?}",
|
"Cloned {:?} @ {:?} into {:?}",
|
||||||
repo.repo, repo.commit, clone_path
|
repo.repo, repo.commit, &clone_path
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Evaluate Repo
|
||||||
|
evaluate_repo(repo, clone_path);
|
||||||
|
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
println!("Error Cloning: {:?}", err);
|
println!("Error Cloning: {:?}", err);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue