Add an eval binary that evaluates our semantic index against CodeSearchNet (#17375)

This PR is the beginning of an evaluation framework for our AI features.
Right now, we're evaluating our semantic search feature against the
[CodeSearchNet](https://github.com/github/CodeSearchNet) code search
dataset. This dataset is very limited (for the most part, only 1 known
good search result per repo) but it has surfaced some problems with our
search already.

Release Notes:

- N/A

---------

Co-authored-by: Jason <jason@zed.dev>
Co-authored-by: Jason Mancuso <7891333+jvmncs@users.noreply.github.com>
Co-authored-by: Nathan <nathan@zed.dev>
Co-authored-by: Richard <richard@zed.dev>
This commit is contained in:
Max Brunsfeld 2024-09-17 12:44:33 -07:00 committed by GitHub
parent 06a13c2983
commit d3d3a093b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 881 additions and 144 deletions

View file

@ -8,14 +8,12 @@ use assistant_slash_command::{ArgumentCompletion, SlashCommandOutputSection};
use feature_flags::FeatureFlag;
use gpui::{AppContext, Task, WeakView};
use language::{CodeLabel, LineEnding, LspAdapterDelegate};
use semantic_index::SemanticDb;
use semantic_index::{LoadedSearchResult, SemanticDb};
use std::{
fmt::Write,
path::PathBuf,
sync::{atomic::AtomicBool, Arc},
};
use ui::{prelude::*, IconName};
use util::ResultExt;
use workspace::Workspace;
pub(crate) struct SearchSlashCommandFeatureFlag;
@ -107,52 +105,28 @@ impl SlashCommand for SearchSlashCommand {
})?
.await?;
let mut loaded_results = Vec::new();
for result in results {
let (full_path, file_content) =
result.worktree.read_with(&cx, |worktree, _cx| {
let entry_abs_path = worktree.abs_path().join(&result.path);
let mut entry_full_path = PathBuf::from(worktree.root_name());
entry_full_path.push(&result.path);
let file_content = async {
let entry_abs_path = entry_abs_path;
fs.load(&entry_abs_path).await
};
(entry_full_path, file_content)
})?;
if let Some(file_content) = file_content.await.log_err() {
loaded_results.push((result, full_path, file_content));
}
}
let loaded_results = SemanticDb::load_results(results, &fs, &cx).await?;
let output = cx
.background_executor()
.spawn(async move {
let mut text = format!("Search results for {query}:\n");
let mut sections = Vec::new();
for (result, full_path, file_content) in loaded_results {
let range_start = result.range.start.min(file_content.len());
let range_end = result.range.end.min(file_content.len());
let start_row = file_content[0..range_start].matches('\n').count() as u32;
let end_row = file_content[0..range_end].matches('\n').count() as u32;
let start_line_byte_offset = file_content[0..range_start]
.rfind('\n')
.map(|pos| pos + 1)
.unwrap_or_default();
let end_line_byte_offset = file_content[range_end..]
.find('\n')
.map(|pos| range_end + pos)
.unwrap_or_else(|| file_content.len());
for LoadedSearchResult {
path,
range,
full_path,
file_content,
row_range,
} in loaded_results
{
let section_start_ix = text.len();
text.push_str(&codeblock_fence_for_path(
Some(&result.path),
Some(start_row..end_row),
Some(&path),
Some(row_range.clone()),
));
let mut excerpt =
file_content[start_line_byte_offset..end_line_byte_offset].to_string();
let mut excerpt = file_content[range].to_string();
LineEnding::normalize(&mut excerpt);
text.push_str(&excerpt);
writeln!(text, "\n```\n").unwrap();
@ -161,7 +135,7 @@ impl SlashCommand for SearchSlashCommand {
section_start_ix..section_end_ix,
Some(&full_path),
false,
Some(start_row + 1..end_row + 1),
Some(row_range.start() + 1..row_range.end() + 1),
));
}