add glob filtering functionality to semantic search
This commit is contained in:
parent
efe973ebe2
commit
e02d6bc0d4
6 changed files with 149 additions and 22 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -6477,6 +6477,7 @@ dependencies = [
|
||||||
"editor",
|
"editor",
|
||||||
"env_logger 0.9.3",
|
"env_logger 0.9.3",
|
||||||
"futures 0.3.28",
|
"futures 0.3.28",
|
||||||
|
"globset",
|
||||||
"gpui",
|
"gpui",
|
||||||
"isahc",
|
"isahc",
|
||||||
"language",
|
"language",
|
||||||
|
|
|
@ -187,14 +187,26 @@ impl ProjectSearch {
|
||||||
cx.notify();
|
cx.notify();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn semantic_search(&mut self, query: String, cx: &mut ModelContext<Self>) {
|
fn semantic_search(
|
||||||
|
&mut self,
|
||||||
|
query: String,
|
||||||
|
include_files: Vec<GlobMatcher>,
|
||||||
|
exclude_files: Vec<GlobMatcher>,
|
||||||
|
cx: &mut ModelContext<Self>,
|
||||||
|
) {
|
||||||
let search = SemanticIndex::global(cx).map(|index| {
|
let search = SemanticIndex::global(cx).map(|index| {
|
||||||
index.update(cx, |semantic_index, cx| {
|
index.update(cx, |semantic_index, cx| {
|
||||||
semantic_index.search_project(self.project.clone(), query.clone(), 10, cx)
|
semantic_index.search_project(
|
||||||
|
self.project.clone(),
|
||||||
|
query.clone(),
|
||||||
|
10,
|
||||||
|
include_files,
|
||||||
|
exclude_files,
|
||||||
|
cx,
|
||||||
|
)
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
self.search_id += 1;
|
self.search_id += 1;
|
||||||
// self.active_query = Some(query);
|
|
||||||
self.match_ranges.clear();
|
self.match_ranges.clear();
|
||||||
self.pending_search = Some(cx.spawn(|this, mut cx| async move {
|
self.pending_search = Some(cx.spawn(|this, mut cx| async move {
|
||||||
let results = search?.await.log_err()?;
|
let results = search?.await.log_err()?;
|
||||||
|
@ -638,8 +650,13 @@ impl ProjectSearchView {
|
||||||
}
|
}
|
||||||
|
|
||||||
let query = self.query_editor.read(cx).text(cx);
|
let query = self.query_editor.read(cx).text(cx);
|
||||||
self.model
|
if let Some((included_files, exclude_files)) =
|
||||||
.update(cx, |model, cx| model.semantic_search(query, cx));
|
self.get_included_and_excluded_globsets(cx)
|
||||||
|
{
|
||||||
|
self.model.update(cx, |model, cx| {
|
||||||
|
model.semantic_search(query, included_files, exclude_files, cx)
|
||||||
|
});
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -648,6 +665,39 @@ impl ProjectSearchView {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_included_and_excluded_globsets(
|
||||||
|
&mut self,
|
||||||
|
cx: &mut ViewContext<Self>,
|
||||||
|
) -> Option<(Vec<GlobMatcher>, Vec<GlobMatcher>)> {
|
||||||
|
let text = self.query_editor.read(cx).text(cx);
|
||||||
|
let included_files =
|
||||||
|
match Self::load_glob_set(&self.included_files_editor.read(cx).text(cx)) {
|
||||||
|
Ok(included_files) => {
|
||||||
|
self.panels_with_errors.remove(&InputPanel::Include);
|
||||||
|
included_files
|
||||||
|
}
|
||||||
|
Err(_e) => {
|
||||||
|
self.panels_with_errors.insert(InputPanel::Include);
|
||||||
|
cx.notify();
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let excluded_files =
|
||||||
|
match Self::load_glob_set(&self.excluded_files_editor.read(cx).text(cx)) {
|
||||||
|
Ok(excluded_files) => {
|
||||||
|
self.panels_with_errors.remove(&InputPanel::Exclude);
|
||||||
|
excluded_files
|
||||||
|
}
|
||||||
|
Err(_e) => {
|
||||||
|
self.panels_with_errors.insert(InputPanel::Exclude);
|
||||||
|
cx.notify();
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Some((included_files, excluded_files))
|
||||||
|
}
|
||||||
|
|
||||||
fn build_search_query(&mut self, cx: &mut ViewContext<Self>) -> Option<SearchQuery> {
|
fn build_search_query(&mut self, cx: &mut ViewContext<Self>) -> Option<SearchQuery> {
|
||||||
let text = self.query_editor.read(cx).text(cx);
|
let text = self.query_editor.read(cx).text(cx);
|
||||||
let included_files =
|
let included_files =
|
||||||
|
|
|
@ -37,6 +37,7 @@ tiktoken-rs = "0.5.0"
|
||||||
parking_lot.workspace = true
|
parking_lot.workspace = true
|
||||||
rand.workspace = true
|
rand.workspace = true
|
||||||
schemars.workspace = true
|
schemars.workspace = true
|
||||||
|
globset.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
gpui = { path = "../gpui", features = ["test-support"] }
|
gpui = { path = "../gpui", features = ["test-support"] }
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
use crate::{parsing::Document, SEMANTIC_INDEX_VERSION};
|
use crate::{parsing::Document, SEMANTIC_INDEX_VERSION};
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use globset::{Glob, GlobMatcher};
|
||||||
use project::Fs;
|
use project::Fs;
|
||||||
use rpc::proto::Timestamp;
|
use rpc::proto::Timestamp;
|
||||||
use rusqlite::{
|
use rusqlite::{
|
||||||
|
@ -252,18 +253,30 @@ impl VectorDatabase {
|
||||||
worktree_ids: &[i64],
|
worktree_ids: &[i64],
|
||||||
query_embedding: &Vec<f32>,
|
query_embedding: &Vec<f32>,
|
||||||
limit: usize,
|
limit: usize,
|
||||||
|
include_globs: Vec<GlobMatcher>,
|
||||||
|
exclude_globs: Vec<GlobMatcher>,
|
||||||
) -> Result<Vec<(i64, PathBuf, Range<usize>)>> {
|
) -> Result<Vec<(i64, PathBuf, Range<usize>)>> {
|
||||||
let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
|
let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
|
||||||
self.for_each_document(&worktree_ids, |id, embedding| {
|
self.for_each_document(&worktree_ids, |relative_path, id, embedding| {
|
||||||
let similarity = dot(&embedding, &query_embedding);
|
if (include_globs.is_empty()
|
||||||
let ix = match results
|
|| include_globs
|
||||||
.binary_search_by(|(_, s)| similarity.partial_cmp(&s).unwrap_or(Ordering::Equal))
|
.iter()
|
||||||
|
.any(|include_glob| include_glob.is_match(relative_path.clone())))
|
||||||
|
&& (exclude_globs.is_empty()
|
||||||
|
|| !exclude_globs
|
||||||
|
.iter()
|
||||||
|
.any(|exclude_glob| exclude_glob.is_match(relative_path.clone())))
|
||||||
{
|
{
|
||||||
Ok(ix) => ix,
|
let similarity = dot(&embedding, &query_embedding);
|
||||||
Err(ix) => ix,
|
let ix = match results.binary_search_by(|(_, s)| {
|
||||||
};
|
similarity.partial_cmp(&s).unwrap_or(Ordering::Equal)
|
||||||
results.insert(ix, (id, similarity));
|
}) {
|
||||||
results.truncate(limit);
|
Ok(ix) => ix,
|
||||||
|
Err(ix) => ix,
|
||||||
|
};
|
||||||
|
results.insert(ix, (id, similarity));
|
||||||
|
results.truncate(limit);
|
||||||
|
}
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let ids = results.into_iter().map(|(id, _)| id).collect::<Vec<_>>();
|
let ids = results.into_iter().map(|(id, _)| id).collect::<Vec<_>>();
|
||||||
|
@ -273,12 +286,12 @@ impl VectorDatabase {
|
||||||
fn for_each_document(
|
fn for_each_document(
|
||||||
&self,
|
&self,
|
||||||
worktree_ids: &[i64],
|
worktree_ids: &[i64],
|
||||||
mut f: impl FnMut(i64, Vec<f32>),
|
mut f: impl FnMut(String, i64, Vec<f32>),
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut query_statement = self.db.prepare(
|
let mut query_statement = self.db.prepare(
|
||||||
"
|
"
|
||||||
SELECT
|
SELECT
|
||||||
documents.id, documents.embedding
|
files.relative_path, documents.id, documents.embedding
|
||||||
FROM
|
FROM
|
||||||
documents, files
|
documents, files
|
||||||
WHERE
|
WHERE
|
||||||
|
@ -289,10 +302,10 @@ impl VectorDatabase {
|
||||||
|
|
||||||
query_statement
|
query_statement
|
||||||
.query_map(params![ids_to_sql(worktree_ids)], |row| {
|
.query_map(params![ids_to_sql(worktree_ids)], |row| {
|
||||||
Ok((row.get(0)?, row.get::<_, Embedding>(1)?))
|
Ok((row.get(0)?, row.get(1)?, row.get::<_, Embedding>(2)?))
|
||||||
})?
|
})?
|
||||||
.filter_map(|row| row.ok())
|
.filter_map(|row| row.ok())
|
||||||
.for_each(|(id, embedding)| f(id, embedding.0));
|
.for_each(|(relative_path, id, embedding)| f(relative_path, id, embedding.0));
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,7 @@ use anyhow::{anyhow, Result};
|
||||||
use db::VectorDatabase;
|
use db::VectorDatabase;
|
||||||
use embedding::{EmbeddingProvider, OpenAIEmbeddings};
|
use embedding::{EmbeddingProvider, OpenAIEmbeddings};
|
||||||
use futures::{channel::oneshot, Future};
|
use futures::{channel::oneshot, Future};
|
||||||
|
use globset::{Glob, GlobMatcher};
|
||||||
use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle};
|
use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle};
|
||||||
use language::{Anchor, Buffer, Language, LanguageRegistry};
|
use language::{Anchor, Buffer, Language, LanguageRegistry};
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
|
@ -624,6 +625,8 @@ impl SemanticIndex {
|
||||||
project: ModelHandle<Project>,
|
project: ModelHandle<Project>,
|
||||||
phrase: String,
|
phrase: String,
|
||||||
limit: usize,
|
limit: usize,
|
||||||
|
include_globs: Vec<GlobMatcher>,
|
||||||
|
exclude_globs: Vec<GlobMatcher>,
|
||||||
cx: &mut ModelContext<Self>,
|
cx: &mut ModelContext<Self>,
|
||||||
) -> Task<Result<Vec<SearchResult>>> {
|
) -> Task<Result<Vec<SearchResult>>> {
|
||||||
let project_state = if let Some(state) = self.projects.get(&project.downgrade()) {
|
let project_state = if let Some(state) = self.projects.get(&project.downgrade()) {
|
||||||
|
@ -657,12 +660,16 @@ impl SemanticIndex {
|
||||||
.next()
|
.next()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
database.top_k_search(&worktree_db_ids, &phrase_embedding, limit)
|
database.top_k_search(
|
||||||
|
&worktree_db_ids,
|
||||||
|
&phrase_embedding,
|
||||||
|
limit,
|
||||||
|
include_globs,
|
||||||
|
exclude_globs,
|
||||||
|
)
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
dbg!(&documents);
|
|
||||||
|
|
||||||
let mut tasks = Vec::new();
|
let mut tasks = Vec::new();
|
||||||
let mut ranges = Vec::new();
|
let mut ranges = Vec::new();
|
||||||
let weak_project = project.downgrade();
|
let weak_project = project.downgrade();
|
||||||
|
|
|
@ -7,6 +7,7 @@ use crate::{
|
||||||
};
|
};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use globset::Glob;
|
||||||
use gpui::{Task, TestAppContext};
|
use gpui::{Task, TestAppContext};
|
||||||
use language::{Language, LanguageConfig, LanguageRegistry, ToOffset};
|
use language::{Language, LanguageConfig, LanguageRegistry, ToOffset};
|
||||||
use pretty_assertions::assert_eq;
|
use pretty_assertions::assert_eq;
|
||||||
|
@ -96,7 +97,7 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
|
||||||
|
|
||||||
let search_results = store
|
let search_results = store
|
||||||
.update(cx, |store, cx| {
|
.update(cx, |store, cx| {
|
||||||
store.search_project(project.clone(), "aaaa".to_string(), 5, cx)
|
store.search_project(project.clone(), "aaaa".to_string(), 5, vec![], vec![], cx)
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -109,6 +110,60 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Test Include Files Functonality
|
||||||
|
let include_files = vec![Glob::new("*.rs").unwrap().compile_matcher()];
|
||||||
|
let exclude_files = vec![Glob::new("*.rs").unwrap().compile_matcher()];
|
||||||
|
let search_results = store
|
||||||
|
.update(cx, |store, cx| {
|
||||||
|
store.search_project(
|
||||||
|
project.clone(),
|
||||||
|
"aaaa".to_string(),
|
||||||
|
5,
|
||||||
|
include_files,
|
||||||
|
vec![],
|
||||||
|
cx,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
for res in &search_results {
|
||||||
|
res.buffer.read_with(cx, |buffer, _cx| {
|
||||||
|
assert!(buffer
|
||||||
|
.file()
|
||||||
|
.unwrap()
|
||||||
|
.path()
|
||||||
|
.to_str()
|
||||||
|
.unwrap()
|
||||||
|
.ends_with("rs"));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let search_results = store
|
||||||
|
.update(cx, |store, cx| {
|
||||||
|
store.search_project(
|
||||||
|
project.clone(),
|
||||||
|
"aaaa".to_string(),
|
||||||
|
5,
|
||||||
|
vec![],
|
||||||
|
exclude_files,
|
||||||
|
cx,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
for res in &search_results {
|
||||||
|
res.buffer.read_with(cx, |buffer, _cx| {
|
||||||
|
assert!(!buffer
|
||||||
|
.file()
|
||||||
|
.unwrap()
|
||||||
|
.path()
|
||||||
|
.to_str()
|
||||||
|
.unwrap()
|
||||||
|
.ends_with("rs"));
|
||||||
|
});
|
||||||
|
}
|
||||||
fs.save(
|
fs.save(
|
||||||
"/the-root/src/file2.rs".as_ref(),
|
"/the-root/src/file2.rs".as_ref(),
|
||||||
&"
|
&"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue