updated vector_store to reindex on save after timed delay

This commit is contained in:
KCaverly 2023-07-04 14:42:12 -04:00
parent e45d3a0a63
commit b6520a8f1d
2 changed files with 57 additions and 53 deletions

View file

@ -33,6 +33,8 @@ use util::{
}; };
use workspace::{Workspace, WorkspaceCreated}; use workspace::{Workspace, WorkspaceCreated};
const REINDEXING_DELAY: u64 = 30;
#[derive(Debug)] #[derive(Debug)]
pub struct Document { pub struct Document {
pub offset: usize, pub offset: usize,
@ -58,10 +60,10 @@ pub fn init(
let vector_store = VectorStore::new( let vector_store = VectorStore::new(
fs, fs,
db_file_path, db_file_path,
Arc::new(embedding::DummyEmbeddings {}), // Arc::new(embedding::DummyEmbeddings {}),
// Arc::new(OpenAIEmbeddings { Arc::new(OpenAIEmbeddings {
// client: http_client, client: http_client,
// }), }),
language_registry, language_registry,
cx.clone(), cx.clone(),
) )
@ -121,7 +123,9 @@ pub struct VectorStore {
embedding_provider: Arc<dyn EmbeddingProvider>, embedding_provider: Arc<dyn EmbeddingProvider>,
language_registry: Arc<LanguageRegistry>, language_registry: Arc<LanguageRegistry>,
db_update_tx: channel::Sender<DbWrite>, db_update_tx: channel::Sender<DbWrite>,
paths_tx: channel::Sender<(i64, PathBuf, Arc<Language>, SystemTime)>,
_db_update_task: Task<()>, _db_update_task: Task<()>,
_paths_update_task: Task<()>,
projects: HashMap<WeakModelHandle<Project>, ProjectState>, projects: HashMap<WeakModelHandle<Project>, ProjectState>,
} }
@ -203,14 +207,50 @@ impl VectorStore {
} }
}); });
let (paths_tx, paths_rx) =
channel::unbounded::<(i64, PathBuf, Arc<Language>, SystemTime)>();
let fs_clone = fs.clone();
let db_update_tx_clone = db_update_tx.clone();
let embedding_provider_clone = embedding_provider.clone();
let _paths_update_task = cx.background().spawn(async move {
let mut parser = Parser::new();
let mut cursor = QueryCursor::new();
while let Ok((worktree_id, file_path, language, mtime)) = paths_rx.recv().await {
log::info!("Parsing File: {:?}", &file_path);
if let Some(indexed_file) = Self::index_file(
&mut cursor,
&mut parser,
embedding_provider_clone.as_ref(),
&fs_clone,
language,
file_path,
mtime,
)
.await
.log_err()
{
db_update_tx_clone
.try_send(DbWrite::InsertFile {
worktree_id,
indexed_file,
})
.unwrap();
}
}
});
Self { Self {
fs, fs,
database_url, database_url,
db_update_tx, db_update_tx,
paths_tx,
embedding_provider, embedding_provider,
language_registry, language_registry,
projects: HashMap::new(), projects: HashMap::new(),
_db_update_task, _db_update_task,
_paths_update_task,
} }
})) }))
} }
@ -315,9 +355,9 @@ impl VectorStore {
let fs = self.fs.clone(); let fs = self.fs.clone();
let language_registry = self.language_registry.clone(); let language_registry = self.language_registry.clone();
let embedding_provider = self.embedding_provider.clone();
let database_url = self.database_url.clone(); let database_url = self.database_url.clone();
let db_update_tx = self.db_update_tx.clone(); let db_update_tx = self.db_update_tx.clone();
let paths_tx = self.paths_tx.clone();
cx.spawn(|this, mut cx| async move { cx.spawn(|this, mut cx| async move {
futures::future::join_all(worktree_scans_complete).await; futures::future::join_all(worktree_scans_complete).await;
@ -356,8 +396,6 @@ impl VectorStore {
}) })
.await?; .await?;
let (paths_tx, paths_rx) =
channel::unbounded::<(i64, PathBuf, Arc<Language>, SystemTime)>();
cx.background() cx.background()
.spawn({ .spawn({
let db_ids_by_worktree_id = db_ids_by_worktree_id.clone(); let db_ids_by_worktree_id = db_ids_by_worktree_id.clone();
@ -415,42 +453,8 @@ impl VectorStore {
}) })
.detach(); .detach();
cx.background()
.scoped(|scope| {
for _ in 0..cx.background().num_cpus() {
scope.spawn(async {
let mut parser = Parser::new();
let mut cursor = QueryCursor::new();
while let Ok((worktree_id, file_path, language, mtime)) =
paths_rx.recv().await
{
if let Some(indexed_file) = Self::index_file(
&mut cursor,
&mut parser,
embedding_provider.as_ref(),
&fs,
language,
file_path,
mtime,
)
.await
.log_err()
{
db_update_tx
.try_send(DbWrite::InsertFile {
worktree_id,
indexed_file,
})
.unwrap();
}
}
});
}
})
.await;
this.update(&mut cx, |this, cx| { this.update(&mut cx, |this, cx| {
let _subscription = cx.subscribe(&project, |this, project, event, cx| { let _subscription = cx.subscribe(&project, |this, project, event, _cx| {
if let Some(project_state) = this.projects.get(&project.downgrade()) { if let Some(project_state) = this.projects.get(&project.downgrade()) {
let worktree_db_ids = project_state.worktree_db_ids.clone(); let worktree_db_ids = project_state.worktree_db_ids.clone();
@ -488,6 +492,7 @@ impl VectorStore {
} }
let file_mtimes = file_mtimes.unwrap(); let file_mtimes = file_mtimes.unwrap();
let paths_tx = this.paths_tx.clone();
smol::block_on(async move { smol::block_on(async move {
for change in changes.into_iter() { for change in changes.into_iter() {
@ -504,7 +509,6 @@ impl VectorStore {
{ {
continue; continue;
} }
log::info!("Language found: {:?}: ", language.name());
// TODO: Make this a bit more defensive // TODO: Make this a bit more defensive
let modified_time = let modified_time =
@ -515,7 +519,7 @@ impl VectorStore {
existing_time.map_or(false, |existing_time| { existing_time.map_or(false, |existing_time| {
if &modified_time != existing_time if &modified_time != existing_time
&& existing_time.elapsed().unwrap().as_secs() && existing_time.elapsed().unwrap().as_secs()
> 30 > REINDEXING_DELAY
{ {
false false
} else { } else {
@ -525,14 +529,14 @@ impl VectorStore {
if !already_stored { if !already_stored {
log::info!("Need to reindex: {:?}", &change_path); log::info!("Need to reindex: {:?}", &change_path);
// paths_tx paths_tx
// .try_send(( .try_send((
// worktree_db_id, worktree_db_id,
// change_path.to_path_buf(), change_path.to_path_buf(),
// language, language,
// modified_time, modified_time,
// )) ))
// .unwrap(); .unwrap();
} }
} }
} }

View file

@ -5,7 +5,7 @@ use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
use gpui::{Task, TestAppContext}; use gpui::{Task, TestAppContext};
use language::{Language, LanguageConfig, LanguageRegistry}; use language::{Language, LanguageConfig, LanguageRegistry};
use project::{FakeFs, Fs, Project}; use project::{FakeFs, Project};
use rand::Rng; use rand::Rng;
use serde_json::json; use serde_json::json;
use unindent::Unindent; use unindent::Unindent;