flush embeddings queue when no files are parsed for 250 milliseconds

Co-authored-by: Antonio <antonio@zed.dev>
This commit is contained in:
KCaverly 2023-09-01 11:24:08 -04:00
parent c4db914f0a
commit 524533cfb2
2 changed files with 33 additions and 29 deletions

View file

@ -12,6 +12,7 @@ use anyhow::{anyhow, Result};
use db::VectorDatabase; use db::VectorDatabase;
use embedding::{Embedding, EmbeddingProvider, OpenAIEmbeddings}; use embedding::{Embedding, EmbeddingProvider, OpenAIEmbeddings};
use embedding_queue::{EmbeddingQueue, FileToEmbed}; use embedding_queue::{EmbeddingQueue, FileToEmbed};
use futures::{FutureExt, StreamExt};
use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle}; use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle};
use language::{Anchor, Buffer, Language, LanguageRegistry}; use language::{Anchor, Buffer, Language, LanguageRegistry};
use parking_lot::Mutex; use parking_lot::Mutex;
@ -39,6 +40,7 @@ use workspace::WorkspaceCreated;
const SEMANTIC_INDEX_VERSION: usize = 8; const SEMANTIC_INDEX_VERSION: usize = 8;
const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(600); const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(600);
const EMBEDDING_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_millis(250);
pub fn init( pub fn init(
fs: Arc<dyn Fs>, fs: Arc<dyn Fs>,
@ -253,24 +255,34 @@ impl SemanticIndex {
let mut _parsing_files_tasks = Vec::new(); let mut _parsing_files_tasks = Vec::new();
for _ in 0..cx.background().num_cpus() { for _ in 0..cx.background().num_cpus() {
let fs = fs.clone(); let fs = fs.clone();
let parsing_files_rx = parsing_files_rx.clone(); let mut parsing_files_rx = parsing_files_rx.clone();
let embedding_provider = embedding_provider.clone(); let embedding_provider = embedding_provider.clone();
let embedding_queue = embedding_queue.clone(); let embedding_queue = embedding_queue.clone();
let db = db.clone(); let background = cx.background().clone();
_parsing_files_tasks.push(cx.background().spawn(async move { _parsing_files_tasks.push(cx.background().spawn(async move {
let mut retriever = CodeContextRetriever::new(embedding_provider.clone()); let mut retriever = CodeContextRetriever::new(embedding_provider.clone());
while let Ok((embeddings_for_digest, pending_file)) = loop {
parsing_files_rx.recv().await let mut timer = background.timer(EMBEDDING_QUEUE_FLUSH_TIMEOUT).fuse();
{ let mut next_file_to_parse = parsing_files_rx.next().fuse();
Self::parse_file( futures::select_biased! {
&fs, next_file_to_parse = next_file_to_parse => {
pending_file, if let Some((embeddings_for_digest, pending_file)) = next_file_to_parse {
&mut retriever, Self::parse_file(
&embedding_queue, &fs,
&parsing_files_rx, pending_file,
&embeddings_for_digest, &mut retriever,
) &embedding_queue,
.await; &embeddings_for_digest,
)
.await
} else {
break;
}
},
_ = timer => {
embedding_queue.lock().flush();
}
}
} }
})); }));
} }
@ -297,10 +309,6 @@ impl SemanticIndex {
pending_file: PendingFile, pending_file: PendingFile,
retriever: &mut CodeContextRetriever, retriever: &mut CodeContextRetriever,
embedding_queue: &Arc<Mutex<EmbeddingQueue>>, embedding_queue: &Arc<Mutex<EmbeddingQueue>>,
parsing_files_rx: &channel::Receiver<(
Arc<HashMap<DocumentDigest, Embedding>>,
PendingFile,
)>,
embeddings_for_digest: &HashMap<DocumentDigest, Embedding>, embeddings_for_digest: &HashMap<DocumentDigest, Embedding>,
) { ) {
let Some(language) = pending_file.language else { let Some(language) = pending_file.language else {
@ -333,10 +341,6 @@ impl SemanticIndex {
}); });
} }
} }
if parsing_files_rx.len() == 0 {
embedding_queue.lock().flush();
}
} }
pub fn project_previously_indexed( pub fn project_previously_indexed(
@ -581,7 +585,7 @@ impl SemanticIndex {
cx: &mut ModelContext<Self>, cx: &mut ModelContext<Self>,
) -> Task<Result<(usize, watch::Receiver<usize>)>> { ) -> Task<Result<(usize, watch::Receiver<usize>)>> {
cx.spawn(|this, mut cx| async move { cx.spawn(|this, mut cx| async move {
let embeddings_for_digest = this.read_with(&cx, |this, cx| { let embeddings_for_digest = this.read_with(&cx, |this, _| {
if let Some(state) = this.projects.get(&project.downgrade()) { if let Some(state) = this.projects.get(&project.downgrade()) {
let mut worktree_id_file_paths = HashMap::default(); let mut worktree_id_file_paths = HashMap::default();
for (path, _) in &state.changed_paths { for (path, _) in &state.changed_paths {

View file

@ -3,11 +3,11 @@ use crate::{
embedding_queue::EmbeddingQueue, embedding_queue::EmbeddingQueue,
parsing::{subtract_ranges, CodeContextRetriever, Document, DocumentDigest}, parsing::{subtract_ranges, CodeContextRetriever, Document, DocumentDigest},
semantic_index_settings::SemanticIndexSettings, semantic_index_settings::SemanticIndexSettings,
FileToEmbed, JobHandle, SearchResult, SemanticIndex, FileToEmbed, JobHandle, SearchResult, SemanticIndex, EMBEDDING_QUEUE_FLUSH_TIMEOUT,
}; };
use anyhow::Result; use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
use gpui::{Task, TestAppContext}; use gpui::{executor::Deterministic, Task, TestAppContext};
use language::{Language, LanguageConfig, LanguageRegistry, ToOffset}; use language::{Language, LanguageConfig, LanguageRegistry, ToOffset};
use parking_lot::Mutex; use parking_lot::Mutex;
use pretty_assertions::assert_eq; use pretty_assertions::assert_eq;
@ -34,7 +34,7 @@ fn init_logger() {
} }
#[gpui::test] #[gpui::test]
async fn test_semantic_index(cx: &mut TestAppContext) { async fn test_semantic_index(deterministic: Arc<Deterministic>, cx: &mut TestAppContext) {
init_test(cx); init_test(cx);
let fs = FakeFs::new(cx.background()); let fs = FakeFs::new(cx.background());
@ -98,7 +98,7 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
.await .await
.unwrap(); .unwrap();
assert_eq!(file_count, 3); assert_eq!(file_count, 3);
cx.foreground().run_until_parked(); deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
assert_eq!(*outstanding_file_count.borrow(), 0); assert_eq!(*outstanding_file_count.borrow(), 0);
let search_results = semantic_index let search_results = semantic_index
@ -188,7 +188,7 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
.await .await
.unwrap(); .unwrap();
cx.foreground().run_until_parked(); deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
let prev_embedding_count = embedding_provider.embedding_count(); let prev_embedding_count = embedding_provider.embedding_count();
let (file_count, outstanding_file_count) = semantic_index let (file_count, outstanding_file_count) = semantic_index
@ -197,7 +197,7 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
.unwrap(); .unwrap();
assert_eq!(file_count, 1); assert_eq!(file_count, 1);
cx.foreground().run_until_parked(); deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
assert_eq!(*outstanding_file_count.borrow(), 0); assert_eq!(*outstanding_file_count.borrow(), 0);
assert_eq!( assert_eq!(