flush embeddings queue when no files are parsed for 250 milliseconds
Co-authored-by: Antonio <antonio@zed.dev>
This commit is contained in:
parent
c4db914f0a
commit
524533cfb2
2 changed files with 33 additions and 29 deletions
|
@ -12,6 +12,7 @@ use anyhow::{anyhow, Result};
|
||||||
use db::VectorDatabase;
|
use db::VectorDatabase;
|
||||||
use embedding::{Embedding, EmbeddingProvider, OpenAIEmbeddings};
|
use embedding::{Embedding, EmbeddingProvider, OpenAIEmbeddings};
|
||||||
use embedding_queue::{EmbeddingQueue, FileToEmbed};
|
use embedding_queue::{EmbeddingQueue, FileToEmbed};
|
||||||
|
use futures::{FutureExt, StreamExt};
|
||||||
use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle};
|
use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle};
|
||||||
use language::{Anchor, Buffer, Language, LanguageRegistry};
|
use language::{Anchor, Buffer, Language, LanguageRegistry};
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
|
@ -39,6 +40,7 @@ use workspace::WorkspaceCreated;
|
||||||
|
|
||||||
const SEMANTIC_INDEX_VERSION: usize = 8;
|
const SEMANTIC_INDEX_VERSION: usize = 8;
|
||||||
const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(600);
|
const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(600);
|
||||||
|
const EMBEDDING_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_millis(250);
|
||||||
|
|
||||||
pub fn init(
|
pub fn init(
|
||||||
fs: Arc<dyn Fs>,
|
fs: Arc<dyn Fs>,
|
||||||
|
@ -253,24 +255,34 @@ impl SemanticIndex {
|
||||||
let mut _parsing_files_tasks = Vec::new();
|
let mut _parsing_files_tasks = Vec::new();
|
||||||
for _ in 0..cx.background().num_cpus() {
|
for _ in 0..cx.background().num_cpus() {
|
||||||
let fs = fs.clone();
|
let fs = fs.clone();
|
||||||
let parsing_files_rx = parsing_files_rx.clone();
|
let mut parsing_files_rx = parsing_files_rx.clone();
|
||||||
let embedding_provider = embedding_provider.clone();
|
let embedding_provider = embedding_provider.clone();
|
||||||
let embedding_queue = embedding_queue.clone();
|
let embedding_queue = embedding_queue.clone();
|
||||||
let db = db.clone();
|
let background = cx.background().clone();
|
||||||
_parsing_files_tasks.push(cx.background().spawn(async move {
|
_parsing_files_tasks.push(cx.background().spawn(async move {
|
||||||
let mut retriever = CodeContextRetriever::new(embedding_provider.clone());
|
let mut retriever = CodeContextRetriever::new(embedding_provider.clone());
|
||||||
while let Ok((embeddings_for_digest, pending_file)) =
|
loop {
|
||||||
parsing_files_rx.recv().await
|
let mut timer = background.timer(EMBEDDING_QUEUE_FLUSH_TIMEOUT).fuse();
|
||||||
{
|
let mut next_file_to_parse = parsing_files_rx.next().fuse();
|
||||||
|
futures::select_biased! {
|
||||||
|
next_file_to_parse = next_file_to_parse => {
|
||||||
|
if let Some((embeddings_for_digest, pending_file)) = next_file_to_parse {
|
||||||
Self::parse_file(
|
Self::parse_file(
|
||||||
&fs,
|
&fs,
|
||||||
pending_file,
|
pending_file,
|
||||||
&mut retriever,
|
&mut retriever,
|
||||||
&embedding_queue,
|
&embedding_queue,
|
||||||
&parsing_files_rx,
|
|
||||||
&embeddings_for_digest,
|
&embeddings_for_digest,
|
||||||
)
|
)
|
||||||
.await;
|
.await
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ = timer => {
|
||||||
|
embedding_queue.lock().flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
@ -297,10 +309,6 @@ impl SemanticIndex {
|
||||||
pending_file: PendingFile,
|
pending_file: PendingFile,
|
||||||
retriever: &mut CodeContextRetriever,
|
retriever: &mut CodeContextRetriever,
|
||||||
embedding_queue: &Arc<Mutex<EmbeddingQueue>>,
|
embedding_queue: &Arc<Mutex<EmbeddingQueue>>,
|
||||||
parsing_files_rx: &channel::Receiver<(
|
|
||||||
Arc<HashMap<DocumentDigest, Embedding>>,
|
|
||||||
PendingFile,
|
|
||||||
)>,
|
|
||||||
embeddings_for_digest: &HashMap<DocumentDigest, Embedding>,
|
embeddings_for_digest: &HashMap<DocumentDigest, Embedding>,
|
||||||
) {
|
) {
|
||||||
let Some(language) = pending_file.language else {
|
let Some(language) = pending_file.language else {
|
||||||
|
@ -333,10 +341,6 @@ impl SemanticIndex {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if parsing_files_rx.len() == 0 {
|
|
||||||
embedding_queue.lock().flush();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn project_previously_indexed(
|
pub fn project_previously_indexed(
|
||||||
|
@ -581,7 +585,7 @@ impl SemanticIndex {
|
||||||
cx: &mut ModelContext<Self>,
|
cx: &mut ModelContext<Self>,
|
||||||
) -> Task<Result<(usize, watch::Receiver<usize>)>> {
|
) -> Task<Result<(usize, watch::Receiver<usize>)>> {
|
||||||
cx.spawn(|this, mut cx| async move {
|
cx.spawn(|this, mut cx| async move {
|
||||||
let embeddings_for_digest = this.read_with(&cx, |this, cx| {
|
let embeddings_for_digest = this.read_with(&cx, |this, _| {
|
||||||
if let Some(state) = this.projects.get(&project.downgrade()) {
|
if let Some(state) = this.projects.get(&project.downgrade()) {
|
||||||
let mut worktree_id_file_paths = HashMap::default();
|
let mut worktree_id_file_paths = HashMap::default();
|
||||||
for (path, _) in &state.changed_paths {
|
for (path, _) in &state.changed_paths {
|
||||||
|
|
|
@ -3,11 +3,11 @@ use crate::{
|
||||||
embedding_queue::EmbeddingQueue,
|
embedding_queue::EmbeddingQueue,
|
||||||
parsing::{subtract_ranges, CodeContextRetriever, Document, DocumentDigest},
|
parsing::{subtract_ranges, CodeContextRetriever, Document, DocumentDigest},
|
||||||
semantic_index_settings::SemanticIndexSettings,
|
semantic_index_settings::SemanticIndexSettings,
|
||||||
FileToEmbed, JobHandle, SearchResult, SemanticIndex,
|
FileToEmbed, JobHandle, SearchResult, SemanticIndex, EMBEDDING_QUEUE_FLUSH_TIMEOUT,
|
||||||
};
|
};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use gpui::{Task, TestAppContext};
|
use gpui::{executor::Deterministic, Task, TestAppContext};
|
||||||
use language::{Language, LanguageConfig, LanguageRegistry, ToOffset};
|
use language::{Language, LanguageConfig, LanguageRegistry, ToOffset};
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use pretty_assertions::assert_eq;
|
use pretty_assertions::assert_eq;
|
||||||
|
@ -34,7 +34,7 @@ fn init_logger() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[gpui::test]
|
#[gpui::test]
|
||||||
async fn test_semantic_index(cx: &mut TestAppContext) {
|
async fn test_semantic_index(deterministic: Arc<Deterministic>, cx: &mut TestAppContext) {
|
||||||
init_test(cx);
|
init_test(cx);
|
||||||
|
|
||||||
let fs = FakeFs::new(cx.background());
|
let fs = FakeFs::new(cx.background());
|
||||||
|
@ -98,7 +98,7 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(file_count, 3);
|
assert_eq!(file_count, 3);
|
||||||
cx.foreground().run_until_parked();
|
deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
|
||||||
assert_eq!(*outstanding_file_count.borrow(), 0);
|
assert_eq!(*outstanding_file_count.borrow(), 0);
|
||||||
|
|
||||||
let search_results = semantic_index
|
let search_results = semantic_index
|
||||||
|
@ -188,7 +188,7 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
cx.foreground().run_until_parked();
|
deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
|
||||||
|
|
||||||
let prev_embedding_count = embedding_provider.embedding_count();
|
let prev_embedding_count = embedding_provider.embedding_count();
|
||||||
let (file_count, outstanding_file_count) = semantic_index
|
let (file_count, outstanding_file_count) = semantic_index
|
||||||
|
@ -197,7 +197,7 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(file_count, 1);
|
assert_eq!(file_count, 1);
|
||||||
|
|
||||||
cx.foreground().run_until_parked();
|
deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT);
|
||||||
assert_eq!(*outstanding_file_count.borrow(), 0);
|
assert_eq!(*outstanding_file_count.borrow(), 0);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue