From 9701b3ddbf781919eb9d8648ea28408cb3e925d9 Mon Sep 17 00:00:00 2001 From: "Joseph T. Lyons" Date: Wed, 6 Sep 2023 12:52:23 -0400 Subject: [PATCH 01/10] x --- script/bump-zed-minor-versions | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/bump-zed-minor-versions b/script/bump-zed-minor-versions index 8dcf7e334e..2f1ad7345e 100755 --- a/script/bump-zed-minor-versions +++ b/script/bump-zed-minor-versions @@ -31,7 +31,7 @@ preview_tag_name="v${major}.${minor}.${patch}-pre" git fetch origin ${prev_minor_branch_name}:${prev_minor_branch_name} git fetch origin --tags -cargo check -q +# cargo check -q function cleanup { git checkout -q main @@ -89,7 +89,7 @@ git checkout -q main git clean -q -dff old_main_sha=$(git rev-parse HEAD) cargo set-version --package zed --bump minor -cargo check -q +# cargo check -q git commit -q --all --message "${next_minor_branch_name} dev" cat < Date: Wed, 6 Sep 2023 12:52:35 -0400 Subject: [PATCH 02/10] v0.103.x preview --- crates/zed/RELEASE_CHANNEL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/zed/RELEASE_CHANNEL b/crates/zed/RELEASE_CHANNEL index 90012116c0..4de2f126df 100644 --- a/crates/zed/RELEASE_CHANNEL +++ b/crates/zed/RELEASE_CHANNEL @@ -1 +1 @@ -dev \ No newline at end of file +preview \ No newline at end of file From 3b173b609070e9a3d83e323817b8edf0a8aba9ef Mon Sep 17 00:00:00 2001 From: "Joseph T. Lyons" Date: Wed, 6 Sep 2023 23:08:36 -0400 Subject: [PATCH 03/10] Allow call events to be logged without a room id (#2937) Prior to this PR, we assumed that all call events needed a room_id, but we now have call-based actions that don't need a room_id - for instance, you can right click a channel and view the notes while not in a call. In this case, there is no room_id. We want to be able to track these events, which requires removing the restriction that requires a room_id. Release Notes: - N/A --- crates/call/src/call.rs | 30 ++++++++++++++++------------ crates/client/src/telemetry.rs | 2 +- crates/collab_ui/src/collab_panel.rs | 18 +++++++++++++---- crates/collab_ui/src/collab_ui.rs | 8 ++++---- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/crates/call/src/call.rs b/crates/call/src/call.rs index 5886462ccf..4db298fe98 100644 --- a/crates/call/src/call.rs +++ b/crates/call/src/call.rs @@ -273,7 +273,13 @@ impl ActiveCall { .borrow_mut() .take() .ok_or_else(|| anyhow!("no incoming call"))?; - Self::report_call_event_for_room("decline incoming", call.room_id, None, &self.client, cx); + Self::report_call_event_for_room( + "decline incoming", + Some(call.room_id), + None, + &self.client, + cx, + ); self.client.send(proto::DeclineCall { room_id: call.room_id, })?; @@ -403,22 +409,20 @@ impl ActiveCall { &self.pending_invites } - pub fn report_call_event(&self, operation: &'static str, cx: &AppContext) { - if let Some(room) = self.room() { - let room = room.read(cx); - Self::report_call_event_for_room( - operation, - room.id(), - room.channel_id(), - &self.client, - cx, - ) - } + fn report_call_event(&self, operation: &'static str, cx: &AppContext) { + let (room_id, channel_id) = match self.room() { + Some(room) => { + let room = room.read(cx); + (Some(room.id()), room.channel_id()) + } + None => (None, None), + }; + Self::report_call_event_for_room(operation, room_id, channel_id, &self.client, cx) } pub fn report_call_event_for_room( operation: &'static str, - room_id: u64, + room_id: Option, channel_id: Option, client: &Arc, cx: &AppContext, diff --git a/crates/client/src/telemetry.rs b/crates/client/src/telemetry.rs index 9cc5d13af0..f8642dd7fa 100644 --- a/crates/client/src/telemetry.rs +++ b/crates/client/src/telemetry.rs @@ -73,7 +73,7 @@ pub enum ClickhouseEvent { }, Call { operation: &'static str, - room_id: u64, + room_id: Option, channel_id: Option, }, } diff --git a/crates/collab_ui/src/collab_panel.rs b/crates/collab_ui/src/collab_panel.rs index d27cdc8acf..fba10c61ba 100644 --- a/crates/collab_ui/src/collab_panel.rs +++ b/crates/collab_ui/src/collab_panel.rs @@ -2240,7 +2240,8 @@ impl CollabPanel { fn open_channel_buffer(&mut self, action: &OpenChannelBuffer, cx: &mut ViewContext) { if let Some(workspace) = self.workspace.upgrade(cx) { let pane = workspace.read(cx).active_pane().clone(); - let channel_view = ChannelView::open(action.channel_id, pane.clone(), workspace, cx); + let channel_id = action.channel_id; + let channel_view = ChannelView::open(channel_id, pane.clone(), workspace, cx); cx.spawn(|_, mut cx| async move { let channel_view = channel_view.await?; pane.update(&mut cx, |pane, cx| { @@ -2249,9 +2250,18 @@ impl CollabPanel { anyhow::Ok(()) }) .detach(); - ActiveCall::global(cx).update(cx, |call, cx| { - call.report_call_event("open channel notes", cx) - }); + let room_id = ActiveCall::global(cx) + .read(cx) + .room() + .map(|room| room.read(cx).id()); + + ActiveCall::report_call_event_for_room( + "open channel notes", + room_id, + Some(channel_id), + &self.client, + cx, + ); } } diff --git a/crates/collab_ui/src/collab_ui.rs b/crates/collab_ui/src/collab_ui.rs index 04644b62d9..ee34f600fa 100644 --- a/crates/collab_ui/src/collab_ui.rs +++ b/crates/collab_ui/src/collab_ui.rs @@ -49,7 +49,7 @@ pub fn toggle_screen_sharing(_: &ToggleScreenSharing, cx: &mut AppContext) { if room.is_screen_sharing() { ActiveCall::report_call_event_for_room( "disable screen share", - room.id(), + Some(room.id()), room.channel_id(), &client, cx, @@ -58,7 +58,7 @@ pub fn toggle_screen_sharing(_: &ToggleScreenSharing, cx: &mut AppContext) { } else { ActiveCall::report_call_event_for_room( "enable screen share", - room.id(), + Some(room.id()), room.channel_id(), &client, cx, @@ -78,7 +78,7 @@ pub fn toggle_mute(_: &ToggleMute, cx: &mut AppContext) { if room.is_muted(cx) { ActiveCall::report_call_event_for_room( "enable microphone", - room.id(), + Some(room.id()), room.channel_id(), &client, cx, @@ -86,7 +86,7 @@ pub fn toggle_mute(_: &ToggleMute, cx: &mut AppContext) { } else { ActiveCall::report_call_event_for_room( "disable microphone", - room.id(), + Some(room.id()), room.channel_id(), &client, cx, From f685265cadf262b0f2ebd10e5be5b05b027f89b6 Mon Sep 17 00:00:00 2001 From: Kyle Caverly Date: Wed, 6 Sep 2023 15:15:02 -0400 Subject: [PATCH 04/10] Token count fix (#2935) Fix token count for OpenAIEmbeddings Release Notes (Preview Only) - update token count calculation for truncated OpenAIEmbeddings - increased request timeout for OpenAI --- crates/semantic_index/src/embedding.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/crates/semantic_index/src/embedding.rs b/crates/semantic_index/src/embedding.rs index 97c25ca170..7228738525 100644 --- a/crates/semantic_index/src/embedding.rs +++ b/crates/semantic_index/src/embedding.rs @@ -181,18 +181,17 @@ impl EmbeddingProvider for OpenAIEmbeddings { fn truncate(&self, span: &str) -> (String, usize) { let mut tokens = OPENAI_BPE_TOKENIZER.encode_with_special_tokens(span); - let token_count = tokens.len(); - let output = if token_count > OPENAI_INPUT_LIMIT { + let output = if tokens.len() > OPENAI_INPUT_LIMIT { tokens.truncate(OPENAI_INPUT_LIMIT); OPENAI_BPE_TOKENIZER - .decode(tokens) + .decode(tokens.clone()) .ok() .unwrap_or_else(|| span.to_string()) } else { span.to_string() }; - (output, token_count) + (output, tokens.len()) } async fn embed_batch(&self, spans: Vec) -> Result> { @@ -204,7 +203,7 @@ impl EmbeddingProvider for OpenAIEmbeddings { .ok_or_else(|| anyhow!("no api key"))?; let mut request_number = 0; - let mut request_timeout: u64 = 10; + let mut request_timeout: u64 = 15; let mut response: Response; while request_number < MAX_RETRIES { response = self From fedec2d21ce517266d0ed65421e6e0e994b621f3 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Thu, 7 Sep 2023 15:30:19 +0200 Subject: [PATCH 05/10] Rework how we track projects and worktrees in semantic index (#2938) This pull request introduces several improvements to the semantic search experience. We're still missing collaboration and searching modified buffers, which we'll tackle after we take a detour into reducing the number of tokens used to generate embeddings. Release Notes: - Fixed a bug that could prevent semantic search from working when deploying right after opening a project. - Fixed a panic that could sometimes occur when using semantic search while simultaneously changing a file. - Fixed a bug that prevented semantic search from including new worktrees when adding them to a project. --- Cargo.lock | 1 + crates/search/src/project_search.rs | 42 +- crates/semantic_index/Cargo.toml | 2 + crates/semantic_index/src/db.rs | 69 +- crates/semantic_index/src/embedding_queue.rs | 134 ++- crates/semantic_index/src/parsing.rs | 66 +- crates/semantic_index/src/semantic_index.rs | 851 ++++++++++-------- .../src/semantic_index_tests.rs | 68 +- 8 files changed, 635 insertions(+), 598 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 05cd0ec21c..7a35645899 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6721,6 +6721,7 @@ dependencies = [ "anyhow", "async-trait", "bincode", + "collections", "ctor", "editor", "env_logger 0.9.3", diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs index c2ecde4ce5..7088f394bc 100644 --- a/crates/search/src/project_search.rs +++ b/crates/search/src/project_search.rs @@ -12,15 +12,13 @@ use editor::{ SelectAll, MAX_TAB_TITLE_LEN, }; use futures::StreamExt; - -use gpui::platform::PromptLevel; - use gpui::{ - actions, elements::*, platform::MouseButton, Action, AnyElement, AnyViewHandle, AppContext, - Entity, ModelContext, ModelHandle, Subscription, Task, View, ViewContext, ViewHandle, - WeakModelHandle, WeakViewHandle, + actions, + elements::*, + platform::{MouseButton, PromptLevel}, + Action, AnyElement, AnyViewHandle, AppContext, Entity, ModelContext, ModelHandle, Subscription, + Task, View, ViewContext, ViewHandle, WeakModelHandle, WeakViewHandle, }; - use menu::Confirm; use postage::stream::Stream; use project::{ @@ -132,8 +130,7 @@ pub struct ProjectSearchView { } struct SemanticSearchState { - file_count: usize, - outstanding_file_count: usize, + pending_file_count: usize, _progress_task: Task<()>, } @@ -319,12 +316,8 @@ impl View for ProjectSearchView { }; let semantic_status = if let Some(semantic) = &self.semantic_state { - if semantic.outstanding_file_count > 0 { - format!( - "Indexing: {} of {}...", - semantic.file_count - semantic.outstanding_file_count, - semantic.file_count - ) + if semantic.pending_file_count > 0 { + format!("Remaining files to index: {}", semantic.pending_file_count) } else { "Indexing complete".to_string() } @@ -641,26 +634,27 @@ impl ProjectSearchView { let project = self.model.read(cx).project.clone(); - let index_task = semantic_index.update(cx, |semantic_index, cx| { - semantic_index.index_project(project, cx) + let mut pending_file_count_rx = semantic_index.update(cx, |semantic_index, cx| { + semantic_index + .index_project(project.clone(), cx) + .detach_and_log_err(cx); + semantic_index.pending_file_count(&project).unwrap() }); cx.spawn(|search_view, mut cx| async move { - let (files_to_index, mut files_remaining_rx) = index_task.await?; - search_view.update(&mut cx, |search_view, cx| { cx.notify(); + let pending_file_count = *pending_file_count_rx.borrow(); search_view.semantic_state = Some(SemanticSearchState { - file_count: files_to_index, - outstanding_file_count: files_to_index, + pending_file_count, _progress_task: cx.spawn(|search_view, mut cx| async move { - while let Some(count) = files_remaining_rx.recv().await { + while let Some(count) = pending_file_count_rx.recv().await { search_view .update(&mut cx, |search_view, cx| { if let Some(semantic_search_state) = &mut search_view.semantic_state { - semantic_search_state.outstanding_file_count = count; + semantic_search_state.pending_file_count = count; cx.notify(); if count == 0 { return; @@ -959,7 +953,7 @@ impl ProjectSearchView { match mode { SearchMode::Semantic => { if let Some(semantic) = &mut self.semantic_state { - if semantic.outstanding_file_count > 0 { + if semantic.pending_file_count > 0 { return; } diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml index d46346e0ab..72a36efd50 100644 --- a/crates/semantic_index/Cargo.toml +++ b/crates/semantic_index/Cargo.toml @@ -9,6 +9,7 @@ path = "src/semantic_index.rs" doctest = false [dependencies] +collections = { path = "../collections" } gpui = { path = "../gpui" } language = { path = "../language" } project = { path = "../project" } @@ -42,6 +43,7 @@ sha1 = "0.10.5" parse_duration = "2.1.1" [dev-dependencies] +collections = { path = "../collections", features = ["test-support"] } gpui = { path = "../gpui", features = ["test-support"] } language = { path = "../language", features = ["test-support"] } project = { path = "../project", features = ["test-support"] } diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs index 2ececc1eb6..c35057594a 100644 --- a/crates/semantic_index/src/db.rs +++ b/crates/semantic_index/src/db.rs @@ -1,9 +1,10 @@ use crate::{ embedding::Embedding, - parsing::{Document, DocumentDigest}, + parsing::{Span, SpanDigest}, SEMANTIC_INDEX_VERSION, }; use anyhow::{anyhow, Context, Result}; +use collections::HashMap; use futures::channel::oneshot; use gpui::executor; use project::{search::PathMatcher, Fs}; @@ -12,7 +13,6 @@ use rusqlite::params; use rusqlite::types::Value; use std::{ cmp::Ordering, - collections::HashMap, future::Future, ops::Range, path::{Path, PathBuf}, @@ -124,8 +124,12 @@ impl VectorDatabase { } log::trace!("vector database schema out of date. updating..."); + // We renamed the `documents` table to `spans`, so we want to drop + // `documents` without recreating it if it exists. db.execute("DROP TABLE IF EXISTS documents", []) .context("failed to drop 'documents' table")?; + db.execute("DROP TABLE IF EXISTS spans", []) + .context("failed to drop 'spans' table")?; db.execute("DROP TABLE IF EXISTS files", []) .context("failed to drop 'files' table")?; db.execute("DROP TABLE IF EXISTS worktrees", []) @@ -174,7 +178,7 @@ impl VectorDatabase { )?; db.execute( - "CREATE TABLE documents ( + "CREATE TABLE spans ( id INTEGER PRIMARY KEY AUTOINCREMENT, file_id INTEGER NOT NULL, start_byte INTEGER NOT NULL, @@ -195,7 +199,7 @@ impl VectorDatabase { pub fn delete_file( &self, worktree_id: i64, - delete_path: PathBuf, + delete_path: Arc, ) -> impl Future> { self.transact(move |db| { db.execute( @@ -209,9 +213,9 @@ impl VectorDatabase { pub fn insert_file( &self, worktree_id: i64, - path: PathBuf, + path: Arc, mtime: SystemTime, - documents: Vec, + spans: Vec, ) -> impl Future> { self.transact(move |db| { // Return the existing ID, if both the file and mtime match @@ -231,7 +235,7 @@ impl VectorDatabase { let t0 = Instant::now(); let mut query = db.prepare( " - INSERT INTO documents + INSERT INTO spans (file_id, start_byte, end_byte, name, embedding, digest) VALUES (?1, ?2, ?3, ?4, ?5, ?6) ", @@ -241,14 +245,14 @@ impl VectorDatabase { t0.elapsed().as_millis() ); - for document in documents { + for span in spans { query.execute(params![ file_id, - document.range.start.to_string(), - document.range.end.to_string(), - document.name, - document.embedding, - document.digest + span.range.start.to_string(), + span.range.end.to_string(), + span.name, + span.embedding, + span.digest ])?; } @@ -278,17 +282,17 @@ impl VectorDatabase { pub fn embeddings_for_files( &self, worktree_id_file_paths: HashMap>>, - ) -> impl Future>> { + ) -> impl Future>> { self.transact(move |db| { let mut query = db.prepare( " SELECT digest, embedding - FROM documents - LEFT JOIN files ON files.id = documents.file_id + FROM spans + LEFT JOIN files ON files.id = spans.file_id WHERE files.worktree_id = ? AND files.relative_path IN rarray(?) ", )?; - let mut embeddings_by_digest = HashMap::new(); + let mut embeddings_by_digest = HashMap::default(); for (worktree_id, file_paths) in worktree_id_file_paths { let file_paths = Rc::new( file_paths @@ -297,10 +301,7 @@ impl VectorDatabase { .collect::>(), ); let rows = query.query_map(params![worktree_id, file_paths], |row| { - Ok(( - row.get::<_, DocumentDigest>(0)?, - row.get::<_, Embedding>(1)?, - )) + Ok((row.get::<_, SpanDigest>(0)?, row.get::<_, Embedding>(1)?)) })?; for row in rows { @@ -316,7 +317,7 @@ impl VectorDatabase { pub fn find_or_create_worktree( &self, - worktree_root_path: PathBuf, + worktree_root_path: Arc, ) -> impl Future> { self.transact(move |db| { let mut worktree_query = @@ -351,7 +352,7 @@ impl VectorDatabase { WHERE worktree_id = ?1 ORDER BY relative_path", )?; - let mut result: HashMap = HashMap::new(); + let mut result: HashMap = HashMap::default(); for row in statement.query_map(params![worktree_id], |row| { Ok(( row.get::<_, String>(0)?.into(), @@ -379,7 +380,7 @@ impl VectorDatabase { let file_ids = file_ids.to_vec(); self.transact(move |db| { let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1); - Self::for_each_document(db, &file_ids, |id, embedding| { + Self::for_each_span(db, &file_ids, |id, embedding| { let similarity = embedding.similarity(&query_embedding); let ix = match results.binary_search_by(|(_, s)| { similarity.partial_cmp(&s).unwrap_or(Ordering::Equal) @@ -434,7 +435,7 @@ impl VectorDatabase { }) } - fn for_each_document( + fn for_each_span( db: &rusqlite::Connection, file_ids: &[i64], mut f: impl FnMut(i64, Embedding), @@ -444,7 +445,7 @@ impl VectorDatabase { SELECT id, embedding FROM - documents + spans WHERE file_id IN rarray(?) ", @@ -459,7 +460,7 @@ impl VectorDatabase { Ok(()) } - pub fn get_documents_by_ids( + pub fn spans_for_ids( &self, ids: &[i64], ) -> impl Future)>>> { @@ -468,16 +469,16 @@ impl VectorDatabase { let mut statement = db.prepare( " SELECT - documents.id, + spans.id, files.worktree_id, files.relative_path, - documents.start_byte, - documents.end_byte + spans.start_byte, + spans.end_byte FROM - documents, files + spans, files WHERE - documents.file_id = files.id AND - documents.id in rarray(?) + spans.file_id = files.id AND + spans.id in rarray(?) ", )?; @@ -500,7 +501,7 @@ impl VectorDatabase { for id in &ids { let value = values_by_id .remove(id) - .ok_or(anyhow!("missing document id {}", id))?; + .ok_or(anyhow!("missing span id {}", id))?; results.push(value); } diff --git a/crates/semantic_index/src/embedding_queue.rs b/crates/semantic_index/src/embedding_queue.rs index 96493fc4d3..3026eef9ae 100644 --- a/crates/semantic_index/src/embedding_queue.rs +++ b/crates/semantic_index/src/embedding_queue.rs @@ -1,15 +1,15 @@ -use crate::{embedding::EmbeddingProvider, parsing::Document, JobHandle}; +use crate::{embedding::EmbeddingProvider, parsing::Span, JobHandle}; use gpui::executor::Background; use parking_lot::Mutex; use smol::channel; -use std::{mem, ops::Range, path::PathBuf, sync::Arc, time::SystemTime}; +use std::{mem, ops::Range, path::Path, sync::Arc, time::SystemTime}; #[derive(Clone)] pub struct FileToEmbed { pub worktree_id: i64, - pub path: PathBuf, + pub path: Arc, pub mtime: SystemTime, - pub documents: Vec, + pub spans: Vec, pub job_handle: JobHandle, } @@ -19,7 +19,7 @@ impl std::fmt::Debug for FileToEmbed { .field("worktree_id", &self.worktree_id) .field("path", &self.path) .field("mtime", &self.mtime) - .field("document", &self.documents) + .field("spans", &self.spans) .finish_non_exhaustive() } } @@ -29,13 +29,13 @@ impl PartialEq for FileToEmbed { self.worktree_id == other.worktree_id && self.path == other.path && self.mtime == other.mtime - && self.documents == other.documents + && self.spans == other.spans } } pub struct EmbeddingQueue { embedding_provider: Arc, - pending_batch: Vec, + pending_batch: Vec, executor: Arc, pending_batch_token_count: usize, finished_files_tx: channel::Sender, @@ -43,9 +43,9 @@ pub struct EmbeddingQueue { } #[derive(Clone)] -pub struct FileToEmbedFragment { +pub struct FileFragmentToEmbed { file: Arc>, - document_range: Range, + span_range: Range, } impl EmbeddingQueue { @@ -62,43 +62,40 @@ impl EmbeddingQueue { } pub fn push(&mut self, file: FileToEmbed) { - if file.documents.is_empty() { + if file.spans.is_empty() { self.finished_files_tx.try_send(file).unwrap(); return; } let file = Arc::new(Mutex::new(file)); - self.pending_batch.push(FileToEmbedFragment { + self.pending_batch.push(FileFragmentToEmbed { file: file.clone(), - document_range: 0..0, + span_range: 0..0, }); - let mut fragment_range = &mut self.pending_batch.last_mut().unwrap().document_range; - let mut saved_tokens = 0; - for (ix, document) in file.lock().documents.iter().enumerate() { - let document_token_count = if document.embedding.is_none() { - document.token_count + let mut fragment_range = &mut self.pending_batch.last_mut().unwrap().span_range; + for (ix, span) in file.lock().spans.iter().enumerate() { + let span_token_count = if span.embedding.is_none() { + span.token_count } else { - saved_tokens += document.token_count; 0 }; - let next_token_count = self.pending_batch_token_count + document_token_count; + let next_token_count = self.pending_batch_token_count + span_token_count; if next_token_count > self.embedding_provider.max_tokens_per_batch() { let range_end = fragment_range.end; self.flush(); - self.pending_batch.push(FileToEmbedFragment { + self.pending_batch.push(FileFragmentToEmbed { file: file.clone(), - document_range: range_end..range_end, + span_range: range_end..range_end, }); - fragment_range = &mut self.pending_batch.last_mut().unwrap().document_range; + fragment_range = &mut self.pending_batch.last_mut().unwrap().span_range; } fragment_range.end = ix + 1; - self.pending_batch_token_count += document_token_count; + self.pending_batch_token_count += span_token_count; } - log::trace!("Saved Tokens: {:?}", saved_tokens); } pub fn flush(&mut self) { @@ -111,60 +108,55 @@ impl EmbeddingQueue { let finished_files_tx = self.finished_files_tx.clone(); let embedding_provider = self.embedding_provider.clone(); - self.executor.spawn(async move { - let mut spans = Vec::new(); - let mut document_count = 0; - for fragment in &batch { - let file = fragment.file.lock(); - document_count += file.documents[fragment.document_range.clone()].len(); - spans.extend( - { - file.documents[fragment.document_range.clone()] - .iter().filter(|d| d.embedding.is_none()) - .map(|d| d.content.clone()) - } - ); - } - - log::trace!("Documents Length: {:?}", document_count); - log::trace!("Span Length: {:?}", spans.clone().len()); - - // If spans is 0, just send the fragment to the finished files if its the last one. - if spans.len() == 0 { - for fragment in batch.clone() { - if let Some(file) = Arc::into_inner(fragment.file) { - finished_files_tx.try_send(file.into_inner()).unwrap(); - } + self.executor + .spawn(async move { + let mut spans = Vec::new(); + for fragment in &batch { + let file = fragment.file.lock(); + spans.extend( + file.spans[fragment.span_range.clone()] + .iter() + .filter(|d| d.embedding.is_none()) + .map(|d| d.content.clone()), + ); } - return; - }; - - match embedding_provider.embed_batch(spans).await { - Ok(embeddings) => { - let mut embeddings = embeddings.into_iter(); - for fragment in batch { - for document in - &mut fragment.file.lock().documents[fragment.document_range.clone()].iter_mut().filter(|d| d.embedding.is_none()) - { - if let Some(embedding) = embeddings.next() { - document.embedding = Some(embedding); - } else { - // - log::error!("number of embeddings returned different from number of documents"); - } - } + // If spans is 0, just send the fragment to the finished files if its the last one. + if spans.is_empty() { + for fragment in batch.clone() { if let Some(file) = Arc::into_inner(fragment.file) { finished_files_tx.try_send(file.into_inner()).unwrap(); } } + return; + }; + + match embedding_provider.embed_batch(spans).await { + Ok(embeddings) => { + let mut embeddings = embeddings.into_iter(); + for fragment in batch { + for span in &mut fragment.file.lock().spans[fragment.span_range.clone()] + .iter_mut() + .filter(|d| d.embedding.is_none()) + { + if let Some(embedding) = embeddings.next() { + span.embedding = Some(embedding); + } else { + log::error!("number of embeddings != number of documents"); + } + } + + if let Some(file) = Arc::into_inner(fragment.file) { + finished_files_tx.try_send(file.into_inner()).unwrap(); + } + } + } + Err(error) => { + log::error!("{:?}", error); + } } - Err(error) => { - log::error!("{:?}", error); - } - } - }) - .detach(); + }) + .detach(); } pub fn finished_files(&self) -> channel::Receiver { diff --git a/crates/semantic_index/src/parsing.rs b/crates/semantic_index/src/parsing.rs index c0a94c6b73..b6fc000e1d 100644 --- a/crates/semantic_index/src/parsing.rs +++ b/crates/semantic_index/src/parsing.rs @@ -16,9 +16,9 @@ use std::{ use tree_sitter::{Parser, QueryCursor}; #[derive(Debug, PartialEq, Eq, Clone, Hash)] -pub struct DocumentDigest([u8; 20]); +pub struct SpanDigest([u8; 20]); -impl FromSql for DocumentDigest { +impl FromSql for SpanDigest { fn column_result(value: ValueRef) -> FromSqlResult { let blob = value.as_blob()?; let bytes = @@ -27,17 +27,17 @@ impl FromSql for DocumentDigest { expected_size: 20, blob_size: blob.len(), })?; - return Ok(DocumentDigest(bytes)); + return Ok(SpanDigest(bytes)); } } -impl ToSql for DocumentDigest { +impl ToSql for SpanDigest { fn to_sql(&self) -> rusqlite::Result { self.0.to_sql() } } -impl From<&'_ str> for DocumentDigest { +impl From<&'_ str> for SpanDigest { fn from(value: &'_ str) -> Self { let mut sha1 = Sha1::new(); sha1.update(value); @@ -46,12 +46,12 @@ impl From<&'_ str> for DocumentDigest { } #[derive(Debug, PartialEq, Clone)] -pub struct Document { +pub struct Span { pub name: String, pub range: Range, pub content: String, pub embedding: Option, - pub digest: DocumentDigest, + pub digest: SpanDigest, pub token_count: usize, } @@ -97,14 +97,14 @@ impl CodeContextRetriever { relative_path: &Path, language_name: Arc, content: &str, - ) -> Result> { + ) -> Result> { let document_span = ENTIRE_FILE_TEMPLATE .replace("", relative_path.to_string_lossy().as_ref()) .replace("", language_name.as_ref()) .replace("", &content); - let digest = DocumentDigest::from(document_span.as_str()); + let digest = SpanDigest::from(document_span.as_str()); let (document_span, token_count) = self.embedding_provider.truncate(&document_span); - Ok(vec![Document { + Ok(vec![Span { range: 0..content.len(), content: document_span, embedding: Default::default(), @@ -114,13 +114,13 @@ impl CodeContextRetriever { }]) } - fn parse_markdown_file(&self, relative_path: &Path, content: &str) -> Result> { + fn parse_markdown_file(&self, relative_path: &Path, content: &str) -> Result> { let document_span = MARKDOWN_CONTEXT_TEMPLATE .replace("", relative_path.to_string_lossy().as_ref()) .replace("", &content); - let digest = DocumentDigest::from(document_span.as_str()); + let digest = SpanDigest::from(document_span.as_str()); let (document_span, token_count) = self.embedding_provider.truncate(&document_span); - Ok(vec![Document { + Ok(vec![Span { range: 0..content.len(), content: document_span, embedding: None, @@ -191,32 +191,32 @@ impl CodeContextRetriever { relative_path: &Path, content: &str, language: Arc, - ) -> Result> { + ) -> Result> { let language_name = language.name(); if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language_name.as_ref()) { return self.parse_entire_file(relative_path, language_name, &content); - } else if &language_name.to_string() == &"Markdown".to_string() { + } else if language_name.as_ref() == "Markdown" { return self.parse_markdown_file(relative_path, &content); } - let mut documents = self.parse_file(content, language)?; - for document in &mut documents { + let mut spans = self.parse_file(content, language)?; + for span in &mut spans { let document_content = CODE_CONTEXT_TEMPLATE .replace("", relative_path.to_string_lossy().as_ref()) .replace("", language_name.as_ref()) - .replace("item", &document.content); + .replace("item", &span.content); let (document_content, token_count) = self.embedding_provider.truncate(&document_content); - document.content = document_content; - document.token_count = token_count; + span.content = document_content; + span.token_count = token_count; } - Ok(documents) + Ok(spans) } - pub fn parse_file(&mut self, content: &str, language: Arc) -> Result> { + pub fn parse_file(&mut self, content: &str, language: Arc) -> Result> { let grammar = language .grammar() .ok_or_else(|| anyhow!("no grammar for language"))?; @@ -227,7 +227,7 @@ impl CodeContextRetriever { let language_scope = language.default_scope(); let placeholder = language_scope.collapsed_placeholder(); - let mut documents = Vec::new(); + let mut spans = Vec::new(); let mut collapsed_ranges_within = Vec::new(); let mut parsed_name_ranges = HashSet::new(); for (i, context_match) in matches.iter().enumerate() { @@ -267,22 +267,22 @@ impl CodeContextRetriever { collapsed_ranges_within.sort_by_key(|r| (r.start, Reverse(r.end))); - let mut document_content = String::new(); + let mut span_content = String::new(); for context_range in &context_match.context_ranges { add_content_from_range( - &mut document_content, + &mut span_content, content, context_range.clone(), context_match.start_col, ); - document_content.push_str("\n"); + span_content.push_str("\n"); } let mut offset = item_range.start; for collapsed_range in &collapsed_ranges_within { if collapsed_range.start > offset { add_content_from_range( - &mut document_content, + &mut span_content, content, offset..collapsed_range.start, context_match.start_col, @@ -291,24 +291,24 @@ impl CodeContextRetriever { } if collapsed_range.end > offset { - document_content.push_str(placeholder); + span_content.push_str(placeholder); offset = collapsed_range.end; } } if offset < item_range.end { add_content_from_range( - &mut document_content, + &mut span_content, content, offset..item_range.end, context_match.start_col, ); } - let sha1 = DocumentDigest::from(document_content.as_str()); - documents.push(Document { + let sha1 = SpanDigest::from(span_content.as_str()); + spans.push(Span { name, - content: document_content, + content: span_content, range: item_range.clone(), embedding: None, digest: sha1, @@ -316,7 +316,7 @@ impl CodeContextRetriever { }) } - return Ok(documents); + return Ok(spans); } } diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs index a917eabfc8..8bba2f1d0e 100644 --- a/crates/semantic_index/src/semantic_index.rs +++ b/crates/semantic_index/src/semantic_index.rs @@ -9,22 +9,21 @@ mod semantic_index_tests; use crate::semantic_index_settings::SemanticIndexSettings; use anyhow::{anyhow, Result}; +use collections::{BTreeMap, HashMap, HashSet}; use db::VectorDatabase; use embedding::{Embedding, EmbeddingProvider, OpenAIEmbeddings}; use embedding_queue::{EmbeddingQueue, FileToEmbed}; -use futures::{FutureExt, StreamExt}; +use futures::{future, FutureExt, StreamExt}; use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle}; -use language::{Anchor, Buffer, Language, LanguageRegistry}; +use language::{Anchor, Bias, Buffer, Language, LanguageRegistry}; use parking_lot::Mutex; -use parsing::{CodeContextRetriever, DocumentDigest, PARSEABLE_ENTIRE_FILE_TYPES}; +use parsing::{CodeContextRetriever, SpanDigest, PARSEABLE_ENTIRE_FILE_TYPES}; use postage::watch; -use project::{ - search::PathMatcher, Fs, PathChange, Project, ProjectEntryId, ProjectPath, Worktree, WorktreeId, -}; +use project::{search::PathMatcher, Fs, PathChange, Project, ProjectEntryId, Worktree, WorktreeId}; use smol::channel; use std::{ cmp::Ordering, - collections::{BTreeMap, HashMap}, + future::Future, ops::Range, path::{Path, PathBuf}, sync::{Arc, Weak}, @@ -36,9 +35,8 @@ use util::{ paths::EMBEDDINGS_DIR, ResultExt, }; -use workspace::WorkspaceCreated; -const SEMANTIC_INDEX_VERSION: usize = 9; +const SEMANTIC_INDEX_VERSION: usize = 10; const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(5 * 60); const EMBEDDING_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_millis(250); @@ -59,24 +57,6 @@ pub fn init( return; } - cx.subscribe_global::({ - move |event, cx| { - let Some(semantic_index) = SemanticIndex::global(cx) else { - return; - }; - let workspace = &event.0; - if let Some(workspace) = workspace.upgrade(cx) { - let project = workspace.read(cx).project().clone(); - if project.read(cx).is_local() { - semantic_index.update(cx, |index, cx| { - index.initialize_project(project, cx).detach_and_log_err(cx) - }); - } - } - } - }) - .detach(); - cx.spawn(move |mut cx| async move { let semantic_index = SemanticIndex::new( fs, @@ -104,22 +84,78 @@ pub struct SemanticIndex { db: VectorDatabase, embedding_provider: Arc, language_registry: Arc, - parsing_files_tx: channel::Sender<(Arc>, PendingFile)>, + parsing_files_tx: channel::Sender<(Arc>, PendingFile)>, _embedding_task: Task<()>, _parsing_files_tasks: Vec>, projects: HashMap, ProjectState>, } struct ProjectState { - worktree_db_ids: Vec<(WorktreeId, i64)>, + worktrees: HashMap, + pending_file_count_rx: watch::Receiver, + pending_file_count_tx: Arc>>, _subscription: gpui::Subscription, - outstanding_job_count_rx: watch::Receiver, - outstanding_job_count_tx: Arc>>, - changed_paths: BTreeMap, +} + +enum WorktreeState { + Registering(RegisteringWorktreeState), + Registered(RegisteredWorktreeState), +} + +impl WorktreeState { + fn paths_changed( + &mut self, + changes: Arc<[(Arc, ProjectEntryId, PathChange)]>, + worktree: &Worktree, + ) { + let changed_paths = match self { + Self::Registering(state) => &mut state.changed_paths, + Self::Registered(state) => &mut state.changed_paths, + }; + + for (path, entry_id, change) in changes.iter() { + let Some(entry) = worktree.entry_for_id(*entry_id) else { + continue; + }; + if entry.is_ignored || entry.is_symlink || entry.is_external || entry.is_dir() { + continue; + } + changed_paths.insert( + path.clone(), + ChangedPathInfo { + mtime: entry.mtime, + is_deleted: *change == PathChange::Removed, + }, + ); + } + } +} + +struct RegisteringWorktreeState { + changed_paths: BTreeMap, ChangedPathInfo>, + done_rx: watch::Receiver>, + _registration: Task<()>, +} + +impl RegisteringWorktreeState { + fn done(&self) -> impl Future { + let mut done_rx = self.done_rx.clone(); + async move { + while let Some(result) = done_rx.next().await { + if result.is_some() { + break; + } + } + } + } +} + +struct RegisteredWorktreeState { + db_id: i64, + changed_paths: BTreeMap, ChangedPathInfo>, } struct ChangedPathInfo { - changed_at: Instant, mtime: SystemTime, is_deleted: bool, } @@ -141,47 +177,23 @@ impl JobHandle { } impl ProjectState { - fn new( - subscription: gpui::Subscription, - worktree_db_ids: Vec<(WorktreeId, i64)>, - changed_paths: BTreeMap, - ) -> Self { - let (outstanding_job_count_tx, outstanding_job_count_rx) = watch::channel_with(0); - let outstanding_job_count_tx = Arc::new(Mutex::new(outstanding_job_count_tx)); + fn new(subscription: gpui::Subscription) -> Self { + let (pending_file_count_tx, pending_file_count_rx) = watch::channel_with(0); + let pending_file_count_tx = Arc::new(Mutex::new(pending_file_count_tx)); Self { - worktree_db_ids, - outstanding_job_count_rx, - outstanding_job_count_tx, - changed_paths, + worktrees: Default::default(), + pending_file_count_rx, + pending_file_count_tx, _subscription: subscription, } } - pub fn get_outstanding_count(&self) -> usize { - self.outstanding_job_count_rx.borrow().clone() - } - - fn db_id_for_worktree_id(&self, id: WorktreeId) -> Option { - self.worktree_db_ids - .iter() - .find_map(|(worktree_id, db_id)| { - if *worktree_id == id { - Some(*db_id) - } else { - None - } - }) - } - fn worktree_id_for_db_id(&self, id: i64) -> Option { - self.worktree_db_ids + self.worktrees .iter() - .find_map(|(worktree_id, db_id)| { - if *db_id == id { - Some(*worktree_id) - } else { - None - } + .find_map(|(worktree_id, worktree_state)| match worktree_state { + WorktreeState::Registered(state) if state.db_id == id => Some(*worktree_id), + _ => None, }) } } @@ -189,7 +201,7 @@ impl ProjectState { #[derive(Clone)] pub struct PendingFile { worktree_db_id: i64, - relative_path: PathBuf, + relative_path: Arc, absolute_path: PathBuf, language: Option>, modified_time: SystemTime, @@ -240,16 +252,16 @@ impl SemanticIndex { let db = db.clone(); async move { while let Ok(file) = embedded_files.recv().await { - db.insert_file(file.worktree_id, file.path, file.mtime, file.documents) + db.insert_file(file.worktree_id, file.path, file.mtime, file.spans) .await .log_err(); } } }); - // Parse files into embeddable documents. + // Parse files into embeddable spans. let (parsing_files_tx, parsing_files_rx) = - channel::unbounded::<(Arc>, PendingFile)>(); + channel::unbounded::<(Arc>, PendingFile)>(); let embedding_queue = Arc::new(Mutex::new(embedding_queue)); let mut _parsing_files_tasks = Vec::new(); for _ in 0..cx.background().num_cpus() { @@ -298,7 +310,7 @@ impl SemanticIndex { parsing_files_tx, _embedding_task, _parsing_files_tasks, - projects: HashMap::new(), + projects: Default::default(), } })) } @@ -308,26 +320,26 @@ impl SemanticIndex { pending_file: PendingFile, retriever: &mut CodeContextRetriever, embedding_queue: &Arc>, - embeddings_for_digest: &HashMap, + embeddings_for_digest: &HashMap, ) { let Some(language) = pending_file.language else { return; }; if let Some(content) = fs.load(&pending_file.absolute_path).await.log_err() { - if let Some(mut documents) = retriever + if let Some(mut spans) = retriever .parse_file_with_template(&pending_file.relative_path, &content, language) .log_err() { log::trace!( - "parsed path {:?}: {} documents", + "parsed path {:?}: {} spans", pending_file.relative_path, - documents.len() + spans.len() ); - for document in documents.iter_mut() { - if let Some(embedding) = embeddings_for_digest.get(&document.digest) { - document.embedding = Some(embedding.to_owned()); + for span in &mut spans { + if let Some(embedding) = embeddings_for_digest.get(&span.digest) { + span.embedding = Some(embedding.to_owned()); } } @@ -336,7 +348,7 @@ impl SemanticIndex { path: pending_file.relative_path, mtime: pending_file.modified_time, job_handle: pending_file.job_handle, - documents, + spans: spans, }); } } @@ -369,9 +381,9 @@ impl SemanticIndex { fn project_entries_changed( &mut self, project: ModelHandle, + worktree_id: WorktreeId, changes: Arc<[(Arc, ProjectEntryId, PathChange)]>, - cx: &mut ModelContext<'_, SemanticIndex>, - worktree_id: &WorktreeId, + cx: &mut ModelContext, ) { let Some(worktree) = project.read(cx).worktree_for_id(worktree_id.clone(), cx) else { return; @@ -381,258 +393,219 @@ impl SemanticIndex { return; }; - let embeddings_for_digest = { - let mut worktree_id_file_paths = HashMap::new(); - for (path, _) in &project_state.changed_paths { - if let Some(worktree_db_id) = project_state.db_id_for_worktree_id(path.worktree_id) - { - worktree_id_file_paths - .entry(worktree_db_id) - .or_insert(Vec::new()) - .push(path.path.clone()); - } - } - self.db.embeddings_for_files(worktree_id_file_paths) - }; - let worktree = worktree.read(cx); - let change_time = Instant::now(); - for (path, entry_id, change) in changes.iter() { - let Some(entry) = worktree.entry_for_id(*entry_id) else { - continue; + let worktree_state = + if let Some(worktree_state) = project_state.worktrees.get_mut(&worktree_id) { + worktree_state + } else { + return; }; - if entry.is_ignored || entry.is_symlink || entry.is_external { - continue; - } - let project_path = ProjectPath { - worktree_id: *worktree_id, - path: path.clone(), - }; - project_state.changed_paths.insert( - project_path, - ChangedPathInfo { - changed_at: change_time, - mtime: entry.mtime, - is_deleted: *change == PathChange::Removed, - }, - ); + worktree_state.paths_changed(changes, worktree); + if let WorktreeState::Registered(_) = worktree_state { + cx.spawn_weak(|this, mut cx| async move { + cx.background().timer(BACKGROUND_INDEXING_DELAY).await; + if let Some((this, project)) = this.upgrade(&cx).zip(project.upgrade(&cx)) { + this.update(&mut cx, |this, cx| { + this.index_project(project, cx).detach_and_log_err(cx) + }); + } + }) + .detach(); } - - cx.spawn_weak(|this, mut cx| async move { - let embeddings_for_digest = embeddings_for_digest.await.log_err().unwrap_or_default(); - - cx.background().timer(BACKGROUND_INDEXING_DELAY).await; - if let Some((this, project)) = this.upgrade(&cx).zip(project.upgrade(&cx)) { - Self::reindex_changed_paths( - this, - project, - Some(change_time), - &mut cx, - Arc::new(embeddings_for_digest), - ) - .await; - } - }) - .detach(); } - pub fn initialize_project( + fn register_worktree( &mut self, project: ModelHandle, + worktree: ModelHandle, cx: &mut ModelContext, - ) -> Task> { - log::trace!("Initializing Project for Semantic Index"); - let worktree_scans_complete = project - .read(cx) - .worktrees(cx) - .map(|worktree| { - let scan_complete = worktree.read(cx).as_local().unwrap().scan_complete(); - async move { - scan_complete.await; - } - }) - .collect::>(); - - let worktree_db_ids = project - .read(cx) - .worktrees(cx) - .map(|worktree| { - self.db - .find_or_create_worktree(worktree.read(cx).abs_path().to_path_buf()) - }) - .collect::>(); - - let _subscription = cx.subscribe(&project, |this, project, event, cx| { - if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event { - this.project_entries_changed(project.clone(), changes.clone(), cx, worktree_id); - }; - }); - + ) { + let project = project.downgrade(); + let project_state = if let Some(project_state) = self.projects.get_mut(&project) { + project_state + } else { + return; + }; + let worktree = if let Some(worktree) = worktree.read(cx).as_local() { + worktree + } else { + return; + }; + let worktree_abs_path = worktree.abs_path().clone(); + let scan_complete = worktree.scan_complete(); + let worktree_id = worktree.id(); + let db = self.db.clone(); let language_registry = self.language_registry.clone(); + let (mut done_tx, done_rx) = watch::channel(); + let registration = cx.spawn(|this, mut cx| { + async move { + let register = async { + scan_complete.await; + let db_id = db.find_or_create_worktree(worktree_abs_path).await?; + let mut file_mtimes = db.get_file_mtimes(db_id).await?; + let worktree = if let Some(project) = project.upgrade(&cx) { + project + .read_with(&cx, |project, cx| project.worktree_for_id(worktree_id, cx)) + .ok_or_else(|| anyhow!("worktree not found"))? + } else { + return anyhow::Ok(()); + }; + let worktree = worktree.read_with(&cx, |worktree, _| worktree.snapshot()); + let mut changed_paths = cx + .background() + .spawn(async move { + let mut changed_paths = BTreeMap::new(); + for file in worktree.files(false, 0) { + let absolute_path = worktree.absolutize(&file.path); - cx.spawn(|this, mut cx| async move { - futures::future::join_all(worktree_scans_complete).await; - - let worktree_db_ids = futures::future::join_all(worktree_db_ids).await; - let worktrees = project.read_with(&cx, |project, cx| { - project - .worktrees(cx) - .map(|worktree| worktree.read(cx).snapshot()) - .collect::>() - }); - - let mut worktree_file_mtimes = HashMap::new(); - let mut db_ids_by_worktree_id = HashMap::new(); - - for (worktree, db_id) in worktrees.iter().zip(worktree_db_ids) { - let db_id = db_id?; - db_ids_by_worktree_id.insert(worktree.id(), db_id); - worktree_file_mtimes.insert( - worktree.id(), - this.read_with(&cx, |this, _| this.db.get_file_mtimes(db_id)) - .await?, - ); - } - - let worktree_db_ids = db_ids_by_worktree_id - .iter() - .map(|(a, b)| (*a, *b)) - .collect(); - - let changed_paths = cx - .background() - .spawn(async move { - let mut changed_paths = BTreeMap::new(); - let now = Instant::now(); - for worktree in worktrees.into_iter() { - let mut file_mtimes = worktree_file_mtimes.remove(&worktree.id()).unwrap(); - for file in worktree.files(false, 0) { - let absolute_path = worktree.absolutize(&file.path); - - if file.is_external || file.is_ignored || file.is_symlink { - continue; - } - - if let Ok(language) = language_registry - .language_for_file(&absolute_path, None) - .await - { - // Test if file is valid parseable file - if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) - && &language.name().as_ref() != &"Markdown" - && language - .grammar() - .and_then(|grammar| grammar.embedding_config.as_ref()) - .is_none() - { + if file.is_external || file.is_ignored || file.is_symlink { continue; } - let stored_mtime = file_mtimes.remove(&file.path.to_path_buf()); - let already_stored = stored_mtime - .map_or(false, |existing_mtime| existing_mtime == file.mtime); + if let Ok(language) = language_registry + .language_for_file(&absolute_path, None) + .await + { + // Test if file is valid parseable file + if !PARSEABLE_ENTIRE_FILE_TYPES + .contains(&language.name().as_ref()) + && &language.name().as_ref() != &"Markdown" + && language + .grammar() + .and_then(|grammar| grammar.embedding_config.as_ref()) + .is_none() + { + continue; + } - if !already_stored { - changed_paths.insert( - ProjectPath { - worktree_id: worktree.id(), - path: file.path.clone(), - }, - ChangedPathInfo { - changed_at: now, - mtime: file.mtime, - is_deleted: false, - }, - ); + let stored_mtime = file_mtimes.remove(&file.path.to_path_buf()); + let already_stored = stored_mtime + .map_or(false, |existing_mtime| { + existing_mtime == file.mtime + }); + + if !already_stored { + changed_paths.insert( + file.path.clone(), + ChangedPathInfo { + mtime: file.mtime, + is_deleted: false, + }, + ); + } } } + + // Clean up entries from database that are no longer in the worktree. + for (path, mtime) in file_mtimes { + changed_paths.insert( + path.into(), + ChangedPathInfo { + mtime, + is_deleted: true, + }, + ); + } + + anyhow::Ok(changed_paths) + }) + .await?; + this.update(&mut cx, |this, cx| { + let project_state = this + .projects + .get_mut(&project) + .ok_or_else(|| anyhow!("project not registered"))?; + let project = project + .upgrade(cx) + .ok_or_else(|| anyhow!("project was dropped"))?; + + if let Some(WorktreeState::Registering(state)) = + project_state.worktrees.remove(&worktree_id) + { + changed_paths.extend(state.changed_paths); } + project_state.worktrees.insert( + worktree_id, + WorktreeState::Registered(RegisteredWorktreeState { + db_id, + changed_paths, + }), + ); + this.index_project(project, cx).detach_and_log_err(cx); - // Clean up entries from database that are no longer in the worktree. - for (path, mtime) in file_mtimes { - changed_paths.insert( - ProjectPath { - worktree_id: worktree.id(), - path: path.into(), - }, - ChangedPathInfo { - changed_at: now, - mtime, - is_deleted: true, - }, - ); - } - } + anyhow::Ok(()) + })?; - anyhow::Ok(changed_paths) - }) - .await?; + anyhow::Ok(()) + }; - this.update(&mut cx, |this, _| { - this.projects.insert( - project.downgrade(), - ProjectState::new(_subscription, worktree_db_ids, changed_paths), - ); - }); - Result::<(), _>::Ok(()) - }) + if register.await.log_err().is_none() { + // Stop tracking this worktree if the registration failed. + this.update(&mut cx, |this, _| { + this.projects.get_mut(&project).map(|project_state| { + project_state.worktrees.remove(&worktree_id); + }); + }) + } + + *done_tx.borrow_mut() = Some(()); + } + }); + project_state.worktrees.insert( + worktree_id, + WorktreeState::Registering(RegisteringWorktreeState { + changed_paths: Default::default(), + done_rx, + _registration: registration, + }), + ); } - pub fn index_project( + fn project_worktrees_changed( &mut self, project: ModelHandle, cx: &mut ModelContext, - ) -> Task)>> { - cx.spawn(|this, mut cx| async move { - let embeddings_for_digest = this.read_with(&cx, |this, _| { - if let Some(state) = this.projects.get(&project.downgrade()) { - let mut worktree_id_file_paths = HashMap::default(); - for (path, _) in &state.changed_paths { - if let Some(worktree_db_id) = state.db_id_for_worktree_id(path.worktree_id) - { - worktree_id_file_paths - .entry(worktree_db_id) - .or_insert(Vec::new()) - .push(path.path.clone()); - } - } + ) { + let project_state = if let Some(project_state) = self.projects.get_mut(&project.downgrade()) + { + project_state + } else { + return; + }; - Ok(this.db.embeddings_for_files(worktree_id_file_paths)) - } else { - Err(anyhow!("Project not yet initialized")) - } - })?; + let mut worktrees = project + .read(cx) + .worktrees(cx) + .filter(|worktree| worktree.read(cx).is_local()) + .collect::>(); + let worktree_ids = worktrees + .iter() + .map(|worktree| worktree.read(cx).id()) + .collect::>(); - let embeddings_for_digest = Arc::new(embeddings_for_digest.await?); + // Remove worktrees that are no longer present + project_state + .worktrees + .retain(|worktree_id, _| worktree_ids.contains(worktree_id)); - Self::reindex_changed_paths( - this.clone(), - project.clone(), - None, - &mut cx, - embeddings_for_digest, - ) - .await; - - this.update(&mut cx, |this, _cx| { - let Some(state) = this.projects.get(&project.downgrade()) else { - return Err(anyhow!("Project not yet initialized")); - }; - let job_count_rx = state.outstanding_job_count_rx.clone(); - let count = state.get_outstanding_count(); - Ok((count, job_count_rx)) - }) - }) + // Register new worktrees + worktrees.retain(|worktree| { + let worktree_id = worktree.read(cx).id(); + !project_state.worktrees.contains_key(&worktree_id) + }); + for worktree in worktrees { + self.register_worktree(project.clone(), worktree, cx); + } } - pub fn outstanding_job_count_rx( + pub fn pending_file_count( &self, project: &ModelHandle, ) -> Option> { Some( self.projects .get(&project.downgrade())? - .outstanding_job_count_rx + .pending_file_count_rx .clone(), ) } @@ -646,25 +619,13 @@ impl SemanticIndex { excludes: Vec, cx: &mut ModelContext, ) -> Task>> { - let project_state = if let Some(state) = self.projects.get(&project.downgrade()) { - state - } else { - return Task::ready(Err(anyhow!("project not added"))); - }; - - let worktree_db_ids = project - .read(cx) - .worktrees(cx) - .filter_map(|worktree| { - let worktree_id = worktree.read(cx).id(); - project_state.db_id_for_worktree_id(worktree_id) - }) - .collect::>(); - + let index = self.index_project(project.clone(), cx); let embedding_provider = self.embedding_provider.clone(); let db_path = self.db.path().clone(); let fs = self.fs.clone(); cx.spawn(|this, mut cx| async move { + index.await?; + let t0 = Instant::now(); let database = VectorDatabase::new(fs.clone(), db_path.clone(), cx.background()).await?; @@ -681,6 +642,24 @@ impl SemanticIndex { t0.elapsed().as_millis() ); + let worktree_db_ids = this.read_with(&cx, |this, _| { + let project_state = this + .projects + .get(&project.downgrade()) + .ok_or_else(|| anyhow!("project was not indexed"))?; + let worktree_db_ids = project_state + .worktrees + .values() + .filter_map(|worktree| { + if let WorktreeState::Registered(worktree) = worktree { + Some(worktree.db_id) + } else { + None + } + }) + .collect::>(); + anyhow::Ok(worktree_db_ids) + })?; let file_ids = database .retrieve_included_file_ids(&worktree_db_ids, &includes, &excludes) .await?; @@ -729,13 +708,13 @@ impl SemanticIndex { } let ids = results.into_iter().map(|(id, _)| id).collect::>(); - let documents = database.get_documents_by_ids(ids.as_slice()).await?; + let spans = database.spans_for_ids(ids.as_slice()).await?; let mut tasks = Vec::new(); let mut ranges = Vec::new(); let weak_project = project.downgrade(); project.update(&mut cx, |project, cx| { - for (worktree_db_id, file_path, byte_range) in documents { + for (worktree_db_id, file_path, byte_range) in spans { let project_state = if let Some(state) = this.read(cx).projects.get(&weak_project) { state @@ -764,7 +743,9 @@ impl SemanticIndex { .filter_map(|(buffer, range)| { let buffer = buffer.log_err()?; let range = buffer.read_with(&cx, |buffer, _| { - buffer.anchor_before(range.start)..buffer.anchor_after(range.end) + let start = buffer.clip_offset(range.start, Bias::Left); + let end = buffer.clip_offset(range.end, Bias::Right); + buffer.anchor_before(start)..buffer.anchor_after(end) }); Some(SearchResult { buffer, range }) }) @@ -772,95 +753,173 @@ impl SemanticIndex { }) } - async fn reindex_changed_paths( - this: ModelHandle, + pub fn index_project( + &mut self, project: ModelHandle, - last_changed_before: Option, - cx: &mut AsyncAppContext, - embeddings_for_digest: Arc>, - ) { - let mut pending_files = Vec::new(); - let mut files_to_delete = Vec::new(); - let (db, language_registry, parsing_files_tx) = this.update(cx, |this, cx| { - if let Some(project_state) = this.projects.get_mut(&project.downgrade()) { - let outstanding_job_count_tx = &project_state.outstanding_job_count_tx; - let db_ids = &project_state.worktree_db_ids; - let mut worktree: Option> = None; + cx: &mut ModelContext, + ) -> Task> { + if !self.projects.contains_key(&project.downgrade()) { + log::trace!("Registering Project for Semantic Index"); - project_state.changed_paths.retain(|path, info| { - if let Some(last_changed_before) = last_changed_before { - if info.changed_at > last_changed_before { - return true; + let subscription = cx.subscribe(&project, |this, project, event, cx| match event { + project::Event::WorktreeAdded | project::Event::WorktreeRemoved(_) => { + this.project_worktrees_changed(project.clone(), cx); + } + project::Event::WorktreeUpdatedEntries(worktree_id, changes) => { + this.project_entries_changed(project, *worktree_id, changes.clone(), cx); + } + _ => {} + }); + self.projects + .insert(project.downgrade(), ProjectState::new(subscription)); + self.project_worktrees_changed(project.clone(), cx); + } + let project_state = &self.projects[&project.downgrade()]; + let mut pending_file_count_rx = project_state.pending_file_count_rx.clone(); + + let db = self.db.clone(); + let language_registry = self.language_registry.clone(); + let parsing_files_tx = self.parsing_files_tx.clone(); + let worktree_registration = self.wait_for_worktree_registration(&project, cx); + + cx.spawn(|this, mut cx| async move { + worktree_registration.await?; + + let mut pending_files = Vec::new(); + let mut files_to_delete = Vec::new(); + this.update(&mut cx, |this, cx| { + let project_state = this + .projects + .get_mut(&project.downgrade()) + .ok_or_else(|| anyhow!("project was dropped"))?; + let pending_file_count_tx = &project_state.pending_file_count_tx; + + project_state + .worktrees + .retain(|worktree_id, worktree_state| { + let worktree = if let Some(worktree) = + project.read(cx).worktree_for_id(*worktree_id, cx) + { + worktree + } else { + return false; + }; + let worktree_state = + if let WorktreeState::Registered(worktree_state) = worktree_state { + worktree_state + } else { + return true; + }; + + worktree_state.changed_paths.retain(|path, info| { + if info.is_deleted { + files_to_delete.push((worktree_state.db_id, path.clone())); + } else { + let absolute_path = worktree.read(cx).absolutize(path); + let job_handle = JobHandle::new(pending_file_count_tx); + pending_files.push(PendingFile { + absolute_path, + relative_path: path.clone(), + language: None, + job_handle, + modified_time: info.mtime, + worktree_db_id: worktree_state.db_id, + }); + } + + false + }); + true + }); + + anyhow::Ok(()) + })?; + + cx.background() + .spawn(async move { + for (worktree_db_id, path) in files_to_delete { + db.delete_file(worktree_db_id, path).await.log_err(); + } + + let embeddings_for_digest = { + let mut files = HashMap::default(); + for pending_file in &pending_files { + files + .entry(pending_file.worktree_db_id) + .or_insert(Vec::new()) + .push(pending_file.relative_path.clone()); + } + Arc::new( + db.embeddings_for_files(files) + .await + .log_err() + .unwrap_or_default(), + ) + }; + + for mut pending_file in pending_files { + if let Ok(language) = language_registry + .language_for_file(&pending_file.relative_path, None) + .await + { + if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) + && &language.name().as_ref() != &"Markdown" + && language + .grammar() + .and_then(|grammar| grammar.embedding_config.as_ref()) + .is_none() + { + continue; + } + pending_file.language = Some(language); + } + parsing_files_tx + .try_send((embeddings_for_digest.clone(), pending_file)) + .ok(); + } + + // Wait until we're done indexing. + while let Some(count) = pending_file_count_rx.next().await { + if count == 0 { + break; } } + }) + .await; - if worktree - .as_ref() - .map_or(true, |tree| tree.read(cx).id() != path.worktree_id) - { - worktree = project.read(cx).worktree_for_id(path.worktree_id, cx); - } - let Some(worktree) = &worktree else { - return false; - }; + Ok(()) + }) + } - let Some(worktree_db_id) = db_ids - .iter() - .find_map(|entry| (entry.0 == path.worktree_id).then_some(entry.1)) - else { - return false; - }; + fn wait_for_worktree_registration( + &self, + project: &ModelHandle, + cx: &mut ModelContext, + ) -> Task> { + let project = project.downgrade(); + cx.spawn_weak(|this, cx| async move { + loop { + let mut pending_worktrees = Vec::new(); + this.upgrade(&cx) + .ok_or_else(|| anyhow!("semantic index dropped"))? + .read_with(&cx, |this, _| { + if let Some(project) = this.projects.get(&project) { + for worktree in project.worktrees.values() { + if let WorktreeState::Registering(worktree) = worktree { + pending_worktrees.push(worktree.done()); + } + } + } + }); - if info.is_deleted { - files_to_delete.push((worktree_db_id, path.path.to_path_buf())); - } else { - let absolute_path = worktree.read(cx).absolutize(&path.path); - let job_handle = JobHandle::new(&outstanding_job_count_tx); - pending_files.push(PendingFile { - absolute_path, - relative_path: path.path.to_path_buf(), - language: None, - job_handle, - modified_time: info.mtime, - worktree_db_id, - }); - } - - false - }); - } - - ( - this.db.clone(), - this.language_registry.clone(), - this.parsing_files_tx.clone(), - ) - }); - - for (worktree_db_id, path) in files_to_delete { - db.delete_file(worktree_db_id, path).await.log_err(); - } - - for mut pending_file in pending_files { - if let Ok(language) = language_registry - .language_for_file(&pending_file.relative_path, None) - .await - { - if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) - && &language.name().as_ref() != &"Markdown" - && language - .grammar() - .and_then(|grammar| grammar.embedding_config.as_ref()) - .is_none() - { - continue; + if pending_worktrees.is_empty() { + break; + } else { + future::join_all(pending_worktrees).await; } - pending_file.language = Some(language); } - parsing_files_tx - .try_send((embeddings_for_digest.clone(), pending_file)) - .ok(); - } + Ok(()) + }) } } diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs index f549e68e04..ffd8db8781 100644 --- a/crates/semantic_index/src/semantic_index_tests.rs +++ b/crates/semantic_index/src/semantic_index_tests.rs @@ -1,7 +1,7 @@ use crate::{ embedding::{DummyEmbeddings, Embedding, EmbeddingProvider}, embedding_queue::EmbeddingQueue, - parsing::{subtract_ranges, CodeContextRetriever, Document, DocumentDigest}, + parsing::{subtract_ranges, CodeContextRetriever, Span, SpanDigest}, semantic_index_settings::SemanticIndexSettings, FileToEmbed, JobHandle, SearchResult, SemanticIndex, EMBEDDING_QUEUE_FLUSH_TIMEOUT, }; @@ -87,34 +87,24 @@ async fn test_semantic_index(deterministic: Arc, cx: &mut TestApp let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await; - let _ = semantic_index - .update(cx, |store, cx| { - store.initialize_project(project.clone(), cx) - }) - .await; - - let (file_count, outstanding_file_count) = semantic_index - .update(cx, |store, cx| store.index_project(project.clone(), cx)) - .await - .unwrap(); - assert_eq!(file_count, 3); + let search_results = semantic_index.update(cx, |store, cx| { + store.search_project( + project.clone(), + "aaaaaabbbbzz".to_string(), + 5, + vec![], + vec![], + cx, + ) + }); + let pending_file_count = + semantic_index.read_with(cx, |index, _| index.pending_file_count(&project).unwrap()); + deterministic.run_until_parked(); + assert_eq!(*pending_file_count.borrow(), 3); deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT); - assert_eq!(*outstanding_file_count.borrow(), 0); - - let search_results = semantic_index - .update(cx, |store, cx| { - store.search_project( - project.clone(), - "aaaaaabbbbzz".to_string(), - 5, - vec![], - vec![], - cx, - ) - }) - .await - .unwrap(); + assert_eq!(*pending_file_count.borrow(), 0); + let search_results = search_results.await.unwrap(); assert_search_results( &search_results, &[ @@ -191,14 +181,12 @@ async fn test_semantic_index(deterministic: Arc, cx: &mut TestApp deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT); let prev_embedding_count = embedding_provider.embedding_count(); - let (file_count, outstanding_file_count) = semantic_index - .update(cx, |store, cx| store.index_project(project.clone(), cx)) - .await - .unwrap(); - assert_eq!(file_count, 1); - + let index = semantic_index.update(cx, |store, cx| store.index_project(project.clone(), cx)); + deterministic.run_until_parked(); + assert_eq!(*pending_file_count.borrow(), 1); deterministic.advance_clock(EMBEDDING_QUEUE_FLUSH_TIMEOUT); - assert_eq!(*outstanding_file_count.borrow(), 0); + assert_eq!(*pending_file_count.borrow(), 0); + index.await.unwrap(); assert_eq!( embedding_provider.embedding_count() - prev_embedding_count, @@ -214,17 +202,17 @@ async fn test_embedding_batching(cx: &mut TestAppContext, mut rng: StdRng) { let files = (1..=3) .map(|file_ix| FileToEmbed { worktree_id: 5, - path: format!("path-{file_ix}").into(), + path: Path::new(&format!("path-{file_ix}")).into(), mtime: SystemTime::now(), - documents: (0..rng.gen_range(4..22)) + spans: (0..rng.gen_range(4..22)) .map(|document_ix| { let content_len = rng.gen_range(10..100); let content = RandomCharIter::new(&mut rng) .with_simple_text() .take(content_len) .collect::(); - let digest = DocumentDigest::from(content.as_str()); - Document { + let digest = SpanDigest::from(content.as_str()); + Span { range: 0..10, embedding: None, name: format!("document {document_ix}"), @@ -257,7 +245,7 @@ async fn test_embedding_batching(cx: &mut TestAppContext, mut rng: StdRng) { .iter() .map(|file| { let mut file = file.clone(); - for doc in &mut file.documents { + for doc in &mut file.spans { doc.embedding = Some(embedding_provider.embed_sync(doc.content.as_ref())); } file @@ -449,7 +437,7 @@ async fn test_code_context_retrieval_json() { } fn assert_documents_eq( - documents: &[Document], + documents: &[Span], expected_contents_and_start_offsets: &[(String, usize)], ) { assert_eq!( From eb8cf7c36e7a6cc0d77b467b49e884173712dffd Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Thu, 7 Sep 2023 19:47:26 +0200 Subject: [PATCH 06/10] Start indexing right away when project was already indexed before (#2941) Release notes: - Improved semantic search indexing to start in the background if the project was already indexed before. --- crates/search/src/project_search.rs | 109 +++++++++----------- crates/semantic_index/src/db.rs | 7 +- crates/semantic_index/src/semantic_index.rs | 99 ++++++++++++++++-- 3 files changed, 144 insertions(+), 71 deletions(-) diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs index 7088f394bc..c52be64141 100644 --- a/crates/search/src/project_search.rs +++ b/crates/search/src/project_search.rs @@ -20,12 +20,11 @@ use gpui::{ Task, View, ViewContext, ViewHandle, WeakModelHandle, WeakViewHandle, }; use menu::Confirm; -use postage::stream::Stream; use project::{ search::{PathMatcher, SearchInputs, SearchQuery}, Entry, Project, }; -use semantic_index::SemanticIndex; +use semantic_index::{SemanticIndex, SemanticIndexStatus}; use smallvec::SmallVec; use std::{ any::{Any, TypeId}, @@ -116,7 +115,7 @@ pub struct ProjectSearchView { model: ModelHandle, query_editor: ViewHandle, results_editor: ViewHandle, - semantic_state: Option, + semantic_state: Option, semantic_permissioned: Option, search_options: SearchOptions, panels_with_errors: HashSet, @@ -129,9 +128,9 @@ pub struct ProjectSearchView { current_mode: SearchMode, } -struct SemanticSearchState { - pending_file_count: usize, - _progress_task: Task<()>, +struct SemanticState { + index_status: SemanticIndexStatus, + _subscription: Subscription, } pub struct ProjectSearchBar { @@ -230,7 +229,7 @@ impl ProjectSearch { self.search_id += 1; self.match_ranges.clear(); self.search_history.add(inputs.as_str().to_string()); - self.no_results = Some(true); + self.no_results = None; self.pending_search = Some(cx.spawn(|this, mut cx| async move { let results = search?.await.log_err()?; let matches = results @@ -238,9 +237,10 @@ impl ProjectSearch { .map(|result| (result.buffer, vec![result.range.start..result.range.start])); this.update(&mut cx, |this, cx| { + this.no_results = Some(true); this.excerpts.update(cx, |excerpts, cx| { excerpts.clear(cx); - }) + }); }); for (buffer, ranges) in matches { let mut match_ranges = this.update(&mut cx, |this, cx| { @@ -315,15 +315,20 @@ impl View for ProjectSearchView { } }; - let semantic_status = if let Some(semantic) = &self.semantic_state { - if semantic.pending_file_count > 0 { - format!("Remaining files to index: {}", semantic.pending_file_count) - } else { - "Indexing complete".to_string() + let semantic_status = self.semantic_state.as_ref().and_then(|semantic| { + let status = semantic.index_status; + match status { + SemanticIndexStatus::Indexed => Some("Indexing complete".to_string()), + SemanticIndexStatus::Indexing { remaining_files } => { + if remaining_files == 0 { + Some(format!("Indexing...")) + } else { + Some(format!("Remaining files to index: {}", remaining_files)) + } + } + SemanticIndexStatus::NotIndexed => None, } - } else { - "Indexing: ...".to_string() - }; + }); let minor_text = if let Some(no_results) = model.no_results { if model.pending_search.is_none() && no_results { @@ -333,12 +338,16 @@ impl View for ProjectSearchView { } } else { match current_mode { - SearchMode::Semantic => vec![ - "".to_owned(), - semantic_status, - "Simply explain the code you are looking to find.".to_owned(), - "ex. 'prompt user for permissions to index their project'".to_owned(), - ], + SearchMode::Semantic => { + let mut minor_text = Vec::new(); + minor_text.push("".into()); + minor_text.extend(semantic_status); + minor_text.push("Simply explain the code you are looking to find.".into()); + minor_text.push( + "ex. 'prompt user for permissions to index their project'".into(), + ); + minor_text + } _ => vec![ "".to_owned(), "Include/exclude specific paths with the filter option.".to_owned(), @@ -634,41 +643,29 @@ impl ProjectSearchView { let project = self.model.read(cx).project.clone(); - let mut pending_file_count_rx = semantic_index.update(cx, |semantic_index, cx| { + semantic_index.update(cx, |semantic_index, cx| { semantic_index .index_project(project.clone(), cx) .detach_and_log_err(cx); - semantic_index.pending_file_count(&project).unwrap() }); - cx.spawn(|search_view, mut cx| async move { - search_view.update(&mut cx, |search_view, cx| { - cx.notify(); - let pending_file_count = *pending_file_count_rx.borrow(); - search_view.semantic_state = Some(SemanticSearchState { - pending_file_count, - _progress_task: cx.spawn(|search_view, mut cx| async move { - while let Some(count) = pending_file_count_rx.recv().await { - search_view - .update(&mut cx, |search_view, cx| { - if let Some(semantic_search_state) = - &mut search_view.semantic_state - { - semantic_search_state.pending_file_count = count; - cx.notify(); - if count == 0 { - return; - } - } - }) - .ok(); - } - }), - }); - })?; - anyhow::Ok(()) - }) - .detach_and_log_err(cx); + self.semantic_state = Some(SemanticState { + index_status: semantic_index.read(cx).status(&project), + _subscription: cx.observe(&semantic_index, Self::semantic_index_changed), + }); + cx.notify(); + } + } + + fn semantic_index_changed( + &mut self, + semantic_index: ModelHandle, + cx: &mut ViewContext, + ) { + let project = self.model.read(cx).project.clone(); + if let Some(semantic_state) = self.semantic_state.as_mut() { + semantic_state.index_status = semantic_index.read(cx).status(&project); + cx.notify(); } } @@ -867,7 +864,7 @@ impl ProjectSearchView { SemanticIndex::global(cx) .map(|semantic| { let project = self.model.read(cx).project.clone(); - semantic.update(cx, |this, cx| this.project_previously_indexed(project, cx)) + semantic.update(cx, |this, cx| this.project_previously_indexed(&project, cx)) }) .unwrap_or(Task::ready(Ok(false))) } @@ -952,11 +949,7 @@ impl ProjectSearchView { let mode = self.current_mode; match mode { SearchMode::Semantic => { - if let Some(semantic) = &mut self.semantic_state { - if semantic.pending_file_count > 0 { - return; - } - + if self.semantic_state.is_some() { if let Some(query) = self.build_search_query(cx) { self.model .update(cx, |model, cx| model.semantic_search(query.as_inner(), cx)); diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs index c35057594a..c53a3e1ba9 100644 --- a/crates/semantic_index/src/db.rs +++ b/crates/semantic_index/src/db.rs @@ -18,7 +18,7 @@ use std::{ path::{Path, PathBuf}, rc::Rc, sync::Arc, - time::{Instant, SystemTime}, + time::SystemTime, }; use util::TryFutureExt; @@ -232,7 +232,6 @@ impl VectorDatabase { let file_id = db.last_insert_rowid(); - let t0 = Instant::now(); let mut query = db.prepare( " INSERT INTO spans @@ -240,10 +239,6 @@ impl VectorDatabase { VALUES (?1, ?2, ?3, ?4, ?5, ?6) ", )?; - log::trace!( - "Preparing Query Took: {:?} milliseconds", - t0.elapsed().as_millis() - ); for span in spans { query.execute(params![ diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs index 8bba2f1d0e..2ef409eb92 100644 --- a/crates/semantic_index/src/semantic_index.rs +++ b/crates/semantic_index/src/semantic_index.rs @@ -35,6 +35,7 @@ use util::{ paths::EMBEDDINGS_DIR, ResultExt, }; +use workspace::WorkspaceCreated; const SEMANTIC_INDEX_VERSION: usize = 10; const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(5 * 60); @@ -57,6 +58,35 @@ pub fn init( return; } + cx.subscribe_global::({ + move |event, cx| { + let Some(semantic_index) = SemanticIndex::global(cx) else { + return; + }; + let workspace = &event.0; + if let Some(workspace) = workspace.upgrade(cx) { + let project = workspace.read(cx).project().clone(); + if project.read(cx).is_local() { + cx.spawn(|mut cx| async move { + let previously_indexed = semantic_index + .update(&mut cx, |index, cx| { + index.project_previously_indexed(&project, cx) + }) + .await?; + if previously_indexed { + semantic_index + .update(&mut cx, |index, cx| index.index_project(project, cx)) + .await?; + } + anyhow::Ok(()) + }) + .detach_and_log_err(cx); + } + } + } + }) + .detach(); + cx.spawn(move |mut cx| async move { let semantic_index = SemanticIndex::new( fs, @@ -79,6 +109,13 @@ pub fn init( .detach(); } +#[derive(Copy, Clone, Debug)] +pub enum SemanticIndexStatus { + NotIndexed, + Indexed, + Indexing { remaining_files: usize }, +} + pub struct SemanticIndex { fs: Arc, db: VectorDatabase, @@ -94,7 +131,9 @@ struct ProjectState { worktrees: HashMap, pending_file_count_rx: watch::Receiver, pending_file_count_tx: Arc>>, + pending_index: usize, _subscription: gpui::Subscription, + _observe_pending_file_count: Task<()>, } enum WorktreeState { @@ -103,6 +142,10 @@ enum WorktreeState { } impl WorktreeState { + fn is_registered(&self) -> bool { + matches!(self, Self::Registered(_)) + } + fn paths_changed( &mut self, changes: Arc<[(Arc, ProjectEntryId, PathChange)]>, @@ -177,14 +220,25 @@ impl JobHandle { } impl ProjectState { - fn new(subscription: gpui::Subscription) -> Self { + fn new(subscription: gpui::Subscription, cx: &mut ModelContext) -> Self { let (pending_file_count_tx, pending_file_count_rx) = watch::channel_with(0); let pending_file_count_tx = Arc::new(Mutex::new(pending_file_count_tx)); Self { worktrees: Default::default(), - pending_file_count_rx, + pending_file_count_rx: pending_file_count_rx.clone(), pending_file_count_tx, + pending_index: 0, _subscription: subscription, + _observe_pending_file_count: cx.spawn_weak({ + let mut pending_file_count_rx = pending_file_count_rx.clone(); + |this, mut cx| async move { + while let Some(_) = pending_file_count_rx.next().await { + if let Some(this) = this.upgrade(&cx) { + this.update(&mut cx, |_, cx| cx.notify()); + } + } + } + }), } } @@ -227,6 +281,25 @@ impl SemanticIndex { && *RELEASE_CHANNEL != ReleaseChannel::Stable } + pub fn status(&self, project: &ModelHandle) -> SemanticIndexStatus { + if let Some(project_state) = self.projects.get(&project.downgrade()) { + if project_state + .worktrees + .values() + .all(|worktree| worktree.is_registered()) + && project_state.pending_index == 0 + { + SemanticIndexStatus::Indexed + } else { + SemanticIndexStatus::Indexing { + remaining_files: project_state.pending_file_count_rx.borrow().clone(), + } + } + } else { + SemanticIndexStatus::NotIndexed + } + } + async fn new( fs: Arc, database_path: PathBuf, @@ -356,7 +429,7 @@ impl SemanticIndex { pub fn project_previously_indexed( &mut self, - project: ModelHandle, + project: &ModelHandle, cx: &mut ModelContext, ) -> Task> { let worktrees_indexed_previously = project @@ -770,13 +843,15 @@ impl SemanticIndex { } _ => {} }); - self.projects - .insert(project.downgrade(), ProjectState::new(subscription)); + let project_state = ProjectState::new(subscription, cx); + self.projects.insert(project.downgrade(), project_state); self.project_worktrees_changed(project.clone(), cx); } - let project_state = &self.projects[&project.downgrade()]; - let mut pending_file_count_rx = project_state.pending_file_count_rx.clone(); + let project_state = self.projects.get_mut(&project.downgrade()).unwrap(); + project_state.pending_index += 1; + cx.notify(); + let mut pending_file_count_rx = project_state.pending_file_count_rx.clone(); let db = self.db.clone(); let language_registry = self.language_registry.clone(); let parsing_files_tx = self.parsing_files_tx.clone(); @@ -887,6 +962,16 @@ impl SemanticIndex { }) .await; + this.update(&mut cx, |this, cx| { + let project_state = this + .projects + .get_mut(&project.downgrade()) + .ok_or_else(|| anyhow!("project was dropped"))?; + project_state.pending_index -= 1; + cx.notify(); + anyhow::Ok(()) + })?; + Ok(()) }) } From a61d639b67e3ca27c03d95fcd8895fb156c70848 Mon Sep 17 00:00:00 2001 From: "Joseph T. Lyons" Date: Thu, 7 Sep 2023 13:54:48 -0400 Subject: [PATCH 07/10] zed 0.103.1 --- Cargo.lock | 2 +- crates/live_kit_client/LiveKitBridge/Package.resolved | 4 ++-- crates/zed/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a35645899..cb5686ed66 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9762,7 +9762,7 @@ dependencies = [ [[package]] name = "zed" -version = "0.103.0" +version = "0.103.1" dependencies = [ "activity_indicator", "ai", diff --git a/crates/live_kit_client/LiveKitBridge/Package.resolved b/crates/live_kit_client/LiveKitBridge/Package.resolved index b925bc8f0d..85ae088565 100644 --- a/crates/live_kit_client/LiveKitBridge/Package.resolved +++ b/crates/live_kit_client/LiveKitBridge/Package.resolved @@ -42,8 +42,8 @@ "repositoryURL": "https://github.com/apple/swift-protobuf.git", "state": { "branch": null, - "revision": "ce20dc083ee485524b802669890291c0d8090170", - "version": "1.22.1" + "revision": "0af9125c4eae12a4973fb66574c53a54962a9e1e", + "version": "1.21.0" } } ] diff --git a/crates/zed/Cargo.toml b/crates/zed/Cargo.toml index 66d55b38f0..089f500549 100644 --- a/crates/zed/Cargo.toml +++ b/crates/zed/Cargo.toml @@ -3,7 +3,7 @@ authors = ["Nathan Sobo "] description = "The fast, collaborative code editor." edition = "2021" name = "zed" -version = "0.103.0" +version = "0.103.1" publish = false [lib] From fd373517c7e96b60719a8edb4081e02e68f5548d Mon Sep 17 00:00:00 2001 From: Kyle Caverly Date: Thu, 7 Sep 2023 14:58:00 -0400 Subject: [PATCH 08/10] update semantic search to show no results if search query is blank (#2942) Update semantic search to show no search results if search query is blank --- crates/semantic_index/src/semantic_index.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs index 2ef409eb92..0e18c42049 100644 --- a/crates/semantic_index/src/semantic_index.rs +++ b/crates/semantic_index/src/semantic_index.rs @@ -703,6 +703,10 @@ impl SemanticIndex { let database = VectorDatabase::new(fs.clone(), db_path.clone(), cx.background()).await?; + if phrase.len() == 0 { + return Ok(Vec::new()); + } + let phrase_embedding = embedding_provider .embed_batch(vec![phrase]) .await? From 69ab1c014aec489333bbdf8da4915a4141d6c2de Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 7 Sep 2023 13:12:26 -0700 Subject: [PATCH 09/10] Make toolbar horizontal padding more consistent (#2944) * increase horizontal padding of toolbar itself, remove padding that was added to individual toolbar items like feedback button. * make feedback info text and breadcrumbs have the same additional padding as quick action buttons. Release Notes: - Fixed some inconsistencies in the layout of toolbars. --- crates/feedback/src/feedback_info_text.rs | 8 +++++--- styles/src/style_tree/feedback.ts | 15 +++++++++------ styles/src/style_tree/toolbar.ts | 10 +++++----- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/crates/feedback/src/feedback_info_text.rs b/crates/feedback/src/feedback_info_text.rs index 91ff22e904..bc0ee9ea36 100644 --- a/crates/feedback/src/feedback_info_text.rs +++ b/crates/feedback/src/feedback_info_text.rs @@ -42,14 +42,14 @@ impl View for FeedbackInfoText { ) .with_child( MouseEventHandler::new::(0, cx, |state, _| { - let contained_text = if state.hovered() { + let style = if state.hovered() { &theme.feedback.link_text_hover } else { &theme.feedback.link_text_default }; - - Label::new("community repo", contained_text.text.clone()) + Label::new("community repo", style.text.clone()) .contained() + .with_style(style.container) .aligned() .left() .clipped() @@ -64,6 +64,8 @@ impl View for FeedbackInfoText { .with_soft_wrap(false) .aligned(), ) + .contained() + .with_style(theme.feedback.info_text_default.container) .aligned() .left() .clipped() diff --git a/styles/src/style_tree/feedback.ts b/styles/src/style_tree/feedback.ts index 0349359533..4226db9753 100644 --- a/styles/src/style_tree/feedback.ts +++ b/styles/src/style_tree/feedback.ts @@ -12,9 +12,6 @@ export default function feedback(): any { background: background(theme.highest, "on"), corner_radius: 6, border: border(theme.highest, "on"), - margin: { - right: 4, - }, padding: { bottom: 2, left: 10, @@ -41,9 +38,15 @@ export default function feedback(): any { }, }), button_margin: 8, - info_text_default: text(theme.highest, "sans", "default", { - size: "xs", - }), + info_text_default: { + padding: { + left: 4, + right: 4, + }, + ...text(theme.highest, "sans", "default", { + size: "xs", + }) + }, link_text_default: text(theme.highest, "sans", "default", { size: "xs", underline: true, diff --git a/styles/src/style_tree/toolbar.ts b/styles/src/style_tree/toolbar.ts index adf8fb866f..8ec46d9f2a 100644 --- a/styles/src/style_tree/toolbar.ts +++ b/styles/src/style_tree/toolbar.ts @@ -2,14 +2,14 @@ import { useTheme } from "../common" import { toggleable_icon_button } from "../component/icon_button" import { interactive, toggleable } from "../element" import { background, border, foreground, text } from "./components" -import { text_button } from "../component"; +import { text_button } from "../component" export const toolbar = () => { const theme = useTheme() return { height: 42, - padding: { left: 4, right: 4 }, + padding: { left: 8, right: 8 }, background: background(theme.highest), border: border(theme.highest, { bottom: true }), item_spacing: 4, @@ -24,9 +24,9 @@ export const toolbar = () => { ...text(theme.highest, "sans", "variant"), corner_radius: 6, padding: { - left: 6, - right: 6, - }, + left: 4, + right: 4, + } }, state: { hovered: { From bcd474d92013d24f9e7f27d06f15cab9d71c152e Mon Sep 17 00:00:00 2001 From: "Joseph T. Lyons" Date: Wed, 13 Sep 2023 12:21:55 -0400 Subject: [PATCH 10/10] v0.103.x stable --- crates/zed/RELEASE_CHANNEL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/zed/RELEASE_CHANNEL b/crates/zed/RELEASE_CHANNEL index 4de2f126df..870bbe4e50 100644 --- a/crates/zed/RELEASE_CHANNEL +++ b/crates/zed/RELEASE_CHANNEL @@ -1 +1 @@ -preview \ No newline at end of file +stable \ No newline at end of file