Start indexing right away when project was already indexed before (#2941)

Release notes:
- Improved semantic search indexing to start in the background if the
project was already indexed before.
This commit is contained in:
Antonio Scandurra 2023-09-07 19:47:26 +02:00
parent fedec2d21c
commit eb8cf7c36e
3 changed files with 144 additions and 71 deletions

View file

@ -20,12 +20,11 @@ use gpui::{
Task, View, ViewContext, ViewHandle, WeakModelHandle, WeakViewHandle, Task, View, ViewContext, ViewHandle, WeakModelHandle, WeakViewHandle,
}; };
use menu::Confirm; use menu::Confirm;
use postage::stream::Stream;
use project::{ use project::{
search::{PathMatcher, SearchInputs, SearchQuery}, search::{PathMatcher, SearchInputs, SearchQuery},
Entry, Project, Entry, Project,
}; };
use semantic_index::SemanticIndex; use semantic_index::{SemanticIndex, SemanticIndexStatus};
use smallvec::SmallVec; use smallvec::SmallVec;
use std::{ use std::{
any::{Any, TypeId}, any::{Any, TypeId},
@ -116,7 +115,7 @@ pub struct ProjectSearchView {
model: ModelHandle<ProjectSearch>, model: ModelHandle<ProjectSearch>,
query_editor: ViewHandle<Editor>, query_editor: ViewHandle<Editor>,
results_editor: ViewHandle<Editor>, results_editor: ViewHandle<Editor>,
semantic_state: Option<SemanticSearchState>, semantic_state: Option<SemanticState>,
semantic_permissioned: Option<bool>, semantic_permissioned: Option<bool>,
search_options: SearchOptions, search_options: SearchOptions,
panels_with_errors: HashSet<InputPanel>, panels_with_errors: HashSet<InputPanel>,
@ -129,9 +128,9 @@ pub struct ProjectSearchView {
current_mode: SearchMode, current_mode: SearchMode,
} }
struct SemanticSearchState { struct SemanticState {
pending_file_count: usize, index_status: SemanticIndexStatus,
_progress_task: Task<()>, _subscription: Subscription,
} }
pub struct ProjectSearchBar { pub struct ProjectSearchBar {
@ -230,7 +229,7 @@ impl ProjectSearch {
self.search_id += 1; self.search_id += 1;
self.match_ranges.clear(); self.match_ranges.clear();
self.search_history.add(inputs.as_str().to_string()); self.search_history.add(inputs.as_str().to_string());
self.no_results = Some(true); self.no_results = None;
self.pending_search = Some(cx.spawn(|this, mut cx| async move { self.pending_search = Some(cx.spawn(|this, mut cx| async move {
let results = search?.await.log_err()?; let results = search?.await.log_err()?;
let matches = results let matches = results
@ -238,9 +237,10 @@ impl ProjectSearch {
.map(|result| (result.buffer, vec![result.range.start..result.range.start])); .map(|result| (result.buffer, vec![result.range.start..result.range.start]));
this.update(&mut cx, |this, cx| { this.update(&mut cx, |this, cx| {
this.no_results = Some(true);
this.excerpts.update(cx, |excerpts, cx| { this.excerpts.update(cx, |excerpts, cx| {
excerpts.clear(cx); excerpts.clear(cx);
}) });
}); });
for (buffer, ranges) in matches { for (buffer, ranges) in matches {
let mut match_ranges = this.update(&mut cx, |this, cx| { let mut match_ranges = this.update(&mut cx, |this, cx| {
@ -315,15 +315,20 @@ impl View for ProjectSearchView {
} }
}; };
let semantic_status = if let Some(semantic) = &self.semantic_state { let semantic_status = self.semantic_state.as_ref().and_then(|semantic| {
if semantic.pending_file_count > 0 { let status = semantic.index_status;
format!("Remaining files to index: {}", semantic.pending_file_count) match status {
} else { SemanticIndexStatus::Indexed => Some("Indexing complete".to_string()),
"Indexing complete".to_string() SemanticIndexStatus::Indexing { remaining_files } => {
if remaining_files == 0 {
Some(format!("Indexing..."))
} else {
Some(format!("Remaining files to index: {}", remaining_files))
}
}
SemanticIndexStatus::NotIndexed => None,
} }
} else { });
"Indexing: ...".to_string()
};
let minor_text = if let Some(no_results) = model.no_results { let minor_text = if let Some(no_results) = model.no_results {
if model.pending_search.is_none() && no_results { if model.pending_search.is_none() && no_results {
@ -333,12 +338,16 @@ impl View for ProjectSearchView {
} }
} else { } else {
match current_mode { match current_mode {
SearchMode::Semantic => vec![ SearchMode::Semantic => {
"".to_owned(), let mut minor_text = Vec::new();
semantic_status, minor_text.push("".into());
"Simply explain the code you are looking to find.".to_owned(), minor_text.extend(semantic_status);
"ex. 'prompt user for permissions to index their project'".to_owned(), minor_text.push("Simply explain the code you are looking to find.".into());
], minor_text.push(
"ex. 'prompt user for permissions to index their project'".into(),
);
minor_text
}
_ => vec![ _ => vec![
"".to_owned(), "".to_owned(),
"Include/exclude specific paths with the filter option.".to_owned(), "Include/exclude specific paths with the filter option.".to_owned(),
@ -634,41 +643,29 @@ impl ProjectSearchView {
let project = self.model.read(cx).project.clone(); let project = self.model.read(cx).project.clone();
let mut pending_file_count_rx = semantic_index.update(cx, |semantic_index, cx| { semantic_index.update(cx, |semantic_index, cx| {
semantic_index semantic_index
.index_project(project.clone(), cx) .index_project(project.clone(), cx)
.detach_and_log_err(cx); .detach_and_log_err(cx);
semantic_index.pending_file_count(&project).unwrap()
}); });
cx.spawn(|search_view, mut cx| async move { self.semantic_state = Some(SemanticState {
search_view.update(&mut cx, |search_view, cx| { index_status: semantic_index.read(cx).status(&project),
cx.notify(); _subscription: cx.observe(&semantic_index, Self::semantic_index_changed),
let pending_file_count = *pending_file_count_rx.borrow(); });
search_view.semantic_state = Some(SemanticSearchState { cx.notify();
pending_file_count, }
_progress_task: cx.spawn(|search_view, mut cx| async move { }
while let Some(count) = pending_file_count_rx.recv().await {
search_view fn semantic_index_changed(
.update(&mut cx, |search_view, cx| { &mut self,
if let Some(semantic_search_state) = semantic_index: ModelHandle<SemanticIndex>,
&mut search_view.semantic_state cx: &mut ViewContext<Self>,
{ ) {
semantic_search_state.pending_file_count = count; let project = self.model.read(cx).project.clone();
cx.notify(); if let Some(semantic_state) = self.semantic_state.as_mut() {
if count == 0 { semantic_state.index_status = semantic_index.read(cx).status(&project);
return; cx.notify();
}
}
})
.ok();
}
}),
});
})?;
anyhow::Ok(())
})
.detach_and_log_err(cx);
} }
} }
@ -867,7 +864,7 @@ impl ProjectSearchView {
SemanticIndex::global(cx) SemanticIndex::global(cx)
.map(|semantic| { .map(|semantic| {
let project = self.model.read(cx).project.clone(); let project = self.model.read(cx).project.clone();
semantic.update(cx, |this, cx| this.project_previously_indexed(project, cx)) semantic.update(cx, |this, cx| this.project_previously_indexed(&project, cx))
}) })
.unwrap_or(Task::ready(Ok(false))) .unwrap_or(Task::ready(Ok(false)))
} }
@ -952,11 +949,7 @@ impl ProjectSearchView {
let mode = self.current_mode; let mode = self.current_mode;
match mode { match mode {
SearchMode::Semantic => { SearchMode::Semantic => {
if let Some(semantic) = &mut self.semantic_state { if self.semantic_state.is_some() {
if semantic.pending_file_count > 0 {
return;
}
if let Some(query) = self.build_search_query(cx) { if let Some(query) = self.build_search_query(cx) {
self.model self.model
.update(cx, |model, cx| model.semantic_search(query.as_inner(), cx)); .update(cx, |model, cx| model.semantic_search(query.as_inner(), cx));

View file

@ -18,7 +18,7 @@ use std::{
path::{Path, PathBuf}, path::{Path, PathBuf},
rc::Rc, rc::Rc,
sync::Arc, sync::Arc,
time::{Instant, SystemTime}, time::SystemTime,
}; };
use util::TryFutureExt; use util::TryFutureExt;
@ -232,7 +232,6 @@ impl VectorDatabase {
let file_id = db.last_insert_rowid(); let file_id = db.last_insert_rowid();
let t0 = Instant::now();
let mut query = db.prepare( let mut query = db.prepare(
" "
INSERT INTO spans INSERT INTO spans
@ -240,10 +239,6 @@ impl VectorDatabase {
VALUES (?1, ?2, ?3, ?4, ?5, ?6) VALUES (?1, ?2, ?3, ?4, ?5, ?6)
", ",
)?; )?;
log::trace!(
"Preparing Query Took: {:?} milliseconds",
t0.elapsed().as_millis()
);
for span in spans { for span in spans {
query.execute(params![ query.execute(params![

View file

@ -35,6 +35,7 @@ use util::{
paths::EMBEDDINGS_DIR, paths::EMBEDDINGS_DIR,
ResultExt, ResultExt,
}; };
use workspace::WorkspaceCreated;
const SEMANTIC_INDEX_VERSION: usize = 10; const SEMANTIC_INDEX_VERSION: usize = 10;
const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(5 * 60); const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(5 * 60);
@ -57,6 +58,35 @@ pub fn init(
return; return;
} }
cx.subscribe_global::<WorkspaceCreated, _>({
move |event, cx| {
let Some(semantic_index) = SemanticIndex::global(cx) else {
return;
};
let workspace = &event.0;
if let Some(workspace) = workspace.upgrade(cx) {
let project = workspace.read(cx).project().clone();
if project.read(cx).is_local() {
cx.spawn(|mut cx| async move {
let previously_indexed = semantic_index
.update(&mut cx, |index, cx| {
index.project_previously_indexed(&project, cx)
})
.await?;
if previously_indexed {
semantic_index
.update(&mut cx, |index, cx| index.index_project(project, cx))
.await?;
}
anyhow::Ok(())
})
.detach_and_log_err(cx);
}
}
}
})
.detach();
cx.spawn(move |mut cx| async move { cx.spawn(move |mut cx| async move {
let semantic_index = SemanticIndex::new( let semantic_index = SemanticIndex::new(
fs, fs,
@ -79,6 +109,13 @@ pub fn init(
.detach(); .detach();
} }
#[derive(Copy, Clone, Debug)]
pub enum SemanticIndexStatus {
NotIndexed,
Indexed,
Indexing { remaining_files: usize },
}
pub struct SemanticIndex { pub struct SemanticIndex {
fs: Arc<dyn Fs>, fs: Arc<dyn Fs>,
db: VectorDatabase, db: VectorDatabase,
@ -94,7 +131,9 @@ struct ProjectState {
worktrees: HashMap<WorktreeId, WorktreeState>, worktrees: HashMap<WorktreeId, WorktreeState>,
pending_file_count_rx: watch::Receiver<usize>, pending_file_count_rx: watch::Receiver<usize>,
pending_file_count_tx: Arc<Mutex<watch::Sender<usize>>>, pending_file_count_tx: Arc<Mutex<watch::Sender<usize>>>,
pending_index: usize,
_subscription: gpui::Subscription, _subscription: gpui::Subscription,
_observe_pending_file_count: Task<()>,
} }
enum WorktreeState { enum WorktreeState {
@ -103,6 +142,10 @@ enum WorktreeState {
} }
impl WorktreeState { impl WorktreeState {
fn is_registered(&self) -> bool {
matches!(self, Self::Registered(_))
}
fn paths_changed( fn paths_changed(
&mut self, &mut self,
changes: Arc<[(Arc<Path>, ProjectEntryId, PathChange)]>, changes: Arc<[(Arc<Path>, ProjectEntryId, PathChange)]>,
@ -177,14 +220,25 @@ impl JobHandle {
} }
impl ProjectState { impl ProjectState {
fn new(subscription: gpui::Subscription) -> Self { fn new(subscription: gpui::Subscription, cx: &mut ModelContext<SemanticIndex>) -> Self {
let (pending_file_count_tx, pending_file_count_rx) = watch::channel_with(0); let (pending_file_count_tx, pending_file_count_rx) = watch::channel_with(0);
let pending_file_count_tx = Arc::new(Mutex::new(pending_file_count_tx)); let pending_file_count_tx = Arc::new(Mutex::new(pending_file_count_tx));
Self { Self {
worktrees: Default::default(), worktrees: Default::default(),
pending_file_count_rx, pending_file_count_rx: pending_file_count_rx.clone(),
pending_file_count_tx, pending_file_count_tx,
pending_index: 0,
_subscription: subscription, _subscription: subscription,
_observe_pending_file_count: cx.spawn_weak({
let mut pending_file_count_rx = pending_file_count_rx.clone();
|this, mut cx| async move {
while let Some(_) = pending_file_count_rx.next().await {
if let Some(this) = this.upgrade(&cx) {
this.update(&mut cx, |_, cx| cx.notify());
}
}
}
}),
} }
} }
@ -227,6 +281,25 @@ impl SemanticIndex {
&& *RELEASE_CHANNEL != ReleaseChannel::Stable && *RELEASE_CHANNEL != ReleaseChannel::Stable
} }
pub fn status(&self, project: &ModelHandle<Project>) -> SemanticIndexStatus {
if let Some(project_state) = self.projects.get(&project.downgrade()) {
if project_state
.worktrees
.values()
.all(|worktree| worktree.is_registered())
&& project_state.pending_index == 0
{
SemanticIndexStatus::Indexed
} else {
SemanticIndexStatus::Indexing {
remaining_files: project_state.pending_file_count_rx.borrow().clone(),
}
}
} else {
SemanticIndexStatus::NotIndexed
}
}
async fn new( async fn new(
fs: Arc<dyn Fs>, fs: Arc<dyn Fs>,
database_path: PathBuf, database_path: PathBuf,
@ -356,7 +429,7 @@ impl SemanticIndex {
pub fn project_previously_indexed( pub fn project_previously_indexed(
&mut self, &mut self,
project: ModelHandle<Project>, project: &ModelHandle<Project>,
cx: &mut ModelContext<Self>, cx: &mut ModelContext<Self>,
) -> Task<Result<bool>> { ) -> Task<Result<bool>> {
let worktrees_indexed_previously = project let worktrees_indexed_previously = project
@ -770,13 +843,15 @@ impl SemanticIndex {
} }
_ => {} _ => {}
}); });
self.projects let project_state = ProjectState::new(subscription, cx);
.insert(project.downgrade(), ProjectState::new(subscription)); self.projects.insert(project.downgrade(), project_state);
self.project_worktrees_changed(project.clone(), cx); self.project_worktrees_changed(project.clone(), cx);
} }
let project_state = &self.projects[&project.downgrade()]; let project_state = self.projects.get_mut(&project.downgrade()).unwrap();
let mut pending_file_count_rx = project_state.pending_file_count_rx.clone(); project_state.pending_index += 1;
cx.notify();
let mut pending_file_count_rx = project_state.pending_file_count_rx.clone();
let db = self.db.clone(); let db = self.db.clone();
let language_registry = self.language_registry.clone(); let language_registry = self.language_registry.clone();
let parsing_files_tx = self.parsing_files_tx.clone(); let parsing_files_tx = self.parsing_files_tx.clone();
@ -887,6 +962,16 @@ impl SemanticIndex {
}) })
.await; .await;
this.update(&mut cx, |this, cx| {
let project_state = this
.projects
.get_mut(&project.downgrade())
.ok_or_else(|| anyhow!("project was dropped"))?;
project_state.pending_index -= 1;
cx.notify();
anyhow::Ok(())
})?;
Ok(()) Ok(())
}) })
} }