🎨
This commit is contained in:
parent
8dbc0fe033
commit
d4cff68475
6 changed files with 308 additions and 259 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -6713,6 +6713,7 @@ dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"bincode",
|
"bincode",
|
||||||
|
"collections",
|
||||||
"ctor",
|
"ctor",
|
||||||
"editor",
|
"editor",
|
||||||
"env_logger 0.9.3",
|
"env_logger 0.9.3",
|
||||||
|
|
|
@ -9,6 +9,7 @@ path = "src/semantic_index.rs"
|
||||||
doctest = false
|
doctest = false
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
collections = { path = "../collections" }
|
||||||
gpui = { path = "../gpui" }
|
gpui = { path = "../gpui" }
|
||||||
language = { path = "../language" }
|
language = { path = "../language" }
|
||||||
project = { path = "../project" }
|
project = { path = "../project" }
|
||||||
|
@ -42,6 +43,7 @@ sha1 = "0.10.5"
|
||||||
parse_duration = "2.1.1"
|
parse_duration = "2.1.1"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
collections = { path = "../collections", features = ["test-support"] }
|
||||||
gpui = { path = "../gpui", features = ["test-support"] }
|
gpui = { path = "../gpui", features = ["test-support"] }
|
||||||
language = { path = "../language", features = ["test-support"] }
|
language = { path = "../language", features = ["test-support"] }
|
||||||
project = { path = "../project", features = ["test-support"] }
|
project = { path = "../project", features = ["test-support"] }
|
||||||
|
|
|
@ -4,6 +4,7 @@ use crate::{
|
||||||
SEMANTIC_INDEX_VERSION,
|
SEMANTIC_INDEX_VERSION,
|
||||||
};
|
};
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use collections::HashMap;
|
||||||
use futures::channel::oneshot;
|
use futures::channel::oneshot;
|
||||||
use gpui::executor;
|
use gpui::executor;
|
||||||
use project::{search::PathMatcher, Fs};
|
use project::{search::PathMatcher, Fs};
|
||||||
|
@ -12,7 +13,6 @@ use rusqlite::params;
|
||||||
use rusqlite::types::Value;
|
use rusqlite::types::Value;
|
||||||
use std::{
|
use std::{
|
||||||
cmp::Ordering,
|
cmp::Ordering,
|
||||||
collections::HashMap,
|
|
||||||
future::Future,
|
future::Future,
|
||||||
ops::Range,
|
ops::Range,
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
|
@ -195,7 +195,7 @@ impl VectorDatabase {
|
||||||
pub fn delete_file(
|
pub fn delete_file(
|
||||||
&self,
|
&self,
|
||||||
worktree_id: i64,
|
worktree_id: i64,
|
||||||
delete_path: PathBuf,
|
delete_path: Arc<Path>,
|
||||||
) -> impl Future<Output = Result<()>> {
|
) -> impl Future<Output = Result<()>> {
|
||||||
self.transact(move |db| {
|
self.transact(move |db| {
|
||||||
db.execute(
|
db.execute(
|
||||||
|
@ -209,7 +209,7 @@ impl VectorDatabase {
|
||||||
pub fn insert_file(
|
pub fn insert_file(
|
||||||
&self,
|
&self,
|
||||||
worktree_id: i64,
|
worktree_id: i64,
|
||||||
path: PathBuf,
|
path: Arc<Path>,
|
||||||
mtime: SystemTime,
|
mtime: SystemTime,
|
||||||
documents: Vec<Document>,
|
documents: Vec<Document>,
|
||||||
) -> impl Future<Output = Result<()>> {
|
) -> impl Future<Output = Result<()>> {
|
||||||
|
@ -288,7 +288,7 @@ impl VectorDatabase {
|
||||||
WHERE files.worktree_id = ? AND files.relative_path IN rarray(?)
|
WHERE files.worktree_id = ? AND files.relative_path IN rarray(?)
|
||||||
",
|
",
|
||||||
)?;
|
)?;
|
||||||
let mut embeddings_by_digest = HashMap::new();
|
let mut embeddings_by_digest = HashMap::default();
|
||||||
for (worktree_id, file_paths) in worktree_id_file_paths {
|
for (worktree_id, file_paths) in worktree_id_file_paths {
|
||||||
let file_paths = Rc::new(
|
let file_paths = Rc::new(
|
||||||
file_paths
|
file_paths
|
||||||
|
@ -316,7 +316,7 @@ impl VectorDatabase {
|
||||||
|
|
||||||
pub fn find_or_create_worktree(
|
pub fn find_or_create_worktree(
|
||||||
&self,
|
&self,
|
||||||
worktree_root_path: PathBuf,
|
worktree_root_path: Arc<Path>,
|
||||||
) -> impl Future<Output = Result<i64>> {
|
) -> impl Future<Output = Result<i64>> {
|
||||||
self.transact(move |db| {
|
self.transact(move |db| {
|
||||||
let mut worktree_query =
|
let mut worktree_query =
|
||||||
|
@ -351,7 +351,7 @@ impl VectorDatabase {
|
||||||
WHERE worktree_id = ?1
|
WHERE worktree_id = ?1
|
||||||
ORDER BY relative_path",
|
ORDER BY relative_path",
|
||||||
)?;
|
)?;
|
||||||
let mut result: HashMap<PathBuf, SystemTime> = HashMap::new();
|
let mut result: HashMap<PathBuf, SystemTime> = HashMap::default();
|
||||||
for row in statement.query_map(params![worktree_id], |row| {
|
for row in statement.query_map(params![worktree_id], |row| {
|
||||||
Ok((
|
Ok((
|
||||||
row.get::<_, String>(0)?.into(),
|
row.get::<_, String>(0)?.into(),
|
||||||
|
|
|
@ -2,12 +2,12 @@ use crate::{embedding::EmbeddingProvider, parsing::Document, JobHandle};
|
||||||
use gpui::executor::Background;
|
use gpui::executor::Background;
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use smol::channel;
|
use smol::channel;
|
||||||
use std::{mem, ops::Range, path::PathBuf, sync::Arc, time::SystemTime};
|
use std::{mem, ops::Range, path::Path, sync::Arc, time::SystemTime};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct FileToEmbed {
|
pub struct FileToEmbed {
|
||||||
pub worktree_id: i64,
|
pub worktree_id: i64,
|
||||||
pub path: PathBuf,
|
pub path: Arc<Path>,
|
||||||
pub mtime: SystemTime,
|
pub mtime: SystemTime,
|
||||||
pub documents: Vec<Document>,
|
pub documents: Vec<Document>,
|
||||||
pub job_handle: JobHandle,
|
pub job_handle: JobHandle,
|
||||||
|
|
|
@ -9,6 +9,7 @@ mod semantic_index_tests;
|
||||||
|
|
||||||
use crate::semantic_index_settings::SemanticIndexSettings;
|
use crate::semantic_index_settings::SemanticIndexSettings;
|
||||||
use anyhow::{anyhow, Result};
|
use anyhow::{anyhow, Result};
|
||||||
|
use collections::{BTreeMap, HashMap, HashSet};
|
||||||
use db::VectorDatabase;
|
use db::VectorDatabase;
|
||||||
use embedding::{Embedding, EmbeddingProvider, OpenAIEmbeddings};
|
use embedding::{Embedding, EmbeddingProvider, OpenAIEmbeddings};
|
||||||
use embedding_queue::{EmbeddingQueue, FileToEmbed};
|
use embedding_queue::{EmbeddingQueue, FileToEmbed};
|
||||||
|
@ -18,13 +19,10 @@ use language::{Anchor, Buffer, Language, LanguageRegistry};
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use parsing::{CodeContextRetriever, DocumentDigest, PARSEABLE_ENTIRE_FILE_TYPES};
|
use parsing::{CodeContextRetriever, DocumentDigest, PARSEABLE_ENTIRE_FILE_TYPES};
|
||||||
use postage::watch;
|
use postage::watch;
|
||||||
use project::{
|
use project::{search::PathMatcher, Fs, PathChange, Project, ProjectEntryId, Worktree, WorktreeId};
|
||||||
search::PathMatcher, Fs, PathChange, Project, ProjectEntryId, ProjectPath, Worktree, WorktreeId,
|
|
||||||
};
|
|
||||||
use smol::channel;
|
use smol::channel;
|
||||||
use std::{
|
use std::{
|
||||||
cmp::Ordering,
|
cmp::Ordering,
|
||||||
collections::{BTreeMap, HashMap},
|
|
||||||
ops::Range,
|
ops::Range,
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
sync::{Arc, Weak},
|
sync::{Arc, Weak},
|
||||||
|
@ -34,7 +32,7 @@ use util::{
|
||||||
channel::{ReleaseChannel, RELEASE_CHANNEL, RELEASE_CHANNEL_NAME},
|
channel::{ReleaseChannel, RELEASE_CHANNEL, RELEASE_CHANNEL_NAME},
|
||||||
http::HttpClient,
|
http::HttpClient,
|
||||||
paths::EMBEDDINGS_DIR,
|
paths::EMBEDDINGS_DIR,
|
||||||
ResultExt,
|
ResultExt, TryFutureExt,
|
||||||
};
|
};
|
||||||
use workspace::WorkspaceCreated;
|
use workspace::WorkspaceCreated;
|
||||||
|
|
||||||
|
@ -68,9 +66,7 @@ pub fn init(
|
||||||
if let Some(workspace) = workspace.upgrade(cx) {
|
if let Some(workspace) = workspace.upgrade(cx) {
|
||||||
let project = workspace.read(cx).project().clone();
|
let project = workspace.read(cx).project().clone();
|
||||||
if project.read(cx).is_local() {
|
if project.read(cx).is_local() {
|
||||||
semantic_index.update(cx, |index, cx| {
|
semantic_index.update(cx, |index, cx| index.register_project(project, cx));
|
||||||
index.initialize_project(project, cx).detach_and_log_err(cx)
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -111,11 +107,56 @@ pub struct SemanticIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ProjectState {
|
struct ProjectState {
|
||||||
worktree_db_ids: Vec<(WorktreeId, i64)>,
|
worktrees: HashMap<WorktreeId, WorktreeState>,
|
||||||
_subscription: gpui::Subscription,
|
|
||||||
outstanding_job_count_rx: watch::Receiver<usize>,
|
outstanding_job_count_rx: watch::Receiver<usize>,
|
||||||
outstanding_job_count_tx: Arc<Mutex<watch::Sender<usize>>>,
|
outstanding_job_count_tx: Arc<Mutex<watch::Sender<usize>>>,
|
||||||
changed_paths: BTreeMap<ProjectPath, ChangedPathInfo>,
|
_subscription: gpui::Subscription,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum WorktreeState {
|
||||||
|
Registering(RegisteringWorktreeState),
|
||||||
|
Registered(RegisteredWorktreeState),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WorktreeState {
|
||||||
|
fn paths_changed(
|
||||||
|
&mut self,
|
||||||
|
changes: Arc<[(Arc<Path>, ProjectEntryId, PathChange)]>,
|
||||||
|
changed_at: Instant,
|
||||||
|
worktree: &Worktree,
|
||||||
|
) {
|
||||||
|
let changed_paths = match self {
|
||||||
|
Self::Registering(state) => &mut state.changed_paths,
|
||||||
|
Self::Registered(state) => &mut state.changed_paths,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (path, entry_id, change) in changes.iter() {
|
||||||
|
let Some(entry) = worktree.entry_for_id(*entry_id) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
if entry.is_ignored || entry.is_symlink || entry.is_external || entry.is_dir() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
changed_paths.insert(
|
||||||
|
path.clone(),
|
||||||
|
ChangedPathInfo {
|
||||||
|
changed_at,
|
||||||
|
mtime: entry.mtime,
|
||||||
|
is_deleted: *change == PathChange::Removed,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RegisteringWorktreeState {
|
||||||
|
changed_paths: BTreeMap<Arc<Path>, ChangedPathInfo>,
|
||||||
|
_registration: Task<Option<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RegisteredWorktreeState {
|
||||||
|
db_id: i64,
|
||||||
|
changed_paths: BTreeMap<Arc<Path>, ChangedPathInfo>,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ChangedPathInfo {
|
struct ChangedPathInfo {
|
||||||
|
@ -141,55 +182,42 @@ impl JobHandle {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ProjectState {
|
impl ProjectState {
|
||||||
fn new(
|
fn new(subscription: gpui::Subscription) -> Self {
|
||||||
subscription: gpui::Subscription,
|
|
||||||
worktree_db_ids: Vec<(WorktreeId, i64)>,
|
|
||||||
changed_paths: BTreeMap<ProjectPath, ChangedPathInfo>,
|
|
||||||
) -> Self {
|
|
||||||
let (outstanding_job_count_tx, outstanding_job_count_rx) = watch::channel_with(0);
|
let (outstanding_job_count_tx, outstanding_job_count_rx) = watch::channel_with(0);
|
||||||
let outstanding_job_count_tx = Arc::new(Mutex::new(outstanding_job_count_tx));
|
let outstanding_job_count_tx = Arc::new(Mutex::new(outstanding_job_count_tx));
|
||||||
Self {
|
Self {
|
||||||
worktree_db_ids,
|
worktrees: Default::default(),
|
||||||
outstanding_job_count_rx,
|
outstanding_job_count_rx,
|
||||||
outstanding_job_count_tx,
|
outstanding_job_count_tx,
|
||||||
changed_paths,
|
|
||||||
_subscription: subscription,
|
_subscription: subscription,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_outstanding_count(&self) -> usize {
|
|
||||||
self.outstanding_job_count_rx.borrow().clone()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn db_id_for_worktree_id(&self, id: WorktreeId) -> Option<i64> {
|
fn db_id_for_worktree_id(&self, id: WorktreeId) -> Option<i64> {
|
||||||
self.worktree_db_ids
|
match self.worktrees.get(&id)? {
|
||||||
.iter()
|
WorktreeState::Registering(_) => None,
|
||||||
.find_map(|(worktree_id, db_id)| {
|
WorktreeState::Registered(state) => Some(state.db_id),
|
||||||
if *worktree_id == id {
|
|
||||||
Some(*db_id)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
}
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn worktree_id_for_db_id(&self, id: i64) -> Option<WorktreeId> {
|
fn worktree_id_for_db_id(&self, id: i64) -> Option<WorktreeId> {
|
||||||
self.worktree_db_ids
|
self.worktrees
|
||||||
.iter()
|
.iter()
|
||||||
.find_map(|(worktree_id, db_id)| {
|
.find_map(|(worktree_id, worktree_state)| match worktree_state {
|
||||||
if *db_id == id {
|
WorktreeState::Registered(state) if state.db_id == id => Some(*worktree_id),
|
||||||
Some(*worktree_id)
|
_ => None,
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn worktree(&mut self, id: WorktreeId) -> Option<&mut WorktreeState> {
|
||||||
|
self.worktrees.get_mut(&id)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct PendingFile {
|
pub struct PendingFile {
|
||||||
worktree_db_id: i64,
|
worktree_db_id: i64,
|
||||||
relative_path: PathBuf,
|
relative_path: Arc<Path>,
|
||||||
absolute_path: PathBuf,
|
absolute_path: PathBuf,
|
||||||
language: Option<Arc<Language>>,
|
language: Option<Arc<Language>>,
|
||||||
modified_time: SystemTime,
|
modified_time: SystemTime,
|
||||||
|
@ -298,7 +326,7 @@ impl SemanticIndex {
|
||||||
parsing_files_tx,
|
parsing_files_tx,
|
||||||
_embedding_task,
|
_embedding_task,
|
||||||
_parsing_files_tasks,
|
_parsing_files_tasks,
|
||||||
projects: HashMap::new(),
|
projects: Default::default(),
|
||||||
}
|
}
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
@ -369,9 +397,9 @@ impl SemanticIndex {
|
||||||
fn project_entries_changed(
|
fn project_entries_changed(
|
||||||
&mut self,
|
&mut self,
|
||||||
project: ModelHandle<Project>,
|
project: ModelHandle<Project>,
|
||||||
|
worktree_id: WorktreeId,
|
||||||
changes: Arc<[(Arc<Path>, ProjectEntryId, PathChange)]>,
|
changes: Arc<[(Arc<Path>, ProjectEntryId, PathChange)]>,
|
||||||
cx: &mut ModelContext<'_, SemanticIndex>,
|
cx: &mut ModelContext<Self>,
|
||||||
worktree_id: &WorktreeId,
|
|
||||||
) {
|
) {
|
||||||
let Some(worktree) = project.read(cx).worktree_for_id(worktree_id.clone(), cx) else {
|
let Some(worktree) = project.read(cx).worktree_for_id(worktree_id.clone(), cx) else {
|
||||||
return;
|
return;
|
||||||
|
@ -381,45 +409,29 @@ impl SemanticIndex {
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let change_time = Instant::now();
|
||||||
|
let worktree = worktree.read(cx);
|
||||||
|
let worktree_state = if let Some(worktree_state) = project_state.worktree(worktree_id) {
|
||||||
|
worktree_state
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
worktree_state.paths_changed(changes, Instant::now(), worktree);
|
||||||
|
if let WorktreeState::Registered(worktree_state) = worktree_state {
|
||||||
let embeddings_for_digest = {
|
let embeddings_for_digest = {
|
||||||
let mut worktree_id_file_paths = HashMap::new();
|
let worktree_paths = worktree_state
|
||||||
for (path, _) in &project_state.changed_paths {
|
.changed_paths
|
||||||
if let Some(worktree_db_id) = project_state.db_id_for_worktree_id(path.worktree_id)
|
.iter()
|
||||||
{
|
.map(|(path, _)| path.clone())
|
||||||
worktree_id_file_paths
|
.collect::<Vec<_>>();
|
||||||
.entry(worktree_db_id)
|
let mut worktree_id_file_paths = HashMap::default();
|
||||||
.or_insert(Vec::new())
|
worktree_id_file_paths.insert(worktree_state.db_id, worktree_paths);
|
||||||
.push(path.path.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.db.embeddings_for_files(worktree_id_file_paths)
|
self.db.embeddings_for_files(worktree_id_file_paths)
|
||||||
};
|
};
|
||||||
|
|
||||||
let worktree = worktree.read(cx);
|
|
||||||
let change_time = Instant::now();
|
|
||||||
for (path, entry_id, change) in changes.iter() {
|
|
||||||
let Some(entry) = worktree.entry_for_id(*entry_id) else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
if entry.is_ignored || entry.is_symlink || entry.is_external {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let project_path = ProjectPath {
|
|
||||||
worktree_id: *worktree_id,
|
|
||||||
path: path.clone(),
|
|
||||||
};
|
|
||||||
project_state.changed_paths.insert(
|
|
||||||
project_path,
|
|
||||||
ChangedPathInfo {
|
|
||||||
changed_at: change_time,
|
|
||||||
mtime: entry.mtime,
|
|
||||||
is_deleted: *change == PathChange::Removed,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
cx.spawn_weak(|this, mut cx| async move {
|
cx.spawn_weak(|this, mut cx| async move {
|
||||||
let embeddings_for_digest = embeddings_for_digest.await.log_err().unwrap_or_default();
|
let embeddings_for_digest =
|
||||||
|
embeddings_for_digest.await.log_err().unwrap_or_default();
|
||||||
|
|
||||||
cx.background().timer(BACKGROUND_INDEXING_DELAY).await;
|
cx.background().timer(BACKGROUND_INDEXING_DELAY).await;
|
||||||
if let Some((this, project)) = this.upgrade(&cx).zip(project.upgrade(&cx)) {
|
if let Some((this, project)) = this.upgrade(&cx).zip(project.upgrade(&cx)) {
|
||||||
|
@ -435,77 +447,65 @@ impl SemanticIndex {
|
||||||
})
|
})
|
||||||
.detach();
|
.detach();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn initialize_project(
|
pub fn register_project(&mut self, project: ModelHandle<Project>, cx: &mut ModelContext<Self>) {
|
||||||
|
log::trace!("Registering Project for Semantic Index");
|
||||||
|
|
||||||
|
let subscription = cx.subscribe(&project, |this, project, event, cx| match event {
|
||||||
|
project::Event::WorktreeAdded | project::Event::WorktreeRemoved(_) => {
|
||||||
|
this.project_worktrees_changed(project.clone(), cx);
|
||||||
|
}
|
||||||
|
project::Event::WorktreeUpdatedEntries(worktree_id, changes) => {
|
||||||
|
this.project_entries_changed(project, *worktree_id, changes.clone(), cx);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
});
|
||||||
|
self.projects
|
||||||
|
.insert(project.downgrade(), ProjectState::new(subscription));
|
||||||
|
self.project_worktrees_changed(project, cx);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn register_worktree(
|
||||||
&mut self,
|
&mut self,
|
||||||
project: ModelHandle<Project>,
|
project: ModelHandle<Project>,
|
||||||
|
worktree: ModelHandle<Worktree>,
|
||||||
cx: &mut ModelContext<Self>,
|
cx: &mut ModelContext<Self>,
|
||||||
) -> Task<Result<()>> {
|
) {
|
||||||
log::trace!("Initializing Project for Semantic Index");
|
let project = project.downgrade();
|
||||||
let worktree_scans_complete = project
|
let project_state = if let Some(project_state) = self.projects.get_mut(&project) {
|
||||||
.read(cx)
|
project_state
|
||||||
.worktrees(cx)
|
} else {
|
||||||
.map(|worktree| {
|
return;
|
||||||
let scan_complete = worktree.read(cx).as_local().unwrap().scan_complete();
|
};
|
||||||
|
let worktree = if let Some(worktree) = worktree.read(cx).as_local() {
|
||||||
|
worktree
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
let worktree_abs_path = worktree.abs_path().clone();
|
||||||
|
let scan_complete = worktree.scan_complete();
|
||||||
|
let worktree_id = worktree.id();
|
||||||
|
let db = self.db.clone();
|
||||||
|
let language_registry = self.language_registry.clone();
|
||||||
|
let registration = cx.spawn(|this, mut cx| {
|
||||||
async move {
|
async move {
|
||||||
scan_complete.await;
|
scan_complete.await;
|
||||||
}
|
let db_id = db.find_or_create_worktree(worktree_abs_path).await?;
|
||||||
})
|
let mut file_mtimes = db.get_file_mtimes(db_id).await?;
|
||||||
.collect::<Vec<_>>();
|
let worktree = if let Some(project) = project.upgrade(&cx) {
|
||||||
|
|
||||||
let worktree_db_ids = project
|
|
||||||
.read(cx)
|
|
||||||
.worktrees(cx)
|
|
||||||
.map(|worktree| {
|
|
||||||
self.db
|
|
||||||
.find_or_create_worktree(worktree.read(cx).abs_path().to_path_buf())
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let _subscription = cx.subscribe(&project, |this, project, event, cx| {
|
|
||||||
if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event {
|
|
||||||
this.project_entries_changed(project.clone(), changes.clone(), cx, worktree_id);
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
let language_registry = self.language_registry.clone();
|
|
||||||
|
|
||||||
cx.spawn(|this, mut cx| async move {
|
|
||||||
futures::future::join_all(worktree_scans_complete).await;
|
|
||||||
|
|
||||||
let worktree_db_ids = futures::future::join_all(worktree_db_ids).await;
|
|
||||||
let worktrees = project.read_with(&cx, |project, cx| {
|
|
||||||
project
|
project
|
||||||
.worktrees(cx)
|
.read_with(&cx, |project, cx| project.worktree_for_id(worktree_id, cx))
|
||||||
.map(|worktree| worktree.read(cx).snapshot())
|
.ok_or_else(|| anyhow!("worktree not found"))?
|
||||||
.collect::<Vec<_>>()
|
} else {
|
||||||
});
|
return anyhow::Ok(());
|
||||||
|
};
|
||||||
let mut worktree_file_mtimes = HashMap::new();
|
let worktree = worktree.read_with(&cx, |worktree, _| worktree.snapshot());
|
||||||
let mut db_ids_by_worktree_id = HashMap::new();
|
let mut changed_paths = cx
|
||||||
|
|
||||||
for (worktree, db_id) in worktrees.iter().zip(worktree_db_ids) {
|
|
||||||
let db_id = db_id?;
|
|
||||||
db_ids_by_worktree_id.insert(worktree.id(), db_id);
|
|
||||||
worktree_file_mtimes.insert(
|
|
||||||
worktree.id(),
|
|
||||||
this.read_with(&cx, |this, _| this.db.get_file_mtimes(db_id))
|
|
||||||
.await?,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
let worktree_db_ids = db_ids_by_worktree_id
|
|
||||||
.iter()
|
|
||||||
.map(|(a, b)| (*a, *b))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let changed_paths = cx
|
|
||||||
.background()
|
.background()
|
||||||
.spawn(async move {
|
.spawn(async move {
|
||||||
let mut changed_paths = BTreeMap::new();
|
let mut changed_paths = BTreeMap::new();
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
for worktree in worktrees.into_iter() {
|
|
||||||
let mut file_mtimes = worktree_file_mtimes.remove(&worktree.id()).unwrap();
|
|
||||||
for file in worktree.files(false, 0) {
|
for file in worktree.files(false, 0) {
|
||||||
let absolute_path = worktree.absolutize(&file.path);
|
let absolute_path = worktree.absolutize(&file.path);
|
||||||
|
|
||||||
|
@ -534,10 +534,7 @@ impl SemanticIndex {
|
||||||
|
|
||||||
if !already_stored {
|
if !already_stored {
|
||||||
changed_paths.insert(
|
changed_paths.insert(
|
||||||
ProjectPath {
|
file.path.clone(),
|
||||||
worktree_id: worktree.id(),
|
|
||||||
path: file.path.clone(),
|
|
||||||
},
|
|
||||||
ChangedPathInfo {
|
ChangedPathInfo {
|
||||||
changed_at: now,
|
changed_at: now,
|
||||||
mtime: file.mtime,
|
mtime: file.mtime,
|
||||||
|
@ -551,10 +548,7 @@ impl SemanticIndex {
|
||||||
// Clean up entries from database that are no longer in the worktree.
|
// Clean up entries from database that are no longer in the worktree.
|
||||||
for (path, mtime) in file_mtimes {
|
for (path, mtime) in file_mtimes {
|
||||||
changed_paths.insert(
|
changed_paths.insert(
|
||||||
ProjectPath {
|
path.into(),
|
||||||
worktree_id: worktree.id(),
|
|
||||||
path: path.into(),
|
|
||||||
},
|
|
||||||
ChangedPathInfo {
|
ChangedPathInfo {
|
||||||
changed_at: now,
|
changed_at: now,
|
||||||
mtime,
|
mtime,
|
||||||
|
@ -562,20 +556,80 @@ impl SemanticIndex {
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
anyhow::Ok(changed_paths)
|
anyhow::Ok(changed_paths)
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
this.update(&mut cx, |this, _| {
|
this.update(&mut cx, |this, _| {
|
||||||
this.projects.insert(
|
let project_state = this
|
||||||
project.downgrade(),
|
.projects
|
||||||
ProjectState::new(_subscription, worktree_db_ids, changed_paths),
|
.get_mut(&project)
|
||||||
|
.ok_or_else(|| anyhow!("project not registered"))?;
|
||||||
|
|
||||||
|
if let Some(WorktreeState::Registering(state)) =
|
||||||
|
project_state.worktrees.remove(&worktree_id)
|
||||||
|
{
|
||||||
|
changed_paths.extend(state.changed_paths);
|
||||||
|
}
|
||||||
|
project_state.worktrees.insert(
|
||||||
|
worktree_id,
|
||||||
|
WorktreeState::Registered(RegisteredWorktreeState {
|
||||||
|
db_id,
|
||||||
|
changed_paths,
|
||||||
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
anyhow::Ok(())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
anyhow::Ok(())
|
||||||
|
}
|
||||||
|
.log_err()
|
||||||
});
|
});
|
||||||
Result::<(), _>::Ok(())
|
project_state.worktrees.insert(
|
||||||
})
|
worktree_id,
|
||||||
|
WorktreeState::Registering(RegisteringWorktreeState {
|
||||||
|
changed_paths: Default::default(),
|
||||||
|
_registration: registration,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn project_worktrees_changed(
|
||||||
|
&mut self,
|
||||||
|
project: ModelHandle<Project>,
|
||||||
|
cx: &mut ModelContext<Self>,
|
||||||
|
) {
|
||||||
|
let project_state = if let Some(project_state) = self.projects.get_mut(&project.downgrade())
|
||||||
|
{
|
||||||
|
project_state
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut worktrees = project
|
||||||
|
.read(cx)
|
||||||
|
.worktrees(cx)
|
||||||
|
.filter(|worktree| worktree.read(cx).is_local())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let worktree_ids = worktrees
|
||||||
|
.iter()
|
||||||
|
.map(|worktree| worktree.read(cx).id())
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
|
||||||
|
// Remove worktrees that are no longer present
|
||||||
|
project_state
|
||||||
|
.worktrees
|
||||||
|
.retain(|worktree_id, _| worktree_ids.contains(worktree_id));
|
||||||
|
|
||||||
|
// Register new worktrees
|
||||||
|
worktrees.retain(|worktree| {
|
||||||
|
let worktree_id = worktree.read(cx).id();
|
||||||
|
project_state.worktree(worktree_id).is_none()
|
||||||
|
});
|
||||||
|
for worktree in worktrees {
|
||||||
|
self.register_worktree(project.clone(), worktree, cx);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn index_project(
|
pub fn index_project(
|
||||||
|
@ -583,28 +637,31 @@ impl SemanticIndex {
|
||||||
project: ModelHandle<Project>,
|
project: ModelHandle<Project>,
|
||||||
cx: &mut ModelContext<Self>,
|
cx: &mut ModelContext<Self>,
|
||||||
) -> Task<Result<(usize, watch::Receiver<usize>)>> {
|
) -> Task<Result<(usize, watch::Receiver<usize>)>> {
|
||||||
|
let project_state = if let Some(project_state) = self.projects.get_mut(&project.downgrade())
|
||||||
|
{
|
||||||
|
project_state
|
||||||
|
} else {
|
||||||
|
return Task::ready(Err(anyhow!("project was not registered")));
|
||||||
|
};
|
||||||
|
let outstanding_job_count_rx = project_state.outstanding_job_count_rx.clone();
|
||||||
|
|
||||||
|
let mut worktree_id_file_paths = HashMap::default();
|
||||||
|
for worktree in project_state.worktrees.values() {
|
||||||
|
if let WorktreeState::Registered(worktree_state) = worktree {
|
||||||
|
for (path, _) in &worktree_state.changed_paths {
|
||||||
|
worktree_id_file_paths
|
||||||
|
.entry(worktree_state.db_id)
|
||||||
|
.or_insert(Vec::new())
|
||||||
|
.push(path.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cx.spawn(|this, mut cx| async move {
|
cx.spawn(|this, mut cx| async move {
|
||||||
let embeddings_for_digest = this.read_with(&cx, |this, _| {
|
let embeddings_for_digest = this.read_with(&cx, |this, _| {
|
||||||
if let Some(state) = this.projects.get(&project.downgrade()) {
|
this.db.embeddings_for_files(worktree_id_file_paths)
|
||||||
let mut worktree_id_file_paths = HashMap::default();
|
});
|
||||||
for (path, _) in &state.changed_paths {
|
|
||||||
if let Some(worktree_db_id) = state.db_id_for_worktree_id(path.worktree_id)
|
|
||||||
{
|
|
||||||
worktree_id_file_paths
|
|
||||||
.entry(worktree_db_id)
|
|
||||||
.or_insert(Vec::new())
|
|
||||||
.push(path.path.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(this.db.embeddings_for_files(worktree_id_file_paths))
|
|
||||||
} else {
|
|
||||||
Err(anyhow!("Project not yet initialized"))
|
|
||||||
}
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let embeddings_for_digest = Arc::new(embeddings_for_digest.await?);
|
let embeddings_for_digest = Arc::new(embeddings_for_digest.await?);
|
||||||
|
|
||||||
Self::reindex_changed_paths(
|
Self::reindex_changed_paths(
|
||||||
this.clone(),
|
this.clone(),
|
||||||
project.clone(),
|
project.clone(),
|
||||||
|
@ -613,15 +670,8 @@ impl SemanticIndex {
|
||||||
embeddings_for_digest,
|
embeddings_for_digest,
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
let count = *outstanding_job_count_rx.borrow();
|
||||||
this.update(&mut cx, |this, _cx| {
|
Ok((count, outstanding_job_count_rx))
|
||||||
let Some(state) = this.projects.get(&project.downgrade()) else {
|
|
||||||
return Err(anyhow!("Project not yet initialized"));
|
|
||||||
};
|
|
||||||
let job_count_rx = state.outstanding_job_count_rx.clone();
|
|
||||||
let count = state.get_outstanding_count();
|
|
||||||
Ok((count, job_count_rx))
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -784,50 +834,49 @@ impl SemanticIndex {
|
||||||
let (db, language_registry, parsing_files_tx) = this.update(cx, |this, cx| {
|
let (db, language_registry, parsing_files_tx) = this.update(cx, |this, cx| {
|
||||||
if let Some(project_state) = this.projects.get_mut(&project.downgrade()) {
|
if let Some(project_state) = this.projects.get_mut(&project.downgrade()) {
|
||||||
let outstanding_job_count_tx = &project_state.outstanding_job_count_tx;
|
let outstanding_job_count_tx = &project_state.outstanding_job_count_tx;
|
||||||
let db_ids = &project_state.worktree_db_ids;
|
project_state
|
||||||
let mut worktree: Option<ModelHandle<Worktree>> = None;
|
.worktrees
|
||||||
|
.retain(|worktree_id, worktree_state| {
|
||||||
|
let worktree = if let Some(worktree) =
|
||||||
|
project.read(cx).worktree_for_id(*worktree_id, cx)
|
||||||
|
{
|
||||||
|
worktree
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
let worktree_state =
|
||||||
|
if let WorktreeState::Registered(worktree_state) = worktree_state {
|
||||||
|
worktree_state
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
project_state.changed_paths.retain(|path, info| {
|
worktree_state.changed_paths.retain(|path, info| {
|
||||||
if let Some(last_changed_before) = last_changed_before {
|
if let Some(last_changed_before) = last_changed_before {
|
||||||
if info.changed_at > last_changed_before {
|
if info.changed_at > last_changed_before {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if worktree
|
|
||||||
.as_ref()
|
|
||||||
.map_or(true, |tree| tree.read(cx).id() != path.worktree_id)
|
|
||||||
{
|
|
||||||
worktree = project.read(cx).worktree_for_id(path.worktree_id, cx);
|
|
||||||
}
|
|
||||||
let Some(worktree) = &worktree else {
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
|
|
||||||
let Some(worktree_db_id) = db_ids
|
|
||||||
.iter()
|
|
||||||
.find_map(|entry| (entry.0 == path.worktree_id).then_some(entry.1))
|
|
||||||
else {
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
|
|
||||||
if info.is_deleted {
|
if info.is_deleted {
|
||||||
files_to_delete.push((worktree_db_id, path.path.to_path_buf()));
|
files_to_delete.push((worktree_state.db_id, path.clone()));
|
||||||
} else {
|
} else {
|
||||||
let absolute_path = worktree.read(cx).absolutize(&path.path);
|
let absolute_path = worktree.read(cx).absolutize(path);
|
||||||
let job_handle = JobHandle::new(&outstanding_job_count_tx);
|
let job_handle = JobHandle::new(&outstanding_job_count_tx);
|
||||||
pending_files.push(PendingFile {
|
pending_files.push(PendingFile {
|
||||||
absolute_path,
|
absolute_path,
|
||||||
relative_path: path.path.to_path_buf(),
|
relative_path: path.clone(),
|
||||||
language: None,
|
language: None,
|
||||||
job_handle,
|
job_handle,
|
||||||
modified_time: info.mtime,
|
modified_time: info.mtime,
|
||||||
worktree_db_id,
|
worktree_db_id: worktree_state.db_id,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
false
|
false
|
||||||
});
|
});
|
||||||
|
true
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
(
|
(
|
||||||
|
|
|
@ -87,11 +87,8 @@ async fn test_semantic_index(deterministic: Arc<Deterministic>, cx: &mut TestApp
|
||||||
|
|
||||||
let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
|
let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
|
||||||
|
|
||||||
let _ = semantic_index
|
semantic_index.update(cx, |store, cx| store.register_project(project.clone(), cx));
|
||||||
.update(cx, |store, cx| {
|
deterministic.run_until_parked();
|
||||||
store.initialize_project(project.clone(), cx)
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
|
|
||||||
let (file_count, outstanding_file_count) = semantic_index
|
let (file_count, outstanding_file_count) = semantic_index
|
||||||
.update(cx, |store, cx| store.index_project(project.clone(), cx))
|
.update(cx, |store, cx| store.index_project(project.clone(), cx))
|
||||||
|
@ -214,7 +211,7 @@ async fn test_embedding_batching(cx: &mut TestAppContext, mut rng: StdRng) {
|
||||||
let files = (1..=3)
|
let files = (1..=3)
|
||||||
.map(|file_ix| FileToEmbed {
|
.map(|file_ix| FileToEmbed {
|
||||||
worktree_id: 5,
|
worktree_id: 5,
|
||||||
path: format!("path-{file_ix}").into(),
|
path: Path::new(&format!("path-{file_ix}")).into(),
|
||||||
mtime: SystemTime::now(),
|
mtime: SystemTime::now(),
|
||||||
documents: (0..rng.gen_range(4..22))
|
documents: (0..rng.gen_range(4..22))
|
||||||
.map(|document_ix| {
|
.map(|document_ix| {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue