added opt-in default settings for vector store
This commit is contained in:
parent
f5fec55930
commit
298c2213a0
7 changed files with 70 additions and 19 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -8503,8 +8503,10 @@ dependencies = [
|
|||
"rand 0.8.5",
|
||||
"rpc",
|
||||
"rusqlite",
|
||||
"schemars",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"settings",
|
||||
"smol",
|
||||
"tempdir",
|
||||
"theme",
|
||||
|
|
|
@ -291,6 +291,12 @@
|
|||
// the terminal will default to matching the buffer's font family.
|
||||
// "font_family": "Zed Mono"
|
||||
},
|
||||
// Difference settings for vector_store
|
||||
"vector_store": {
|
||||
"enable": false,
|
||||
"reindexing_delay_seconds": 600,
|
||||
"embedding_batch_size": 150
|
||||
},
|
||||
// Different settings for specific languages.
|
||||
"languages": {
|
||||
"Plain Text": {
|
||||
|
|
|
@ -18,6 +18,7 @@ picker = { path = "../picker" }
|
|||
theme = { path = "../theme" }
|
||||
editor = { path = "../editor" }
|
||||
rpc = { path = "../rpc" }
|
||||
settings = { path = "../settings" }
|
||||
anyhow.workspace = true
|
||||
futures.workspace = true
|
||||
smol.workspace = true
|
||||
|
@ -33,6 +34,7 @@ bincode = "1.3.3"
|
|||
matrixmultiply = "0.3.7"
|
||||
tiktoken-rs = "0.5.0"
|
||||
rand.workspace = true
|
||||
schemars.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
gpui = { path = "../gpui", features = ["test-support"] }
|
||||
|
@ -40,6 +42,7 @@ language = { path = "../language", features = ["test-support"] }
|
|||
project = { path = "../project", features = ["test-support"] }
|
||||
rpc = { path = "../rpc", features = ["test-support"] }
|
||||
workspace = { path = "../workspace", features = ["test-support"] }
|
||||
settings = { path = "../settings", features = ["test-support"]}
|
||||
tree-sitter-rust = "*"
|
||||
rand.workspace = true
|
||||
unindent.workspace = true
|
||||
|
|
|
@ -204,8 +204,6 @@ impl VectorDatabase {
|
|||
) -> Result<Vec<(i64, PathBuf, usize, String)>> {
|
||||
let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
|
||||
self.for_each_document(&worktree_ids, |id, embedding| {
|
||||
eprintln!("document {id} {embedding:?}");
|
||||
|
||||
let similarity = dot(&embedding, &query_embedding);
|
||||
let ix = match results
|
||||
.binary_search_by(|(_, s)| similarity.partial_cmp(&s).unwrap_or(Ordering::Equal))
|
||||
|
@ -243,10 +241,7 @@ impl VectorDatabase {
|
|||
Ok((row.get(0)?, row.get::<_, Embedding>(1)?))
|
||||
})?
|
||||
.filter_map(|row| row.ok())
|
||||
.for_each(|(id, embedding)| {
|
||||
dbg!("id");
|
||||
f(id, embedding.0)
|
||||
});
|
||||
.for_each(|(id, embedding)| f(id, embedding.0));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
@ -2,22 +2,25 @@ mod db;
|
|||
mod embedding;
|
||||
mod modal;
|
||||
mod parsing;
|
||||
mod vector_store_settings;
|
||||
|
||||
#[cfg(test)]
|
||||
mod vector_store_tests;
|
||||
|
||||
use crate::vector_store_settings::VectorStoreSettings;
|
||||
use anyhow::{anyhow, Result};
|
||||
use db::VectorDatabase;
|
||||
use embedding::{EmbeddingProvider, OpenAIEmbeddings};
|
||||
use futures::{channel::oneshot, Future};
|
||||
use gpui::{
|
||||
AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, ViewContext,
|
||||
WeakModelHandle,
|
||||
AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Subscription, Task,
|
||||
ViewContext, WeakModelHandle,
|
||||
};
|
||||
use language::{Language, LanguageRegistry};
|
||||
use modal::{SemanticSearch, SemanticSearchDelegate, Toggle};
|
||||
use parsing::{CodeContextRetriever, ParsedFile};
|
||||
use project::{Fs, PathChange, Project, ProjectEntryId, WorktreeId};
|
||||
use settings::SettingsStore;
|
||||
use smol::channel;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
|
@ -34,9 +37,6 @@ use util::{
|
|||
};
|
||||
use workspace::{Workspace, WorkspaceCreated};
|
||||
|
||||
const REINDEXING_DELAY_SECONDS: u64 = 3;
|
||||
const EMBEDDINGS_BATCH_SIZE: usize = 150;
|
||||
|
||||
pub fn init(
|
||||
fs: Arc<dyn Fs>,
|
||||
http_client: Arc<dyn HttpClient>,
|
||||
|
@ -47,6 +47,12 @@ pub fn init(
|
|||
return;
|
||||
}
|
||||
|
||||
settings::register::<VectorStoreSettings>(cx);
|
||||
|
||||
if !settings::get::<VectorStoreSettings>(cx).enable {
|
||||
return;
|
||||
}
|
||||
|
||||
let db_file_path = EMBEDDINGS_DIR
|
||||
.join(Path::new(RELEASE_CHANNEL_NAME.as_str()))
|
||||
.join("embeddings_db");
|
||||
|
@ -83,6 +89,7 @@ pub fn init(
|
|||
.detach();
|
||||
|
||||
cx.add_action({
|
||||
// "semantic search: Toggle"
|
||||
move |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext<Workspace>| {
|
||||
let vector_store = vector_store.clone();
|
||||
workspace.toggle_modal(cx, |workspace, cx| {
|
||||
|
@ -274,7 +281,6 @@ impl VectorStore {
|
|||
worktree_id,
|
||||
indexed_file,
|
||||
} => {
|
||||
log::info!("Inserting Data for {:?}", &indexed_file.path);
|
||||
db.insert_file(worktree_id, indexed_file).log_err();
|
||||
}
|
||||
DbOperation::Delete { worktree_id, path } => {
|
||||
|
@ -347,6 +353,7 @@ impl VectorStore {
|
|||
});
|
||||
|
||||
// batch_tx/rx: Batch Files to Send for Embeddings
|
||||
let batch_size = settings::get::<VectorStoreSettings>(cx).embedding_batch_size;
|
||||
let (batch_files_tx, batch_files_rx) = channel::unbounded::<EmbeddingJob>();
|
||||
let _batch_files_task = cx.background().spawn(async move {
|
||||
let mut queue_len = 0;
|
||||
|
@ -361,7 +368,7 @@ impl VectorStore {
|
|||
} => {
|
||||
queue_len += &document_spans.len();
|
||||
embeddings_queue.push((worktree_id, parsed_file, document_spans));
|
||||
queue_len >= EMBEDDINGS_BATCH_SIZE
|
||||
queue_len >= batch_size
|
||||
}
|
||||
EmbeddingJob::Flush => true,
|
||||
};
|
||||
|
@ -387,8 +394,6 @@ impl VectorStore {
|
|||
let cursor = QueryCursor::new();
|
||||
let mut retriever = CodeContextRetriever { parser, cursor, fs };
|
||||
while let Ok(pending_file) = parsing_files_rx.recv().await {
|
||||
log::info!("Parsing File: {:?}", &pending_file.relative_path);
|
||||
|
||||
if let Some((indexed_file, document_spans)) =
|
||||
retriever.parse_file(pending_file.clone()).await.log_err()
|
||||
{
|
||||
|
@ -476,11 +481,9 @@ impl VectorStore {
|
|||
let parsing_files_tx = self.parsing_files_tx.clone();
|
||||
|
||||
cx.spawn(|this, mut cx| async move {
|
||||
let t0 = Instant::now();
|
||||
futures::future::join_all(worktree_scans_complete).await;
|
||||
|
||||
let worktree_db_ids = futures::future::join_all(worktree_db_ids).await;
|
||||
log::info!("Worktree Scanning Done in {:?}", t0.elapsed().as_millis());
|
||||
|
||||
if let Some(db_directory) = database_url.parent() {
|
||||
fs.create_dir(db_directory).await.log_err();
|
||||
|
@ -665,6 +668,8 @@ impl VectorStore {
|
|||
cx: &mut ModelContext<'_, VectorStore>,
|
||||
worktree_id: &WorktreeId,
|
||||
) -> Option<()> {
|
||||
let reindexing_delay = settings::get::<VectorStoreSettings>(cx).reindexing_delay_seconds;
|
||||
|
||||
let worktree = project
|
||||
.read(cx)
|
||||
.worktree_for_id(worktree_id.clone(), cx)?
|
||||
|
@ -725,7 +730,7 @@ impl VectorStore {
|
|||
if !already_stored {
|
||||
this.update(&mut cx, |this, _| {
|
||||
let reindex_time = modified_time
|
||||
+ Duration::from_secs(REINDEXING_DELAY_SECONDS);
|
||||
+ Duration::from_secs(reindexing_delay as u64);
|
||||
|
||||
let project_state =
|
||||
this.projects.get_mut(&project.downgrade())?;
|
||||
|
|
32
crates/vector_store/src/vector_store_settings.rs
Normal file
32
crates/vector_store/src/vector_store_settings.rs
Normal file
|
@ -0,0 +1,32 @@
|
|||
use anyhow;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use settings::Setting;
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct VectorStoreSettings {
|
||||
pub enable: bool,
|
||||
pub reindexing_delay_seconds: usize,
|
||||
pub embedding_batch_size: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Serialize, Deserialize, JsonSchema, Debug)]
|
||||
pub struct VectorStoreSettingsContent {
|
||||
pub enable: Option<bool>,
|
||||
pub reindexing_delay_seconds: Option<usize>,
|
||||
pub embedding_batch_size: Option<usize>,
|
||||
}
|
||||
|
||||
impl Setting for VectorStoreSettings {
|
||||
const KEY: Option<&'static str> = Some("vector_store");
|
||||
|
||||
type FileContent = VectorStoreSettingsContent;
|
||||
|
||||
fn load(
|
||||
default_value: &Self::FileContent,
|
||||
user_values: &[&Self::FileContent],
|
||||
_: &gpui::AppContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
Self::load_via_json_merge(default_value, user_values)
|
||||
}
|
||||
}
|
|
@ -1,4 +1,6 @@
|
|||
use crate::{db::dot, embedding::EmbeddingProvider, VectorStore};
|
||||
use crate::{
|
||||
db::dot, embedding::EmbeddingProvider, vector_store_settings::VectorStoreSettings, VectorStore,
|
||||
};
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use gpui::{Task, TestAppContext};
|
||||
|
@ -6,11 +8,17 @@ use language::{Language, LanguageConfig, LanguageRegistry};
|
|||
use project::{FakeFs, Project};
|
||||
use rand::Rng;
|
||||
use serde_json::json;
|
||||
use settings::SettingsStore;
|
||||
use std::sync::Arc;
|
||||
use unindent::Unindent;
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_vector_store(cx: &mut TestAppContext) {
|
||||
cx.update(|cx| {
|
||||
cx.set_global(SettingsStore::test(cx));
|
||||
settings::register::<VectorStoreSettings>(cx);
|
||||
});
|
||||
|
||||
let fs = FakeFs::new(cx.background());
|
||||
fs.insert_tree(
|
||||
"/the-root",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue