From 80a894b82999d4e562a18800568c8f712a705e6e Mon Sep 17 00:00:00 2001 From: KCaverly Date: Wed, 21 Jun 2023 14:53:08 -0400 Subject: [PATCH 001/115] WIP: started work on vector store db, by walking project worktrees.\n\nCo-Authored-By: Max --- Cargo.lock | 15 +++ Cargo.toml | 1 + crates/vector_store/Cargo.toml | 25 +++++ crates/vector_store/README.md | 31 ++++++ crates/vector_store/src/vector_store.rs | 134 ++++++++++++++++++++++++ crates/zed/Cargo.toml | 1 + crates/zed/src/main.rs | 1 + 7 files changed, 208 insertions(+) create mode 100644 crates/vector_store/Cargo.toml create mode 100644 crates/vector_store/README.md create mode 100644 crates/vector_store/src/vector_store.rs diff --git a/Cargo.lock b/Cargo.lock index a4b12223e5..3bf0a568a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7877,6 +7877,20 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vector_store" +version = "0.1.0" +dependencies = [ + "anyhow", + "futures 0.3.28", + "gpui", + "language", + "project", + "smol", + "util", + "workspace", +] + [[package]] name = "version_check" version = "0.9.4" @@ -8917,6 +8931,7 @@ dependencies = [ "urlencoding", "util", "uuid 1.3.2", + "vector_store", "vim", "welcome", "workspace", diff --git a/Cargo.toml b/Cargo.toml index fca7355964..b1faf158df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,6 +63,7 @@ members = [ "crates/theme_selector", "crates/theme_testbench", "crates/util", + "crates/vector_store", "crates/vim", "crates/workspace", "crates/welcome", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml new file mode 100644 index 0000000000..c33a35bcad --- /dev/null +++ b/crates/vector_store/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "vector_store" +version = "0.1.0" +edition = "2021" +publish = false + +[lib] +path = "src/vector_store.rs" +doctest = false + +[dependencies] +gpui = { path = "../gpui" } +language = { path = "../language" } +project = { path = "../project" } +workspace = { path = "../workspace" } +util = { path = "../util" } +anyhow.workspace = true +futures.workspace = true +smol.workspace = true + +[dev-dependencies] +gpui = { path = "../gpui", features = ["test-support"] } +language = { path = "../language", features = ["test-support"] } +project = { path = "../project", features = ["test-support"] } +workspace = { path = "../workspace", features = ["test-support"] } diff --git a/crates/vector_store/README.md b/crates/vector_store/README.md new file mode 100644 index 0000000000..86e68dc414 --- /dev/null +++ b/crates/vector_store/README.md @@ -0,0 +1,31 @@ + +WIP: Sample SQL Queries +/* + +create table "files" ( +"id" INTEGER PRIMARY KEY, +"path" VARCHAR, +"sha1" VARCHAR, +); + +create table symbols ( +"file_id" INTEGER REFERENCES("files", "id") ON CASCADE DELETE, +"offset" INTEGER, +"embedding" VECTOR, +); + +insert into "files" ("path", "sha1") values ("src/main.rs", "sha1") return id; +insert into symbols ( +"file_id", +"start", +"end", +"embedding" +) values ( +(id,), +(id,), +(id,), +(id,), +) + + +*/ diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs new file mode 100644 index 0000000000..1556df7ebe --- /dev/null +++ b/crates/vector_store/src/vector_store.rs @@ -0,0 +1,134 @@ +use anyhow::{anyhow, Result}; +use gpui::{AppContext, Entity, ModelContext, ModelHandle}; +use language::LanguageRegistry; +use project::{Fs, Project}; +use smol::channel; +use std::{path::PathBuf, sync::Arc}; +use util::ResultExt; +use workspace::WorkspaceCreated; + +pub fn init(fs: Arc, language_registry: Arc, cx: &mut AppContext) { + let vector_store = cx.add_model(|cx| VectorStore::new(fs, language_registry)); + + cx.subscribe_global::({ + let vector_store = vector_store.clone(); + move |event, cx| { + let workspace = &event.0; + if let Some(workspace) = workspace.upgrade(cx) { + let project = workspace.read(cx).project().clone(); + if project.read(cx).is_local() { + vector_store.update(cx, |store, cx| { + store.add_project(project, cx); + }); + } + } + } + }) + .detach(); +} + +struct Document { + offset: usize, + name: String, + embedding: Vec, +} + +struct IndexedFile { + path: PathBuf, + sha1: String, + documents: Vec, +} + +struct SearchResult { + path: PathBuf, + offset: usize, + name: String, + distance: f32, +} + +struct VectorStore { + fs: Arc, + language_registry: Arc, +} + +impl VectorStore { + fn new(fs: Arc, language_registry: Arc) -> Self { + Self { + fs, + language_registry, + } + } + + async fn index_file( + fs: &Arc, + language_registry: &Arc, + file_path: PathBuf, + ) -> Result { + eprintln!("indexing file {file_path:?}"); + Err(anyhow!("not implemented")) + // todo!(); + } + + fn add_project(&mut self, project: ModelHandle, cx: &mut ModelContext) { + let worktree_scans_complete = project + .read(cx) + .worktrees(cx) + .map(|worktree| worktree.read(cx).as_local().unwrap().scan_complete()) + .collect::>(); + + let fs = self.fs.clone(); + let language_registry = self.language_registry.clone(); + + cx.spawn(|this, cx| async move { + futures::future::join_all(worktree_scans_complete).await; + + let worktrees = project.read_with(&cx, |project, cx| { + project + .worktrees(cx) + .map(|worktree| worktree.read(cx).snapshot()) + .collect::>() + }); + + let (paths_tx, paths_rx) = channel::unbounded::(); + let (indexed_files_tx, indexed_files_rx) = channel::unbounded::(); + cx.background() + .spawn(async move { + for worktree in worktrees { + for file in worktree.files(false, 0) { + paths_tx.try_send(worktree.absolutize(&file.path)).unwrap(); + } + } + }) + .detach(); + cx.background() + .spawn(async move { + while let Ok(indexed_file) = indexed_files_rx.recv().await { + // write document to database + } + }) + .detach(); + cx.background() + .scoped(|scope| { + for _ in 0..cx.background().num_cpus() { + scope.spawn(async { + while let Ok(file_path) = paths_rx.recv().await { + if let Some(indexed_file) = + Self::index_file(&fs, &language_registry, file_path) + .await + .log_err() + { + indexed_files_tx.try_send(indexed_file).unwrap(); + } + } + }); + } + }) + .await; + }) + .detach(); + } +} + +impl Entity for VectorStore { + type Event = (); +} diff --git a/crates/zed/Cargo.toml b/crates/zed/Cargo.toml index d8e47d1c3e..26e27a9193 100644 --- a/crates/zed/Cargo.toml +++ b/crates/zed/Cargo.toml @@ -64,6 +64,7 @@ theme = { path = "../theme" } theme_selector = { path = "../theme_selector" } theme_testbench = { path = "../theme_testbench" } util = { path = "../util" } +vector_store = { path = "../vector_store" } vim = { path = "../vim" } workspace = { path = "../workspace" } welcome = { path = "../welcome" } diff --git a/crates/zed/src/main.rs b/crates/zed/src/main.rs index dcdf5c1ea5..76d02307f6 100644 --- a/crates/zed/src/main.rs +++ b/crates/zed/src/main.rs @@ -152,6 +152,7 @@ fn main() { project_panel::init(cx); diagnostics::init(cx); search::init(cx); + vector_store::init(fs.clone(), languages.clone(), cx); vim::init(cx); terminal_view::init(cx); theme_testbench::init(cx); From d4a4db42aa4d96c2576713bd86260d38e8febc8f Mon Sep 17 00:00:00 2001 From: KCaverly Date: Thu, 22 Jun 2023 13:25:33 -0400 Subject: [PATCH 002/115] WIP: started DB creating and naive inserts --- Cargo.lock | 19 +++++ crates/vector_store/Cargo.toml | 4 + crates/vector_store/src/db.rs | 107 ++++++++++++++++++++++++ crates/vector_store/src/vector_store.rs | 38 +++++++-- 4 files changed, 161 insertions(+), 7 deletions(-) create mode 100644 crates/vector_store/src/db.rs diff --git a/Cargo.lock b/Cargo.lock index 3bf0a568a2..beb84e04bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1389,6 +1389,15 @@ dependencies = [ "theme", ] +[[package]] +name = "conv" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ff10625fd0ac447827aa30ea8b861fead473bb60aeb73af6c1c58caf0d1299" +dependencies = [ + "custom_derive", +] + [[package]] name = "copilot" version = "0.1.0" @@ -1766,6 +1775,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "custom_derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" + [[package]] name = "cxx" version = "1.0.94" @@ -7882,11 +7897,15 @@ name = "vector_store" version = "0.1.0" dependencies = [ "anyhow", + "async-compat", + "conv", "futures 0.3.28", "gpui", "language", "project", + "rand 0.8.5", "smol", + "sqlx", "util", "workspace", ] diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index c33a35bcad..74ad23740e 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -17,6 +17,10 @@ util = { path = "../util" } anyhow.workspace = true futures.workspace = true smol.workspace = true +sqlx = { version = "0.6", features = ["sqlite","runtime-tokio-rustls"] } +async-compat = "0.2.1" +conv = "0.3.3" +rand.workspace = true [dev-dependencies] gpui = { path = "../gpui", features = ["test-support"] } diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs new file mode 100644 index 0000000000..dfa85044d6 --- /dev/null +++ b/crates/vector_store/src/db.rs @@ -0,0 +1,107 @@ +use anyhow::Result; +use async_compat::{Compat, CompatExt}; +use conv::ValueFrom; +use sqlx::{migrate::MigrateDatabase, Pool, Sqlite, SqlitePool}; +use std::time::{Duration, Instant}; + +use crate::IndexedFile; + +// This is saving to a local database store within the users dev zed path +// Where do we want this to sit? +// Assuming near where the workspace DB sits. +const VECTOR_DB_URL: &str = "embeddings_db"; + +pub struct VectorDatabase {} + +impl VectorDatabase { + pub async fn initialize_database() -> Result<()> { + // If database doesnt exist create database + if !Sqlite::database_exists(VECTOR_DB_URL) + .compat() + .await + .unwrap_or(false) + { + Sqlite::create_database(VECTOR_DB_URL).compat().await?; + } + + let db = SqlitePool::connect(VECTOR_DB_URL).compat().await?; + + // Initialize Vector Databasing Tables + // We may be able to skip this assuming the database is never created + // without creating the tables at the same time. + sqlx::query( + "CREATE TABLE IF NOT EXISTS files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + path NVARCHAR(100) NOT NULL, + sha1 NVARCHAR(40) NOT NULL + )", + ) + .execute(&db) + .compat() + .await?; + + sqlx::query( + "CREATE TABLE IF NOT EXISTS documents ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_id INTEGER NOT NULL, + offset INTEGER NOT NULL, + name NVARCHAR(100) NOT NULL, + embedding BLOB NOT NULL, + FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE + )", + ) + .execute(&db) + .compat() + .await?; + + Ok(()) + } + + pub async fn insert_file(indexed_file: IndexedFile) -> Result<()> { + // Write to files table, and return generated id. + let db = SqlitePool::connect(VECTOR_DB_URL).compat().await?; + + let files_insert = sqlx::query("INSERT INTO files (path, sha1) VALUES ($1, $2)") + .bind(indexed_file.path.to_str()) + .bind(indexed_file.sha1) + .execute(&db) + .compat() + .await?; + + let inserted_id = files_insert.last_insert_rowid(); + + // I stole this from https://stackoverflow.com/questions/71829931/how-do-i-convert-a-negative-f32-value-to-binary-string-and-back-again + // I imagine there is a better way to serialize to/from blob + fn get_binary_from_values(values: Vec) -> String { + let bits: Vec<_> = values.iter().map(|v| v.to_bits().to_string()).collect(); + bits.join(";") + } + + fn get_values_from_binary(bin: &str) -> Vec { + (0..bin.len() / 32) + .map(|i| { + let start = i * 32; + let end = start + 32; + f32::from_bits(u32::from_str_radix(&bin[start..end], 2).unwrap()) + }) + .collect() + } + + // Currently inserting at approximately 3400 documents a second + // I imagine we can speed this up with a bulk insert of some kind. + for document in indexed_file.documents { + sqlx::query( + "INSERT INTO documents (file_id, offset, name, embedding) VALUES ($1, $2, $3, $4)", + ) + .bind(inserted_id) + .bind(document.offset.to_string()) + .bind(document.name) + .bind(get_binary_from_values(document.embedding)) + .execute(&db) + .compat() + .await?; + } + + Ok(()) + } +} diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 1556df7ebe..93f9fbe06d 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -1,9 +1,12 @@ -use anyhow::{anyhow, Result}; +mod db; +use anyhow::Result; +use db::VectorDatabase; use gpui::{AppContext, Entity, ModelContext, ModelHandle}; use language::LanguageRegistry; use project::{Fs, Project}; +use rand::Rng; use smol::channel; -use std::{path::PathBuf, sync::Arc}; +use std::{path::PathBuf, sync::Arc, time::Instant}; use util::ResultExt; use workspace::WorkspaceCreated; @@ -27,13 +30,15 @@ pub fn init(fs: Arc, language_registry: Arc, cx: &mut .detach(); } +#[derive(Debug, sqlx::FromRow)] struct Document { offset: usize, name: String, embedding: Vec, } -struct IndexedFile { +#[derive(Debug, sqlx::FromRow)] +pub struct IndexedFile { path: PathBuf, sha1: String, documents: Vec, @@ -64,9 +69,24 @@ impl VectorStore { language_registry: &Arc, file_path: PathBuf, ) -> Result { - eprintln!("indexing file {file_path:?}"); - Err(anyhow!("not implemented")) - // todo!(); + // This is creating dummy documents to test the database writes. + let mut documents = vec![]; + let mut rng = rand::thread_rng(); + let rand_num_of_documents: u8 = rng.gen_range(0..200); + for _ in 0..rand_num_of_documents { + let doc = Document { + offset: 0, + name: "test symbol".to_string(), + embedding: vec![0.32 as f32; 768], + }; + documents.push(doc); + } + + return Ok(IndexedFile { + path: file_path, + sha1: "asdfasdfasdf".to_string(), + documents, + }); } fn add_project(&mut self, project: ModelHandle, cx: &mut ModelContext) { @@ -100,13 +120,17 @@ impl VectorStore { } }) .detach(); + cx.background() .spawn(async move { + // Initialize Database, creates database and tables if not exists + VectorDatabase::initialize_database().await.log_err(); while let Ok(indexed_file) = indexed_files_rx.recv().await { - // write document to database + VectorDatabase::insert_file(indexed_file).await.log_err(); } }) .detach(); + cx.background() .scoped(|scope| { for _ in 0..cx.background().num_cpus() { From dd309070eb03dd51041d412ecce553ab43450342 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Thu, 22 Jun 2023 16:50:07 -0400 Subject: [PATCH 003/115] open ai indexing on open for rust files --- Cargo.lock | 57 ++++++++---- crates/language/src/language.rs | 16 ++-- crates/vector_store/Cargo.toml | 10 +- crates/vector_store/src/db.rs | 4 +- crates/vector_store/src/embedding.rs | 100 ++++++++++++++++++++ crates/vector_store/src/vector_store.rs | 118 +++++++++++++++++++----- crates/zed/src/main.rs | 2 +- 7 files changed, 252 insertions(+), 55 deletions(-) create mode 100644 crates/vector_store/src/embedding.rs diff --git a/Cargo.lock b/Cargo.lock index beb84e04bd..5a93ce77af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1389,15 +1389,6 @@ dependencies = [ "theme", ] -[[package]] -name = "conv" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ff10625fd0ac447827aa30ea8b861fead473bb60aeb73af6c1c58caf0d1299" -dependencies = [ - "custom_derive", -] - [[package]] name = "copilot" version = "0.1.0" @@ -1775,12 +1766,6 @@ dependencies = [ "winapi 0.3.9", ] -[[package]] -name = "custom_derive" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" - [[package]] name = "cxx" version = "1.0.94" @@ -2219,6 +2204,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fancy-regex" version = "0.11.0" @@ -2909,6 +2900,15 @@ dependencies = [ "ahash 0.8.3", ] +[[package]] +name = "hashlink" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf" +dependencies = [ + "hashbrown 0.11.2", +] + [[package]] name = "hashlink" version = "0.8.1" @@ -5600,6 +5600,21 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rusqlite" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85127183a999f7db96d1a976a309eebbfb6ea3b0b400ddd8340190129de6eb7a" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink 0.7.0", + "libsqlite3-sys", + "memchr", + "smallvec", +] + [[package]] name = "rust-embed" version = "6.6.1" @@ -6531,7 +6546,7 @@ dependencies = [ "futures-executor", "futures-intrusive", "futures-util", - "hashlink", + "hashlink 0.8.1", "hex", "hkdf", "hmac 0.12.1", @@ -7898,14 +7913,20 @@ version = "0.1.0" dependencies = [ "anyhow", "async-compat", - "conv", + "async-trait", "futures 0.3.28", "gpui", + "isahc", "language", + "lazy_static", + "log", "project", - "rand 0.8.5", + "rusqlite", + "serde", + "serde_json", "smol", "sqlx", + "tree-sitter", "util", "workspace", ] diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 5a4d604ce3..4c6f709f38 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -476,12 +476,12 @@ pub struct Language { pub struct Grammar { id: usize, - pub(crate) ts_language: tree_sitter::Language, + pub ts_language: tree_sitter::Language, pub(crate) error_query: Query, pub(crate) highlights_query: Option, pub(crate) brackets_config: Option, pub(crate) indents_config: Option, - pub(crate) outline_config: Option, + pub outline_config: Option, pub(crate) injection_config: Option, pub(crate) override_config: Option, pub(crate) highlight_map: Mutex, @@ -495,12 +495,12 @@ struct IndentConfig { outdent_capture_ix: Option, } -struct OutlineConfig { - query: Query, - item_capture_ix: u32, - name_capture_ix: u32, - context_capture_ix: Option, - extra_context_capture_ix: Option, +pub struct OutlineConfig { + pub query: Query, + pub item_capture_ix: u32, + pub name_capture_ix: u32, + pub context_capture_ix: Option, + pub extra_context_capture_ix: Option, } struct InjectionConfig { diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index 74ad23740e..2db672ed25 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -19,8 +19,14 @@ futures.workspace = true smol.workspace = true sqlx = { version = "0.6", features = ["sqlite","runtime-tokio-rustls"] } async-compat = "0.2.1" -conv = "0.3.3" -rand.workspace = true +rusqlite = "0.27.0" +isahc.workspace = true +log.workspace = true +tree-sitter.workspace = true +lazy_static.workspace = true +serde.workspace = true +serde_json.workspace = true +async-trait.workspace = true [dev-dependencies] gpui = { path = "../gpui", features = ["test-support"] } diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index dfa85044d6..d335d327b8 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -1,8 +1,6 @@ use anyhow::Result; use async_compat::{Compat, CompatExt}; -use conv::ValueFrom; -use sqlx::{migrate::MigrateDatabase, Pool, Sqlite, SqlitePool}; -use std::time::{Duration, Instant}; +use sqlx::{migrate::MigrateDatabase, Sqlite, SqlitePool}; use crate::IndexedFile; diff --git a/crates/vector_store/src/embedding.rs b/crates/vector_store/src/embedding.rs new file mode 100644 index 0000000000..f1ae5479ee --- /dev/null +++ b/crates/vector_store/src/embedding.rs @@ -0,0 +1,100 @@ +use anyhow::{anyhow, Result}; +use async_trait::async_trait; +use futures::AsyncReadExt; +use gpui::serde_json; +use isahc::prelude::Configurable; +use lazy_static::lazy_static; +use serde::{Deserialize, Serialize}; +use std::env; +use std::sync::Arc; +use util::http::{HttpClient, Request}; + +lazy_static! { + static ref OPENAI_API_KEY: Option = env::var("OPENAI_API_KEY").ok(); +} + +pub struct OpenAIEmbeddings { + pub client: Arc, +} + +#[derive(Serialize)] +struct OpenAIEmbeddingRequest<'a> { + model: &'static str, + input: Vec<&'a str>, +} + +#[derive(Deserialize)] +struct OpenAIEmbeddingResponse { + data: Vec, + usage: OpenAIEmbeddingUsage, +} + +#[derive(Debug, Deserialize)] +struct OpenAIEmbedding { + embedding: Vec, + index: usize, + object: String, +} + +#[derive(Deserialize)] +struct OpenAIEmbeddingUsage { + prompt_tokens: usize, + total_tokens: usize, +} + +#[async_trait] +pub trait EmbeddingProvider: Sync { + async fn embed_batch(&self, spans: Vec<&str>) -> Result>>; +} + +#[async_trait] +impl EmbeddingProvider for OpenAIEmbeddings { + async fn embed_batch(&self, spans: Vec<&str>) -> Result>> { + let api_key = OPENAI_API_KEY + .as_ref() + .ok_or_else(|| anyhow!("no api key"))?; + + let request = Request::post("https://api.openai.com/v1/embeddings") + .redirect_policy(isahc::config::RedirectPolicy::Follow) + .header("Content-Type", "application/json") + .header("Authorization", format!("Bearer {}", api_key)) + .body( + serde_json::to_string(&OpenAIEmbeddingRequest { + input: spans, + model: "text-embedding-ada-002", + }) + .unwrap() + .into(), + )?; + + let mut response = self.client.send(request).await?; + if !response.status().is_success() { + return Err(anyhow!("openai embedding failed {}", response.status())); + } + + let mut body = String::new(); + response.body_mut().read_to_string(&mut body).await?; + let response: OpenAIEmbeddingResponse = serde_json::from_str(&body)?; + + log::info!( + "openai embedding completed. tokens: {:?}", + response.usage.total_tokens + ); + + // do we need to re-order these based on the `index` field? + eprintln!( + "indices: {:?}", + response + .data + .iter() + .map(|embedding| embedding.index) + .collect::>() + ); + + Ok(response + .data + .into_iter() + .map(|embedding| embedding.embedding) + .collect()) + } +} diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 93f9fbe06d..f4d5baca80 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -1,17 +1,25 @@ mod db; -use anyhow::Result; +mod embedding; + +use anyhow::{anyhow, Result}; use db::VectorDatabase; +use embedding::{EmbeddingProvider, OpenAIEmbeddings}; use gpui::{AppContext, Entity, ModelContext, ModelHandle}; use language::LanguageRegistry; use project::{Fs, Project}; -use rand::Rng; use smol::channel; use std::{path::PathBuf, sync::Arc, time::Instant}; -use util::ResultExt; +use tree_sitter::{Parser, QueryCursor}; +use util::{http::HttpClient, ResultExt}; use workspace::WorkspaceCreated; -pub fn init(fs: Arc, language_registry: Arc, cx: &mut AppContext) { - let vector_store = cx.add_model(|cx| VectorStore::new(fs, language_registry)); +pub fn init( + fs: Arc, + http_client: Arc, + language_registry: Arc, + cx: &mut AppContext, +) { + let vector_store = cx.add_model(|cx| VectorStore::new(fs, http_client, language_registry)); cx.subscribe_global::({ let vector_store = vector_store.clone(); @@ -53,38 +61,86 @@ struct SearchResult { struct VectorStore { fs: Arc, + http_client: Arc, language_registry: Arc, } impl VectorStore { - fn new(fs: Arc, language_registry: Arc) -> Self { + fn new( + fs: Arc, + http_client: Arc, + language_registry: Arc, + ) -> Self { Self { fs, + http_client, language_registry, } } async fn index_file( + cursor: &mut QueryCursor, + parser: &mut Parser, + embedding_provider: &dyn EmbeddingProvider, fs: &Arc, language_registry: &Arc, file_path: PathBuf, ) -> Result { - // This is creating dummy documents to test the database writes. - let mut documents = vec![]; - let mut rng = rand::thread_rng(); - let rand_num_of_documents: u8 = rng.gen_range(0..200); - for _ in 0..rand_num_of_documents { - let doc = Document { - offset: 0, - name: "test symbol".to_string(), - embedding: vec![0.32 as f32; 768], - }; - documents.push(doc); + let language = language_registry + .language_for_file(&file_path, None) + .await?; + + if language.name().as_ref() != "Rust" { + Err(anyhow!("unsupported language"))?; + } + + let grammar = language.grammar().ok_or_else(|| anyhow!("no grammar"))?; + let outline_config = grammar + .outline_config + .as_ref() + .ok_or_else(|| anyhow!("no outline query"))?; + + let content = fs.load(&file_path).await?; + parser.set_language(grammar.ts_language).unwrap(); + let tree = parser + .parse(&content, None) + .ok_or_else(|| anyhow!("parsing failed"))?; + + let mut documents = Vec::new(); + let mut context_spans = Vec::new(); + for mat in cursor.matches(&outline_config.query, tree.root_node(), content.as_bytes()) { + let mut item_range = None; + let mut name_range = None; + for capture in mat.captures { + if capture.index == outline_config.item_capture_ix { + item_range = Some(capture.node.byte_range()); + } else if capture.index == outline_config.name_capture_ix { + name_range = Some(capture.node.byte_range()); + } + } + + if let Some((item_range, name_range)) = item_range.zip(name_range) { + if let Some((item, name)) = + content.get(item_range.clone()).zip(content.get(name_range)) + { + context_spans.push(item); + documents.push(Document { + name: name.to_string(), + offset: item_range.start, + embedding: Vec::new(), + }); + } + } + } + + let embeddings = embedding_provider.embed_batch(context_spans).await?; + for (document, embedding) in documents.iter_mut().zip(embeddings) { + document.embedding = embedding; } return Ok(IndexedFile { path: file_path, - sha1: "asdfasdfasdf".to_string(), + sha1: String::new(), documents, }); } @@ -98,8 +154,9 @@ impl VectorStore { let fs = self.fs.clone(); let language_registry = self.language_registry.clone(); + let client = self.http_client.clone(); - cx.spawn(|this, cx| async move { + cx.spawn(|_, cx| async move { futures::future::join_all(worktree_scans_complete).await; let worktrees = project.read_with(&cx, |project, cx| { @@ -131,15 +188,27 @@ impl VectorStore { }) .detach(); + let provider = OpenAIEmbeddings { client }; + + let t0 = Instant::now(); + cx.background() .scoped(|scope| { for _ in 0..cx.background().num_cpus() { scope.spawn(async { + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); while let Ok(file_path) = paths_rx.recv().await { - if let Some(indexed_file) = - Self::index_file(&fs, &language_registry, file_path) - .await - .log_err() + if let Some(indexed_file) = Self::index_file( + &mut cursor, + &mut parser, + &provider, + &fs, + &language_registry, + file_path, + ) + .await + .log_err() { indexed_files_tx.try_send(indexed_file).unwrap(); } @@ -148,6 +217,9 @@ impl VectorStore { } }) .await; + + let duration = t0.elapsed(); + log::info!("indexed project in {duration:?}"); }) .detach(); } diff --git a/crates/zed/src/main.rs b/crates/zed/src/main.rs index 76d02307f6..8a59bbde41 100644 --- a/crates/zed/src/main.rs +++ b/crates/zed/src/main.rs @@ -152,7 +152,7 @@ fn main() { project_panel::init(cx); diagnostics::init(cx); search::init(cx); - vector_store::init(fs.clone(), languages.clone(), cx); + vector_store::init(fs.clone(), http.clone(), languages.clone(), cx); vim::init(cx); terminal_view::init(cx); theme_testbench::init(cx); From c071b271be195b0e8af9335469c969e6f1624d6d Mon Sep 17 00:00:00 2001 From: KCaverly Date: Fri, 23 Jun 2023 10:25:12 -0400 Subject: [PATCH 004/115] removed tokio and sqlx dependency, added dummy embeddings provider to save on open ai costs when testing --- Cargo.lock | 2 - crates/vector_store/Cargo.toml | 2 - crates/vector_store/src/db.rs | 74 ++++++++++--------------- crates/vector_store/src/embedding.rs | 12 ++++ crates/vector_store/src/vector_store.rs | 9 +-- 5 files changed, 45 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a93ce77af..3f13c75dda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7912,7 +7912,6 @@ name = "vector_store" version = "0.1.0" dependencies = [ "anyhow", - "async-compat", "async-trait", "futures 0.3.28", "gpui", @@ -7925,7 +7924,6 @@ dependencies = [ "serde", "serde_json", "smol", - "sqlx", "tree-sitter", "util", "workspace", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index 2db672ed25..434f341147 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -17,8 +17,6 @@ util = { path = "../util" } anyhow.workspace = true futures.workspace = true smol.workspace = true -sqlx = { version = "0.6", features = ["sqlite","runtime-tokio-rustls"] } -async-compat = "0.2.1" rusqlite = "0.27.0" isahc.workspace = true log.workspace = true diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index d335d327b8..e2b23f7548 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -1,6 +1,5 @@ use anyhow::Result; -use async_compat::{Compat, CompatExt}; -use sqlx::{migrate::MigrateDatabase, Sqlite, SqlitePool}; +use rusqlite::params; use crate::IndexedFile; @@ -13,32 +12,20 @@ pub struct VectorDatabase {} impl VectorDatabase { pub async fn initialize_database() -> Result<()> { - // If database doesnt exist create database - if !Sqlite::database_exists(VECTOR_DB_URL) - .compat() - .await - .unwrap_or(false) - { - Sqlite::create_database(VECTOR_DB_URL).compat().await?; - } - - let db = SqlitePool::connect(VECTOR_DB_URL).compat().await?; + // This will create the database if it doesnt exist + let db = rusqlite::Connection::open(VECTOR_DB_URL)?; // Initialize Vector Databasing Tables - // We may be able to skip this assuming the database is never created - // without creating the tables at the same time. - sqlx::query( + db.execute( "CREATE TABLE IF NOT EXISTS files ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - path NVARCHAR(100) NOT NULL, - sha1 NVARCHAR(40) NOT NULL - )", - ) - .execute(&db) - .compat() - .await?; + id INTEGER PRIMARY KEY AUTOINCREMENT, + path NVARCHAR(100) NOT NULL, + sha1 NVARCHAR(40) NOT NULL + )", + [], + )?; - sqlx::query( + db.execute( "CREATE TABLE IF NOT EXISTS documents ( id INTEGER PRIMARY KEY AUTOINCREMENT, file_id INTEGER NOT NULL, @@ -47,26 +34,22 @@ impl VectorDatabase { embedding BLOB NOT NULL, FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE )", - ) - .execute(&db) - .compat() - .await?; + [], + )?; Ok(()) } pub async fn insert_file(indexed_file: IndexedFile) -> Result<()> { // Write to files table, and return generated id. - let db = SqlitePool::connect(VECTOR_DB_URL).compat().await?; + let db = rusqlite::Connection::open(VECTOR_DB_URL)?; - let files_insert = sqlx::query("INSERT INTO files (path, sha1) VALUES ($1, $2)") - .bind(indexed_file.path.to_str()) - .bind(indexed_file.sha1) - .execute(&db) - .compat() - .await?; + let files_insert = db.execute( + "INSERT INTO files (path, sha1) VALUES (?1, ?2)", + params![indexed_file.path.to_str(), indexed_file.sha1], + )?; - let inserted_id = files_insert.last_insert_rowid(); + let inserted_id = db.last_insert_rowid(); // I stole this from https://stackoverflow.com/questions/71829931/how-do-i-convert-a-negative-f32-value-to-binary-string-and-back-again // I imagine there is a better way to serialize to/from blob @@ -88,16 +71,15 @@ impl VectorDatabase { // Currently inserting at approximately 3400 documents a second // I imagine we can speed this up with a bulk insert of some kind. for document in indexed_file.documents { - sqlx::query( - "INSERT INTO documents (file_id, offset, name, embedding) VALUES ($1, $2, $3, $4)", - ) - .bind(inserted_id) - .bind(document.offset.to_string()) - .bind(document.name) - .bind(get_binary_from_values(document.embedding)) - .execute(&db) - .compat() - .await?; + db.execute( + "INSERT INTO documents (file_id, offset, name, embedding) VALUES (?1, ?2, ?3, ?4)", + params![ + inserted_id, + document.offset.to_string(), + document.name, + get_binary_from_values(document.embedding) + ], + )?; } Ok(()) diff --git a/crates/vector_store/src/embedding.rs b/crates/vector_store/src/embedding.rs index f1ae5479ee..4883917d5a 100644 --- a/crates/vector_store/src/embedding.rs +++ b/crates/vector_store/src/embedding.rs @@ -47,6 +47,18 @@ pub trait EmbeddingProvider: Sync { async fn embed_batch(&self, spans: Vec<&str>) -> Result>>; } +pub struct DummyEmbeddings {} + +#[async_trait] +impl EmbeddingProvider for DummyEmbeddings { + async fn embed_batch(&self, spans: Vec<&str>) -> Result>> { + // 1024 is the OpenAI Embeddings size for ada models. + // the model we will likely be starting with. + let dummy_vec = vec![0.32 as f32; 1024]; + return Ok(vec![dummy_vec; spans.len()]); + } +} + #[async_trait] impl EmbeddingProvider for OpenAIEmbeddings { async fn embed_batch(&self, spans: Vec<&str>) -> Result>> { diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index f4d5baca80..f424346d56 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -3,7 +3,7 @@ mod embedding; use anyhow::{anyhow, Result}; use db::VectorDatabase; -use embedding::{EmbeddingProvider, OpenAIEmbeddings}; +use embedding::{DummyEmbeddings, EmbeddingProvider, OpenAIEmbeddings}; use gpui::{AppContext, Entity, ModelContext, ModelHandle}; use language::LanguageRegistry; use project::{Fs, Project}; @@ -38,14 +38,14 @@ pub fn init( .detach(); } -#[derive(Debug, sqlx::FromRow)] +#[derive(Debug)] struct Document { offset: usize, name: String, embedding: Vec, } -#[derive(Debug, sqlx::FromRow)] +#[derive(Debug)] pub struct IndexedFile { path: PathBuf, sha1: String, @@ -188,7 +188,8 @@ impl VectorStore { }) .detach(); - let provider = OpenAIEmbeddings { client }; + // let provider = OpenAIEmbeddings { client }; + let provider = DummyEmbeddings {}; let t0 = Instant::now(); From 65bbb7c57bad891dbe9303ddc413c674974d5234 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Sun, 25 Jun 2023 20:02:56 -0400 Subject: [PATCH 005/115] added proper blob serialization for embeddings and vector search trait --- Cargo.lock | 13 +-- crates/vector_store/Cargo.toml | 3 +- crates/vector_store/src/db.rs | 102 +++++++++++++++++++----- crates/vector_store/src/embedding.rs | 3 +- crates/vector_store/src/search.rs | 5 ++ crates/vector_store/src/vector_store.rs | 17 ++-- 6 files changed, 104 insertions(+), 39 deletions(-) create mode 100644 crates/vector_store/src/search.rs diff --git a/Cargo.lock b/Cargo.lock index 3f13c75dda..309bcfa378 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1768,9 +1768,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.94" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" +checksum = "e88abab2f5abbe4c56e8f1fb431b784d710b709888f35755a160e62e33fe38e8" dependencies = [ "cc", "cxxbridge-flags", @@ -1795,15 +1795,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.94" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" +checksum = "8d3816ed957c008ccd4728485511e3d9aaf7db419aa321e3d2c5a2f3411e36c8" [[package]] name = "cxxbridge-macro" -version = "1.0.94" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" +checksum = "a26acccf6f445af85ea056362561a24ef56cdc15fcc685f03aec50b9c702cb6d" dependencies = [ "proc-macro2", "quote", @@ -7913,6 +7913,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", + "bincode", "futures 0.3.28", "gpui", "isahc", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index 434f341147..6446651d5d 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -17,7 +17,7 @@ util = { path = "../util" } anyhow.workspace = true futures.workspace = true smol.workspace = true -rusqlite = "0.27.0" +rusqlite = { version = "0.27.0", features=["blob"] } isahc.workspace = true log.workspace = true tree-sitter.workspace = true @@ -25,6 +25,7 @@ lazy_static.workspace = true serde.workspace = true serde_json.workspace = true async-trait.workspace = true +bincode = "1.3.3" [dev-dependencies] gpui = { path = "../gpui", features = ["test-support"] } diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index e2b23f7548..54f0292d1f 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -1,13 +1,44 @@ -use anyhow::Result; -use rusqlite::params; +use std::collections::HashMap; -use crate::IndexedFile; +use anyhow::{anyhow, Result}; + +use rusqlite::{ + params, + types::{FromSql, FromSqlResult, ValueRef}, + Connection, +}; +use util::ResultExt; + +use crate::{Document, IndexedFile}; // This is saving to a local database store within the users dev zed path // Where do we want this to sit? // Assuming near where the workspace DB sits. const VECTOR_DB_URL: &str = "embeddings_db"; +// Note this is not an appropriate document +#[derive(Debug)] +pub struct DocumentRecord { + id: usize, + offset: usize, + name: String, + embedding: Embedding, +} + +#[derive(Debug)] +struct Embedding(Vec); + +impl FromSql for Embedding { + fn column_result(value: ValueRef) -> FromSqlResult { + let bytes = value.as_blob()?; + let embedding: Result, Box> = bincode::deserialize(bytes); + if embedding.is_err() { + return Err(rusqlite::types::FromSqlError::Other(embedding.unwrap_err())); + } + return Ok(Embedding(embedding.unwrap())); + } +} + pub struct VectorDatabase {} impl VectorDatabase { @@ -51,37 +82,66 @@ impl VectorDatabase { let inserted_id = db.last_insert_rowid(); - // I stole this from https://stackoverflow.com/questions/71829931/how-do-i-convert-a-negative-f32-value-to-binary-string-and-back-again - // I imagine there is a better way to serialize to/from blob - fn get_binary_from_values(values: Vec) -> String { - let bits: Vec<_> = values.iter().map(|v| v.to_bits().to_string()).collect(); - bits.join(";") - } - - fn get_values_from_binary(bin: &str) -> Vec { - (0..bin.len() / 32) - .map(|i| { - let start = i * 32; - let end = start + 32; - f32::from_bits(u32::from_str_radix(&bin[start..end], 2).unwrap()) - }) - .collect() - } - // Currently inserting at approximately 3400 documents a second // I imagine we can speed this up with a bulk insert of some kind. for document in indexed_file.documents { + let embedding_blob = bincode::serialize(&document.embedding)?; + db.execute( "INSERT INTO documents (file_id, offset, name, embedding) VALUES (?1, ?2, ?3, ?4)", params![ inserted_id, document.offset.to_string(), document.name, - get_binary_from_values(document.embedding) + embedding_blob ], )?; } Ok(()) } + + pub fn get_documents(&self) -> Result> { + // Should return a HashMap in which the key is the id, and the value is the finished document + + // Get Data from Database + let db = rusqlite::Connection::open(VECTOR_DB_URL)?; + + fn query(db: Connection) -> rusqlite::Result> { + let mut query_statement = + db.prepare("SELECT id, offset, name, embedding FROM documents LIMIT 10")?; + let result_iter = query_statement.query_map([], |row| { + Ok(DocumentRecord { + id: row.get(0)?, + offset: row.get(1)?, + name: row.get(2)?, + embedding: row.get(3)?, + }) + })?; + + let mut results = vec![]; + for result in result_iter { + results.push(result?); + } + + return Ok(results); + } + + let mut documents: HashMap = HashMap::new(); + let result_iter = query(db); + if result_iter.is_ok() { + for result in result_iter.unwrap() { + documents.insert( + result.id, + Document { + offset: result.offset, + name: result.name, + embedding: result.embedding.0, + }, + ); + } + } + + return Ok(documents); + } } diff --git a/crates/vector_store/src/embedding.rs b/crates/vector_store/src/embedding.rs index 4883917d5a..903c2451b3 100644 --- a/crates/vector_store/src/embedding.rs +++ b/crates/vector_store/src/embedding.rs @@ -13,6 +13,7 @@ lazy_static! { static ref OPENAI_API_KEY: Option = env::var("OPENAI_API_KEY").ok(); } +#[derive(Clone)] pub struct OpenAIEmbeddings { pub client: Arc, } @@ -54,7 +55,7 @@ impl EmbeddingProvider for DummyEmbeddings { async fn embed_batch(&self, spans: Vec<&str>) -> Result>> { // 1024 is the OpenAI Embeddings size for ada models. // the model we will likely be starting with. - let dummy_vec = vec![0.32 as f32; 1024]; + let dummy_vec = vec![0.32 as f32; 1536]; return Ok(vec![dummy_vec; spans.len()]); } } diff --git a/crates/vector_store/src/search.rs b/crates/vector_store/src/search.rs new file mode 100644 index 0000000000..3dc72edbce --- /dev/null +++ b/crates/vector_store/src/search.rs @@ -0,0 +1,5 @@ +trait VectorSearch { + // Given a query vector, and a limit to return + // Return a vector of id, distance tuples. + fn top_k_search(&self, vec: &Vec) -> Vec<(usize, f32)>; +} diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index f424346d56..0b6d2928cc 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -1,5 +1,6 @@ mod db; mod embedding; +mod search; use anyhow::{anyhow, Result}; use db::VectorDatabase; @@ -39,10 +40,10 @@ pub fn init( } #[derive(Debug)] -struct Document { - offset: usize, - name: String, - embedding: Vec, +pub struct Document { + pub offset: usize, + pub name: String, + pub embedding: Vec, } #[derive(Debug)] @@ -185,14 +186,13 @@ impl VectorStore { while let Ok(indexed_file) = indexed_files_rx.recv().await { VectorDatabase::insert_file(indexed_file).await.log_err(); } + + anyhow::Ok(()) }) .detach(); - // let provider = OpenAIEmbeddings { client }; let provider = DummyEmbeddings {}; - let t0 = Instant::now(); - cx.background() .scoped(|scope| { for _ in 0..cx.background().num_cpus() { @@ -218,9 +218,6 @@ impl VectorStore { } }) .await; - - let duration = t0.elapsed(); - log::info!("indexed project in {duration:?}"); }) .detach(); } From 7937a16002f7fa4abb752f20bce1bf0d810a823e Mon Sep 17 00:00:00 2001 From: KCaverly Date: Mon, 26 Jun 2023 10:34:12 -0400 Subject: [PATCH 006/115] added brute force search and VectorSearch trait --- Cargo.lock | 39 ++++++++++++++ crates/vector_store/Cargo.toml | 1 + crates/vector_store/src/search.rs | 84 ++++++++++++++++++++++++++++++- 3 files changed, 122 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 309bcfa378..48952d6c25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3837,6 +3837,16 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73cbba799671b762df5a175adf59ce145165747bb891505c43d09aefbbf38beb" +[[package]] +name = "matrixmultiply" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77" +dependencies = [ + "autocfg 1.1.0", + "rawpointer", +] + [[package]] name = "maybe-owned" version = "0.3.4" @@ -4121,6 +4131,19 @@ dependencies = [ "tempfile", ] +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + [[package]] name = "net2" version = "0.2.38" @@ -4228,6 +4251,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-complex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" +dependencies = [ + "num-traits", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -5245,6 +5277,12 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + [[package]] name = "rayon" version = "1.7.0" @@ -7920,6 +7958,7 @@ dependencies = [ "language", "lazy_static", "log", + "ndarray", "project", "rusqlite", "serde", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index 6446651d5d..8de93c0401 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -26,6 +26,7 @@ serde.workspace = true serde_json.workspace = true async-trait.workspace = true bincode = "1.3.3" +ndarray = "0.15.6" [dev-dependencies] gpui = { path = "../gpui", features = ["test-support"] } diff --git a/crates/vector_store/src/search.rs b/crates/vector_store/src/search.rs index 3dc72edbce..6b508b401b 100644 --- a/crates/vector_store/src/search.rs +++ b/crates/vector_store/src/search.rs @@ -1,5 +1,85 @@ -trait VectorSearch { +use std::cmp::Ordering; + +use async_trait::async_trait; +use ndarray::{Array1, Array2}; + +use crate::db::{DocumentRecord, VectorDatabase}; +use anyhow::Result; + +#[async_trait] +pub trait VectorSearch { // Given a query vector, and a limit to return // Return a vector of id, distance tuples. - fn top_k_search(&self, vec: &Vec) -> Vec<(usize, f32)>; + async fn top_k_search(&mut self, vec: &Vec, limit: usize) -> Vec<(usize, f32)>; +} + +pub struct BruteForceSearch { + document_ids: Vec, + candidate_array: ndarray::Array2, +} + +impl BruteForceSearch { + pub fn load() -> Result { + let db = VectorDatabase {}; + let documents = db.get_documents()?; + let embeddings: Vec<&DocumentRecord> = documents.values().into_iter().collect(); + let mut document_ids = vec![]; + for i in documents.keys() { + document_ids.push(i.to_owned()); + } + + let mut candidate_array = Array2::::default((documents.len(), 1536)); + for (i, mut row) in candidate_array.axis_iter_mut(ndarray::Axis(0)).enumerate() { + for (j, col) in row.iter_mut().enumerate() { + *col = embeddings[i].embedding.0[j]; + } + } + + return Ok(BruteForceSearch { + document_ids, + candidate_array, + }); + } +} + +#[async_trait] +impl VectorSearch for BruteForceSearch { + async fn top_k_search(&mut self, vec: &Vec, limit: usize) -> Vec<(usize, f32)> { + let target = Array1::from_vec(vec.to_owned()); + + let distances = self.candidate_array.dot(&target); + + let distances = distances.to_vec(); + + // construct a tuple vector from the floats, the tuple being (index,float) + let mut with_indices = distances + .clone() + .into_iter() + .enumerate() + .map(|(index, value)| (index, value)) + .collect::>(); + + // sort the tuple vector by float + with_indices.sort_by(|&a, &b| match (a.1.is_nan(), b.1.is_nan()) { + (true, true) => Ordering::Equal, + (true, false) => Ordering::Greater, + (false, true) => Ordering::Less, + (false, false) => a.1.partial_cmp(&b.1).unwrap(), + }); + + // extract the sorted indices from the sorted tuple vector + let stored_indices = with_indices + .into_iter() + .map(|(index, value)| index) + .collect::>(); + + let sorted_indices: Vec = stored_indices.into_iter().rev().collect(); + + let mut results = vec![]; + for idx in sorted_indices[0..limit].to_vec() { + results.push((self.document_ids[idx], 1.0 - distances[idx])); + } + + return results; + } } From 0f232e0ce2c7e50ef91b0daf9b8618c81f0ec33d Mon Sep 17 00:00:00 2001 From: KCaverly Date: Mon, 26 Jun 2023 10:35:56 -0400 Subject: [PATCH 007/115] added file metadata retrieval from db --- crates/vector_store/src/db.rs | 87 ++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 27 deletions(-) diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index 54f0292d1f..bc5a7fd497 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -7,9 +7,8 @@ use rusqlite::{ types::{FromSql, FromSqlResult, ValueRef}, Connection, }; -use util::ResultExt; -use crate::{Document, IndexedFile}; +use crate::IndexedFile; // This is saving to a local database store within the users dev zed path // Where do we want this to sit? @@ -19,14 +18,22 @@ const VECTOR_DB_URL: &str = "embeddings_db"; // Note this is not an appropriate document #[derive(Debug)] pub struct DocumentRecord { - id: usize, - offset: usize, - name: String, - embedding: Embedding, + pub id: usize, + pub file_id: usize, + pub offset: usize, + pub name: String, + pub embedding: Embedding, } #[derive(Debug)] -struct Embedding(Vec); +pub struct FileRecord { + pub id: usize, + pub path: String, + pub sha1: String, +} + +#[derive(Debug)] +pub struct Embedding(pub Vec); impl FromSql for Embedding { fn column_result(value: ValueRef) -> FromSqlResult { @@ -101,21 +108,16 @@ impl VectorDatabase { Ok(()) } - pub fn get_documents(&self) -> Result> { - // Should return a HashMap in which the key is the id, and the value is the finished document - - // Get Data from Database + pub fn get_files(&self) -> Result> { let db = rusqlite::Connection::open(VECTOR_DB_URL)?; - fn query(db: Connection) -> rusqlite::Result> { - let mut query_statement = - db.prepare("SELECT id, offset, name, embedding FROM documents LIMIT 10")?; + fn query(db: Connection) -> rusqlite::Result> { + let mut query_statement = db.prepare("SELECT id, path, sha1 FROM files")?; let result_iter = query_statement.query_map([], |row| { - Ok(DocumentRecord { + Ok(FileRecord { id: row.get(0)?, - offset: row.get(1)?, - name: row.get(2)?, - embedding: row.get(3)?, + path: row.get(1)?, + sha1: row.get(2)?, }) })?; @@ -127,18 +129,49 @@ impl VectorDatabase { return Ok(results); } - let mut documents: HashMap = HashMap::new(); + let mut pages: HashMap = HashMap::new(); let result_iter = query(db); if result_iter.is_ok() { for result in result_iter.unwrap() { - documents.insert( - result.id, - Document { - offset: result.offset, - name: result.name, - embedding: result.embedding.0, - }, - ); + pages.insert(result.id, result); + } + } + + return Ok(pages); + } + + pub fn get_documents(&self) -> Result> { + // Should return a HashMap in which the key is the id, and the value is the finished document + + // Get Data from Database + let db = rusqlite::Connection::open(VECTOR_DB_URL)?; + + fn query(db: Connection) -> rusqlite::Result> { + let mut query_statement = + db.prepare("SELECT id, file_id, offset, name, embedding FROM documents")?; + let result_iter = query_statement.query_map([], |row| { + Ok(DocumentRecord { + id: row.get(0)?, + file_id: row.get(1)?, + offset: row.get(2)?, + name: row.get(3)?, + embedding: row.get(4)?, + }) + })?; + + let mut results = vec![]; + for result in result_iter { + results.push(result?); + } + + return Ok(results); + } + + let mut documents: HashMap = HashMap::new(); + let result_iter = query(db); + if result_iter.is_ok() { + for result in result_iter.unwrap() { + documents.insert(result.id, result); } } From 74b693d6b915f587956a85e3decadfab2d5238fc Mon Sep 17 00:00:00 2001 From: KCaverly Date: Mon, 26 Jun 2023 14:57:57 -0400 Subject: [PATCH 008/115] Updated database calls to share single connection, and simplified top_k_search sorting. Co-authored-by: maxbrunsfeld --- crates/vector_store/src/db.rs | 159 ++++++++++++------------ crates/vector_store/src/embedding.rs | 10 -- crates/vector_store/src/search.rs | 47 ++++--- crates/vector_store/src/vector_store.rs | 56 +++++++-- 4 files changed, 148 insertions(+), 124 deletions(-) diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index bc5a7fd497..4f6da14cab 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::{collections::HashMap, path::PathBuf}; use anyhow::{anyhow, Result}; @@ -46,31 +46,50 @@ impl FromSql for Embedding { } } -pub struct VectorDatabase {} +pub struct VectorDatabase { + db: rusqlite::Connection, +} impl VectorDatabase { - pub async fn initialize_database() -> Result<()> { + pub fn new() -> Result { + let this = Self { + db: rusqlite::Connection::open(VECTOR_DB_URL)?, + }; + this.initialize_database()?; + Ok(this) + } + + fn initialize_database(&self) -> Result<()> { // This will create the database if it doesnt exist - let db = rusqlite::Connection::open(VECTOR_DB_URL)?; // Initialize Vector Databasing Tables - db.execute( + // self.db.execute( + // " + // CREATE TABLE IF NOT EXISTS projects ( + // id INTEGER PRIMARY KEY AUTOINCREMENT, + // path NVARCHAR(100) NOT NULL + // ) + // ", + // [], + // )?; + + self.db.execute( "CREATE TABLE IF NOT EXISTS files ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - path NVARCHAR(100) NOT NULL, - sha1 NVARCHAR(40) NOT NULL - )", + id INTEGER PRIMARY KEY AUTOINCREMENT, + path NVARCHAR(100) NOT NULL, + sha1 NVARCHAR(40) NOT NULL + )", [], )?; - db.execute( + self.db.execute( "CREATE TABLE IF NOT EXISTS documents ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_id INTEGER NOT NULL, - offset INTEGER NOT NULL, - name NVARCHAR(100) NOT NULL, - embedding BLOB NOT NULL, - FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_id INTEGER NOT NULL, + offset INTEGER NOT NULL, + name NVARCHAR(100) NOT NULL, + embedding BLOB NOT NULL, + FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE )", [], )?; @@ -78,23 +97,37 @@ impl VectorDatabase { Ok(()) } - pub async fn insert_file(indexed_file: IndexedFile) -> Result<()> { - // Write to files table, and return generated id. - let db = rusqlite::Connection::open(VECTOR_DB_URL)?; + // pub async fn get_or_create_project(project_path: PathBuf) -> Result { + // // Check if we have the project, if we do, return the ID + // // If we do not have the project, insert the project and return the ID - let files_insert = db.execute( + // let db = rusqlite::Connection::open(VECTOR_DB_URL)?; + + // let projects_query = db.prepare(&format!( + // "SELECT id FROM projects WHERE path = {}", + // project_path.to_str().unwrap() // This is unsafe + // ))?; + + // let project_id = db.last_insert_rowid(); + + // return Ok(project_id as usize); + // } + + pub fn insert_file(&self, indexed_file: IndexedFile) -> Result<()> { + // Write to files table, and return generated id. + let files_insert = self.db.execute( "INSERT INTO files (path, sha1) VALUES (?1, ?2)", params![indexed_file.path.to_str(), indexed_file.sha1], )?; - let inserted_id = db.last_insert_rowid(); + let inserted_id = self.db.last_insert_rowid(); // Currently inserting at approximately 3400 documents a second // I imagine we can speed this up with a bulk insert of some kind. for document in indexed_file.documents { let embedding_blob = bincode::serialize(&document.embedding)?; - db.execute( + self.db.execute( "INSERT INTO documents (file_id, offset, name, embedding) VALUES (?1, ?2, ?3, ?4)", params![ inserted_id, @@ -109,70 +142,42 @@ impl VectorDatabase { } pub fn get_files(&self) -> Result> { - let db = rusqlite::Connection::open(VECTOR_DB_URL)?; - - fn query(db: Connection) -> rusqlite::Result> { - let mut query_statement = db.prepare("SELECT id, path, sha1 FROM files")?; - let result_iter = query_statement.query_map([], |row| { - Ok(FileRecord { - id: row.get(0)?, - path: row.get(1)?, - sha1: row.get(2)?, - }) - })?; - - let mut results = vec![]; - for result in result_iter { - results.push(result?); - } - - return Ok(results); - } + let mut query_statement = self.db.prepare("SELECT id, path, sha1 FROM files")?; + let result_iter = query_statement.query_map([], |row| { + Ok(FileRecord { + id: row.get(0)?, + path: row.get(1)?, + sha1: row.get(2)?, + }) + })?; let mut pages: HashMap = HashMap::new(); - let result_iter = query(db); - if result_iter.is_ok() { - for result in result_iter.unwrap() { - pages.insert(result.id, result); - } + for result in result_iter { + let result = result?; + pages.insert(result.id, result); } - return Ok(pages); + Ok(pages) } pub fn get_documents(&self) -> Result> { - // Should return a HashMap in which the key is the id, and the value is the finished document - - // Get Data from Database - let db = rusqlite::Connection::open(VECTOR_DB_URL)?; - - fn query(db: Connection) -> rusqlite::Result> { - let mut query_statement = - db.prepare("SELECT id, file_id, offset, name, embedding FROM documents")?; - let result_iter = query_statement.query_map([], |row| { - Ok(DocumentRecord { - id: row.get(0)?, - file_id: row.get(1)?, - offset: row.get(2)?, - name: row.get(3)?, - embedding: row.get(4)?, - }) - })?; - - let mut results = vec![]; - for result in result_iter { - results.push(result?); - } - - return Ok(results); - } + let mut query_statement = self + .db + .prepare("SELECT id, file_id, offset, name, embedding FROM documents")?; + let result_iter = query_statement.query_map([], |row| { + Ok(DocumentRecord { + id: row.get(0)?, + file_id: row.get(1)?, + offset: row.get(2)?, + name: row.get(3)?, + embedding: row.get(4)?, + }) + })?; let mut documents: HashMap = HashMap::new(); - let result_iter = query(db); - if result_iter.is_ok() { - for result in result_iter.unwrap() { - documents.insert(result.id, result); - } + for result in result_iter { + let result = result?; + documents.insert(result.id, result); } return Ok(documents); diff --git a/crates/vector_store/src/embedding.rs b/crates/vector_store/src/embedding.rs index 903c2451b3..f995639e64 100644 --- a/crates/vector_store/src/embedding.rs +++ b/crates/vector_store/src/embedding.rs @@ -94,16 +94,6 @@ impl EmbeddingProvider for OpenAIEmbeddings { response.usage.total_tokens ); - // do we need to re-order these based on the `index` field? - eprintln!( - "indices: {:?}", - response - .data - .iter() - .map(|embedding| embedding.index) - .collect::>() - ); - Ok(response .data .into_iter() diff --git a/crates/vector_store/src/search.rs b/crates/vector_store/src/search.rs index 6b508b401b..ce8bdd1af4 100644 --- a/crates/vector_store/src/search.rs +++ b/crates/vector_store/src/search.rs @@ -19,8 +19,8 @@ pub struct BruteForceSearch { } impl BruteForceSearch { - pub fn load() -> Result { - let db = VectorDatabase {}; + pub fn load(db: &VectorDatabase) -> Result { + // let db = VectorDatabase {}; let documents = db.get_documents()?; let embeddings: Vec<&DocumentRecord> = documents.values().into_iter().collect(); let mut document_ids = vec![]; @@ -47,39 +47,36 @@ impl VectorSearch for BruteForceSearch { async fn top_k_search(&mut self, vec: &Vec, limit: usize) -> Vec<(usize, f32)> { let target = Array1::from_vec(vec.to_owned()); - let distances = self.candidate_array.dot(&target); + let similarities = self.candidate_array.dot(&target); - let distances = distances.to_vec(); + let similarities = similarities.to_vec(); // construct a tuple vector from the floats, the tuple being (index,float) - let mut with_indices = distances - .clone() - .into_iter() + let mut with_indices = similarities + .iter() + .copied() .enumerate() - .map(|(index, value)| (index, value)) + .map(|(index, value)| (self.document_ids[index], value)) .collect::>(); // sort the tuple vector by float - with_indices.sort_by(|&a, &b| match (a.1.is_nan(), b.1.is_nan()) { - (true, true) => Ordering::Equal, - (true, false) => Ordering::Greater, - (false, true) => Ordering::Less, - (false, false) => a.1.partial_cmp(&b.1).unwrap(), - }); + with_indices.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); + with_indices.truncate(limit); + with_indices - // extract the sorted indices from the sorted tuple vector - let stored_indices = with_indices - .into_iter() - .map(|(index, value)| index) - .collect::>(); + // // extract the sorted indices from the sorted tuple vector + // let stored_indices = with_indices + // .into_iter() + // .map(|(index, value)| index) + // .collect::>(); - let sorted_indices: Vec = stored_indices.into_iter().rev().collect(); + // let sorted_indices: Vec = stored_indices.into_iter().rev().collect(); - let mut results = vec![]; - for idx in sorted_indices[0..limit].to_vec() { - results.push((self.document_ids[idx], 1.0 - distances[idx])); - } + // let mut results = vec![]; + // for idx in sorted_indices[0..limit].to_vec() { + // results.push((self.document_ids[idx], 1.0 - similarities[idx])); + // } - return results; + // return results; } } diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 0b6d2928cc..6e6bedc33a 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -1,5 +1,6 @@ mod db; mod embedding; +mod parsing; mod search; use anyhow::{anyhow, Result}; @@ -7,11 +8,13 @@ use db::VectorDatabase; use embedding::{DummyEmbeddings, EmbeddingProvider, OpenAIEmbeddings}; use gpui::{AppContext, Entity, ModelContext, ModelHandle}; use language::LanguageRegistry; +use parsing::Document; use project::{Fs, Project}; +use search::{BruteForceSearch, VectorSearch}; use smol::channel; use std::{path::PathBuf, sync::Arc, time::Instant}; use tree_sitter::{Parser, QueryCursor}; -use util::{http::HttpClient, ResultExt}; +use util::{http::HttpClient, ResultExt, TryFutureExt}; use workspace::WorkspaceCreated; pub fn init( @@ -39,13 +42,6 @@ pub fn init( .detach(); } -#[derive(Debug)] -pub struct Document { - pub offset: usize, - pub name: String, - pub embedding: Vec, -} - #[derive(Debug)] pub struct IndexedFile { path: PathBuf, @@ -180,18 +176,54 @@ impl VectorStore { .detach(); cx.background() - .spawn(async move { + .spawn({ + let client = client.clone(); + async move { // Initialize Database, creates database and tables if not exists - VectorDatabase::initialize_database().await.log_err(); + let db = VectorDatabase::new()?; while let Ok(indexed_file) = indexed_files_rx.recv().await { - VectorDatabase::insert_file(indexed_file).await.log_err(); + db.insert_file(indexed_file).log_err(); + } + + // ALL OF THE BELOW IS FOR TESTING, + // This should be removed as we find and appropriate place for evaluate our search. + + let embedding_provider = OpenAIEmbeddings{ client }; + let queries = vec![ + "compute embeddings for all of the symbols in the codebase, and write them to a database", + "compute an outline view of all of the symbols in a buffer", + "scan a directory on the file system and load all of its children into an in-memory snapshot", + ]; + let embeddings = embedding_provider.embed_batch(queries.clone()).await?; + + let t2 = Instant::now(); + let documents = db.get_documents().unwrap(); + let files = db.get_files().unwrap(); + println!("Retrieving all documents from Database: {}", t2.elapsed().as_millis()); + + let t1 = Instant::now(); + let mut bfs = BruteForceSearch::load(&db).unwrap(); + println!("Loading BFS to Memory: {:?}", t1.elapsed().as_millis()); + for (idx, embed) in embeddings.into_iter().enumerate() { + let t0 = Instant::now(); + println!("\nQuery: {:?}", queries[idx]); + let results = bfs.top_k_search(&embed, 5).await; + println!("Search Elapsed: {}", t0.elapsed().as_millis()); + for (id, distance) in results { + println!(""); + println!(" distance: {:?}", distance); + println!(" document: {:?}", documents[&id].name); + println!(" path: {:?}", files[&documents[&id].file_id].path); + } + } anyhow::Ok(()) - }) + }}.log_err()) .detach(); let provider = DummyEmbeddings {}; + // let provider = OpenAIEmbeddings { client }; cx.background() .scoped(|scope| { From 953e928bdb3aa80744a13ff53a197fd798fec0fe Mon Sep 17 00:00:00 2001 From: KCaverly Date: Mon, 26 Jun 2023 19:01:19 -0400 Subject: [PATCH 009/115] WIP: Got the streaming matrix multiplication working, and started work on file hashing. Co-authored-by: maxbrunsfeld --- Cargo.lock | 5 + crates/vector_store/Cargo.toml | 5 + crates/vector_store/src/db.rs | 84 ++++-- crates/vector_store/src/embedding.rs | 2 +- crates/vector_store/src/search.rs | 18 +- crates/vector_store/src/vector_store.rs | 243 +++++++++++++----- crates/vector_store/src/vector_store_tests.rs | 136 ++++++++++ 7 files changed, 396 insertions(+), 97 deletions(-) create mode 100644 crates/vector_store/src/vector_store_tests.rs diff --git a/Cargo.lock b/Cargo.lock index 48952d6c25..ff4caaa5a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7958,13 +7958,18 @@ dependencies = [ "language", "lazy_static", "log", + "matrixmultiply", "ndarray", "project", + "rand 0.8.5", "rusqlite", "serde", "serde_json", + "sha-1 0.10.1", "smol", "tree-sitter", + "tree-sitter-rust", + "unindent", "util", "workspace", ] diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index 8de93c0401..dbe0a2e69c 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -27,9 +27,14 @@ serde_json.workspace = true async-trait.workspace = true bincode = "1.3.3" ndarray = "0.15.6" +sha-1 = "0.10.1" +matrixmultiply = "0.3.7" [dev-dependencies] gpui = { path = "../gpui", features = ["test-support"] } language = { path = "../language", features = ["test-support"] } project = { path = "../project", features = ["test-support"] } workspace = { path = "../workspace", features = ["test-support"] } +tree-sitter-rust = "*" +rand.workspace = true +unindent.workspace = true diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index 4f6da14cab..bcb1090a8d 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -1,4 +1,7 @@ -use std::{collections::HashMap, path::PathBuf}; +use std::{ + collections::HashMap, + path::{Path, PathBuf}, +}; use anyhow::{anyhow, Result}; @@ -13,7 +16,7 @@ use crate::IndexedFile; // This is saving to a local database store within the users dev zed path // Where do we want this to sit? // Assuming near where the workspace DB sits. -const VECTOR_DB_URL: &str = "embeddings_db"; +pub const VECTOR_DB_URL: &str = "embeddings_db"; // Note this is not an appropriate document #[derive(Debug)] @@ -28,7 +31,7 @@ pub struct DocumentRecord { #[derive(Debug)] pub struct FileRecord { pub id: usize, - pub path: String, + pub relative_path: String, pub sha1: String, } @@ -51,9 +54,9 @@ pub struct VectorDatabase { } impl VectorDatabase { - pub fn new() -> Result { + pub fn new(path: &str) -> Result { let this = Self { - db: rusqlite::Connection::open(VECTOR_DB_URL)?, + db: rusqlite::Connection::open(path)?, }; this.initialize_database()?; Ok(this) @@ -63,21 +66,23 @@ impl VectorDatabase { // This will create the database if it doesnt exist // Initialize Vector Databasing Tables - // self.db.execute( - // " - // CREATE TABLE IF NOT EXISTS projects ( - // id INTEGER PRIMARY KEY AUTOINCREMENT, - // path NVARCHAR(100) NOT NULL - // ) - // ", - // [], - // )?; + self.db.execute( + "CREATE TABLE IF NOT EXISTS worktrees ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + absolute_path VARCHAR NOT NULL + ); + CREATE UNIQUE INDEX IF NOT EXISTS worktrees_absolute_path ON worktrees (absolute_path); + ", + [], + )?; self.db.execute( "CREATE TABLE IF NOT EXISTS files ( id INTEGER PRIMARY KEY AUTOINCREMENT, - path NVARCHAR(100) NOT NULL, - sha1 NVARCHAR(40) NOT NULL + worktree_id INTEGER NOT NULL, + relative_path VARCHAR NOT NULL, + sha1 NVARCHAR(40) NOT NULL, + FOREIGN KEY(worktree_id) REFERENCES worktrees(id) ON DELETE CASCADE )", [], )?; @@ -87,7 +92,7 @@ impl VectorDatabase { id INTEGER PRIMARY KEY AUTOINCREMENT, file_id INTEGER NOT NULL, offset INTEGER NOT NULL, - name NVARCHAR(100) NOT NULL, + name VARCHAR NOT NULL, embedding BLOB NOT NULL, FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE )", @@ -116,7 +121,7 @@ impl VectorDatabase { pub fn insert_file(&self, indexed_file: IndexedFile) -> Result<()> { // Write to files table, and return generated id. let files_insert = self.db.execute( - "INSERT INTO files (path, sha1) VALUES (?1, ?2)", + "INSERT INTO files (relative_path, sha1) VALUES (?1, ?2)", params![indexed_file.path.to_str(), indexed_file.sha1], )?; @@ -141,12 +146,38 @@ impl VectorDatabase { Ok(()) } + pub fn find_or_create_worktree(&self, worktree_root_path: &Path) -> Result { + self.db.execute( + " + INSERT into worktrees (absolute_path) VALUES (?1) + ON CONFLICT DO NOTHING + ", + params![worktree_root_path.to_string_lossy()], + )?; + Ok(self.db.last_insert_rowid()) + } + + pub fn get_file_hashes(&self, worktree_id: i64) -> Result> { + let mut statement = self + .db + .prepare("SELECT relative_path, sha1 FROM files ORDER BY relative_path")?; + let mut result = Vec::new(); + for row in + statement.query_map([], |row| Ok((row.get::<_, String>(0)?.into(), row.get(1)?)))? + { + result.push(row?); + } + Ok(result) + } + pub fn get_files(&self) -> Result> { - let mut query_statement = self.db.prepare("SELECT id, path, sha1 FROM files")?; + let mut query_statement = self + .db + .prepare("SELECT id, relative_path, sha1 FROM files")?; let result_iter = query_statement.query_map([], |row| { Ok(FileRecord { id: row.get(0)?, - path: row.get(1)?, + relative_path: row.get(1)?, sha1: row.get(2)?, }) })?; @@ -160,6 +191,19 @@ impl VectorDatabase { Ok(pages) } + pub fn for_each_document( + &self, + worktree_id: i64, + mut f: impl FnMut(i64, Embedding), + ) -> Result<()> { + let mut query_statement = self.db.prepare("SELECT id, embedding FROM documents")?; + query_statement + .query_map(params![], |row| Ok((row.get(0)?, row.get(1)?)))? + .filter_map(|row| row.ok()) + .for_each(|row| f(row.0, row.1)); + Ok(()) + } + pub fn get_documents(&self) -> Result> { let mut query_statement = self .db diff --git a/crates/vector_store/src/embedding.rs b/crates/vector_store/src/embedding.rs index f995639e64..86d8494ab4 100644 --- a/crates/vector_store/src/embedding.rs +++ b/crates/vector_store/src/embedding.rs @@ -44,7 +44,7 @@ struct OpenAIEmbeddingUsage { } #[async_trait] -pub trait EmbeddingProvider: Sync { +pub trait EmbeddingProvider: Sync + Send { async fn embed_batch(&self, spans: Vec<&str>) -> Result>>; } diff --git a/crates/vector_store/src/search.rs b/crates/vector_store/src/search.rs index ce8bdd1af4..90a8d874da 100644 --- a/crates/vector_store/src/search.rs +++ b/crates/vector_store/src/search.rs @@ -1,4 +1,4 @@ -use std::cmp::Ordering; +use std::{cmp::Ordering, path::PathBuf}; use async_trait::async_trait; use ndarray::{Array1, Array2}; @@ -20,7 +20,6 @@ pub struct BruteForceSearch { impl BruteForceSearch { pub fn load(db: &VectorDatabase) -> Result { - // let db = VectorDatabase {}; let documents = db.get_documents()?; let embeddings: Vec<&DocumentRecord> = documents.values().into_iter().collect(); let mut document_ids = vec![]; @@ -63,20 +62,5 @@ impl VectorSearch for BruteForceSearch { with_indices.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); with_indices.truncate(limit); with_indices - - // // extract the sorted indices from the sorted tuple vector - // let stored_indices = with_indices - // .into_iter() - // .map(|(index, value)| index) - // .collect::>(); - - // let sorted_indices: Vec = stored_indices.into_iter().rev().collect(); - - // let mut results = vec![]; - // for idx in sorted_indices[0..limit].to_vec() { - // results.push((self.document_ids[idx], 1.0 - similarities[idx])); - // } - - // return results; } } diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 6e6bedc33a..f34316e950 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -3,16 +3,19 @@ mod embedding; mod parsing; mod search; +#[cfg(test)] +mod vector_store_tests; + use anyhow::{anyhow, Result}; -use db::VectorDatabase; +use db::{VectorDatabase, VECTOR_DB_URL}; use embedding::{DummyEmbeddings, EmbeddingProvider, OpenAIEmbeddings}; -use gpui::{AppContext, Entity, ModelContext, ModelHandle}; +use gpui::{AppContext, Entity, ModelContext, ModelHandle, Task}; use language::LanguageRegistry; use parsing::Document; use project::{Fs, Project}; use search::{BruteForceSearch, VectorSearch}; use smol::channel; -use std::{path::PathBuf, sync::Arc, time::Instant}; +use std::{cmp::Ordering, path::PathBuf, sync::Arc, time::Instant}; use tree_sitter::{Parser, QueryCursor}; use util::{http::HttpClient, ResultExt, TryFutureExt}; use workspace::WorkspaceCreated; @@ -23,7 +26,16 @@ pub fn init( language_registry: Arc, cx: &mut AppContext, ) { - let vector_store = cx.add_model(|cx| VectorStore::new(fs, http_client, language_registry)); + let vector_store = cx.add_model(|cx| { + VectorStore::new( + fs, + VECTOR_DB_URL.to_string(), + Arc::new(OpenAIEmbeddings { + client: http_client, + }), + language_registry, + ) + }); cx.subscribe_global::({ let vector_store = vector_store.clone(); @@ -49,28 +61,36 @@ pub struct IndexedFile { documents: Vec, } -struct SearchResult { - path: PathBuf, - offset: usize, - name: String, - distance: f32, -} - +// struct SearchResult { +// path: PathBuf, +// offset: usize, +// name: String, +// distance: f32, +// } struct VectorStore { fs: Arc, - http_client: Arc, + database_url: Arc, + embedding_provider: Arc, language_registry: Arc, } +pub struct SearchResult { + pub name: String, + pub offset: usize, + pub file_path: PathBuf, +} + impl VectorStore { fn new( fs: Arc, - http_client: Arc, + database_url: String, + embedding_provider: Arc, language_registry: Arc, ) -> Self { Self { fs, - http_client, + database_url: database_url.into(), + embedding_provider, language_registry, } } @@ -79,10 +99,12 @@ impl VectorStore { cursor: &mut QueryCursor, parser: &mut Parser, embedding_provider: &dyn EmbeddingProvider, - fs: &Arc, language_registry: &Arc, file_path: PathBuf, + content: String, ) -> Result { + dbg!(&file_path, &content); + let language = language_registry .language_for_file(&file_path, None) .await?; @@ -97,7 +119,6 @@ impl VectorStore { .as_ref() .ok_or_else(|| anyhow!("no outline query"))?; - let content = fs.load(&file_path).await?; parser.set_language(grammar.ts_language).unwrap(); let tree = parser .parse(&content, None) @@ -142,7 +163,11 @@ impl VectorStore { }); } - fn add_project(&mut self, project: ModelHandle, cx: &mut ModelContext) { + fn add_project( + &mut self, + project: ModelHandle, + cx: &mut ModelContext, + ) -> Task> { let worktree_scans_complete = project .read(cx) .worktrees(cx) @@ -151,7 +176,8 @@ impl VectorStore { let fs = self.fs.clone(); let language_registry = self.language_registry.clone(); - let client = self.http_client.clone(); + let embedding_provider = self.embedding_provider.clone(); + let database_url = self.database_url.clone(); cx.spawn(|_, cx| async move { futures::future::join_all(worktree_scans_complete).await; @@ -163,24 +189,47 @@ impl VectorStore { .collect::>() }); - let (paths_tx, paths_rx) = channel::unbounded::(); + let db = VectorDatabase::new(&database_url)?; + let worktree_root_paths = worktrees + .iter() + .map(|worktree| worktree.abs_path().clone()) + .collect::>(); + let (db, file_hashes) = cx + .background() + .spawn(async move { + let mut hashes = Vec::new(); + for worktree_root_path in worktree_root_paths { + let worktree_id = + db.find_or_create_worktree(worktree_root_path.as_ref())?; + hashes.push((worktree_id, db.get_file_hashes(worktree_id)?)); + } + anyhow::Ok((db, hashes)) + }) + .await?; + + let (paths_tx, paths_rx) = channel::unbounded::<(i64, PathBuf, String)>(); let (indexed_files_tx, indexed_files_rx) = channel::unbounded::(); cx.background() - .spawn(async move { - for worktree in worktrees { - for file in worktree.files(false, 0) { - paths_tx.try_send(worktree.absolutize(&file.path)).unwrap(); + .spawn({ + let fs = fs.clone(); + async move { + for worktree in worktrees.into_iter() { + for file in worktree.files(false, 0) { + let absolute_path = worktree.absolutize(&file.path); + dbg!(&absolute_path); + if let Some(content) = fs.load(&absolute_path).await.log_err() { + dbg!(&content); + paths_tx.try_send((0, absolute_path, content)).unwrap(); + } + } } } }) .detach(); - cx.background() - .spawn({ - let client = client.clone(); - async move { + let db_write_task = cx.background().spawn( + async move { // Initialize Database, creates database and tables if not exists - let db = VectorDatabase::new()?; while let Ok(indexed_file) = indexed_files_rx.recv().await { db.insert_file(indexed_file).log_err(); } @@ -188,39 +237,39 @@ impl VectorStore { // ALL OF THE BELOW IS FOR TESTING, // This should be removed as we find and appropriate place for evaluate our search. - let embedding_provider = OpenAIEmbeddings{ client }; - let queries = vec![ - "compute embeddings for all of the symbols in the codebase, and write them to a database", - "compute an outline view of all of the symbols in a buffer", - "scan a directory on the file system and load all of its children into an in-memory snapshot", - ]; - let embeddings = embedding_provider.embed_batch(queries.clone()).await?; + // let queries = vec![ + // "compute embeddings for all of the symbols in the codebase, and write them to a database", + // "compute an outline view of all of the symbols in a buffer", + // "scan a directory on the file system and load all of its children into an in-memory snapshot", + // ]; + // let embeddings = embedding_provider.embed_batch(queries.clone()).await?; - let t2 = Instant::now(); - let documents = db.get_documents().unwrap(); - let files = db.get_files().unwrap(); - println!("Retrieving all documents from Database: {}", t2.elapsed().as_millis()); + // let t2 = Instant::now(); + // let documents = db.get_documents().unwrap(); + // let files = db.get_files().unwrap(); + // println!("Retrieving all documents from Database: {}", t2.elapsed().as_millis()); - let t1 = Instant::now(); - let mut bfs = BruteForceSearch::load(&db).unwrap(); - println!("Loading BFS to Memory: {:?}", t1.elapsed().as_millis()); - for (idx, embed) in embeddings.into_iter().enumerate() { - let t0 = Instant::now(); - println!("\nQuery: {:?}", queries[idx]); - let results = bfs.top_k_search(&embed, 5).await; - println!("Search Elapsed: {}", t0.elapsed().as_millis()); - for (id, distance) in results { - println!(""); - println!(" distance: {:?}", distance); - println!(" document: {:?}", documents[&id].name); - println!(" path: {:?}", files[&documents[&id].file_id].path); - } + // let t1 = Instant::now(); + // let mut bfs = BruteForceSearch::load(&db).unwrap(); + // println!("Loading BFS to Memory: {:?}", t1.elapsed().as_millis()); + // for (idx, embed) in embeddings.into_iter().enumerate() { + // let t0 = Instant::now(); + // println!("\nQuery: {:?}", queries[idx]); + // let results = bfs.top_k_search(&embed, 5).await; + // println!("Search Elapsed: {}", t0.elapsed().as_millis()); + // for (id, distance) in results { + // println!(""); + // println!(" distance: {:?}", distance); + // println!(" document: {:?}", documents[&id].name); + // println!(" path: {:?}", files[&documents[&id].file_id].relative_path); + // } - } + // } anyhow::Ok(()) - }}.log_err()) - .detach(); + } + .log_err(), + ); let provider = DummyEmbeddings {}; // let provider = OpenAIEmbeddings { client }; @@ -231,14 +280,15 @@ impl VectorStore { scope.spawn(async { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - while let Ok(file_path) = paths_rx.recv().await { + while let Ok((worktree_id, file_path, content)) = paths_rx.recv().await + { if let Some(indexed_file) = Self::index_file( &mut cursor, &mut parser, &provider, - &fs, &language_registry, file_path, + content, ) .await .log_err() @@ -250,11 +300,86 @@ impl VectorStore { } }) .await; + drop(indexed_files_tx); + + db_write_task.await; + anyhow::Ok(()) + }) + } + + pub fn search( + &mut self, + phrase: String, + limit: usize, + cx: &mut ModelContext, + ) -> Task>> { + let embedding_provider = self.embedding_provider.clone(); + let database_url = self.database_url.clone(); + cx.spawn(|this, cx| async move { + let database = VectorDatabase::new(database_url.as_ref())?; + + // let embedding = embedding_provider.embed_batch(vec![&phrase]).await?; + // + let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1); + + database.for_each_document(0, |id, embedding| { + dbg!(id, &embedding); + + let similarity = dot(&embedding.0, &embedding.0); + let ix = match results.binary_search_by(|(_, s)| { + s.partial_cmp(&similarity).unwrap_or(Ordering::Equal) + }) { + Ok(ix) => ix, + Err(ix) => ix, + }; + + results.insert(ix, (id, similarity)); + results.truncate(limit); + })?; + + dbg!(&results); + + let ids = results.into_iter().map(|(id, _)| id).collect::>(); + // let documents = database.get_documents_by_ids(ids)?; + + // let search_provider = cx + // .background() + // .spawn(async move { BruteForceSearch::load(&database) }) + // .await?; + + // let results = search_provider.top_k_search(&embedding, limit)) + + anyhow::Ok(vec![]) }) - .detach(); } } impl Entity for VectorStore { type Event = (); } + +fn dot(vec_a: &[f32], vec_b: &[f32]) -> f32 { + let len = vec_a.len(); + assert_eq!(len, vec_b.len()); + + let mut result = 0.0; + unsafe { + matrixmultiply::sgemm( + 1, + len, + 1, + 1.0, + vec_a.as_ptr(), + len as isize, + 1, + vec_b.as_ptr(), + 1, + len as isize, + 0.0, + &mut result as *mut f32, + 1, + 1, + ); + } + result +} diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs new file mode 100644 index 0000000000..f3d01835e9 --- /dev/null +++ b/crates/vector_store/src/vector_store_tests.rs @@ -0,0 +1,136 @@ +use std::sync::Arc; + +use crate::{dot, embedding::EmbeddingProvider, VectorStore}; +use anyhow::Result; +use async_trait::async_trait; +use gpui::{Task, TestAppContext}; +use language::{Language, LanguageConfig, LanguageRegistry}; +use project::{FakeFs, Project}; +use rand::Rng; +use serde_json::json; +use unindent::Unindent; + +#[gpui::test] +async fn test_vector_store(cx: &mut TestAppContext) { + let fs = FakeFs::new(cx.background()); + fs.insert_tree( + "/the-root", + json!({ + "src": { + "file1.rs": " + fn aaa() { + println!(\"aaaa!\"); + } + + fn zzzzzzzzz() { + println!(\"SLEEPING\"); + } + ".unindent(), + "file2.rs": " + fn bbb() { + println!(\"bbbb!\"); + } + ".unindent(), + } + }), + ) + .await; + + let languages = Arc::new(LanguageRegistry::new(Task::ready(()))); + let rust_language = Arc::new( + Language::new( + LanguageConfig { + name: "Rust".into(), + path_suffixes: vec!["rs".into()], + ..Default::default() + }, + Some(tree_sitter_rust::language()), + ) + .with_outline_query( + r#" + (function_item + name: (identifier) @name + body: (block)) @item + "#, + ) + .unwrap(), + ); + languages.add(rust_language); + + let store = cx.add_model(|_| { + VectorStore::new( + fs.clone(), + "foo".to_string(), + Arc::new(FakeEmbeddingProvider), + languages, + ) + }); + + let project = Project::test(fs, ["/the-root".as_ref()], cx).await; + store + .update(cx, |store, cx| store.add_project(project, cx)) + .await + .unwrap(); + + let search_results = store + .update(cx, |store, cx| store.search("aaaa".to_string(), 5, cx)) + .await + .unwrap(); + + assert_eq!(search_results[0].offset, 0); + assert_eq!(search_results[1].name, "aaa"); +} + +#[test] +fn test_dot_product() { + assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.); + assert_eq!(dot(&[2., 0., 0., 0., 0.], &[3., 1., 0., 0., 0.]), 6.); + + for _ in 0..100 { + let mut rng = rand::thread_rng(); + let a: [f32; 32] = rng.gen(); + let b: [f32; 32] = rng.gen(); + assert_eq!( + round_to_decimals(dot(&a, &b), 3), + round_to_decimals(reference_dot(&a, &b), 3) + ); + } + + fn round_to_decimals(n: f32, decimal_places: i32) -> f32 { + let factor = (10.0 as f32).powi(decimal_places); + (n * factor).round() / factor + } + + fn reference_dot(a: &[f32], b: &[f32]) -> f32 { + a.iter().zip(b.iter()).map(|(a, b)| a * b).sum() + } +} + +struct FakeEmbeddingProvider; + +#[async_trait] +impl EmbeddingProvider for FakeEmbeddingProvider { + async fn embed_batch(&self, spans: Vec<&str>) -> Result>> { + Ok(spans + .iter() + .map(|span| { + let mut result = vec![0.0; 26]; + for letter in span.chars() { + if letter as u32 > 'a' as u32 { + let ix = (letter as u32) - ('a' as u32); + if ix < 26 { + result[ix as usize] += 1.0; + } + } + } + + let norm = result.iter().map(|x| x * x).sum::().sqrt(); + for x in &mut result { + *x /= norm; + } + + result + }) + .collect()) + } +} From 4bfe3de1f2e012bc3fb7ee9a928d7ae223e3c97d Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 27 Jun 2023 15:31:21 -0400 Subject: [PATCH 010/115] Working incremental index engine, with streaming similarity search! Co-authored-by: maxbrunsfeld --- Cargo.lock | 1 + crates/vector_store/Cargo.toml | 3 +- crates/vector_store/src/db.rs | 184 ++++++++++++++---- crates/vector_store/src/vector_store.rs | 168 +++++++++------- crates/vector_store/src/vector_store_tests.rs | 23 ++- 5 files changed, 268 insertions(+), 111 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff4caaa5a6..1ea1d1a1b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7967,6 +7967,7 @@ dependencies = [ "serde_json", "sha-1 0.10.1", "smol", + "tempdir", "tree-sitter", "tree-sitter-rust", "unindent", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index dbe0a2e69c..edc06bb295 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -17,7 +17,7 @@ util = { path = "../util" } anyhow.workspace = true futures.workspace = true smol.workspace = true -rusqlite = { version = "0.27.0", features=["blob"] } +rusqlite = { version = "0.27.0", features = ["blob", "array", "modern_sqlite"] } isahc.workspace = true log.workspace = true tree-sitter.workspace = true @@ -38,3 +38,4 @@ workspace = { path = "../workspace", features = ["test-support"] } tree-sitter-rust = "*" rand.workspace = true unindent.workspace = true +tempdir.workspace = true diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index bcb1090a8d..f074a7066b 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -7,9 +7,10 @@ use anyhow::{anyhow, Result}; use rusqlite::{ params, - types::{FromSql, FromSqlResult, ValueRef}, - Connection, + types::{FromSql, FromSqlResult, ToSqlOutput, ValueRef}, + ToSql, }; +use sha1::{Digest, Sha1}; use crate::IndexedFile; @@ -32,7 +33,60 @@ pub struct DocumentRecord { pub struct FileRecord { pub id: usize, pub relative_path: String, - pub sha1: String, + pub sha1: FileSha1, +} + +#[derive(Debug)] +pub struct FileSha1(pub Vec); + +impl FileSha1 { + pub fn from_str(content: String) -> Self { + let mut hasher = Sha1::new(); + hasher.update(content); + let sha1 = hasher.finalize()[..] + .into_iter() + .map(|val| val.to_owned()) + .collect::>(); + return FileSha1(sha1); + } + + pub fn equals(&self, content: &String) -> bool { + let mut hasher = Sha1::new(); + hasher.update(content); + let sha1 = hasher.finalize()[..] + .into_iter() + .map(|val| val.to_owned()) + .collect::>(); + + let equal = self + .0 + .clone() + .into_iter() + .zip(sha1) + .filter(|&(a, b)| a == b) + .count() + == self.0.len(); + + equal + } +} + +impl ToSql for FileSha1 { + fn to_sql(&self) -> rusqlite::Result> { + return self.0.to_sql(); + } +} + +impl FromSql for FileSha1 { + fn column_result(value: ValueRef) -> FromSqlResult { + let bytes = value.as_blob()?; + Ok(FileSha1( + bytes + .into_iter() + .map(|val| val.to_owned()) + .collect::>(), + )) + } } #[derive(Debug)] @@ -63,6 +117,8 @@ impl VectorDatabase { } fn initialize_database(&self) -> Result<()> { + rusqlite::vtab::array::load_module(&self.db)?; + // This will create the database if it doesnt exist // Initialize Vector Databasing Tables @@ -81,7 +137,7 @@ impl VectorDatabase { id INTEGER PRIMARY KEY AUTOINCREMENT, worktree_id INTEGER NOT NULL, relative_path VARCHAR NOT NULL, - sha1 NVARCHAR(40) NOT NULL, + sha1 BLOB NOT NULL, FOREIGN KEY(worktree_id) REFERENCES worktrees(id) ON DELETE CASCADE )", [], @@ -102,30 +158,23 @@ impl VectorDatabase { Ok(()) } - // pub async fn get_or_create_project(project_path: PathBuf) -> Result { - // // Check if we have the project, if we do, return the ID - // // If we do not have the project, insert the project and return the ID - - // let db = rusqlite::Connection::open(VECTOR_DB_URL)?; - - // let projects_query = db.prepare(&format!( - // "SELECT id FROM projects WHERE path = {}", - // project_path.to_str().unwrap() // This is unsafe - // ))?; - - // let project_id = db.last_insert_rowid(); - - // return Ok(project_id as usize); - // } - - pub fn insert_file(&self, indexed_file: IndexedFile) -> Result<()> { + pub fn insert_file(&self, worktree_id: i64, indexed_file: IndexedFile) -> Result<()> { // Write to files table, and return generated id. - let files_insert = self.db.execute( - "INSERT INTO files (relative_path, sha1) VALUES (?1, ?2)", - params![indexed_file.path.to_str(), indexed_file.sha1], + log::info!("Inserting File!"); + self.db.execute( + " + DELETE FROM files WHERE worktree_id = ?1 AND relative_path = ?2; + ", + params![worktree_id, indexed_file.path.to_str()], + )?; + self.db.execute( + " + INSERT INTO files (worktree_id, relative_path, sha1) VALUES (?1, ?2, $3); + ", + params![worktree_id, indexed_file.path.to_str(), indexed_file.sha1], )?; - let inserted_id = self.db.last_insert_rowid(); + let file_id = self.db.last_insert_rowid(); // Currently inserting at approximately 3400 documents a second // I imagine we can speed this up with a bulk insert of some kind. @@ -135,7 +184,7 @@ impl VectorDatabase { self.db.execute( "INSERT INTO documents (file_id, offset, name, embedding) VALUES (?1, ?2, ?3, ?4)", params![ - inserted_id, + file_id, document.offset.to_string(), document.name, embedding_blob @@ -147,25 +196,41 @@ impl VectorDatabase { } pub fn find_or_create_worktree(&self, worktree_root_path: &Path) -> Result { + // Check that the absolute path doesnt exist + let mut worktree_query = self + .db + .prepare("SELECT id FROM worktrees WHERE absolute_path = ?1")?; + + let worktree_id = worktree_query + .query_row(params![worktree_root_path.to_string_lossy()], |row| { + Ok(row.get::<_, i64>(0)?) + }) + .map_err(|err| anyhow!(err)); + + if worktree_id.is_ok() { + return worktree_id; + } + + // If worktree_id is Err, insert new worktree self.db.execute( " INSERT into worktrees (absolute_path) VALUES (?1) - ON CONFLICT DO NOTHING ", params![worktree_root_path.to_string_lossy()], )?; Ok(self.db.last_insert_rowid()) } - pub fn get_file_hashes(&self, worktree_id: i64) -> Result> { - let mut statement = self - .db - .prepare("SELECT relative_path, sha1 FROM files ORDER BY relative_path")?; - let mut result = Vec::new(); - for row in - statement.query_map([], |row| Ok((row.get::<_, String>(0)?.into(), row.get(1)?)))? - { - result.push(row?); + pub fn get_file_hashes(&self, worktree_id: i64) -> Result> { + let mut statement = self.db.prepare( + "SELECT relative_path, sha1 FROM files WHERE worktree_id = ?1 ORDER BY relative_path", + )?; + let mut result: HashMap = HashMap::new(); + for row in statement.query_map(params![worktree_id], |row| { + Ok((row.get::<_, String>(0)?.into(), row.get(1)?)) + })? { + let row = row?; + result.insert(row.0, row.1); } Ok(result) } @@ -204,6 +269,53 @@ impl VectorDatabase { Ok(()) } + pub fn get_documents_by_ids(&self, ids: &[i64]) -> Result> { + let mut statement = self.db.prepare( + " + SELECT + documents.id, files.relative_path, documents.offset, documents.name + FROM + documents, files + WHERE + documents.file_id = files.id AND + documents.id in rarray(?) + ", + )?; + + let result_iter = statement.query_map( + params![std::rc::Rc::new( + ids.iter() + .copied() + .map(|v| rusqlite::types::Value::from(v)) + .collect::>() + )], + |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?.into(), + row.get(2)?, + row.get(3)?, + )) + }, + )?; + + let mut values_by_id = HashMap::::default(); + for row in result_iter { + let (id, path, offset, name) = row?; + values_by_id.insert(id, (path, offset, name)); + } + + let mut results = Vec::with_capacity(ids.len()); + for id in ids { + let (path, offset, name) = values_by_id + .remove(id) + .ok_or(anyhow!("missing document id {}", id))?; + results.push((path, offset, name)); + } + + Ok(results) + } + pub fn get_documents(&self) -> Result> { let mut query_statement = self .db diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index f34316e950..7e4c29cef6 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -7,15 +7,14 @@ mod search; mod vector_store_tests; use anyhow::{anyhow, Result}; -use db::{VectorDatabase, VECTOR_DB_URL}; -use embedding::{DummyEmbeddings, EmbeddingProvider, OpenAIEmbeddings}; +use db::{FileSha1, VectorDatabase, VECTOR_DB_URL}; +use embedding::{EmbeddingProvider, OpenAIEmbeddings}; use gpui::{AppContext, Entity, ModelContext, ModelHandle, Task}; -use language::LanguageRegistry; +use language::{Language, LanguageRegistry}; use parsing::Document; use project::{Fs, Project}; -use search::{BruteForceSearch, VectorSearch}; use smol::channel; -use std::{cmp::Ordering, path::PathBuf, sync::Arc, time::Instant}; +use std::{cmp::Ordering, collections::HashMap, path::PathBuf, sync::Arc}; use tree_sitter::{Parser, QueryCursor}; use util::{http::HttpClient, ResultExt, TryFutureExt}; use workspace::WorkspaceCreated; @@ -45,7 +44,7 @@ pub fn init( let project = workspace.read(cx).project().clone(); if project.read(cx).is_local() { vector_store.update(cx, |store, cx| { - store.add_project(project, cx); + store.add_project(project, cx).detach(); }); } } @@ -57,16 +56,10 @@ pub fn init( #[derive(Debug)] pub struct IndexedFile { path: PathBuf, - sha1: String, + sha1: FileSha1, documents: Vec, } -// struct SearchResult { -// path: PathBuf, -// offset: usize, -// name: String, -// distance: f32, -// } struct VectorStore { fs: Arc, database_url: Arc, @@ -99,20 +92,10 @@ impl VectorStore { cursor: &mut QueryCursor, parser: &mut Parser, embedding_provider: &dyn EmbeddingProvider, - language_registry: &Arc, + language: Arc, file_path: PathBuf, content: String, ) -> Result { - dbg!(&file_path, &content); - - let language = language_registry - .language_for_file(&file_path, None) - .await?; - - if language.name().as_ref() != "Rust" { - Err(anyhow!("unsupported language"))?; - } - let grammar = language.grammar().ok_or_else(|| anyhow!("no grammar"))?; let outline_config = grammar .outline_config @@ -156,9 +139,11 @@ impl VectorStore { document.embedding = embedding; } + let sha1 = FileSha1::from_str(content); + return Ok(IndexedFile { path: file_path, - sha1: String::new(), + sha1, documents, }); } @@ -171,7 +156,13 @@ impl VectorStore { let worktree_scans_complete = project .read(cx) .worktrees(cx) - .map(|worktree| worktree.read(cx).as_local().unwrap().scan_complete()) + .map(|worktree| { + let scan_complete = worktree.read(cx).as_local().unwrap().scan_complete(); + async move { + scan_complete.await; + log::info!("worktree scan completed"); + } + }) .collect::>(); let fs = self.fs.clone(); @@ -182,6 +173,13 @@ impl VectorStore { cx.spawn(|_, cx| async move { futures::future::join_all(worktree_scans_complete).await; + // TODO: remove this after fixing the bug in scan_complete + cx.background() + .timer(std::time::Duration::from_secs(3)) + .await; + + let db = VectorDatabase::new(&database_url)?; + let worktrees = project.read_with(&cx, |project, cx| { project .worktrees(cx) @@ -189,37 +187,74 @@ impl VectorStore { .collect::>() }); - let db = VectorDatabase::new(&database_url)?; let worktree_root_paths = worktrees .iter() .map(|worktree| worktree.abs_path().clone()) .collect::>(); - let (db, file_hashes) = cx + + // Here we query the worktree ids, and yet we dont have them elsewhere + // We likely want to clean up these datastructures + let (db, worktree_hashes, worktree_ids) = cx .background() .spawn(async move { - let mut hashes = Vec::new(); + let mut worktree_ids: HashMap = HashMap::new(); + let mut hashes: HashMap> = HashMap::new(); for worktree_root_path in worktree_root_paths { let worktree_id = db.find_or_create_worktree(worktree_root_path.as_ref())?; - hashes.push((worktree_id, db.get_file_hashes(worktree_id)?)); + worktree_ids.insert(worktree_root_path.to_path_buf(), worktree_id); + hashes.insert(worktree_id, db.get_file_hashes(worktree_id)?); } - anyhow::Ok((db, hashes)) + anyhow::Ok((db, hashes, worktree_ids)) }) .await?; - let (paths_tx, paths_rx) = channel::unbounded::<(i64, PathBuf, String)>(); - let (indexed_files_tx, indexed_files_rx) = channel::unbounded::(); + let (paths_tx, paths_rx) = + channel::unbounded::<(i64, PathBuf, String, Arc)>(); + let (indexed_files_tx, indexed_files_rx) = channel::unbounded::<(i64, IndexedFile)>(); cx.background() .spawn({ let fs = fs.clone(); async move { for worktree in worktrees.into_iter() { + let worktree_id = worktree_ids[&worktree.abs_path().to_path_buf()]; + let file_hashes = &worktree_hashes[&worktree_id]; for file in worktree.files(false, 0) { let absolute_path = worktree.absolutize(&file.path); - dbg!(&absolute_path); - if let Some(content) = fs.load(&absolute_path).await.log_err() { - dbg!(&content); - paths_tx.try_send((0, absolute_path, content)).unwrap(); + + if let Ok(language) = language_registry + .language_for_file(&absolute_path, None) + .await + { + if language.name().as_ref() != "Rust" { + continue; + } + + if let Some(content) = fs.load(&absolute_path).await.log_err() { + log::info!("loaded file: {absolute_path:?}"); + + let path_buf = file.path.to_path_buf(); + let already_stored = file_hashes + .get(&path_buf) + .map_or(false, |existing_hash| { + existing_hash.equals(&content) + }); + + if !already_stored { + log::info!( + "File Changed (Sending to Parse): {:?}", + &path_buf + ); + paths_tx + .try_send(( + worktree_id, + path_buf, + content, + language, + )) + .unwrap(); + } + } } } } @@ -230,8 +265,8 @@ impl VectorStore { let db_write_task = cx.background().spawn( async move { // Initialize Database, creates database and tables if not exists - while let Ok(indexed_file) = indexed_files_rx.recv().await { - db.insert_file(indexed_file).log_err(); + while let Ok((worktree_id, indexed_file)) = indexed_files_rx.recv().await { + db.insert_file(worktree_id, indexed_file).log_err(); } // ALL OF THE BELOW IS FOR TESTING, @@ -271,29 +306,29 @@ impl VectorStore { .log_err(), ); - let provider = DummyEmbeddings {}; - // let provider = OpenAIEmbeddings { client }; - cx.background() .scoped(|scope| { for _ in 0..cx.background().num_cpus() { scope.spawn(async { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - while let Ok((worktree_id, file_path, content)) = paths_rx.recv().await + while let Ok((worktree_id, file_path, content, language)) = + paths_rx.recv().await { if let Some(indexed_file) = Self::index_file( &mut cursor, &mut parser, - &provider, - &language_registry, + embedding_provider.as_ref(), + language, file_path, content, ) .await .log_err() { - indexed_files_tx.try_send(indexed_file).unwrap(); + indexed_files_tx + .try_send((worktree_id, indexed_file)) + .unwrap(); } } }); @@ -315,41 +350,42 @@ impl VectorStore { ) -> Task>> { let embedding_provider = self.embedding_provider.clone(); let database_url = self.database_url.clone(); - cx.spawn(|this, cx| async move { + cx.background().spawn(async move { let database = VectorDatabase::new(database_url.as_ref())?; - // let embedding = embedding_provider.embed_batch(vec![&phrase]).await?; - // + let phrase_embedding = embedding_provider + .embed_batch(vec![&phrase]) + .await? + .into_iter() + .next() + .unwrap(); + let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1); - database.for_each_document(0, |id, embedding| { - dbg!(id, &embedding); - - let similarity = dot(&embedding.0, &embedding.0); + let similarity = dot(&embedding.0, &phrase_embedding); let ix = match results.binary_search_by(|(_, s)| { - s.partial_cmp(&similarity).unwrap_or(Ordering::Equal) + similarity.partial_cmp(&s).unwrap_or(Ordering::Equal) }) { Ok(ix) => ix, Err(ix) => ix, }; - results.insert(ix, (id, similarity)); results.truncate(limit); })?; - dbg!(&results); - let ids = results.into_iter().map(|(id, _)| id).collect::>(); - // let documents = database.get_documents_by_ids(ids)?; + let documents = database.get_documents_by_ids(&ids)?; - // let search_provider = cx - // .background() - // .spawn(async move { BruteForceSearch::load(&database) }) - // .await?; - - // let results = search_provider.top_k_search(&embedding, limit)) - - anyhow::Ok(vec![]) + anyhow::Ok( + documents + .into_iter() + .map(|(file_path, offset, name)| SearchResult { + name, + offset, + file_path, + }) + .collect(), + ) }) } } diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index f3d01835e9..c67bb9954f 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -57,20 +57,26 @@ async fn test_vector_store(cx: &mut TestAppContext) { ); languages.add(rust_language); + let db_dir = tempdir::TempDir::new("vector-store").unwrap(); + let db_path = db_dir.path().join("db.sqlite"); + let store = cx.add_model(|_| { VectorStore::new( fs.clone(), - "foo".to_string(), + db_path.to_string_lossy().to_string(), Arc::new(FakeEmbeddingProvider), languages, ) }); let project = Project::test(fs, ["/the-root".as_ref()], cx).await; - store - .update(cx, |store, cx| store.add_project(project, cx)) - .await - .unwrap(); + let add_project = store.update(cx, |store, cx| store.add_project(project, cx)); + + // TODO - remove + cx.foreground() + .advance_clock(std::time::Duration::from_secs(3)); + + add_project.await.unwrap(); let search_results = store .update(cx, |store, cx| store.search("aaaa".to_string(), 5, cx)) @@ -78,7 +84,7 @@ async fn test_vector_store(cx: &mut TestAppContext) { .unwrap(); assert_eq!(search_results[0].offset, 0); - assert_eq!(search_results[1].name, "aaa"); + assert_eq!(search_results[0].name, "aaa"); } #[test] @@ -114,9 +120,10 @@ impl EmbeddingProvider for FakeEmbeddingProvider { Ok(spans .iter() .map(|span| { - let mut result = vec![0.0; 26]; + let mut result = vec![1.0; 26]; for letter in span.chars() { - if letter as u32 > 'a' as u32 { + let letter = letter.to_ascii_lowercase(); + if letter as u32 >= 'a' as u32 { let ix = (letter as u32) - ('a' as u32); if ix < 26 { result[ix as usize] += 1.0; From d1bdfa0be6e2a638b5f8dd6e836fd2aa06c3264f Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 27 Jun 2023 15:53:07 -0400 Subject: [PATCH 011/115] Added a dummy action for testing the semantic search functionality in the command palette. Co-authored-by: maxbrunsfeld --- crates/vector_store/src/vector_store.rs | 27 +++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 7e4c29cef6..4860bcd2bb 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -9,7 +9,7 @@ mod vector_store_tests; use anyhow::{anyhow, Result}; use db::{FileSha1, VectorDatabase, VECTOR_DB_URL}; use embedding::{EmbeddingProvider, OpenAIEmbeddings}; -use gpui::{AppContext, Entity, ModelContext, ModelHandle, Task}; +use gpui::{actions, AppContext, Entity, ModelContext, ModelHandle, Task, ViewContext}; use language::{Language, LanguageRegistry}; use parsing::Document; use project::{Fs, Project}; @@ -17,7 +17,9 @@ use smol::channel; use std::{cmp::Ordering, collections::HashMap, path::PathBuf, sync::Arc}; use tree_sitter::{Parser, QueryCursor}; use util::{http::HttpClient, ResultExt, TryFutureExt}; -use workspace::WorkspaceCreated; +use workspace::{Workspace, WorkspaceCreated}; + +actions!(semantic_search, [TestSearch]); pub fn init( fs: Arc, @@ -51,6 +53,26 @@ pub fn init( } }) .detach(); + + cx.add_action({ + let vector_store = vector_store.clone(); + move |workspace: &mut Workspace, _: &TestSearch, cx: &mut ViewContext| { + let t0 = std::time::Instant::now(); + let task = vector_store.update(cx, |store, cx| { + store.search("compute embeddings for all of the symbols in the codebase and write them to a database".to_string(), 10, cx) + }); + + cx.spawn(|this, cx| async move { + let results = task.await?; + let duration = t0.elapsed(); + + println!("search took {:?}", duration); + println!("results {:?}", results); + + anyhow::Ok(()) + }).detach() + } + }); } #[derive(Debug)] @@ -67,6 +89,7 @@ struct VectorStore { language_registry: Arc, } +#[derive(Debug)] pub struct SearchResult { pub name: String, pub offset: usize, From 9d19dea7dd858bb49fbbc34ed8eb56b0146d8ed3 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Wed, 28 Jun 2023 08:58:50 -0400 Subject: [PATCH 012/115] updated vector_store to remove parsing module --- crates/vector_store/src/vector_store.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 4860bcd2bb..d7fd59466f 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -1,6 +1,5 @@ mod db; mod embedding; -mod parsing; mod search; #[cfg(test)] @@ -11,7 +10,6 @@ use db::{FileSha1, VectorDatabase, VECTOR_DB_URL}; use embedding::{EmbeddingProvider, OpenAIEmbeddings}; use gpui::{actions, AppContext, Entity, ModelContext, ModelHandle, Task, ViewContext}; use language::{Language, LanguageRegistry}; -use parsing::Document; use project::{Fs, Project}; use smol::channel; use std::{cmp::Ordering, collections::HashMap, path::PathBuf, sync::Arc}; @@ -21,6 +19,13 @@ use workspace::{Workspace, WorkspaceCreated}; actions!(semantic_search, [TestSearch]); +#[derive(Debug)] +pub struct Document { + pub offset: usize, + pub name: String, + pub embedding: Vec, +} + pub fn init( fs: Arc, http_client: Arc, From 40ff7779bbf858fe4786602ad1edef90ad69ca51 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Wed, 28 Jun 2023 13:27:26 -0400 Subject: [PATCH 013/115] WIP: Working modal, without navigation and search on every keystroke --- Cargo.lock | 2 + crates/vector_store/Cargo.toml | 2 + crates/vector_store/src/modal.rs | 107 ++++++++++++++++++++++++ crates/vector_store/src/vector_store.rs | 54 +++++++----- 4 files changed, 146 insertions(+), 19 deletions(-) create mode 100644 crates/vector_store/src/modal.rs diff --git a/Cargo.lock b/Cargo.lock index 1ea1d1a1b4..2eff8630cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7960,6 +7960,7 @@ dependencies = [ "log", "matrixmultiply", "ndarray", + "picker", "project", "rand 0.8.5", "rusqlite", @@ -7968,6 +7969,7 @@ dependencies = [ "sha-1 0.10.1", "smol", "tempdir", + "theme", "tree-sitter", "tree-sitter-rust", "unindent", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index edc06bb295..ddfef6927b 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -14,6 +14,8 @@ language = { path = "../language" } project = { path = "../project" } workspace = { path = "../workspace" } util = { path = "../util" } +picker = { path = "../picker" } +theme = { path = "../theme" } anyhow.workspace = true futures.workspace = true smol.workspace = true diff --git a/crates/vector_store/src/modal.rs b/crates/vector_store/src/modal.rs new file mode 100644 index 0000000000..48429150cd --- /dev/null +++ b/crates/vector_store/src/modal.rs @@ -0,0 +1,107 @@ +use std::sync::Arc; + +use gpui::{ + actions, elements::*, AnyElement, AppContext, ModelHandle, MouseState, Task, ViewContext, + WeakViewHandle, +}; +use picker::{Picker, PickerDelegate, PickerEvent}; +use project::Project; +use util::ResultExt; +use workspace::Workspace; + +use crate::{SearchResult, VectorStore}; + +actions!(semantic_search, [Toggle]); + +pub type SemanticSearch = Picker; + +pub struct SemanticSearchDelegate { + workspace: WeakViewHandle, + project: ModelHandle, + vector_store: ModelHandle, + selected_match_index: usize, + matches: Vec, +} + +impl SemanticSearchDelegate { + // This is currently searching on every keystroke, + // This is wildly overkill, and has the potential to get expensive + // We will need to update this to throttle searching + pub fn new( + workspace: WeakViewHandle, + project: ModelHandle, + vector_store: ModelHandle, + ) -> Self { + Self { + workspace, + project, + vector_store, + selected_match_index: 0, + matches: vec![], + } + } +} + +impl PickerDelegate for SemanticSearchDelegate { + fn placeholder_text(&self) -> Arc { + "Search repository in natural language...".into() + } + + fn confirm(&mut self, cx: &mut ViewContext) { + todo!() + } + + fn dismissed(&mut self, _cx: &mut ViewContext) {} + + fn match_count(&self) -> usize { + self.matches.len() + } + + fn selected_index(&self) -> usize { + self.selected_match_index + } + + fn set_selected_index(&mut self, ix: usize, _cx: &mut ViewContext) { + self.selected_match_index = ix; + } + + fn update_matches(&mut self, query: String, cx: &mut ViewContext) -> Task<()> { + let task = self + .vector_store + .update(cx, |store, cx| store.search(query.to_string(), 10, cx)); + + cx.spawn(|this, mut cx| async move { + let results = task.await.log_err(); + this.update(&mut cx, |this, cx| { + if let Some(results) = results { + let delegate = this.delegate_mut(); + delegate.matches = results; + } + }); + }) + } + + fn render_match( + &self, + ix: usize, + mouse_state: &mut MouseState, + selected: bool, + cx: &AppContext, + ) -> AnyElement> { + let theme = theme::current(cx); + let style = &theme.picker.item; + let current_style = style.style_for(mouse_state, selected); + + let search_result = &self.matches[ix]; + + let mut path = search_result.file_path.to_string_lossy(); + let name = search_result.name.clone(); + + Flex::column() + .with_child(Text::new(name, current_style.label.text.clone()).with_soft_wrap(false)) + .with_child(Label::new(path.to_string(), style.default.label.clone())) + .contained() + .with_style(current_style.container) + .into_any() + } +} diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index d7fd59466f..2dc479045f 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -1,5 +1,6 @@ mod db; mod embedding; +mod modal; mod search; #[cfg(test)] @@ -10,6 +11,7 @@ use db::{FileSha1, VectorDatabase, VECTOR_DB_URL}; use embedding::{EmbeddingProvider, OpenAIEmbeddings}; use gpui::{actions, AppContext, Entity, ModelContext, ModelHandle, Task, ViewContext}; use language::{Language, LanguageRegistry}; +use modal::{SemanticSearch, SemanticSearchDelegate, Toggle}; use project::{Fs, Project}; use smol::channel; use std::{cmp::Ordering, collections::HashMap, path::PathBuf, sync::Arc}; @@ -17,8 +19,6 @@ use tree_sitter::{Parser, QueryCursor}; use util::{http::HttpClient, ResultExt, TryFutureExt}; use workspace::{Workspace, WorkspaceCreated}; -actions!(semantic_search, [TestSearch]); - #[derive(Debug)] pub struct Document { pub offset: usize, @@ -60,24 +60,40 @@ pub fn init( .detach(); cx.add_action({ - let vector_store = vector_store.clone(); - move |workspace: &mut Workspace, _: &TestSearch, cx: &mut ViewContext| { - let t0 = std::time::Instant::now(); - let task = vector_store.update(cx, |store, cx| { - store.search("compute embeddings for all of the symbols in the codebase and write them to a database".to_string(), 10, cx) - }); - - cx.spawn(|this, cx| async move { - let results = task.await?; - let duration = t0.elapsed(); - - println!("search took {:?}", duration); - println!("results {:?}", results); - - anyhow::Ok(()) - }).detach() + move |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext| { + let vector_store = vector_store.clone(); + workspace.toggle_modal(cx, |workspace, cx| { + let project = workspace.project().clone(); + let workspace = cx.weak_handle(); + cx.add_view(|cx| { + SemanticSearch::new( + SemanticSearchDelegate::new(workspace, project, vector_store), + cx, + ) + }) + }) } }); + SemanticSearch::init(cx); + // cx.add_action({ + // let vector_store = vector_store.clone(); + // move |workspace: &mut Workspace, _: &TestSearch, cx: &mut ViewContext| { + // let t0 = std::time::Instant::now(); + // let task = vector_store.update(cx, |store, cx| { + // store.search("compute embeddings for all of the symbols in the codebase and write them to a database".to_string(), 10, cx) + // }); + + // cx.spawn(|this, cx| async move { + // let results = task.await?; + // let duration = t0.elapsed(); + + // println!("search took {:?}", duration); + // println!("results {:?}", results); + + // anyhow::Ok(()) + // }).detach() + // } + // }); } #[derive(Debug)] @@ -87,7 +103,7 @@ pub struct IndexedFile { documents: Vec, } -struct VectorStore { +pub struct VectorStore { fs: Arc, database_url: Arc, embedding_provider: Arc, From 400d39740ca505c3b5f143818c0ebe8eeead0e6e Mon Sep 17 00:00:00 2001 From: KCaverly Date: Wed, 28 Jun 2023 16:21:03 -0400 Subject: [PATCH 014/115] updated both indexing and search method for vector store, to maintain both zed worktree ids and db worktree ids Co-authored-by: maxbrunsfeld --- crates/vector_store/src/db.rs | 67 ++++--- crates/vector_store/src/modal.rs | 17 +- crates/vector_store/src/vector_store.rs | 174 ++++++++++-------- crates/vector_store/src/vector_store_tests.rs | 10 +- 4 files changed, 159 insertions(+), 109 deletions(-) diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index f074a7066b..96856936fc 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -1,6 +1,7 @@ use std::{ collections::HashMap, path::{Path, PathBuf}, + rc::Rc, }; use anyhow::{anyhow, Result}; @@ -258,22 +259,34 @@ impl VectorDatabase { pub fn for_each_document( &self, - worktree_id: i64, + worktree_ids: &[i64], mut f: impl FnMut(i64, Embedding), ) -> Result<()> { - let mut query_statement = self.db.prepare("SELECT id, embedding FROM documents")?; + let mut query_statement = self.db.prepare( + " + SELECT + documents.id, documents.embedding + FROM + documents, files + WHERE + documents.file_id = files.id AND + files.worktree_id IN rarray(?) + ", + )?; query_statement - .query_map(params![], |row| Ok((row.get(0)?, row.get(1)?)))? + .query_map(params![ids_to_sql(worktree_ids)], |row| { + Ok((row.get(0)?, row.get(1)?)) + })? .filter_map(|row| row.ok()) .for_each(|row| f(row.0, row.1)); Ok(()) } - pub fn get_documents_by_ids(&self, ids: &[i64]) -> Result> { + pub fn get_documents_by_ids(&self, ids: &[i64]) -> Result> { let mut statement = self.db.prepare( " SELECT - documents.id, files.relative_path, documents.offset, documents.name + documents.id, files.worktree_id, files.relative_path, documents.offset, documents.name FROM documents, files WHERE @@ -282,35 +295,28 @@ impl VectorDatabase { ", )?; - let result_iter = statement.query_map( - params![std::rc::Rc::new( - ids.iter() - .copied() - .map(|v| rusqlite::types::Value::from(v)) - .collect::>() - )], - |row| { - Ok(( - row.get::<_, i64>(0)?, - row.get::<_, String>(1)?.into(), - row.get(2)?, - row.get(3)?, - )) - }, - )?; + let result_iter = statement.query_map(params![ids_to_sql(ids)], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, i64>(1)?, + row.get::<_, String>(2)?.into(), + row.get(3)?, + row.get(4)?, + )) + })?; - let mut values_by_id = HashMap::::default(); + let mut values_by_id = HashMap::::default(); for row in result_iter { - let (id, path, offset, name) = row?; - values_by_id.insert(id, (path, offset, name)); + let (id, worktree_id, path, offset, name) = row?; + values_by_id.insert(id, (worktree_id, path, offset, name)); } let mut results = Vec::with_capacity(ids.len()); for id in ids { - let (path, offset, name) = values_by_id + let value = values_by_id .remove(id) .ok_or(anyhow!("missing document id {}", id))?; - results.push((path, offset, name)); + results.push(value); } Ok(results) @@ -339,3 +345,12 @@ impl VectorDatabase { return Ok(documents); } } + +fn ids_to_sql(ids: &[i64]) -> Rc> { + Rc::new( + ids.iter() + .copied() + .map(|v| rusqlite::types::Value::from(v)) + .collect::>(), + ) +} diff --git a/crates/vector_store/src/modal.rs b/crates/vector_store/src/modal.rs index 48429150cd..8052277a0b 100644 --- a/crates/vector_store/src/modal.rs +++ b/crates/vector_store/src/modal.rs @@ -48,7 +48,9 @@ impl PickerDelegate for SemanticSearchDelegate { } fn confirm(&mut self, cx: &mut ViewContext) { - todo!() + if let Some(search_result) = self.matches.get(self.selected_match_index) { + // search_result.file_path + } } fn dismissed(&mut self, _cx: &mut ViewContext) {} @@ -66,9 +68,9 @@ impl PickerDelegate for SemanticSearchDelegate { } fn update_matches(&mut self, query: String, cx: &mut ViewContext) -> Task<()> { - let task = self - .vector_store - .update(cx, |store, cx| store.search(query.to_string(), 10, cx)); + let task = self.vector_store.update(cx, |store, cx| { + store.search(&self.project, query.to_string(), 10, cx) + }); cx.spawn(|this, mut cx| async move { let results = task.await.log_err(); @@ -90,7 +92,7 @@ impl PickerDelegate for SemanticSearchDelegate { ) -> AnyElement> { let theme = theme::current(cx); let style = &theme.picker.item; - let current_style = style.style_for(mouse_state, selected); + let current_style = style.in_state(selected).style_for(mouse_state); let search_result = &self.matches[ix]; @@ -99,7 +101,10 @@ impl PickerDelegate for SemanticSearchDelegate { Flex::column() .with_child(Text::new(name, current_style.label.text.clone()).with_soft_wrap(false)) - .with_child(Label::new(path.to_string(), style.default.label.clone())) + .with_child(Label::new( + path.to_string(), + style.inactive_state().default.label.clone(), + )) .contained() .with_style(current_style.container) .into_any() diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 2dc479045f..92926b1f75 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -8,11 +8,11 @@ mod vector_store_tests; use anyhow::{anyhow, Result}; use db::{FileSha1, VectorDatabase, VECTOR_DB_URL}; -use embedding::{EmbeddingProvider, OpenAIEmbeddings}; +use embedding::{DummyEmbeddings, EmbeddingProvider, OpenAIEmbeddings}; use gpui::{actions, AppContext, Entity, ModelContext, ModelHandle, Task, ViewContext}; use language::{Language, LanguageRegistry}; use modal::{SemanticSearch, SemanticSearchDelegate, Toggle}; -use project::{Fs, Project}; +use project::{Fs, Project, WorktreeId}; use smol::channel; use std::{cmp::Ordering, collections::HashMap, path::PathBuf, sync::Arc}; use tree_sitter::{Parser, QueryCursor}; @@ -36,9 +36,10 @@ pub fn init( VectorStore::new( fs, VECTOR_DB_URL.to_string(), - Arc::new(OpenAIEmbeddings { - client: http_client, - }), + // Arc::new(OpenAIEmbeddings { + // client: http_client, + // }), + Arc::new(DummyEmbeddings {}), language_registry, ) }); @@ -75,25 +76,6 @@ pub fn init( } }); SemanticSearch::init(cx); - // cx.add_action({ - // let vector_store = vector_store.clone(); - // move |workspace: &mut Workspace, _: &TestSearch, cx: &mut ViewContext| { - // let t0 = std::time::Instant::now(); - // let task = vector_store.update(cx, |store, cx| { - // store.search("compute embeddings for all of the symbols in the codebase and write them to a database".to_string(), 10, cx) - // }); - - // cx.spawn(|this, cx| async move { - // let results = task.await?; - // let duration = t0.elapsed(); - - // println!("search took {:?}", duration); - // println!("results {:?}", results); - - // anyhow::Ok(()) - // }).detach() - // } - // }); } #[derive(Debug)] @@ -108,10 +90,12 @@ pub struct VectorStore { database_url: Arc, embedding_provider: Arc, language_registry: Arc, + worktree_db_ids: Vec<(WorktreeId, i64)>, } #[derive(Debug)] pub struct SearchResult { + pub worktree_id: WorktreeId, pub name: String, pub offset: usize, pub file_path: PathBuf, @@ -129,6 +113,7 @@ impl VectorStore { database_url: database_url.into(), embedding_provider, language_registry, + worktree_db_ids: Vec::new(), } } @@ -178,9 +163,11 @@ impl VectorStore { } } - let embeddings = embedding_provider.embed_batch(context_spans).await?; - for (document, embedding) in documents.iter_mut().zip(embeddings) { - document.embedding = embedding; + if !documents.is_empty() { + let embeddings = embedding_provider.embed_batch(context_spans).await?; + for (document, embedding) in documents.iter_mut().zip(embeddings) { + document.embedding = embedding; + } } let sha1 = FileSha1::from_str(content); @@ -214,7 +201,7 @@ impl VectorStore { let embedding_provider = self.embedding_provider.clone(); let database_url = self.database_url.clone(); - cx.spawn(|_, cx| async move { + cx.spawn(|this, mut cx| async move { futures::future::join_all(worktree_scans_complete).await; // TODO: remove this after fixing the bug in scan_complete @@ -231,25 +218,24 @@ impl VectorStore { .collect::>() }); - let worktree_root_paths = worktrees - .iter() - .map(|worktree| worktree.abs_path().clone()) - .collect::>(); - // Here we query the worktree ids, and yet we dont have them elsewhere // We likely want to clean up these datastructures - let (db, worktree_hashes, worktree_ids) = cx + let (db, worktree_hashes, worktree_db_ids) = cx .background() - .spawn(async move { - let mut worktree_ids: HashMap = HashMap::new(); - let mut hashes: HashMap> = HashMap::new(); - for worktree_root_path in worktree_root_paths { - let worktree_id = - db.find_or_create_worktree(worktree_root_path.as_ref())?; - worktree_ids.insert(worktree_root_path.to_path_buf(), worktree_id); - hashes.insert(worktree_id, db.get_file_hashes(worktree_id)?); + .spawn({ + let worktrees = worktrees.clone(); + async move { + let mut worktree_db_ids: HashMap = HashMap::new(); + let mut hashes: HashMap> = + HashMap::new(); + for worktree in worktrees { + let worktree_db_id = + db.find_or_create_worktree(worktree.abs_path().as_ref())?; + worktree_db_ids.insert(worktree.id(), worktree_db_id); + hashes.insert(worktree.id(), db.get_file_hashes(worktree_db_id)?); + } + anyhow::Ok((db, hashes, worktree_db_ids)) } - anyhow::Ok((db, hashes, worktree_ids)) }) .await?; @@ -259,10 +245,10 @@ impl VectorStore { cx.background() .spawn({ let fs = fs.clone(); + let worktree_db_ids = worktree_db_ids.clone(); async move { for worktree in worktrees.into_iter() { - let worktree_id = worktree_ids[&worktree.abs_path().to_path_buf()]; - let file_hashes = &worktree_hashes[&worktree_id]; + let file_hashes = &worktree_hashes[&worktree.id()]; for file in worktree.files(false, 0) { let absolute_path = worktree.absolutize(&file.path); @@ -291,7 +277,7 @@ impl VectorStore { ); paths_tx .try_send(( - worktree_id, + worktree_db_ids[&worktree.id()], path_buf, content, language, @@ -382,54 +368,92 @@ impl VectorStore { drop(indexed_files_tx); db_write_task.await; + + this.update(&mut cx, |this, _| { + this.worktree_db_ids.extend(worktree_db_ids); + }); + anyhow::Ok(()) }) } pub fn search( &mut self, + project: &ModelHandle, phrase: String, limit: usize, cx: &mut ModelContext, ) -> Task>> { + let project = project.read(cx); + let worktree_db_ids = project + .worktrees(cx) + .filter_map(|worktree| { + let worktree_id = worktree.read(cx).id(); + self.worktree_db_ids.iter().find_map(|(id, db_id)| { + if *id == worktree_id { + Some(*db_id) + } else { + None + } + }) + }) + .collect::>(); + let embedding_provider = self.embedding_provider.clone(); let database_url = self.database_url.clone(); - cx.background().spawn(async move { - let database = VectorDatabase::new(database_url.as_ref())?; + cx.spawn(|this, cx| async move { + let documents = cx + .background() + .spawn(async move { + let database = VectorDatabase::new(database_url.as_ref())?; - let phrase_embedding = embedding_provider - .embed_batch(vec![&phrase]) - .await? - .into_iter() - .next() - .unwrap(); + let phrase_embedding = embedding_provider + .embed_batch(vec![&phrase]) + .await? + .into_iter() + .next() + .unwrap(); - let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1); - database.for_each_document(0, |id, embedding| { - let similarity = dot(&embedding.0, &phrase_embedding); - let ix = match results.binary_search_by(|(_, s)| { - similarity.partial_cmp(&s).unwrap_or(Ordering::Equal) - }) { - Ok(ix) => ix, - Err(ix) => ix, - }; - results.insert(ix, (id, similarity)); - results.truncate(limit); - })?; + let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1); + database.for_each_document(&worktree_db_ids, |id, embedding| { + let similarity = dot(&embedding.0, &phrase_embedding); + let ix = match results.binary_search_by(|(_, s)| { + similarity.partial_cmp(&s).unwrap_or(Ordering::Equal) + }) { + Ok(ix) => ix, + Err(ix) => ix, + }; + results.insert(ix, (id, similarity)); + results.truncate(limit); + })?; - let ids = results.into_iter().map(|(id, _)| id).collect::>(); - let documents = database.get_documents_by_ids(&ids)?; + let ids = results.into_iter().map(|(id, _)| id).collect::>(); + database.get_documents_by_ids(&ids) + }) + .await?; - anyhow::Ok( + let results = this.read_with(&cx, |this, _| { documents .into_iter() - .map(|(file_path, offset, name)| SearchResult { - name, - offset, - file_path, + .filter_map(|(worktree_db_id, file_path, offset, name)| { + let worktree_id = this.worktree_db_ids.iter().find_map(|(id, db_id)| { + if *db_id == worktree_db_id { + Some(*id) + } else { + None + } + })?; + Some(SearchResult { + worktree_id, + name, + offset, + file_path, + }) }) - .collect(), - ) + .collect() + }); + + anyhow::Ok(results) }) } } diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index c67bb9954f..6f8856c4fb 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -70,7 +70,10 @@ async fn test_vector_store(cx: &mut TestAppContext) { }); let project = Project::test(fs, ["/the-root".as_ref()], cx).await; - let add_project = store.update(cx, |store, cx| store.add_project(project, cx)); + let worktree_id = project.read_with(cx, |project, cx| { + project.worktrees(cx).next().unwrap().read(cx).id() + }); + let add_project = store.update(cx, |store, cx| store.add_project(project.clone(), cx)); // TODO - remove cx.foreground() @@ -79,12 +82,15 @@ async fn test_vector_store(cx: &mut TestAppContext) { add_project.await.unwrap(); let search_results = store - .update(cx, |store, cx| store.search("aaaa".to_string(), 5, cx)) + .update(cx, |store, cx| { + store.search(&project, "aaaa".to_string(), 5, cx) + }) .await .unwrap(); assert_eq!(search_results[0].offset, 0); assert_eq!(search_results[0].name, "aaa"); + assert_eq!(search_results[0].worktree_id, worktree_id); } #[test] From 85e71415fea6102001c324b08c8558abea9b07f7 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Wed, 28 Jun 2023 16:25:05 -0400 Subject: [PATCH 015/115] updated embedding database calls to maintain project consistency Co-authored-by: maxbrunsfeld --- crates/vector_store/src/db.rs | 44 ----------------- crates/vector_store/src/search.rs | 66 ------------------------- crates/vector_store/src/vector_store.rs | 1 - 3 files changed, 111 deletions(-) delete mode 100644 crates/vector_store/src/search.rs diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index 96856936fc..f1453141bb 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -236,27 +236,6 @@ impl VectorDatabase { Ok(result) } - pub fn get_files(&self) -> Result> { - let mut query_statement = self - .db - .prepare("SELECT id, relative_path, sha1 FROM files")?; - let result_iter = query_statement.query_map([], |row| { - Ok(FileRecord { - id: row.get(0)?, - relative_path: row.get(1)?, - sha1: row.get(2)?, - }) - })?; - - let mut pages: HashMap = HashMap::new(); - for result in result_iter { - let result = result?; - pages.insert(result.id, result); - } - - Ok(pages) - } - pub fn for_each_document( &self, worktree_ids: &[i64], @@ -321,29 +300,6 @@ impl VectorDatabase { Ok(results) } - - pub fn get_documents(&self) -> Result> { - let mut query_statement = self - .db - .prepare("SELECT id, file_id, offset, name, embedding FROM documents")?; - let result_iter = query_statement.query_map([], |row| { - Ok(DocumentRecord { - id: row.get(0)?, - file_id: row.get(1)?, - offset: row.get(2)?, - name: row.get(3)?, - embedding: row.get(4)?, - }) - })?; - - let mut documents: HashMap = HashMap::new(); - for result in result_iter { - let result = result?; - documents.insert(result.id, result); - } - - return Ok(documents); - } } fn ids_to_sql(ids: &[i64]) -> Rc> { diff --git a/crates/vector_store/src/search.rs b/crates/vector_store/src/search.rs deleted file mode 100644 index 90a8d874da..0000000000 --- a/crates/vector_store/src/search.rs +++ /dev/null @@ -1,66 +0,0 @@ -use std::{cmp::Ordering, path::PathBuf}; - -use async_trait::async_trait; -use ndarray::{Array1, Array2}; - -use crate::db::{DocumentRecord, VectorDatabase}; -use anyhow::Result; - -#[async_trait] -pub trait VectorSearch { - // Given a query vector, and a limit to return - // Return a vector of id, distance tuples. - async fn top_k_search(&mut self, vec: &Vec, limit: usize) -> Vec<(usize, f32)>; -} - -pub struct BruteForceSearch { - document_ids: Vec, - candidate_array: ndarray::Array2, -} - -impl BruteForceSearch { - pub fn load(db: &VectorDatabase) -> Result { - let documents = db.get_documents()?; - let embeddings: Vec<&DocumentRecord> = documents.values().into_iter().collect(); - let mut document_ids = vec![]; - for i in documents.keys() { - document_ids.push(i.to_owned()); - } - - let mut candidate_array = Array2::::default((documents.len(), 1536)); - for (i, mut row) in candidate_array.axis_iter_mut(ndarray::Axis(0)).enumerate() { - for (j, col) in row.iter_mut().enumerate() { - *col = embeddings[i].embedding.0[j]; - } - } - - return Ok(BruteForceSearch { - document_ids, - candidate_array, - }); - } -} - -#[async_trait] -impl VectorSearch for BruteForceSearch { - async fn top_k_search(&mut self, vec: &Vec, limit: usize) -> Vec<(usize, f32)> { - let target = Array1::from_vec(vec.to_owned()); - - let similarities = self.candidate_array.dot(&target); - - let similarities = similarities.to_vec(); - - // construct a tuple vector from the floats, the tuple being (index,float) - let mut with_indices = similarities - .iter() - .copied() - .enumerate() - .map(|(index, value)| (self.document_ids[index], value)) - .collect::>(); - - // sort the tuple vector by float - with_indices.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); - with_indices.truncate(limit); - with_indices - } -} diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 92926b1f75..a66c2d65ba 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -1,7 +1,6 @@ mod db; mod embedding; mod modal; -mod search; #[cfg(test)] mod vector_store_tests; From fd68a2afaec50423b714b615157857278b038321 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 28 Jun 2023 15:02:20 -0700 Subject: [PATCH 016/115] Debounce searches in semantic search modal --- crates/vector_store/src/modal.rs | 31 ++++++++++++++++--------- crates/vector_store/src/vector_store.rs | 11 +++++---- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/crates/vector_store/src/modal.rs b/crates/vector_store/src/modal.rs index 8052277a0b..1ca59c5585 100644 --- a/crates/vector_store/src/modal.rs +++ b/crates/vector_store/src/modal.rs @@ -1,15 +1,16 @@ -use std::sync::Arc; - +use crate::{SearchResult, VectorStore}; use gpui::{ actions, elements::*, AnyElement, AppContext, ModelHandle, MouseState, Task, ViewContext, WeakViewHandle, }; use picker::{Picker, PickerDelegate, PickerEvent}; use project::Project; +use std::{sync::Arc, time::Duration}; use util::ResultExt; use workspace::Workspace; -use crate::{SearchResult, VectorStore}; +const MIN_QUERY_LEN: usize = 5; +const EMBEDDING_DEBOUNCE_INTERVAL: Duration = Duration::from_millis(500); actions!(semantic_search, [Toggle]); @@ -68,18 +69,26 @@ impl PickerDelegate for SemanticSearchDelegate { } fn update_matches(&mut self, query: String, cx: &mut ViewContext) -> Task<()> { - let task = self.vector_store.update(cx, |store, cx| { - store.search(&self.project, query.to_string(), 10, cx) - }); + if query.len() < MIN_QUERY_LEN { + return Task::ready(()); + } + let vector_store = self.vector_store.clone(); + let project = self.project.clone(); cx.spawn(|this, mut cx| async move { - let results = task.await.log_err(); - this.update(&mut cx, |this, cx| { - if let Some(results) = results { + cx.background().timer(EMBEDDING_DEBOUNCE_INTERVAL).await; + + let task = vector_store.update(&mut cx, |store, cx| { + store.search(&project, query.to_string(), 10, cx) + }); + + if let Some(results) = task.await.log_err() { + this.update(&mut cx, |this, _| { let delegate = this.delegate_mut(); delegate.matches = results; - } - }); + }) + .ok(); + } }) } diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index a66c2d65ba..c37a50e4de 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -31,14 +31,14 @@ pub fn init( language_registry: Arc, cx: &mut AppContext, ) { - let vector_store = cx.add_model(|cx| { + let vector_store = cx.add_model(|_| { VectorStore::new( fs, VECTOR_DB_URL.to_string(), - // Arc::new(OpenAIEmbeddings { - // client: http_client, - // }), - Arc::new(DummyEmbeddings {}), + // Arc::new(DummyEmbeddings {}), + Arc::new(OpenAIEmbeddings { + client: http_client, + }), language_registry, ) }); @@ -74,6 +74,7 @@ pub fn init( }) } }); + SemanticSearch::init(cx); } From a08d60fc61a307bc838b7f53930bc8be2c6bcb37 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Thu, 29 Jun 2023 11:58:47 -0400 Subject: [PATCH 017/115] added navigation on confirm to semantic search modal --- Cargo.lock | 1 + crates/vector_store/Cargo.toml | 1 + crates/vector_store/src/modal.rs | 36 ++++++++++++++++++++++--- crates/vector_store/src/vector_store.rs | 33 ----------------------- 4 files changed, 35 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fb8c719278..dbdf7f5774 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8041,6 +8041,7 @@ dependencies = [ "anyhow", "async-trait", "bincode", + "editor", "futures 0.3.28", "gpui", "isahc", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index ddfef6927b..4ecd46cb92 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -16,6 +16,7 @@ workspace = { path = "../workspace" } util = { path = "../util" } picker = { path = "../picker" } theme = { path = "../theme" } +editor = { path = "../editor" } anyhow.workspace = true futures.workspace = true smol.workspace = true diff --git a/crates/vector_store/src/modal.rs b/crates/vector_store/src/modal.rs index 1ca59c5585..e857aa2ab2 100644 --- a/crates/vector_store/src/modal.rs +++ b/crates/vector_store/src/modal.rs @@ -1,11 +1,12 @@ use crate::{SearchResult, VectorStore}; +use editor::{scroll::autoscroll::Autoscroll, Editor}; use gpui::{ actions, elements::*, AnyElement, AppContext, ModelHandle, MouseState, Task, ViewContext, WeakViewHandle, }; use picker::{Picker, PickerDelegate, PickerEvent}; -use project::Project; -use std::{sync::Arc, time::Duration}; +use project::{Project, ProjectPath}; +use std::{path::Path, sync::Arc, time::Duration}; use util::ResultExt; use workspace::Workspace; @@ -50,7 +51,34 @@ impl PickerDelegate for SemanticSearchDelegate { fn confirm(&mut self, cx: &mut ViewContext) { if let Some(search_result) = self.matches.get(self.selected_match_index) { - // search_result.file_path + // Open Buffer + let search_result = search_result.clone(); + let buffer = self.project.update(cx, |project, cx| { + project.open_buffer( + ProjectPath { + worktree_id: search_result.worktree_id, + path: search_result.file_path.clone().into(), + }, + cx, + ) + }); + + let workspace = self.workspace.clone(); + let position = search_result.clone().offset; + cx.spawn(|_, mut cx| async move { + let buffer = buffer.await?; + workspace.update(&mut cx, |workspace, cx| { + let editor = workspace.open_project_item::(buffer, cx); + editor.update(cx, |editor, cx| { + editor.change_selections(Some(Autoscroll::center()), cx, |s| { + s.select_ranges([position..position]) + }); + }); + })?; + Ok::<_, anyhow::Error>(()) + }) + .detach_and_log_err(cx); + cx.emit(PickerEvent::Dismiss); } } @@ -78,6 +106,8 @@ impl PickerDelegate for SemanticSearchDelegate { cx.spawn(|this, mut cx| async move { cx.background().timer(EMBEDDING_DEBOUNCE_INTERVAL).await; + log::info!("Searching for {:?}", &query); + let task = vector_store.update(&mut cx, |store, cx| { store.search(&project, query.to_string(), 10, cx) }); diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index c37a50e4de..641fdd86f2 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -294,43 +294,10 @@ impl VectorStore { let db_write_task = cx.background().spawn( async move { - // Initialize Database, creates database and tables if not exists while let Ok((worktree_id, indexed_file)) = indexed_files_rx.recv().await { db.insert_file(worktree_id, indexed_file).log_err(); } - // ALL OF THE BELOW IS FOR TESTING, - // This should be removed as we find and appropriate place for evaluate our search. - - // let queries = vec![ - // "compute embeddings for all of the symbols in the codebase, and write them to a database", - // "compute an outline view of all of the symbols in a buffer", - // "scan a directory on the file system and load all of its children into an in-memory snapshot", - // ]; - // let embeddings = embedding_provider.embed_batch(queries.clone()).await?; - - // let t2 = Instant::now(); - // let documents = db.get_documents().unwrap(); - // let files = db.get_files().unwrap(); - // println!("Retrieving all documents from Database: {}", t2.elapsed().as_millis()); - - // let t1 = Instant::now(); - // let mut bfs = BruteForceSearch::load(&db).unwrap(); - // println!("Loading BFS to Memory: {:?}", t1.elapsed().as_millis()); - // for (idx, embed) in embeddings.into_iter().enumerate() { - // let t0 = Instant::now(); - // println!("\nQuery: {:?}", queries[idx]); - // let results = bfs.top_k_search(&embed, 5).await; - // println!("Search Elapsed: {}", t0.elapsed().as_millis()); - // for (id, distance) in results { - // println!(""); - // println!(" distance: {:?}", distance); - // println!(" document: {:?}", documents[&id].name); - // println!(" path: {:?}", files[&documents[&id].file_id].relative_path); - // } - - // } - anyhow::Ok(()) } .log_err(), From 0a7245a583667789dde8d03f9be07117f73e1e31 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Thu, 29 Jun 2023 13:50:49 -0400 Subject: [PATCH 018/115] updated semantic search modal to manage for duplicate queries --- crates/vector_store/src/modal.rs | 45 ++++++++++++++++++------- crates/vector_store/src/vector_store.rs | 6 ++-- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/crates/vector_store/src/modal.rs b/crates/vector_store/src/modal.rs index e857aa2ab2..4d377c6819 100644 --- a/crates/vector_store/src/modal.rs +++ b/crates/vector_store/src/modal.rs @@ -6,7 +6,7 @@ use gpui::{ }; use picker::{Picker, PickerDelegate, PickerEvent}; use project::{Project, ProjectPath}; -use std::{path::Path, sync::Arc, time::Duration}; +use std::{collections::HashMap, sync::Arc, time::Duration}; use util::ResultExt; use workspace::Workspace; @@ -23,6 +23,7 @@ pub struct SemanticSearchDelegate { vector_store: ModelHandle, selected_match_index: usize, matches: Vec, + history: HashMap>, } impl SemanticSearchDelegate { @@ -40,6 +41,7 @@ impl SemanticSearchDelegate { vector_store, selected_match_index: 0, matches: vec![], + history: HashMap::new(), } } } @@ -97,7 +99,9 @@ impl PickerDelegate for SemanticSearchDelegate { } fn update_matches(&mut self, query: String, cx: &mut ViewContext) -> Task<()> { + log::info!("Searching for {:?}...", query); if query.len() < MIN_QUERY_LEN { + log::info!("Query below minimum length"); return Task::ready(()); } @@ -106,18 +110,35 @@ impl PickerDelegate for SemanticSearchDelegate { cx.spawn(|this, mut cx| async move { cx.background().timer(EMBEDDING_DEBOUNCE_INTERVAL).await; - log::info!("Searching for {:?}", &query); - - let task = vector_store.update(&mut cx, |store, cx| { - store.search(&project, query.to_string(), 10, cx) + let retrieved_cached = this.update(&mut cx, |this, _| { + let delegate = this.delegate_mut(); + if delegate.history.contains_key(&query) { + let historic_results = delegate.history.get(&query).unwrap().to_owned(); + delegate.matches = historic_results.clone(); + true + } else { + false + } }); - if let Some(results) = task.await.log_err() { - this.update(&mut cx, |this, _| { - let delegate = this.delegate_mut(); - delegate.matches = results; - }) - .ok(); + if let Some(retrieved) = retrieved_cached.log_err() { + if !retrieved { + let task = vector_store.update(&mut cx, |store, cx| { + store.search(&project, query.to_string(), 10, cx) + }); + + if let Some(results) = task.await.log_err() { + log::info!("Not queried previously, searching..."); + this.update(&mut cx, |this, _| { + let delegate = this.delegate_mut(); + delegate.matches = results.clone(); + delegate.history.insert(query, results); + }) + .ok(); + } + } else { + log::info!("Already queried, retrieved directly from cached history"); + } } }) } @@ -135,7 +156,7 @@ impl PickerDelegate for SemanticSearchDelegate { let search_result = &self.matches[ix]; - let mut path = search_result.file_path.to_string_lossy(); + let path = search_result.file_path.to_string_lossy(); let name = search_result.name.clone(); Flex::column() diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 641fdd86f2..b3894f3686 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -7,8 +7,8 @@ mod vector_store_tests; use anyhow::{anyhow, Result}; use db::{FileSha1, VectorDatabase, VECTOR_DB_URL}; -use embedding::{DummyEmbeddings, EmbeddingProvider, OpenAIEmbeddings}; -use gpui::{actions, AppContext, Entity, ModelContext, ModelHandle, Task, ViewContext}; +use embedding::{EmbeddingProvider, OpenAIEmbeddings}; +use gpui::{AppContext, Entity, ModelContext, ModelHandle, Task, ViewContext}; use language::{Language, LanguageRegistry}; use modal::{SemanticSearch, SemanticSearchDelegate, Toggle}; use project::{Fs, Project, WorktreeId}; @@ -93,7 +93,7 @@ pub struct VectorStore { worktree_db_ids: Vec<(WorktreeId, i64)>, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct SearchResult { pub worktree_id: WorktreeId, pub name: String, From 39137fc19f001a5ab3d24d54a2c7ebaa50ca4d06 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Thu, 29 Jun 2023 15:18:32 -0400 Subject: [PATCH 019/115] updated vector_store db to leverage EMBEDDINGS_DIR path --- crates/util/src/paths.rs | 1 + crates/vector_store/src/db.rs | 7 +--- crates/vector_store/src/vector_store.rs | 33 +++++++++++++------ crates/vector_store/src/vector_store_tests.rs | 2 +- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/crates/util/src/paths.rs b/crates/util/src/paths.rs index 7ef55a9918..5df0ed12e9 100644 --- a/crates/util/src/paths.rs +++ b/crates/util/src/paths.rs @@ -6,6 +6,7 @@ lazy_static::lazy_static! { pub static ref HOME: PathBuf = dirs::home_dir().expect("failed to determine home directory"); pub static ref CONFIG_DIR: PathBuf = HOME.join(".config").join("zed"); pub static ref CONVERSATIONS_DIR: PathBuf = HOME.join(".config/zed/conversations"); + pub static ref EMBEDDINGS_DIR: PathBuf = HOME.join(".config/zed/embeddings"); pub static ref LOGS_DIR: PathBuf = HOME.join("Library/Logs/Zed"); pub static ref SUPPORT_DIR: PathBuf = HOME.join("Library/Application Support/Zed"); pub static ref LANGUAGES_DIR: PathBuf = HOME.join("Library/Application Support/Zed/languages"); diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index f1453141bb..768df8069f 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -15,11 +15,6 @@ use sha1::{Digest, Sha1}; use crate::IndexedFile; -// This is saving to a local database store within the users dev zed path -// Where do we want this to sit? -// Assuming near where the workspace DB sits. -pub const VECTOR_DB_URL: &str = "embeddings_db"; - // Note this is not an appropriate document #[derive(Debug)] pub struct DocumentRecord { @@ -109,7 +104,7 @@ pub struct VectorDatabase { } impl VectorDatabase { - pub fn new(path: &str) -> Result { + pub fn new(path: String) -> Result { let this = Self { db: rusqlite::Connection::open(path)?, }; diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index b3894f3686..47d6932685 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -6,16 +6,23 @@ mod modal; mod vector_store_tests; use anyhow::{anyhow, Result}; -use db::{FileSha1, VectorDatabase, VECTOR_DB_URL}; +use db::{FileSha1, VectorDatabase}; use embedding::{EmbeddingProvider, OpenAIEmbeddings}; use gpui::{AppContext, Entity, ModelContext, ModelHandle, Task, ViewContext}; use language::{Language, LanguageRegistry}; use modal::{SemanticSearch, SemanticSearchDelegate, Toggle}; use project::{Fs, Project, WorktreeId}; use smol::channel; -use std::{cmp::Ordering, collections::HashMap, path::PathBuf, sync::Arc}; +use std::{ + cmp::Ordering, + collections::HashMap, + path::{Path, PathBuf}, + sync::Arc, +}; use tree_sitter::{Parser, QueryCursor}; -use util::{http::HttpClient, ResultExt, TryFutureExt}; +use util::{ + channel::RELEASE_CHANNEL_NAME, http::HttpClient, paths::EMBEDDINGS_DIR, ResultExt, TryFutureExt, +}; use workspace::{Workspace, WorkspaceCreated}; #[derive(Debug)] @@ -31,11 +38,14 @@ pub fn init( language_registry: Arc, cx: &mut AppContext, ) { + let db_file_path = EMBEDDINGS_DIR + .join(Path::new(RELEASE_CHANNEL_NAME.as_str())) + .join("embeddings_db"); + let vector_store = cx.add_model(|_| { VectorStore::new( fs, - VECTOR_DB_URL.to_string(), - // Arc::new(DummyEmbeddings {}), + db_file_path, Arc::new(OpenAIEmbeddings { client: http_client, }), @@ -87,7 +97,7 @@ pub struct IndexedFile { pub struct VectorStore { fs: Arc, - database_url: Arc, + database_url: Arc, embedding_provider: Arc, language_registry: Arc, worktree_db_ids: Vec<(WorktreeId, i64)>, @@ -104,13 +114,13 @@ pub struct SearchResult { impl VectorStore { fn new( fs: Arc, - database_url: String, + database_url: PathBuf, embedding_provider: Arc, language_registry: Arc, ) -> Self { Self { fs, - database_url: database_url.into(), + database_url: Arc::new(database_url), embedding_provider, language_registry, worktree_db_ids: Vec::new(), @@ -209,7 +219,10 @@ impl VectorStore { .timer(std::time::Duration::from_secs(3)) .await; - let db = VectorDatabase::new(&database_url)?; + if let Some(db_directory) = database_url.parent() { + fs.create_dir(db_directory).await.log_err(); + } + let db = VectorDatabase::new(database_url.to_string_lossy().into())?; let worktrees = project.read_with(&cx, |project, cx| { project @@ -372,7 +385,7 @@ impl VectorStore { let documents = cx .background() .spawn(async move { - let database = VectorDatabase::new(database_url.as_ref())?; + let database = VectorDatabase::new(database_url.to_string_lossy().into())?; let phrase_embedding = embedding_provider .embed_batch(vec![&phrase]) diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index 6f8856c4fb..e232ba9f21 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -63,7 +63,7 @@ async fn test_vector_store(cx: &mut TestAppContext) { let store = cx.add_model(|_| { VectorStore::new( fs.clone(), - db_path.to_string_lossy().to_string(), + db_path, Arc::new(FakeEmbeddingProvider), languages, ) From e3ab54942ee46b4395f20ccdaba31c838223b4cb Mon Sep 17 00:00:00 2001 From: KCaverly Date: Fri, 30 Jun 2023 10:17:31 -0400 Subject: [PATCH 020/115] removed sleep from directory scanning as fixes upstream appear to be scanning correctly --- crates/vector_store/src/vector_store.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 47d6932685..9e589e010f 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -214,11 +214,6 @@ impl VectorStore { cx.spawn(|this, mut cx| async move { futures::future::join_all(worktree_scans_complete).await; - // TODO: remove this after fixing the bug in scan_complete - cx.background() - .timer(std::time::Duration::from_secs(3)) - .await; - if let Some(db_directory) = database_url.parent() { fs.create_dir(db_directory).await.log_err(); } From 0db0876289c9bb96706fbd997c52df1a33191b13 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Fri, 30 Jun 2023 11:01:35 -0400 Subject: [PATCH 021/115] implemented file deletes on project indexing --- crates/vector_store/src/db.rs | 9 +++++- crates/vector_store/src/vector_store.rs | 39 ++++++++++++++++--------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index 768df8069f..fec2980550 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -154,9 +154,16 @@ impl VectorDatabase { Ok(()) } + pub fn delete_file(&self, worktree_id: i64, delete_path: PathBuf) -> Result<()> { + self.db.execute( + "DELETE FROM files WHERE worktree_id = ?1 AND relative_path = ?2", + params![worktree_id, delete_path.to_str()], + )?; + Ok(()) + } + pub fn insert_file(&self, worktree_id: i64, indexed_file: IndexedFile) -> Result<()> { // Write to files table, and return generated id. - log::info!("Inserting File!"); self.db.execute( " DELETE FROM files WHERE worktree_id = ?1 AND relative_path = ?2; diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 9e589e010f..876a6018b8 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -15,7 +15,7 @@ use project::{Fs, Project, WorktreeId}; use smol::channel; use std::{ cmp::Ordering, - collections::HashMap, + collections::{HashMap, HashSet}, path::{Path, PathBuf}, sync::Arc, }; @@ -201,7 +201,6 @@ impl VectorStore { let scan_complete = worktree.read(cx).as_local().unwrap().scan_complete(); async move { scan_complete.await; - log::info!("worktree scan completed"); } }) .collect::>(); @@ -249,6 +248,7 @@ impl VectorStore { let (paths_tx, paths_rx) = channel::unbounded::<(i64, PathBuf, String, Arc)>(); + let (delete_paths_tx, delete_paths_rx) = channel::unbounded::<(i64, PathBuf)>(); let (indexed_files_tx, indexed_files_rx) = channel::unbounded::<(i64, IndexedFile)>(); cx.background() .spawn({ @@ -257,6 +257,8 @@ impl VectorStore { async move { for worktree in worktrees.into_iter() { let file_hashes = &worktree_hashes[&worktree.id()]; + let mut files_included = + file_hashes.keys().collect::>(); for file in worktree.files(false, 0) { let absolute_path = worktree.absolutize(&file.path); @@ -269,20 +271,16 @@ impl VectorStore { } if let Some(content) = fs.load(&absolute_path).await.log_err() { - log::info!("loaded file: {absolute_path:?}"); - let path_buf = file.path.to_path_buf(); - let already_stored = file_hashes - .get(&path_buf) - .map_or(false, |existing_hash| { + let already_stored = file_hashes.get(&path_buf).map_or( + false, + |existing_hash| { + files_included.remove(&path_buf); existing_hash.equals(&content) - }); + }, + ); if !already_stored { - log::info!( - "File Changed (Sending to Parse): {:?}", - &path_buf - ); paths_tx .try_send(( worktree_db_ids[&worktree.id()], @@ -295,17 +293,30 @@ impl VectorStore { } } } + for file in files_included { + delete_paths_tx + .try_send((worktree_db_ids[&worktree.id()], file.to_owned())) + .unwrap(); + } } } }) .detach(); - let db_write_task = cx.background().spawn( + let db_update_task = cx.background().spawn( async move { + // Inserting all new files while let Ok((worktree_id, indexed_file)) = indexed_files_rx.recv().await { + log::info!("Inserting File: {:?}", &indexed_file.path); db.insert_file(worktree_id, indexed_file).log_err(); } + // Deleting all old files + while let Ok((worktree_id, delete_path)) = delete_paths_rx.recv().await { + log::info!("Deleting File: {:?}", &delete_path); + db.delete_file(worktree_id, delete_path).log_err(); + } + anyhow::Ok(()) } .log_err(), @@ -342,7 +353,7 @@ impl VectorStore { .await; drop(indexed_files_tx); - db_write_task.await; + db_update_task.await; this.update(&mut cx, |this, _| { this.worktree_db_ids.extend(worktree_db_ids); From 36907bb4dc604c2715242d7cedfc04cde7cf60ff Mon Sep 17 00:00:00 2001 From: KCaverly Date: Fri, 30 Jun 2023 16:14:11 -0400 Subject: [PATCH 022/115] updated vector store indexing to only use languages with an embedding.scm treesitter query Co-authored-by: maxbrunsfeld --- crates/language/src/language.rs | 44 +++++++++++++++++++ crates/vector_store/src/vector_store.rs | 22 +++++++--- crates/vector_store/src/vector_store_tests.rs | 2 +- crates/zed/src/languages.rs | 1 + crates/zed/src/languages/rust/embedding.scm | 36 +++++++++++++++ 5 files changed, 98 insertions(+), 7 deletions(-) create mode 100644 crates/zed/src/languages/rust/embedding.scm diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index b880cbc8d7..4ef9d25894 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -350,6 +350,7 @@ pub struct LanguageQueries { pub brackets: Option>, pub indents: Option>, pub outline: Option>, + pub embedding: Option>, pub injections: Option>, pub overrides: Option>, } @@ -495,6 +496,7 @@ pub struct Grammar { pub(crate) brackets_config: Option, pub(crate) indents_config: Option, pub outline_config: Option, + pub embedding_config: Option, pub(crate) injection_config: Option, pub(crate) override_config: Option, pub(crate) highlight_map: Mutex, @@ -516,6 +518,15 @@ pub struct OutlineConfig { pub extra_context_capture_ix: Option, } +#[derive(Debug)] +pub struct EmbeddingConfig { + pub query: Query, + pub item_capture_ix: u32, + pub name_capture_ix: u32, + pub context_capture_ix: Option, + pub extra_context_capture_ix: Option, +} + struct InjectionConfig { query: Query, content_capture_ix: u32, @@ -1145,6 +1156,7 @@ impl Language { highlights_query: None, brackets_config: None, outline_config: None, + embedding_config: None, indents_config: None, injection_config: None, override_config: None, @@ -1181,6 +1193,9 @@ impl Language { if let Some(query) = queries.outline { self = self.with_outline_query(query.as_ref())?; } + if let Some(query) = queries.embedding { + self = self.with_embedding_query(query.as_ref())?; + } if let Some(query) = queries.injections { self = self.with_injection_query(query.as_ref())?; } @@ -1189,6 +1204,7 @@ impl Language { } Ok(self) } + pub fn with_highlights_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); grammar.highlights_query = Some(Query::new(grammar.ts_language, source)?); @@ -1223,6 +1239,34 @@ impl Language { Ok(self) } + pub fn with_embedding_query(mut self, source: &str) -> Result { + let grammar = self.grammar_mut(); + let query = Query::new(grammar.ts_language, source)?; + let mut item_capture_ix = None; + let mut name_capture_ix = None; + let mut context_capture_ix = None; + let mut extra_context_capture_ix = None; + get_capture_indices( + &query, + &mut [ + ("item", &mut item_capture_ix), + ("name", &mut name_capture_ix), + ("context", &mut context_capture_ix), + ("context.extra", &mut extra_context_capture_ix), + ], + ); + if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) { + grammar.embedding_config = Some(EmbeddingConfig { + query, + item_capture_ix, + name_capture_ix, + context_capture_ix, + extra_context_capture_ix, + }); + } + Ok(self) + } + pub fn with_brackets_query(mut self, source: &str) -> Result { let grammar = self.grammar_mut(); let query = Query::new(grammar.ts_language, source)?; diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 876a6018b8..35a467b82f 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -136,8 +136,8 @@ impl VectorStore { content: String, ) -> Result { let grammar = language.grammar().ok_or_else(|| anyhow!("no grammar"))?; - let outline_config = grammar - .outline_config + let embedding_config = grammar + .embedding_config .as_ref() .ok_or_else(|| anyhow!("no outline query"))?; @@ -148,13 +148,17 @@ impl VectorStore { let mut documents = Vec::new(); let mut context_spans = Vec::new(); - for mat in cursor.matches(&outline_config.query, tree.root_node(), content.as_bytes()) { + for mat in cursor.matches( + &embedding_config.query, + tree.root_node(), + content.as_bytes(), + ) { let mut item_range = None; let mut name_range = None; for capture in mat.captures { - if capture.index == outline_config.item_capture_ix { + if capture.index == embedding_config.item_capture_ix { item_range = Some(capture.node.byte_range()); - } else if capture.index == outline_config.name_capture_ix { + } else if capture.index == embedding_config.name_capture_ix { name_range = Some(capture.node.byte_range()); } } @@ -266,7 +270,11 @@ impl VectorStore { .language_for_file(&absolute_path, None) .await { - if language.name().as_ref() != "Rust" { + if language + .grammar() + .and_then(|grammar| grammar.embedding_config.as_ref()) + .is_none() + { continue; } @@ -359,6 +367,8 @@ impl VectorStore { this.worktree_db_ids.extend(worktree_db_ids); }); + log::info!("Semantic Indexing Complete!"); + anyhow::Ok(()) }) } diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index e232ba9f21..78470ad4be 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -46,7 +46,7 @@ async fn test_vector_store(cx: &mut TestAppContext) { }, Some(tree_sitter_rust::language()), ) - .with_outline_query( + .with_embedding_query( r#" (function_item name: (identifier) @name diff --git a/crates/zed/src/languages.rs b/crates/zed/src/languages.rs index 44e144e89b..820f564151 100644 --- a/crates/zed/src/languages.rs +++ b/crates/zed/src/languages.rs @@ -170,6 +170,7 @@ fn load_queries(name: &str) -> LanguageQueries { brackets: load_query(name, "/brackets"), indents: load_query(name, "/indents"), outline: load_query(name, "/outline"), + embedding: load_query(name, "/embedding"), injections: load_query(name, "/injections"), overrides: load_query(name, "/overrides"), } diff --git a/crates/zed/src/languages/rust/embedding.scm b/crates/zed/src/languages/rust/embedding.scm new file mode 100644 index 0000000000..ea8bab9f68 --- /dev/null +++ b/crates/zed/src/languages/rust/embedding.scm @@ -0,0 +1,36 @@ +(struct_item + (visibility_modifier)? @context + "struct" @context + name: (_) @name) @item + +(enum_item + (visibility_modifier)? @context + "enum" @context + name: (_) @name) @item + +(impl_item + "impl" @context + trait: (_)? @name + "for"? @context + type: (_) @name) @item + +(trait_item + (visibility_modifier)? @context + "trait" @context + name: (_) @name) @item + +(function_item + (visibility_modifier)? @context + (function_modifiers)? @context + "fn" @context + name: (_) @name) @item + +(function_signature_item + (visibility_modifier)? @context + (function_modifiers)? @context + "fn" @context + name: (_) @name) @item + +(macro_definition + . "macro_rules!" @context + name: (_) @name) @item From 3408b98167481aa54c70839f6024bdc5cdfb2aec Mon Sep 17 00:00:00 2001 From: KCaverly Date: Fri, 30 Jun 2023 16:53:23 -0400 Subject: [PATCH 023/115] updated file compare in the semantic indexing engine, to work off of modified system times as opposed to file hashes Co-authored-by: maxbrunsfeld --- Cargo.lock | 25 +------ crates/vector_store/Cargo.toml | 4 +- crates/vector_store/src/db.rs | 99 +++++++++---------------- crates/vector_store/src/vector_store.rs | 76 +++++++++---------- 4 files changed, 74 insertions(+), 130 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 85599036a1..59cf30001e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4232,19 +4232,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "ndarray" -version = "0.15.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "rawpointer", -] - [[package]] name = "net2" version = "0.2.38" @@ -4353,15 +4340,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "num-complex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" -dependencies = [ - "num-traits", -] - [[package]] name = "num-integer" version = "0.1.45" @@ -8050,14 +8028,13 @@ dependencies = [ "lazy_static", "log", "matrixmultiply", - "ndarray", "picker", "project", "rand 0.8.5", + "rpc", "rusqlite", "serde", "serde_json", - "sha-1 0.10.1", "smol", "tempdir", "theme", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index 4ecd46cb92..d1ad8a0f9b 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -17,6 +17,7 @@ util = { path = "../util" } picker = { path = "../picker" } theme = { path = "../theme" } editor = { path = "../editor" } +rpc = { path = "../rpc" } anyhow.workspace = true futures.workspace = true smol.workspace = true @@ -29,14 +30,13 @@ serde.workspace = true serde_json.workspace = true async-trait.workspace = true bincode = "1.3.3" -ndarray = "0.15.6" -sha-1 = "0.10.1" matrixmultiply = "0.3.7" [dev-dependencies] gpui = { path = "../gpui", features = ["test-support"] } language = { path = "../language", features = ["test-support"] } project = { path = "../project", features = ["test-support"] } +rpc = { path = "../rpc", features = ["test-support"] } workspace = { path = "../workspace", features = ["test-support"] } tree-sitter-rust = "*" rand.workspace = true diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index fec2980550..f822cca77e 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -2,18 +2,17 @@ use std::{ collections::HashMap, path::{Path, PathBuf}, rc::Rc, + time::SystemTime, }; use anyhow::{anyhow, Result}; +use crate::IndexedFile; +use rpc::proto::Timestamp; use rusqlite::{ params, - types::{FromSql, FromSqlResult, ToSqlOutput, ValueRef}, - ToSql, + types::{FromSql, FromSqlResult, ValueRef}, }; -use sha1::{Digest, Sha1}; - -use crate::IndexedFile; // Note this is not an appropriate document #[derive(Debug)] @@ -29,60 +28,7 @@ pub struct DocumentRecord { pub struct FileRecord { pub id: usize, pub relative_path: String, - pub sha1: FileSha1, -} - -#[derive(Debug)] -pub struct FileSha1(pub Vec); - -impl FileSha1 { - pub fn from_str(content: String) -> Self { - let mut hasher = Sha1::new(); - hasher.update(content); - let sha1 = hasher.finalize()[..] - .into_iter() - .map(|val| val.to_owned()) - .collect::>(); - return FileSha1(sha1); - } - - pub fn equals(&self, content: &String) -> bool { - let mut hasher = Sha1::new(); - hasher.update(content); - let sha1 = hasher.finalize()[..] - .into_iter() - .map(|val| val.to_owned()) - .collect::>(); - - let equal = self - .0 - .clone() - .into_iter() - .zip(sha1) - .filter(|&(a, b)| a == b) - .count() - == self.0.len(); - - equal - } -} - -impl ToSql for FileSha1 { - fn to_sql(&self) -> rusqlite::Result> { - return self.0.to_sql(); - } -} - -impl FromSql for FileSha1 { - fn column_result(value: ValueRef) -> FromSqlResult { - let bytes = value.as_blob()?; - Ok(FileSha1( - bytes - .into_iter() - .map(|val| val.to_owned()) - .collect::>(), - )) - } + pub mtime: Timestamp, } #[derive(Debug)] @@ -133,7 +79,8 @@ impl VectorDatabase { id INTEGER PRIMARY KEY AUTOINCREMENT, worktree_id INTEGER NOT NULL, relative_path VARCHAR NOT NULL, - sha1 BLOB NOT NULL, + mtime_seconds INTEGER NOT NULL, + mtime_nanos INTEGER NOT NULL, FOREIGN KEY(worktree_id) REFERENCES worktrees(id) ON DELETE CASCADE )", [], @@ -170,11 +117,20 @@ impl VectorDatabase { ", params![worktree_id, indexed_file.path.to_str()], )?; + let mtime = Timestamp::from(indexed_file.mtime); self.db.execute( " - INSERT INTO files (worktree_id, relative_path, sha1) VALUES (?1, ?2, $3); + INSERT INTO files + (worktree_id, relative_path, mtime_seconds, mtime_nanos) + VALUES + (?1, ?2, $3, $4); ", - params![worktree_id, indexed_file.path.to_str(), indexed_file.sha1], + params![ + worktree_id, + indexed_file.path.to_str(), + mtime.seconds, + mtime.nanos + ], )?; let file_id = self.db.last_insert_rowid(); @@ -224,13 +180,24 @@ impl VectorDatabase { Ok(self.db.last_insert_rowid()) } - pub fn get_file_hashes(&self, worktree_id: i64) -> Result> { + pub fn get_file_mtimes(&self, worktree_id: i64) -> Result> { let mut statement = self.db.prepare( - "SELECT relative_path, sha1 FROM files WHERE worktree_id = ?1 ORDER BY relative_path", + " + SELECT relative_path, mtime_seconds, mtime_nanos + FROM files + WHERE worktree_id = ?1 + ORDER BY relative_path", )?; - let mut result: HashMap = HashMap::new(); + let mut result: HashMap = HashMap::new(); for row in statement.query_map(params![worktree_id], |row| { - Ok((row.get::<_, String>(0)?.into(), row.get(1)?)) + Ok(( + row.get::<_, String>(0)?.into(), + Timestamp { + seconds: row.get(1)?, + nanos: row.get(2)?, + } + .into(), + )) })? { let row = row?; result.insert(row.0, row.1); diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 35a467b82f..c329206c4b 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -6,7 +6,7 @@ mod modal; mod vector_store_tests; use anyhow::{anyhow, Result}; -use db::{FileSha1, VectorDatabase}; +use db::VectorDatabase; use embedding::{EmbeddingProvider, OpenAIEmbeddings}; use gpui::{AppContext, Entity, ModelContext, ModelHandle, Task, ViewContext}; use language::{Language, LanguageRegistry}; @@ -15,9 +15,10 @@ use project::{Fs, Project, WorktreeId}; use smol::channel; use std::{ cmp::Ordering, - collections::{HashMap, HashSet}, + collections::HashMap, path::{Path, PathBuf}, sync::Arc, + time::SystemTime, }; use tree_sitter::{Parser, QueryCursor}; use util::{ @@ -46,6 +47,7 @@ pub fn init( VectorStore::new( fs, db_file_path, + // Arc::new(embedding::DummyEmbeddings {}), Arc::new(OpenAIEmbeddings { client: http_client, }), @@ -91,7 +93,7 @@ pub fn init( #[derive(Debug)] pub struct IndexedFile { path: PathBuf, - sha1: FileSha1, + mtime: SystemTime, documents: Vec, } @@ -131,9 +133,10 @@ impl VectorStore { cursor: &mut QueryCursor, parser: &mut Parser, embedding_provider: &dyn EmbeddingProvider, + fs: &Arc, language: Arc, file_path: PathBuf, - content: String, + mtime: SystemTime, ) -> Result { let grammar = language.grammar().ok_or_else(|| anyhow!("no grammar"))?; let embedding_config = grammar @@ -141,6 +144,8 @@ impl VectorStore { .as_ref() .ok_or_else(|| anyhow!("no outline query"))?; + let content = fs.load(&file_path).await?; + parser.set_language(grammar.ts_language).unwrap(); let tree = parser .parse(&content, None) @@ -184,11 +189,9 @@ impl VectorStore { } } - let sha1 = FileSha1::from_str(content); - return Ok(IndexedFile { path: file_path, - sha1, + mtime, documents, }); } @@ -231,38 +234,36 @@ impl VectorStore { // Here we query the worktree ids, and yet we dont have them elsewhere // We likely want to clean up these datastructures - let (db, worktree_hashes, worktree_db_ids) = cx + let (db, mut worktree_file_times, worktree_db_ids) = cx .background() .spawn({ let worktrees = worktrees.clone(); async move { let mut worktree_db_ids: HashMap = HashMap::new(); - let mut hashes: HashMap> = + let mut file_times: HashMap> = HashMap::new(); for worktree in worktrees { let worktree_db_id = db.find_or_create_worktree(worktree.abs_path().as_ref())?; worktree_db_ids.insert(worktree.id(), worktree_db_id); - hashes.insert(worktree.id(), db.get_file_hashes(worktree_db_id)?); + file_times.insert(worktree.id(), db.get_file_mtimes(worktree_db_id)?); } - anyhow::Ok((db, hashes, worktree_db_ids)) + anyhow::Ok((db, file_times, worktree_db_ids)) } }) .await?; let (paths_tx, paths_rx) = - channel::unbounded::<(i64, PathBuf, String, Arc)>(); + channel::unbounded::<(i64, PathBuf, Arc, SystemTime)>(); let (delete_paths_tx, delete_paths_rx) = channel::unbounded::<(i64, PathBuf)>(); let (indexed_files_tx, indexed_files_rx) = channel::unbounded::<(i64, IndexedFile)>(); cx.background() .spawn({ - let fs = fs.clone(); let worktree_db_ids = worktree_db_ids.clone(); async move { for worktree in worktrees.into_iter() { - let file_hashes = &worktree_hashes[&worktree.id()]; - let mut files_included = - file_hashes.keys().collect::>(); + let mut file_mtimes = + worktree_file_times.remove(&worktree.id()).unwrap(); for file in worktree.files(false, 0) { let absolute_path = worktree.absolutize(&file.path); @@ -278,30 +279,26 @@ impl VectorStore { continue; } - if let Some(content) = fs.load(&absolute_path).await.log_err() { - let path_buf = file.path.to_path_buf(); - let already_stored = file_hashes.get(&path_buf).map_or( - false, - |existing_hash| { - files_included.remove(&path_buf); - existing_hash.equals(&content) - }, - ); + let path_buf = file.path.to_path_buf(); + let stored_mtime = file_mtimes.remove(&file.path.to_path_buf()); + let already_stored = stored_mtime + .map_or(false, |existing_mtime| { + existing_mtime == file.mtime + }); - if !already_stored { - paths_tx - .try_send(( - worktree_db_ids[&worktree.id()], - path_buf, - content, - language, - )) - .unwrap(); - } + if !already_stored { + paths_tx + .try_send(( + worktree_db_ids[&worktree.id()], + path_buf, + language, + file.mtime, + )) + .unwrap(); } } } - for file in files_included { + for file in file_mtimes.keys() { delete_paths_tx .try_send((worktree_db_ids[&worktree.id()], file.to_owned())) .unwrap(); @@ -336,16 +333,17 @@ impl VectorStore { scope.spawn(async { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - while let Ok((worktree_id, file_path, content, language)) = + while let Ok((worktree_id, file_path, language, mtime)) = paths_rx.recv().await { if let Some(indexed_file) = Self::index_file( &mut cursor, &mut parser, embedding_provider.as_ref(), + &fs, language, file_path, - content, + mtime, ) .await .log_err() @@ -395,6 +393,8 @@ impl VectorStore { }) .collect::>(); + log::info!("Searching for: {:?}", phrase); + let embedding_provider = self.embedding_provider.clone(); let database_url = self.database_url.clone(); cx.spawn(|this, cx| async move { From 18a5a47f8ab758d0b4288871457af5aa05d1404b Mon Sep 17 00:00:00 2001 From: KCaverly Date: Fri, 30 Jun 2023 18:41:19 -0400 Subject: [PATCH 024/115] moved semantic search model to dev and preview only. moved db update tasks to long lived persistent task. Co-authored-by: maxbrunsfeld --- crates/project/src/project.rs | 5 + crates/vector_store/src/modal.rs | 2 +- crates/vector_store/src/vector_store.rs | 328 ++++++++++++------ crates/vector_store/src/vector_store_tests.rs | 25 +- 4 files changed, 239 insertions(+), 121 deletions(-) diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index bbb2064da2..eb0004850c 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -260,6 +260,7 @@ pub enum Event { ActiveEntryChanged(Option), WorktreeAdded, WorktreeRemoved(WorktreeId), + WorktreeUpdatedEntries(WorktreeId, UpdatedEntriesSet), DiskBasedDiagnosticsStarted { language_server_id: LanguageServerId, }, @@ -5371,6 +5372,10 @@ impl Project { this.update_local_worktree_buffers(&worktree, changes, cx); this.update_local_worktree_language_servers(&worktree, changes, cx); this.update_local_worktree_settings(&worktree, changes, cx); + cx.emit(Event::WorktreeUpdatedEntries( + worktree.read(cx).id(), + changes.clone(), + )); } worktree::Event::UpdatedGitRepositories(updated_repos) => { this.update_local_worktree_buffers_git_repos(worktree, updated_repos, cx) diff --git a/crates/vector_store/src/modal.rs b/crates/vector_store/src/modal.rs index 4d377c6819..9225fe8786 100644 --- a/crates/vector_store/src/modal.rs +++ b/crates/vector_store/src/modal.rs @@ -124,7 +124,7 @@ impl PickerDelegate for SemanticSearchDelegate { if let Some(retrieved) = retrieved_cached.log_err() { if !retrieved { let task = vector_store.update(&mut cx, |store, cx| { - store.search(&project, query.to_string(), 10, cx) + store.search(project.clone(), query.to_string(), 10, cx) }); if let Some(results) = task.await.log_err() { diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index c329206c4b..3f0a7001ef 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -8,7 +8,11 @@ mod vector_store_tests; use anyhow::{anyhow, Result}; use db::VectorDatabase; use embedding::{EmbeddingProvider, OpenAIEmbeddings}; -use gpui::{AppContext, Entity, ModelContext, ModelHandle, Task, ViewContext}; +use futures::{channel::oneshot, Future}; +use gpui::{ + AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, ViewContext, + WeakModelHandle, +}; use language::{Language, LanguageRegistry}; use modal::{SemanticSearch, SemanticSearchDelegate, Toggle}; use project::{Fs, Project, WorktreeId}; @@ -22,7 +26,10 @@ use std::{ }; use tree_sitter::{Parser, QueryCursor}; use util::{ - channel::RELEASE_CHANNEL_NAME, http::HttpClient, paths::EMBEDDINGS_DIR, ResultExt, TryFutureExt, + channel::{ReleaseChannel, RELEASE_CHANNEL, RELEASE_CHANNEL_NAME}, + http::HttpClient, + paths::EMBEDDINGS_DIR, + ResultExt, }; use workspace::{Workspace, WorkspaceCreated}; @@ -39,12 +46,16 @@ pub fn init( language_registry: Arc, cx: &mut AppContext, ) { + if *RELEASE_CHANNEL == ReleaseChannel::Stable { + return; + } + let db_file_path = EMBEDDINGS_DIR .join(Path::new(RELEASE_CHANNEL_NAME.as_str())) .join("embeddings_db"); - let vector_store = cx.add_model(|_| { - VectorStore::new( + cx.spawn(move |mut cx| async move { + let vector_store = VectorStore::new( fs, db_file_path, // Arc::new(embedding::DummyEmbeddings {}), @@ -52,42 +63,49 @@ pub fn init( client: http_client, }), language_registry, + cx.clone(), ) - }); + .await?; - cx.subscribe_global::({ - let vector_store = vector_store.clone(); - move |event, cx| { - let workspace = &event.0; - if let Some(workspace) = workspace.upgrade(cx) { - let project = workspace.read(cx).project().clone(); - if project.read(cx).is_local() { - vector_store.update(cx, |store, cx| { - store.add_project(project, cx).detach(); - }); + cx.update(|cx| { + cx.subscribe_global::({ + let vector_store = vector_store.clone(); + move |event, cx| { + let workspace = &event.0; + if let Some(workspace) = workspace.upgrade(cx) { + let project = workspace.read(cx).project().clone(); + if project.read(cx).is_local() { + vector_store.update(cx, |store, cx| { + store.add_project(project, cx).detach(); + }); + } + } } - } - } + }) + .detach(); + + cx.add_action({ + move |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext| { + let vector_store = vector_store.clone(); + workspace.toggle_modal(cx, |workspace, cx| { + let project = workspace.project().clone(); + let workspace = cx.weak_handle(); + cx.add_view(|cx| { + SemanticSearch::new( + SemanticSearchDelegate::new(workspace, project, vector_store), + cx, + ) + }) + }) + } + }); + + SemanticSearch::init(cx); + }); + + anyhow::Ok(()) }) .detach(); - - cx.add_action({ - move |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext| { - let vector_store = vector_store.clone(); - workspace.toggle_modal(cx, |workspace, cx| { - let project = workspace.project().clone(); - let workspace = cx.weak_handle(); - cx.add_view(|cx| { - SemanticSearch::new( - SemanticSearchDelegate::new(workspace, project, vector_store), - cx, - ) - }) - }) - } - }); - - SemanticSearch::init(cx); } #[derive(Debug)] @@ -102,7 +120,14 @@ pub struct VectorStore { database_url: Arc, embedding_provider: Arc, language_registry: Arc, + db_update_tx: channel::Sender, + _db_update_task: Task<()>, + projects: HashMap, ProjectState>, +} + +struct ProjectState { worktree_db_ids: Vec<(WorktreeId, i64)>, + _subscription: gpui::Subscription, } #[derive(Debug, Clone)] @@ -113,20 +138,81 @@ pub struct SearchResult { pub file_path: PathBuf, } +enum DbWrite { + InsertFile { + worktree_id: i64, + indexed_file: IndexedFile, + }, + Delete { + worktree_id: i64, + path: PathBuf, + }, + FindOrCreateWorktree { + path: PathBuf, + sender: oneshot::Sender>, + }, +} + impl VectorStore { - fn new( + async fn new( fs: Arc, database_url: PathBuf, embedding_provider: Arc, language_registry: Arc, - ) -> Self { - Self { - fs, - database_url: Arc::new(database_url), - embedding_provider, - language_registry, - worktree_db_ids: Vec::new(), - } + mut cx: AsyncAppContext, + ) -> Result> { + let database_url = Arc::new(database_url); + + let db = cx + .background() + .spawn({ + let fs = fs.clone(); + let database_url = database_url.clone(); + async move { + if let Some(db_directory) = database_url.parent() { + fs.create_dir(db_directory).await.log_err(); + } + + let db = VectorDatabase::new(database_url.to_string_lossy().to_string())?; + anyhow::Ok(db) + } + }) + .await?; + + Ok(cx.add_model(|cx| { + let (db_update_tx, db_update_rx) = channel::unbounded(); + let _db_update_task = cx.background().spawn(async move { + while let Ok(job) = db_update_rx.recv().await { + match job { + DbWrite::InsertFile { + worktree_id, + indexed_file, + } => { + log::info!("Inserting File: {:?}", &indexed_file.path); + db.insert_file(worktree_id, indexed_file).log_err(); + } + DbWrite::Delete { worktree_id, path } => { + log::info!("Deleting File: {:?}", &path); + db.delete_file(worktree_id, path).log_err(); + } + DbWrite::FindOrCreateWorktree { path, sender } => { + let id = db.find_or_create_worktree(&path); + sender.send(id).ok(); + } + } + } + }); + + Self { + fs, + database_url, + db_update_tx, + embedding_provider, + language_registry, + projects: HashMap::new(), + _db_update_task, + } + })) } async fn index_file( @@ -196,6 +282,14 @@ impl VectorStore { }); } + fn find_or_create_worktree(&self, path: PathBuf) -> impl Future> { + let (tx, rx) = oneshot::channel(); + self.db_update_tx + .try_send(DbWrite::FindOrCreateWorktree { path, sender: tx }) + .unwrap(); + async move { rx.await? } + } + fn add_project( &mut self, project: ModelHandle, @@ -211,19 +305,28 @@ impl VectorStore { } }) .collect::>(); + let worktree_db_ids = project + .read(cx) + .worktrees(cx) + .map(|worktree| { + self.find_or_create_worktree(worktree.read(cx).abs_path().to_path_buf()) + }) + .collect::>(); let fs = self.fs.clone(); let language_registry = self.language_registry.clone(); let embedding_provider = self.embedding_provider.clone(); let database_url = self.database_url.clone(); + let db_update_tx = self.db_update_tx.clone(); cx.spawn(|this, mut cx| async move { futures::future::join_all(worktree_scans_complete).await; + let worktree_db_ids = futures::future::join_all(worktree_db_ids).await; + if let Some(db_directory) = database_url.parent() { fs.create_dir(db_directory).await.log_err(); } - let db = VectorDatabase::new(database_url.to_string_lossy().into())?; let worktrees = project.read_with(&cx, |project, cx| { project @@ -234,32 +337,31 @@ impl VectorStore { // Here we query the worktree ids, and yet we dont have them elsewhere // We likely want to clean up these datastructures - let (db, mut worktree_file_times, worktree_db_ids) = cx + let (mut worktree_file_times, db_ids_by_worktree_id) = cx .background() .spawn({ let worktrees = worktrees.clone(); async move { - let mut worktree_db_ids: HashMap = HashMap::new(); + let db = VectorDatabase::new(database_url.to_string_lossy().into())?; + let mut db_ids_by_worktree_id = HashMap::new(); let mut file_times: HashMap> = HashMap::new(); - for worktree in worktrees { - let worktree_db_id = - db.find_or_create_worktree(worktree.abs_path().as_ref())?; - worktree_db_ids.insert(worktree.id(), worktree_db_id); - file_times.insert(worktree.id(), db.get_file_mtimes(worktree_db_id)?); + for (worktree, db_id) in worktrees.iter().zip(worktree_db_ids) { + let db_id = db_id?; + db_ids_by_worktree_id.insert(worktree.id(), db_id); + file_times.insert(worktree.id(), db.get_file_mtimes(db_id)?); } - anyhow::Ok((db, file_times, worktree_db_ids)) + anyhow::Ok((file_times, db_ids_by_worktree_id)) } }) .await?; let (paths_tx, paths_rx) = channel::unbounded::<(i64, PathBuf, Arc, SystemTime)>(); - let (delete_paths_tx, delete_paths_rx) = channel::unbounded::<(i64, PathBuf)>(); - let (indexed_files_tx, indexed_files_rx) = channel::unbounded::<(i64, IndexedFile)>(); cx.background() .spawn({ - let worktree_db_ids = worktree_db_ids.clone(); + let db_ids_by_worktree_id = db_ids_by_worktree_id.clone(); + let db_update_tx = db_update_tx.clone(); async move { for worktree in worktrees.into_iter() { let mut file_mtimes = @@ -289,7 +391,7 @@ impl VectorStore { if !already_stored { paths_tx .try_send(( - worktree_db_ids[&worktree.id()], + db_ids_by_worktree_id[&worktree.id()], path_buf, language, file.mtime, @@ -299,8 +401,11 @@ impl VectorStore { } } for file in file_mtimes.keys() { - delete_paths_tx - .try_send((worktree_db_ids[&worktree.id()], file.to_owned())) + db_update_tx + .try_send(DbWrite::Delete { + worktree_id: db_ids_by_worktree_id[&worktree.id()], + path: file.to_owned(), + }) .unwrap(); } } @@ -308,25 +413,6 @@ impl VectorStore { }) .detach(); - let db_update_task = cx.background().spawn( - async move { - // Inserting all new files - while let Ok((worktree_id, indexed_file)) = indexed_files_rx.recv().await { - log::info!("Inserting File: {:?}", &indexed_file.path); - db.insert_file(worktree_id, indexed_file).log_err(); - } - - // Deleting all old files - while let Ok((worktree_id, delete_path)) = delete_paths_rx.recv().await { - log::info!("Deleting File: {:?}", &delete_path); - db.delete_file(worktree_id, delete_path).log_err(); - } - - anyhow::Ok(()) - } - .log_err(), - ); - cx.background() .scoped(|scope| { for _ in 0..cx.background().num_cpus() { @@ -348,8 +434,11 @@ impl VectorStore { .await .log_err() { - indexed_files_tx - .try_send((worktree_id, indexed_file)) + db_update_tx + .try_send(DbWrite::InsertFile { + worktree_id, + indexed_file, + }) .unwrap(); } } @@ -357,12 +446,22 @@ impl VectorStore { } }) .await; - drop(indexed_files_tx); - db_update_task.await; + this.update(&mut cx, |this, cx| { + let _subscription = cx.subscribe(&project, |this, project, event, cx| { + if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event { + // + log::info!("worktree changes {:?}", changes); + } + }); - this.update(&mut cx, |this, _| { - this.worktree_db_ids.extend(worktree_db_ids); + this.projects.insert( + project.downgrade(), + ProjectState { + worktree_db_ids: db_ids_by_worktree_id.into_iter().collect(), + _subscription, + }, + ); }); log::info!("Semantic Indexing Complete!"); @@ -373,23 +472,32 @@ impl VectorStore { pub fn search( &mut self, - project: &ModelHandle, + project: ModelHandle, phrase: String, limit: usize, cx: &mut ModelContext, ) -> Task>> { - let project = project.read(cx); + let project_state = if let Some(state) = self.projects.get(&project.downgrade()) { + state + } else { + return Task::ready(Err(anyhow!("project not added"))); + }; + let worktree_db_ids = project + .read(cx) .worktrees(cx) .filter_map(|worktree| { let worktree_id = worktree.read(cx).id(); - self.worktree_db_ids.iter().find_map(|(id, db_id)| { - if *id == worktree_id { - Some(*db_id) - } else { - None - } - }) + project_state + .worktree_db_ids + .iter() + .find_map(|(id, db_id)| { + if *id == worktree_id { + Some(*db_id) + } else { + None + } + }) }) .collect::>(); @@ -428,17 +536,27 @@ impl VectorStore { }) .await?; - let results = this.read_with(&cx, |this, _| { - documents + this.read_with(&cx, |this, _| { + let project_state = if let Some(state) = this.projects.get(&project.downgrade()) { + state + } else { + return Err(anyhow!("project not added")); + }; + + Ok(documents .into_iter() .filter_map(|(worktree_db_id, file_path, offset, name)| { - let worktree_id = this.worktree_db_ids.iter().find_map(|(id, db_id)| { - if *db_id == worktree_db_id { - Some(*id) - } else { - None - } - })?; + let worktree_id = + project_state + .worktree_db_ids + .iter() + .find_map(|(id, db_id)| { + if *db_id == worktree_db_id { + Some(*id) + } else { + None + } + })?; Some(SearchResult { worktree_id, name, @@ -446,10 +564,8 @@ impl VectorStore { file_path, }) }) - .collect() - }); - - anyhow::Ok(results) + .collect()) + }) }) } } diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index 78470ad4be..51065c0ee4 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -5,7 +5,7 @@ use anyhow::Result; use async_trait::async_trait; use gpui::{Task, TestAppContext}; use language::{Language, LanguageConfig, LanguageRegistry}; -use project::{FakeFs, Project}; +use project::{FakeFs, Fs, Project}; use rand::Rng; use serde_json::json; use unindent::Unindent; @@ -60,14 +60,15 @@ async fn test_vector_store(cx: &mut TestAppContext) { let db_dir = tempdir::TempDir::new("vector-store").unwrap(); let db_path = db_dir.path().join("db.sqlite"); - let store = cx.add_model(|_| { - VectorStore::new( - fs.clone(), - db_path, - Arc::new(FakeEmbeddingProvider), - languages, - ) - }); + let store = VectorStore::new( + fs.clone(), + db_path, + Arc::new(FakeEmbeddingProvider), + languages, + cx.to_async(), + ) + .await + .unwrap(); let project = Project::test(fs, ["/the-root".as_ref()], cx).await; let worktree_id = project.read_with(cx, |project, cx| { @@ -75,15 +76,11 @@ async fn test_vector_store(cx: &mut TestAppContext) { }); let add_project = store.update(cx, |store, cx| store.add_project(project.clone(), cx)); - // TODO - remove - cx.foreground() - .advance_clock(std::time::Duration::from_secs(3)); - add_project.await.unwrap(); let search_results = store .update(cx, |store, cx| { - store.search(&project, "aaaa".to_string(), 5, cx) + store.search(project.clone(), "aaaa".to_string(), 5, cx) }) .await .unwrap(); From e45d3a0a635ed7d8846134c7a02514ce3733d727 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 4 Jul 2023 11:46:09 -0400 Subject: [PATCH 025/115] WIP: initial reindexing logic worked out --- crates/vector_store/src/vector_store.rs | 100 ++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 7 deletions(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 3f0a7001ef..1bdc0127b7 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -58,10 +58,10 @@ pub fn init( let vector_store = VectorStore::new( fs, db_file_path, - // Arc::new(embedding::DummyEmbeddings {}), - Arc::new(OpenAIEmbeddings { - client: http_client, - }), + Arc::new(embedding::DummyEmbeddings {}), + // Arc::new(OpenAIEmbeddings { + // client: http_client, + // }), language_registry, cx.clone(), ) @@ -362,6 +362,8 @@ impl VectorStore { .spawn({ let db_ids_by_worktree_id = db_ids_by_worktree_id.clone(); let db_update_tx = db_update_tx.clone(); + let language_registry = language_registry.clone(); + let paths_tx = paths_tx.clone(); async move { for worktree in worktrees.into_iter() { let mut file_mtimes = @@ -449,9 +451,93 @@ impl VectorStore { this.update(&mut cx, |this, cx| { let _subscription = cx.subscribe(&project, |this, project, event, cx| { - if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event { - // - log::info!("worktree changes {:?}", changes); + if let Some(project_state) = this.projects.get(&project.downgrade()) { + let worktree_db_ids = project_state.worktree_db_ids.clone(); + + if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event + { + // Iterate through changes + let language_registry = this.language_registry.clone(); + + let db = + VectorDatabase::new(this.database_url.to_string_lossy().into()); + if db.is_err() { + return; + } + let db = db.unwrap(); + + let worktree_db_id: Option = { + let mut found_db_id = None; + for (w_id, db_id) in worktree_db_ids.into_iter() { + if &w_id == worktree_id { + found_db_id = Some(db_id); + } + } + + found_db_id + }; + + if worktree_db_id.is_none() { + return; + } + let worktree_db_id = worktree_db_id.unwrap(); + + let file_mtimes = db.get_file_mtimes(worktree_db_id); + if file_mtimes.is_err() { + return; + } + + let file_mtimes = file_mtimes.unwrap(); + + smol::block_on(async move { + for change in changes.into_iter() { + let change_path = change.0.clone(); + log::info!("Change: {:?}", &change_path); + if let Ok(language) = language_registry + .language_for_file(&change_path.to_path_buf(), None) + .await + { + if language + .grammar() + .and_then(|grammar| grammar.embedding_config.as_ref()) + .is_none() + { + continue; + } + log::info!("Language found: {:?}: ", language.name()); + + // TODO: Make this a bit more defensive + let modified_time = + change_path.metadata().unwrap().modified().unwrap(); + let existing_time = + file_mtimes.get(&change_path.to_path_buf()); + let already_stored = + existing_time.map_or(false, |existing_time| { + if &modified_time != existing_time + && existing_time.elapsed().unwrap().as_secs() + > 30 + { + false + } else { + true + } + }); + + if !already_stored { + log::info!("Need to reindex: {:?}", &change_path); + // paths_tx + // .try_send(( + // worktree_db_id, + // change_path.to_path_buf(), + // language, + // modified_time, + // )) + // .unwrap(); + } + } + } + }) + } } }); From b6520a8f1d11d39055273758d59d3647f2864046 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 4 Jul 2023 14:42:12 -0400 Subject: [PATCH 026/115] updated vector_store to reindex on save after timed delay --- crates/vector_store/src/vector_store.rs | 108 +++++++++--------- crates/vector_store/src/vector_store_tests.rs | 2 +- 2 files changed, 57 insertions(+), 53 deletions(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 1bdc0127b7..5189993eee 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -33,6 +33,8 @@ use util::{ }; use workspace::{Workspace, WorkspaceCreated}; +const REINDEXING_DELAY: u64 = 30; + #[derive(Debug)] pub struct Document { pub offset: usize, @@ -58,10 +60,10 @@ pub fn init( let vector_store = VectorStore::new( fs, db_file_path, - Arc::new(embedding::DummyEmbeddings {}), - // Arc::new(OpenAIEmbeddings { - // client: http_client, - // }), + // Arc::new(embedding::DummyEmbeddings {}), + Arc::new(OpenAIEmbeddings { + client: http_client, + }), language_registry, cx.clone(), ) @@ -121,7 +123,9 @@ pub struct VectorStore { embedding_provider: Arc, language_registry: Arc, db_update_tx: channel::Sender, + paths_tx: channel::Sender<(i64, PathBuf, Arc, SystemTime)>, _db_update_task: Task<()>, + _paths_update_task: Task<()>, projects: HashMap, ProjectState>, } @@ -203,14 +207,50 @@ impl VectorStore { } }); + let (paths_tx, paths_rx) = + channel::unbounded::<(i64, PathBuf, Arc, SystemTime)>(); + + let fs_clone = fs.clone(); + let db_update_tx_clone = db_update_tx.clone(); + let embedding_provider_clone = embedding_provider.clone(); + + let _paths_update_task = cx.background().spawn(async move { + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); + while let Ok((worktree_id, file_path, language, mtime)) = paths_rx.recv().await { + log::info!("Parsing File: {:?}", &file_path); + if let Some(indexed_file) = Self::index_file( + &mut cursor, + &mut parser, + embedding_provider_clone.as_ref(), + &fs_clone, + language, + file_path, + mtime, + ) + .await + .log_err() + { + db_update_tx_clone + .try_send(DbWrite::InsertFile { + worktree_id, + indexed_file, + }) + .unwrap(); + } + } + }); + Self { fs, database_url, db_update_tx, + paths_tx, embedding_provider, language_registry, projects: HashMap::new(), _db_update_task, + _paths_update_task, } })) } @@ -315,9 +355,9 @@ impl VectorStore { let fs = self.fs.clone(); let language_registry = self.language_registry.clone(); - let embedding_provider = self.embedding_provider.clone(); let database_url = self.database_url.clone(); let db_update_tx = self.db_update_tx.clone(); + let paths_tx = self.paths_tx.clone(); cx.spawn(|this, mut cx| async move { futures::future::join_all(worktree_scans_complete).await; @@ -356,8 +396,6 @@ impl VectorStore { }) .await?; - let (paths_tx, paths_rx) = - channel::unbounded::<(i64, PathBuf, Arc, SystemTime)>(); cx.background() .spawn({ let db_ids_by_worktree_id = db_ids_by_worktree_id.clone(); @@ -415,42 +453,8 @@ impl VectorStore { }) .detach(); - cx.background() - .scoped(|scope| { - for _ in 0..cx.background().num_cpus() { - scope.spawn(async { - let mut parser = Parser::new(); - let mut cursor = QueryCursor::new(); - while let Ok((worktree_id, file_path, language, mtime)) = - paths_rx.recv().await - { - if let Some(indexed_file) = Self::index_file( - &mut cursor, - &mut parser, - embedding_provider.as_ref(), - &fs, - language, - file_path, - mtime, - ) - .await - .log_err() - { - db_update_tx - .try_send(DbWrite::InsertFile { - worktree_id, - indexed_file, - }) - .unwrap(); - } - } - }); - } - }) - .await; - this.update(&mut cx, |this, cx| { - let _subscription = cx.subscribe(&project, |this, project, event, cx| { + let _subscription = cx.subscribe(&project, |this, project, event, _cx| { if let Some(project_state) = this.projects.get(&project.downgrade()) { let worktree_db_ids = project_state.worktree_db_ids.clone(); @@ -488,6 +492,7 @@ impl VectorStore { } let file_mtimes = file_mtimes.unwrap(); + let paths_tx = this.paths_tx.clone(); smol::block_on(async move { for change in changes.into_iter() { @@ -504,7 +509,6 @@ impl VectorStore { { continue; } - log::info!("Language found: {:?}: ", language.name()); // TODO: Make this a bit more defensive let modified_time = @@ -515,7 +519,7 @@ impl VectorStore { existing_time.map_or(false, |existing_time| { if &modified_time != existing_time && existing_time.elapsed().unwrap().as_secs() - > 30 + > REINDEXING_DELAY { false } else { @@ -525,14 +529,14 @@ impl VectorStore { if !already_stored { log::info!("Need to reindex: {:?}", &change_path); - // paths_tx - // .try_send(( - // worktree_db_id, - // change_path.to_path_buf(), - // language, - // modified_time, - // )) - // .unwrap(); + paths_tx + .try_send(( + worktree_db_id, + change_path.to_path_buf(), + language, + modified_time, + )) + .unwrap(); } } } diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index 51065c0ee4..e25b737b06 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -5,7 +5,7 @@ use anyhow::Result; use async_trait::async_trait; use gpui::{Task, TestAppContext}; use language::{Language, LanguageConfig, LanguageRegistry}; -use project::{FakeFs, Fs, Project}; +use project::{FakeFs, Project}; use rand::Rng; use serde_json::json; use unindent::Unindent; From eff0ee3b60406c53ac2e6f7ebf6f968264e56b5e Mon Sep 17 00:00:00 2001 From: KCaverly Date: Wed, 5 Jul 2023 10:02:42 -0400 Subject: [PATCH 027/115] enabled batching for embedding calls --- crates/vector_store/src/vector_store.rs | 157 ++++++++++++++++++------ 1 file changed, 120 insertions(+), 37 deletions(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 5189993eee..e072793e25 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -22,7 +22,7 @@ use std::{ collections::HashMap, path::{Path, PathBuf}, sync::Arc, - time::SystemTime, + time::{Instant, SystemTime}, }; use tree_sitter::{Parser, QueryCursor}; use util::{ @@ -34,8 +34,9 @@ use util::{ use workspace::{Workspace, WorkspaceCreated}; const REINDEXING_DELAY: u64 = 30; +const EMBEDDINGS_BATCH_SIZE: usize = 25; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Document { pub offset: usize, pub name: String, @@ -110,7 +111,7 @@ pub fn init( .detach(); } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct IndexedFile { path: PathBuf, mtime: SystemTime, @@ -126,6 +127,7 @@ pub struct VectorStore { paths_tx: channel::Sender<(i64, PathBuf, Arc, SystemTime)>, _db_update_task: Task<()>, _paths_update_task: Task<()>, + _embeddings_update_task: Task<()>, projects: HashMap, ProjectState>, } @@ -184,7 +186,14 @@ impl VectorStore { .await?; Ok(cx.add_model(|cx| { + // paths_tx -> embeddings_tx -> db_update_tx + let (db_update_tx, db_update_rx) = channel::unbounded(); + let (paths_tx, paths_rx) = + channel::unbounded::<(i64, PathBuf, Arc, SystemTime)>(); + let (embeddings_tx, embeddings_rx) = + channel::unbounded::<(i64, IndexedFile, Vec)>(); + let _db_update_task = cx.background().spawn(async move { while let Ok(job) = db_update_rx.recv().await { match job { @@ -192,11 +201,9 @@ impl VectorStore { worktree_id, indexed_file, } => { - log::info!("Inserting File: {:?}", &indexed_file.path); db.insert_file(worktree_id, indexed_file).log_err(); } DbWrite::Delete { worktree_id, path } => { - log::info!("Deleting File: {:?}", &path); db.delete_file(worktree_id, path).log_err(); } DbWrite::FindOrCreateWorktree { path, sender } => { @@ -207,35 +214,116 @@ impl VectorStore { } }); - let (paths_tx, paths_rx) = - channel::unbounded::<(i64, PathBuf, Arc, SystemTime)>(); + async fn embed_batch( + embeddings_queue: Vec<(i64, IndexedFile, Vec)>, + embedding_provider: &Arc, + db_update_tx: channel::Sender, + ) -> Result<()> { + let mut embeddings_queue = embeddings_queue.clone(); + + let mut document_spans = vec![]; + for (_, _, document_span) in embeddings_queue.clone().into_iter() { + document_spans.extend(document_span); + } + + let mut embeddings = embedding_provider + .embed_batch(document_spans.iter().map(|x| &**x).collect()) + .await?; + + // This assumes the embeddings are returned in order + let t0 = Instant::now(); + let mut i = 0; + let mut j = 0; + while let Some(embedding) = embeddings.pop() { + // This has to accomodate for multiple indexed_files in a row without documents + while embeddings_queue[i].1.documents.len() == j { + i += 1; + j = 0; + } + + embeddings_queue[i].1.documents[j].embedding = embedding; + j += 1; + } + + for (worktree_id, indexed_file, _) in embeddings_queue.into_iter() { + // TODO: Update this so it doesnt panic + for document in indexed_file.documents.iter() { + assert!( + document.embedding.len() > 0, + "Document Embedding not Complete" + ); + } + + db_update_tx + .send(DbWrite::InsertFile { + worktree_id, + indexed_file, + }) + .await + .unwrap(); + } + + anyhow::Ok(()) + } + + let embedding_provider_clone = embedding_provider.clone(); + + let db_update_tx_clone = db_update_tx.clone(); + let _embeddings_update_task = cx.background().spawn(async move { + let mut queue_len = 0; + let mut embeddings_queue = vec![]; + let mut request_count = 0; + while let Ok((worktree_id, indexed_file, document_spans)) = + embeddings_rx.recv().await + { + queue_len += &document_spans.len(); + embeddings_queue.push((worktree_id, indexed_file, document_spans)); + + if queue_len >= EMBEDDINGS_BATCH_SIZE { + let _ = embed_batch( + embeddings_queue, + &embedding_provider_clone, + db_update_tx_clone.clone(), + ) + .await; + + embeddings_queue = vec![]; + queue_len = 0; + + request_count += 1; + } + } + + if queue_len > 0 { + let _ = embed_batch( + embeddings_queue, + &embedding_provider_clone, + db_update_tx_clone.clone(), + ) + .await; + request_count += 1; + } + }); let fs_clone = fs.clone(); - let db_update_tx_clone = db_update_tx.clone(); - let embedding_provider_clone = embedding_provider.clone(); let _paths_update_task = cx.background().spawn(async move { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); while let Ok((worktree_id, file_path, language, mtime)) = paths_rx.recv().await { - log::info!("Parsing File: {:?}", &file_path); - if let Some(indexed_file) = Self::index_file( + if let Some((indexed_file, document_spans)) = Self::index_file( &mut cursor, &mut parser, - embedding_provider_clone.as_ref(), &fs_clone, language, - file_path, + file_path.clone(), mtime, ) .await .log_err() { - db_update_tx_clone - .try_send(DbWrite::InsertFile { - worktree_id, - indexed_file, - }) + embeddings_tx + .try_send((worktree_id, indexed_file, document_spans)) .unwrap(); } } @@ -251,6 +339,7 @@ impl VectorStore { projects: HashMap::new(), _db_update_task, _paths_update_task, + _embeddings_update_task, } })) } @@ -258,12 +347,11 @@ impl VectorStore { async fn index_file( cursor: &mut QueryCursor, parser: &mut Parser, - embedding_provider: &dyn EmbeddingProvider, fs: &Arc, language: Arc, file_path: PathBuf, mtime: SystemTime, - ) -> Result { + ) -> Result<(IndexedFile, Vec)> { let grammar = language.grammar().ok_or_else(|| anyhow!("no grammar"))?; let embedding_config = grammar .embedding_config @@ -298,7 +386,7 @@ impl VectorStore { if let Some((item, name)) = content.get(item_range.clone()).zip(content.get(name_range)) { - context_spans.push(item); + context_spans.push(item.to_string()); documents.push(Document { name: name.to_string(), offset: item_range.start, @@ -308,18 +396,14 @@ impl VectorStore { } } - if !documents.is_empty() { - let embeddings = embedding_provider.embed_batch(context_spans).await?; - for (document, embedding) in documents.iter_mut().zip(embeddings) { - document.embedding = embedding; - } - } - - return Ok(IndexedFile { - path: file_path, - mtime, - documents, - }); + return Ok(( + IndexedFile { + path: file_path, + mtime, + documents, + }, + context_spans, + )); } fn find_or_create_worktree(&self, path: PathBuf) -> impl Future> { @@ -454,6 +538,9 @@ impl VectorStore { .detach(); this.update(&mut cx, |this, cx| { + // The below is managing for updated on save + // Currently each time a file is saved, this code is run, and for all the files that were changed, if the current time is + // greater than the previous embedded time by the REINDEXING_DELAY variable, we will send the file off to be indexed. let _subscription = cx.subscribe(&project, |this, project, event, _cx| { if let Some(project_state) = this.projects.get(&project.downgrade()) { let worktree_db_ids = project_state.worktree_db_ids.clone(); @@ -554,8 +641,6 @@ impl VectorStore { ); }); - log::info!("Semantic Indexing Complete!"); - anyhow::Ok(()) }) } @@ -591,8 +676,6 @@ impl VectorStore { }) .collect::>(); - log::info!("Searching for: {:?}", phrase); - let embedding_provider = self.embedding_provider.clone(); let database_url = self.database_url.clone(); cx.spawn(|this, cx| async move { From afccf608f42d9b35d6b1942ae60734f3b3e8d3a9 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Wed, 5 Jul 2023 12:39:08 -0400 Subject: [PATCH 028/115] updated both embed and parsing tasks to be multi-threaded. --- Cargo.lock | 34 +- crates/vector_store/Cargo.toml | 1 + crates/vector_store/src/embedding.rs | 27 +- crates/vector_store/src/vector_store.rs | 411 +++++++++++++----------- 4 files changed, 281 insertions(+), 192 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 59cf30001e..dbc2a1cbb0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -118,7 +118,7 @@ dependencies = [ "settings", "smol", "theme", - "tiktoken-rs", + "tiktoken-rs 0.4.2", "util", "workspace", ] @@ -737,9 +737,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" -version = "0.21.0" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" [[package]] name = "base64ct" @@ -914,9 +914,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" +checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5" dependencies = [ "memchr", "once_cell", @@ -4812,7 +4812,7 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bd9647b268a3d3e14ff09c23201133a62589c658db02bb7388c7246aafe0590" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "indexmap", "line-wrap", "quick-xml", @@ -5529,7 +5529,7 @@ version = "0.11.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13293b639a097af28fc8a90f22add145a9c954e49d77da06263d58cf44d5fb91" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "bytes 1.4.0", "encoding_rs", "futures-core", @@ -5868,7 +5868,7 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", ] [[package]] @@ -7118,7 +7118,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ba161c549e2c0686f35f5d920e63fad5cafba2c28ad2caceaf07e5d9fa6e8c4" dependencies = [ "anyhow", - "base64 0.21.0", + "base64 0.21.2", + "bstr", + "fancy-regex", + "lazy_static", + "parking_lot 0.12.1", + "rustc-hash", +] + +[[package]] +name = "tiktoken-rs" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a99d843674a3468b4a9200a565bbe909a0152f95e82a52feae71e6bf2d4b49d" +dependencies = [ + "anyhow", + "base64 0.21.2", "bstr", "fancy-regex", "lazy_static", @@ -8038,6 +8053,7 @@ dependencies = [ "smol", "tempdir", "theme", + "tiktoken-rs 0.5.0", "tree-sitter", "tree-sitter-rust", "unindent", diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index d1ad8a0f9b..854afe5b6e 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -31,6 +31,7 @@ serde_json.workspace = true async-trait.workspace = true bincode = "1.3.3" matrixmultiply = "0.3.7" +tiktoken-rs = "0.5.0" [dev-dependencies] gpui = { path = "../gpui", features = ["test-support"] } diff --git a/crates/vector_store/src/embedding.rs b/crates/vector_store/src/embedding.rs index 86d8494ab4..72b30d9424 100644 --- a/crates/vector_store/src/embedding.rs +++ b/crates/vector_store/src/embedding.rs @@ -5,8 +5,8 @@ use gpui::serde_json; use isahc::prelude::Configurable; use lazy_static::lazy_static; use serde::{Deserialize, Serialize}; -use std::env; use std::sync::Arc; +use std::{env, time::Instant}; use util::http::{HttpClient, Request}; lazy_static! { @@ -60,9 +60,34 @@ impl EmbeddingProvider for DummyEmbeddings { } } +// impl OpenAIEmbeddings { +// async fn truncate(span: &str) -> String { +// let bpe = cl100k_base().unwrap(); +// let mut tokens = bpe.encode_with_special_tokens(span); +// if tokens.len() > 8192 { +// tokens.truncate(8192); +// let result = bpe.decode(tokens); +// if result.is_ok() { +// return result.unwrap(); +// } +// } + +// return span.to_string(); +// } +// } + #[async_trait] impl EmbeddingProvider for OpenAIEmbeddings { async fn embed_batch(&self, spans: Vec<&str>) -> Result>> { + // Truncate spans to 8192 if needed + // let t0 = Instant::now(); + // let mut truncated_spans = vec![]; + // for span in spans { + // truncated_spans.push(Self::truncate(span)); + // } + // let spans = futures::future::join_all(truncated_spans).await; + // log::info!("Truncated Spans in {:?}", t0.elapsed().as_secs()); + let api_key = OPENAI_API_KEY .as_ref() .ok_or_else(|| anyhow!("no api key"))?; diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index e072793e25..a63674bc34 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -34,7 +34,7 @@ use util::{ use workspace::{Workspace, WorkspaceCreated}; const REINDEXING_DELAY: u64 = 30; -const EMBEDDINGS_BATCH_SIZE: usize = 25; +const EMBEDDINGS_BATCH_SIZE: usize = 150; #[derive(Debug, Clone)] pub struct Document { @@ -74,6 +74,7 @@ pub fn init( cx.subscribe_global::({ let vector_store = vector_store.clone(); move |event, cx| { + let t0 = Instant::now(); let workspace = &event.0; if let Some(workspace) = workspace.upgrade(cx) { let project = workspace.read(cx).project().clone(); @@ -124,10 +125,14 @@ pub struct VectorStore { embedding_provider: Arc, language_registry: Arc, db_update_tx: channel::Sender, - paths_tx: channel::Sender<(i64, PathBuf, Arc, SystemTime)>, + // embed_batch_tx: channel::Sender)>>, + batch_files_tx: channel::Sender<(i64, IndexedFile, Vec)>, + parsing_files_tx: channel::Sender<(i64, PathBuf, Arc, SystemTime)>, + parsing_files_rx: channel::Receiver<(i64, PathBuf, Arc, SystemTime)>, _db_update_task: Task<()>, - _paths_update_task: Task<()>, - _embeddings_update_task: Task<()>, + _embed_batch_task: Vec>, + _batch_files_task: Task<()>, + _parsing_files_tasks: Vec>, projects: HashMap, ProjectState>, } @@ -188,12 +193,8 @@ impl VectorStore { Ok(cx.add_model(|cx| { // paths_tx -> embeddings_tx -> db_update_tx + //db_update_tx/rx: Updating Database let (db_update_tx, db_update_rx) = channel::unbounded(); - let (paths_tx, paths_rx) = - channel::unbounded::<(i64, PathBuf, Arc, SystemTime)>(); - let (embeddings_tx, embeddings_rx) = - channel::unbounded::<(i64, IndexedFile, Vec)>(); - let _db_update_task = cx.background().spawn(async move { while let Ok(job) = db_update_rx.recv().await { match job { @@ -201,6 +202,7 @@ impl VectorStore { worktree_id, indexed_file, } => { + log::info!("Inserting Data for {:?}", &indexed_file.path); db.insert_file(worktree_id, indexed_file).log_err(); } DbWrite::Delete { worktree_id, path } => { @@ -214,132 +216,137 @@ impl VectorStore { } }); - async fn embed_batch( - embeddings_queue: Vec<(i64, IndexedFile, Vec)>, - embedding_provider: &Arc, - db_update_tx: channel::Sender, - ) -> Result<()> { - let mut embeddings_queue = embeddings_queue.clone(); + // embed_tx/rx: Embed Batch and Send to Database + let (embed_batch_tx, embed_batch_rx) = + channel::unbounded::)>>(); + let mut _embed_batch_task = Vec::new(); + for _ in 0..cx.background().num_cpus() { + let db_update_tx = db_update_tx.clone(); + let embed_batch_rx = embed_batch_rx.clone(); + let embedding_provider = embedding_provider.clone(); + _embed_batch_task.push(cx.background().spawn(async move { + while let Ok(embeddings_queue) = embed_batch_rx.recv().await { + log::info!("Embedding Batch! "); - let mut document_spans = vec![]; - for (_, _, document_span) in embeddings_queue.clone().into_iter() { - document_spans.extend(document_span); - } + // Construct Batch + let mut embeddings_queue = embeddings_queue.clone(); + let mut document_spans = vec![]; + for (_, _, document_span) in embeddings_queue.clone().into_iter() { + document_spans.extend(document_span); + } - let mut embeddings = embedding_provider - .embed_batch(document_spans.iter().map(|x| &**x).collect()) - .await?; + if let Some(mut embeddings) = embedding_provider + .embed_batch(document_spans.iter().map(|x| &**x).collect()) + .await + .log_err() + { + let mut i = 0; + let mut j = 0; + while let Some(embedding) = embeddings.pop() { + while embeddings_queue[i].1.documents.len() == j { + i += 1; + j = 0; + } - // This assumes the embeddings are returned in order - let t0 = Instant::now(); - let mut i = 0; - let mut j = 0; - while let Some(embedding) = embeddings.pop() { - // This has to accomodate for multiple indexed_files in a row without documents - while embeddings_queue[i].1.documents.len() == j { - i += 1; - j = 0; + embeddings_queue[i].1.documents[j].embedding = embedding; + j += 1; + } + + for (worktree_id, indexed_file, _) in embeddings_queue.into_iter() { + for document in indexed_file.documents.iter() { + // TODO: Update this so it doesn't panic + assert!( + document.embedding.len() > 0, + "Document Embedding Not Complete" + ); + } + + db_update_tx + .send(DbWrite::InsertFile { + worktree_id, + indexed_file, + }) + .await + .unwrap(); + } + } } - - embeddings_queue[i].1.documents[j].embedding = embedding; - j += 1; - } - - for (worktree_id, indexed_file, _) in embeddings_queue.into_iter() { - // TODO: Update this so it doesnt panic - for document in indexed_file.documents.iter() { - assert!( - document.embedding.len() > 0, - "Document Embedding not Complete" - ); - } - - db_update_tx - .send(DbWrite::InsertFile { - worktree_id, - indexed_file, - }) - .await - .unwrap(); - } - - anyhow::Ok(()) + })) } - let embedding_provider_clone = embedding_provider.clone(); - - let db_update_tx_clone = db_update_tx.clone(); - let _embeddings_update_task = cx.background().spawn(async move { + // batch_tx/rx: Batch Files to Send for Embeddings + let (batch_files_tx, batch_files_rx) = + channel::unbounded::<(i64, IndexedFile, Vec)>(); + let _batch_files_task = cx.background().spawn(async move { let mut queue_len = 0; let mut embeddings_queue = vec![]; - let mut request_count = 0; while let Ok((worktree_id, indexed_file, document_spans)) = - embeddings_rx.recv().await + batch_files_rx.recv().await { + log::info!("Batching File: {:?}", &indexed_file.path); queue_len += &document_spans.len(); embeddings_queue.push((worktree_id, indexed_file, document_spans)); - if queue_len >= EMBEDDINGS_BATCH_SIZE { - let _ = embed_batch( - embeddings_queue, - &embedding_provider_clone, - db_update_tx_clone.clone(), - ) - .await; - + embed_batch_tx.try_send(embeddings_queue).unwrap(); embeddings_queue = vec![]; queue_len = 0; - - request_count += 1; } } - if queue_len > 0 { - let _ = embed_batch( - embeddings_queue, - &embedding_provider_clone, - db_update_tx_clone.clone(), - ) - .await; - request_count += 1; + embed_batch_tx.try_send(embeddings_queue).unwrap(); } }); - let fs_clone = fs.clone(); + // parsing_files_tx/rx: Parsing Files to Embeddable Documents + let (parsing_files_tx, parsing_files_rx) = + channel::unbounded::<(i64, PathBuf, Arc, SystemTime)>(); - let _paths_update_task = cx.background().spawn(async move { - let mut parser = Parser::new(); - let mut cursor = QueryCursor::new(); - while let Ok((worktree_id, file_path, language, mtime)) = paths_rx.recv().await { - if let Some((indexed_file, document_spans)) = Self::index_file( - &mut cursor, - &mut parser, - &fs_clone, - language, - file_path.clone(), - mtime, - ) - .await - .log_err() + let mut _parsing_files_tasks = Vec::new(); + for _ in 0..cx.background().num_cpus() { + let fs = fs.clone(); + let parsing_files_rx = parsing_files_rx.clone(); + let batch_files_tx = batch_files_tx.clone(); + _parsing_files_tasks.push(cx.background().spawn(async move { + let mut parser = Parser::new(); + let mut cursor = QueryCursor::new(); + while let Ok((worktree_id, file_path, language, mtime)) = + parsing_files_rx.recv().await { - embeddings_tx - .try_send((worktree_id, indexed_file, document_spans)) - .unwrap(); + log::info!("Parsing File: {:?}", &file_path); + if let Some((indexed_file, document_spans)) = Self::index_file( + &mut cursor, + &mut parser, + &fs, + language, + file_path.clone(), + mtime, + ) + .await + .log_err() + { + batch_files_tx + .try_send((worktree_id, indexed_file, document_spans)) + .unwrap(); + } } - } - }); + })); + } Self { fs, database_url, - db_update_tx, - paths_tx, embedding_provider, language_registry, - projects: HashMap::new(), + db_update_tx, + // embed_batch_tx, + batch_files_tx, + parsing_files_tx, + parsing_files_rx, _db_update_task, - _paths_update_task, - _embeddings_update_task, + _embed_batch_task, + _batch_files_task, + _parsing_files_tasks, + projects: HashMap::new(), } })) } @@ -441,12 +448,16 @@ impl VectorStore { let language_registry = self.language_registry.clone(); let database_url = self.database_url.clone(); let db_update_tx = self.db_update_tx.clone(); - let paths_tx = self.paths_tx.clone(); + let parsing_files_tx = self.parsing_files_tx.clone(); + let parsing_files_rx = self.parsing_files_rx.clone(); + let batch_files_tx = self.batch_files_tx.clone(); cx.spawn(|this, mut cx| async move { + let t0 = Instant::now(); futures::future::join_all(worktree_scans_complete).await; let worktree_db_ids = futures::future::join_all(worktree_db_ids).await; + log::info!("Worktree Scanning Done in {:?}", t0.elapsed().as_millis()); if let Some(db_directory) = database_url.parent() { fs.create_dir(db_directory).await.log_err(); @@ -485,8 +496,9 @@ impl VectorStore { let db_ids_by_worktree_id = db_ids_by_worktree_id.clone(); let db_update_tx = db_update_tx.clone(); let language_registry = language_registry.clone(); - let paths_tx = paths_tx.clone(); + let parsing_files_tx = parsing_files_tx.clone(); async move { + let t0 = Instant::now(); for worktree in worktrees.into_iter() { let mut file_mtimes = worktree_file_times.remove(&worktree.id()).unwrap(); @@ -513,7 +525,7 @@ impl VectorStore { }); if !already_stored { - paths_tx + parsing_files_tx .try_send(( db_ids_by_worktree_id[&worktree.id()], path_buf, @@ -533,10 +545,45 @@ impl VectorStore { .unwrap(); } } + log::info!( + "Parsing Worktree Completed in {:?}", + t0.elapsed().as_millis() + ); } }) .detach(); + // cx.background() + // .scoped(|scope| { + // for _ in 0..cx.background().num_cpus() { + // scope.spawn(async { + // let mut parser = Parser::new(); + // let mut cursor = QueryCursor::new(); + // while let Ok((worktree_id, file_path, language, mtime)) = + // parsing_files_rx.recv().await + // { + // log::info!("Parsing File: {:?}", &file_path); + // if let Some((indexed_file, document_spans)) = Self::index_file( + // &mut cursor, + // &mut parser, + // &fs, + // language, + // file_path.clone(), + // mtime, + // ) + // .await + // .log_err() + // { + // batch_files_tx + // .try_send((worktree_id, indexed_file, document_spans)) + // .unwrap(); + // } + // } + // }); + // } + // }) + // .await; + this.update(&mut cx, |this, cx| { // The below is managing for updated on save // Currently each time a file is saved, this code is run, and for all the files that were changed, if the current time is @@ -545,90 +592,90 @@ impl VectorStore { if let Some(project_state) = this.projects.get(&project.downgrade()) { let worktree_db_ids = project_state.worktree_db_ids.clone(); - if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event - { - // Iterate through changes - let language_registry = this.language_registry.clone(); + // if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event + // { + // // Iterate through changes + // let language_registry = this.language_registry.clone(); - let db = - VectorDatabase::new(this.database_url.to_string_lossy().into()); - if db.is_err() { - return; - } - let db = db.unwrap(); + // let db = + // VectorDatabase::new(this.database_url.to_string_lossy().into()); + // if db.is_err() { + // return; + // } + // let db = db.unwrap(); - let worktree_db_id: Option = { - let mut found_db_id = None; - for (w_id, db_id) in worktree_db_ids.into_iter() { - if &w_id == worktree_id { - found_db_id = Some(db_id); - } - } + // let worktree_db_id: Option = { + // let mut found_db_id = None; + // for (w_id, db_id) in worktree_db_ids.into_iter() { + // if &w_id == worktree_id { + // found_db_id = Some(db_id); + // } + // } - found_db_id - }; + // found_db_id + // }; - if worktree_db_id.is_none() { - return; - } - let worktree_db_id = worktree_db_id.unwrap(); + // if worktree_db_id.is_none() { + // return; + // } + // let worktree_db_id = worktree_db_id.unwrap(); - let file_mtimes = db.get_file_mtimes(worktree_db_id); - if file_mtimes.is_err() { - return; - } + // let file_mtimes = db.get_file_mtimes(worktree_db_id); + // if file_mtimes.is_err() { + // return; + // } - let file_mtimes = file_mtimes.unwrap(); - let paths_tx = this.paths_tx.clone(); + // let file_mtimes = file_mtimes.unwrap(); + // let paths_tx = this.paths_tx.clone(); - smol::block_on(async move { - for change in changes.into_iter() { - let change_path = change.0.clone(); - log::info!("Change: {:?}", &change_path); - if let Ok(language) = language_registry - .language_for_file(&change_path.to_path_buf(), None) - .await - { - if language - .grammar() - .and_then(|grammar| grammar.embedding_config.as_ref()) - .is_none() - { - continue; - } + // smol::block_on(async move { + // for change in changes.into_iter() { + // let change_path = change.0.clone(); + // log::info!("Change: {:?}", &change_path); + // if let Ok(language) = language_registry + // .language_for_file(&change_path.to_path_buf(), None) + // .await + // { + // if language + // .grammar() + // .and_then(|grammar| grammar.embedding_config.as_ref()) + // .is_none() + // { + // continue; + // } - // TODO: Make this a bit more defensive - let modified_time = - change_path.metadata().unwrap().modified().unwrap(); - let existing_time = - file_mtimes.get(&change_path.to_path_buf()); - let already_stored = - existing_time.map_or(false, |existing_time| { - if &modified_time != existing_time - && existing_time.elapsed().unwrap().as_secs() - > REINDEXING_DELAY - { - false - } else { - true - } - }); + // // TODO: Make this a bit more defensive + // let modified_time = + // change_path.metadata().unwrap().modified().unwrap(); + // let existing_time = + // file_mtimes.get(&change_path.to_path_buf()); + // let already_stored = + // existing_time.map_or(false, |existing_time| { + // if &modified_time != existing_time + // && existing_time.elapsed().unwrap().as_secs() + // > REINDEXING_DELAY + // { + // false + // } else { + // true + // } + // }); - if !already_stored { - log::info!("Need to reindex: {:?}", &change_path); - paths_tx - .try_send(( - worktree_db_id, - change_path.to_path_buf(), - language, - modified_time, - )) - .unwrap(); - } - } - } - }) - } + // if !already_stored { + // log::info!("Need to reindex: {:?}", &change_path); + // paths_tx + // .try_send(( + // worktree_db_id, + // change_path.to_path_buf(), + // language, + // modified_time, + // )) + // .unwrap(); + // } + // } + // } + // }) + // } } }); From a86b6c42c77c7fe5e3721ba3fe4df0fbe91eb268 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Thu, 6 Jul 2023 11:11:39 -0400 Subject: [PATCH 029/115] corrected batching order and managed for open ai embedding errors --- crates/vector_store/Cargo.toml | 1 + crates/vector_store/src/embedding.rs | 138 ++++++++++------ crates/vector_store/src/vector_store.rs | 203 ++++++++++-------------- 3 files changed, 169 insertions(+), 173 deletions(-) diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index 854afe5b6e..35a6a689ae 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -32,6 +32,7 @@ async-trait.workspace = true bincode = "1.3.3" matrixmultiply = "0.3.7" tiktoken-rs = "0.5.0" +rand.workspace = true [dev-dependencies] gpui = { path = "../gpui", features = ["test-support"] } diff --git a/crates/vector_store/src/embedding.rs b/crates/vector_store/src/embedding.rs index 72b30d9424..029a6cdf61 100644 --- a/crates/vector_store/src/embedding.rs +++ b/crates/vector_store/src/embedding.rs @@ -2,15 +2,20 @@ use anyhow::{anyhow, Result}; use async_trait::async_trait; use futures::AsyncReadExt; use gpui::serde_json; +use isahc::http::StatusCode; use isahc::prelude::Configurable; +use isahc::{AsyncBody, Response}; use lazy_static::lazy_static; use serde::{Deserialize, Serialize}; +use std::env; use std::sync::Arc; -use std::{env, time::Instant}; +use std::time::Duration; +use tiktoken_rs::{cl100k_base, CoreBPE}; use util::http::{HttpClient, Request}; lazy_static! { static ref OPENAI_API_KEY: Option = env::var("OPENAI_API_KEY").ok(); + static ref OPENAI_BPE_TOKENIZER: CoreBPE = cl100k_base().unwrap(); } #[derive(Clone)] @@ -60,69 +65,100 @@ impl EmbeddingProvider for DummyEmbeddings { } } -// impl OpenAIEmbeddings { -// async fn truncate(span: &str) -> String { -// let bpe = cl100k_base().unwrap(); -// let mut tokens = bpe.encode_with_special_tokens(span); -// if tokens.len() > 8192 { -// tokens.truncate(8192); -// let result = bpe.decode(tokens); -// if result.is_ok() { -// return result.unwrap(); -// } -// } +impl OpenAIEmbeddings { + async fn truncate(span: String) -> String { + let mut tokens = OPENAI_BPE_TOKENIZER.encode_with_special_tokens(span.as_ref()); + if tokens.len() > 8190 { + tokens.truncate(8190); + let result = OPENAI_BPE_TOKENIZER.decode(tokens.clone()); + if result.is_ok() { + let transformed = result.unwrap(); + // assert_ne!(transformed, span); + return transformed; + } + } -// return span.to_string(); -// } -// } - -#[async_trait] -impl EmbeddingProvider for OpenAIEmbeddings { - async fn embed_batch(&self, spans: Vec<&str>) -> Result>> { - // Truncate spans to 8192 if needed - // let t0 = Instant::now(); - // let mut truncated_spans = vec![]; - // for span in spans { - // truncated_spans.push(Self::truncate(span)); - // } - // let spans = futures::future::join_all(truncated_spans).await; - // log::info!("Truncated Spans in {:?}", t0.elapsed().as_secs()); - - let api_key = OPENAI_API_KEY - .as_ref() - .ok_or_else(|| anyhow!("no api key"))?; + return span.to_string(); + } + async fn send_request(&self, api_key: &str, spans: Vec<&str>) -> Result> { let request = Request::post("https://api.openai.com/v1/embeddings") .redirect_policy(isahc::config::RedirectPolicy::Follow) .header("Content-Type", "application/json") .header("Authorization", format!("Bearer {}", api_key)) .body( serde_json::to_string(&OpenAIEmbeddingRequest { - input: spans, + input: spans.clone(), model: "text-embedding-ada-002", }) .unwrap() .into(), )?; - let mut response = self.client.send(request).await?; - if !response.status().is_success() { - return Err(anyhow!("openai embedding failed {}", response.status())); - } - - let mut body = String::new(); - response.body_mut().read_to_string(&mut body).await?; - let response: OpenAIEmbeddingResponse = serde_json::from_str(&body)?; - - log::info!( - "openai embedding completed. tokens: {:?}", - response.usage.total_tokens - ); - - Ok(response - .data - .into_iter() - .map(|embedding| embedding.embedding) - .collect()) + Ok(self.client.send(request).await?) + } +} + +#[async_trait] +impl EmbeddingProvider for OpenAIEmbeddings { + async fn embed_batch(&self, spans: Vec<&str>) -> Result>> { + const BACKOFF_SECONDS: [usize; 3] = [65, 180, 360]; + const MAX_RETRIES: usize = 3; + + let api_key = OPENAI_API_KEY + .as_ref() + .ok_or_else(|| anyhow!("no api key"))?; + + let mut request_number = 0; + let mut response: Response; + let mut spans: Vec = spans.iter().map(|x| x.to_string()).collect(); + while request_number < MAX_RETRIES { + response = self + .send_request(api_key, spans.iter().map(|x| &**x).collect()) + .await?; + request_number += 1; + + if request_number + 1 == MAX_RETRIES && response.status() != StatusCode::OK { + return Err(anyhow!( + "openai max retries, error: {:?}", + &response.status() + )); + } + + match response.status() { + StatusCode::TOO_MANY_REQUESTS => { + let delay = Duration::from_secs(BACKOFF_SECONDS[request_number - 1] as u64); + std::thread::sleep(delay); + } + StatusCode::BAD_REQUEST => { + log::info!("BAD REQUEST: {:?}", &response.status()); + // Don't worry about delaying bad request, as we can assume + // we haven't been rate limited yet. + for span in spans.iter_mut() { + *span = Self::truncate(span.to_string()).await; + } + } + StatusCode::OK => { + let mut body = String::new(); + response.body_mut().read_to_string(&mut body).await?; + let response: OpenAIEmbeddingResponse = serde_json::from_str(&body)?; + + log::info!( + "openai embedding completed. tokens: {:?}", + response.usage.total_tokens + ); + return Ok(response + .data + .into_iter() + .map(|embedding| embedding.embedding) + .collect()); + } + _ => { + return Err(anyhow!("openai embedding failed {}", response.status())); + } + } + } + + Err(anyhow!("openai embedding failed")) } } diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index a63674bc34..5141451e64 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -74,7 +74,6 @@ pub fn init( cx.subscribe_global::({ let vector_store = vector_store.clone(); move |event, cx| { - let t0 = Instant::now(); let workspace = &event.0; if let Some(workspace) = workspace.upgrade(cx) { let project = workspace.read(cx).project().clone(); @@ -126,9 +125,7 @@ pub struct VectorStore { language_registry: Arc, db_update_tx: channel::Sender, // embed_batch_tx: channel::Sender)>>, - batch_files_tx: channel::Sender<(i64, IndexedFile, Vec)>, parsing_files_tx: channel::Sender<(i64, PathBuf, Arc, SystemTime)>, - parsing_files_rx: channel::Receiver<(i64, PathBuf, Arc, SystemTime)>, _db_update_task: Task<()>, _embed_batch_task: Vec>, _batch_files_task: Task<()>, @@ -220,14 +217,13 @@ impl VectorStore { let (embed_batch_tx, embed_batch_rx) = channel::unbounded::)>>(); let mut _embed_batch_task = Vec::new(); - for _ in 0..cx.background().num_cpus() { + for _ in 0..1 { + //cx.background().num_cpus() { let db_update_tx = db_update_tx.clone(); let embed_batch_rx = embed_batch_rx.clone(); let embedding_provider = embedding_provider.clone(); _embed_batch_task.push(cx.background().spawn(async move { while let Ok(embeddings_queue) = embed_batch_rx.recv().await { - log::info!("Embedding Batch! "); - // Construct Batch let mut embeddings_queue = embeddings_queue.clone(); let mut document_spans = vec![]; @@ -235,20 +231,20 @@ impl VectorStore { document_spans.extend(document_span); } - if let Some(mut embeddings) = embedding_provider + if let Ok(embeddings) = embedding_provider .embed_batch(document_spans.iter().map(|x| &**x).collect()) .await - .log_err() { let mut i = 0; let mut j = 0; - while let Some(embedding) = embeddings.pop() { + + for embedding in embeddings.iter() { while embeddings_queue[i].1.documents.len() == j { i += 1; j = 0; } - embeddings_queue[i].1.documents[j].embedding = embedding; + embeddings_queue[i].1.documents[j].embedding = embedding.to_owned(); j += 1; } @@ -283,7 +279,6 @@ impl VectorStore { while let Ok((worktree_id, indexed_file, document_spans)) = batch_files_rx.recv().await { - log::info!("Batching File: {:?}", &indexed_file.path); queue_len += &document_spans.len(); embeddings_queue.push((worktree_id, indexed_file, document_spans)); if queue_len >= EMBEDDINGS_BATCH_SIZE { @@ -338,10 +333,7 @@ impl VectorStore { embedding_provider, language_registry, db_update_tx, - // embed_batch_tx, - batch_files_tx, parsing_files_tx, - parsing_files_rx, _db_update_task, _embed_batch_task, _batch_files_task, @@ -449,8 +441,6 @@ impl VectorStore { let database_url = self.database_url.clone(); let db_update_tx = self.db_update_tx.clone(); let parsing_files_tx = self.parsing_files_tx.clone(); - let parsing_files_rx = self.parsing_files_rx.clone(); - let batch_files_tx = self.batch_files_tx.clone(); cx.spawn(|this, mut cx| async move { let t0 = Instant::now(); @@ -553,37 +543,6 @@ impl VectorStore { }) .detach(); - // cx.background() - // .scoped(|scope| { - // for _ in 0..cx.background().num_cpus() { - // scope.spawn(async { - // let mut parser = Parser::new(); - // let mut cursor = QueryCursor::new(); - // while let Ok((worktree_id, file_path, language, mtime)) = - // parsing_files_rx.recv().await - // { - // log::info!("Parsing File: {:?}", &file_path); - // if let Some((indexed_file, document_spans)) = Self::index_file( - // &mut cursor, - // &mut parser, - // &fs, - // language, - // file_path.clone(), - // mtime, - // ) - // .await - // .log_err() - // { - // batch_files_tx - // .try_send((worktree_id, indexed_file, document_spans)) - // .unwrap(); - // } - // } - // }); - // } - // }) - // .await; - this.update(&mut cx, |this, cx| { // The below is managing for updated on save // Currently each time a file is saved, this code is run, and for all the files that were changed, if the current time is @@ -592,90 +551,90 @@ impl VectorStore { if let Some(project_state) = this.projects.get(&project.downgrade()) { let worktree_db_ids = project_state.worktree_db_ids.clone(); - // if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event - // { - // // Iterate through changes - // let language_registry = this.language_registry.clone(); + if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event + { + // Iterate through changes + let language_registry = this.language_registry.clone(); - // let db = - // VectorDatabase::new(this.database_url.to_string_lossy().into()); - // if db.is_err() { - // return; - // } - // let db = db.unwrap(); + let db = + VectorDatabase::new(this.database_url.to_string_lossy().into()); + if db.is_err() { + return; + } + let db = db.unwrap(); - // let worktree_db_id: Option = { - // let mut found_db_id = None; - // for (w_id, db_id) in worktree_db_ids.into_iter() { - // if &w_id == worktree_id { - // found_db_id = Some(db_id); - // } - // } + let worktree_db_id: Option = { + let mut found_db_id = None; + for (w_id, db_id) in worktree_db_ids.into_iter() { + if &w_id == worktree_id { + found_db_id = Some(db_id); + } + } - // found_db_id - // }; + found_db_id + }; - // if worktree_db_id.is_none() { - // return; - // } - // let worktree_db_id = worktree_db_id.unwrap(); + if worktree_db_id.is_none() { + return; + } + let worktree_db_id = worktree_db_id.unwrap(); - // let file_mtimes = db.get_file_mtimes(worktree_db_id); - // if file_mtimes.is_err() { - // return; - // } + let file_mtimes = db.get_file_mtimes(worktree_db_id); + if file_mtimes.is_err() { + return; + } - // let file_mtimes = file_mtimes.unwrap(); - // let paths_tx = this.paths_tx.clone(); + let file_mtimes = file_mtimes.unwrap(); + let parsing_files_tx = this.parsing_files_tx.clone(); - // smol::block_on(async move { - // for change in changes.into_iter() { - // let change_path = change.0.clone(); - // log::info!("Change: {:?}", &change_path); - // if let Ok(language) = language_registry - // .language_for_file(&change_path.to_path_buf(), None) - // .await - // { - // if language - // .grammar() - // .and_then(|grammar| grammar.embedding_config.as_ref()) - // .is_none() - // { - // continue; - // } + smol::block_on(async move { + for change in changes.into_iter() { + let change_path = change.0.clone(); + log::info!("Change: {:?}", &change_path); + if let Ok(language) = language_registry + .language_for_file(&change_path.to_path_buf(), None) + .await + { + if language + .grammar() + .and_then(|grammar| grammar.embedding_config.as_ref()) + .is_none() + { + continue; + } - // // TODO: Make this a bit more defensive - // let modified_time = - // change_path.metadata().unwrap().modified().unwrap(); - // let existing_time = - // file_mtimes.get(&change_path.to_path_buf()); - // let already_stored = - // existing_time.map_or(false, |existing_time| { - // if &modified_time != existing_time - // && existing_time.elapsed().unwrap().as_secs() - // > REINDEXING_DELAY - // { - // false - // } else { - // true - // } - // }); + // TODO: Make this a bit more defensive + let modified_time = + change_path.metadata().unwrap().modified().unwrap(); + let existing_time = + file_mtimes.get(&change_path.to_path_buf()); + let already_stored = + existing_time.map_or(false, |existing_time| { + if &modified_time != existing_time + && existing_time.elapsed().unwrap().as_secs() + > REINDEXING_DELAY + { + false + } else { + true + } + }); - // if !already_stored { - // log::info!("Need to reindex: {:?}", &change_path); - // paths_tx - // .try_send(( - // worktree_db_id, - // change_path.to_path_buf(), - // language, - // modified_time, - // )) - // .unwrap(); - // } - // } - // } - // }) - // } + if !already_stored { + log::info!("Need to reindex: {:?}", &change_path); + parsing_files_tx + .try_send(( + worktree_db_id, + change_path.to_path_buf(), + language, + modified_time, + )) + .unwrap(); + } + } + } + }) + } } }); From e57f6f21fe11e1bb585202c346cb5c28360c935f Mon Sep 17 00:00:00 2001 From: KCaverly Date: Thu, 6 Jul 2023 15:26:43 -0400 Subject: [PATCH 030/115] reindexing update to appropriately accomodate for buffer delay and persistent pending files list --- crates/vector_store/src/vector_store.rs | 234 ++++++++++++++++-------- 1 file changed, 160 insertions(+), 74 deletions(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 5141451e64..57277e39af 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -18,11 +18,13 @@ use modal::{SemanticSearch, SemanticSearchDelegate, Toggle}; use project::{Fs, Project, WorktreeId}; use smol::channel; use std::{ + cell::RefCell, cmp::Ordering, collections::HashMap, path::{Path, PathBuf}, + rc::Rc, sync::Arc, - time::{Instant, SystemTime}, + time::{Duration, Instant, SystemTime}, }; use tree_sitter::{Parser, QueryCursor}; use util::{ @@ -33,7 +35,7 @@ use util::{ }; use workspace::{Workspace, WorkspaceCreated}; -const REINDEXING_DELAY: u64 = 30; +const REINDEXING_DELAY_SECONDS: u64 = 3; const EMBEDDINGS_BATCH_SIZE: usize = 150; #[derive(Debug, Clone)] @@ -124,20 +126,62 @@ pub struct VectorStore { embedding_provider: Arc, language_registry: Arc, db_update_tx: channel::Sender, - // embed_batch_tx: channel::Sender)>>, - parsing_files_tx: channel::Sender<(i64, PathBuf, Arc, SystemTime)>, + parsing_files_tx: channel::Sender, _db_update_task: Task<()>, _embed_batch_task: Vec>, _batch_files_task: Task<()>, _parsing_files_tasks: Vec>, - projects: HashMap, ProjectState>, + projects: HashMap, Rc>>, } struct ProjectState { worktree_db_ids: Vec<(WorktreeId, i64)>, + pending_files: HashMap, _subscription: gpui::Subscription, } +impl ProjectState { + fn update_pending_files(&mut self, pending_file: PendingFile, indexing_time: SystemTime) { + // If Pending File Already Exists, Replace it with the new one + // but keep the old indexing time + if let Some(old_file) = self.pending_files.remove(&pending_file.path.clone()) { + self.pending_files + .insert(pending_file.path.clone(), (pending_file, old_file.1)); + } else { + self.pending_files + .insert(pending_file.path.clone(), (pending_file, indexing_time)); + }; + } + + fn get_outstanding_files(&mut self) -> Vec { + let mut outstanding_files = vec![]; + let mut remove_keys = vec![]; + for key in self.pending_files.keys().into_iter() { + if let Some(pending_details) = self.pending_files.get(key) { + let (pending_file, index_time) = pending_details; + if index_time <= &SystemTime::now() { + outstanding_files.push(pending_file.clone()); + remove_keys.push(key.clone()); + } + } + } + + for key in remove_keys.iter() { + self.pending_files.remove(key); + } + + return outstanding_files; + } +} + +#[derive(Clone, Debug)] +struct PendingFile { + worktree_db_id: i64, + path: PathBuf, + language: Arc, + modified_time: SystemTime, +} + #[derive(Debug, Clone)] pub struct SearchResult { pub worktree_id: WorktreeId, @@ -293,8 +337,7 @@ impl VectorStore { }); // parsing_files_tx/rx: Parsing Files to Embeddable Documents - let (parsing_files_tx, parsing_files_rx) = - channel::unbounded::<(i64, PathBuf, Arc, SystemTime)>(); + let (parsing_files_tx, parsing_files_rx) = channel::unbounded::(); let mut _parsing_files_tasks = Vec::new(); for _ in 0..cx.background().num_cpus() { @@ -304,23 +347,25 @@ impl VectorStore { _parsing_files_tasks.push(cx.background().spawn(async move { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - while let Ok((worktree_id, file_path, language, mtime)) = - parsing_files_rx.recv().await - { - log::info!("Parsing File: {:?}", &file_path); + while let Ok(pending_file) = parsing_files_rx.recv().await { + log::info!("Parsing File: {:?}", &pending_file.path); if let Some((indexed_file, document_spans)) = Self::index_file( &mut cursor, &mut parser, &fs, - language, - file_path.clone(), - mtime, + pending_file.language, + pending_file.path.clone(), + pending_file.modified_time, ) .await .log_err() { batch_files_tx - .try_send((worktree_id, indexed_file, document_spans)) + .try_send(( + pending_file.worktree_db_id, + indexed_file, + document_spans, + )) .unwrap(); } } @@ -516,12 +561,13 @@ impl VectorStore { if !already_stored { parsing_files_tx - .try_send(( - db_ids_by_worktree_id[&worktree.id()], - path_buf, + .try_send(PendingFile { + worktree_db_id: db_ids_by_worktree_id + [&worktree.id()], + path: path_buf, language, - file.mtime, - )) + modified_time: file.mtime, + }) .unwrap(); } } @@ -543,54 +589,82 @@ impl VectorStore { }) .detach(); + // let mut pending_files: Vec<(PathBuf, ((i64, PathBuf, Arc, SystemTime), SystemTime))> = vec![]; this.update(&mut cx, |this, cx| { // The below is managing for updated on save // Currently each time a file is saved, this code is run, and for all the files that were changed, if the current time is // greater than the previous embedded time by the REINDEXING_DELAY variable, we will send the file off to be indexed. - let _subscription = cx.subscribe(&project, |this, project, event, _cx| { + let _subscription = cx.subscribe(&project, |this, project, event, cx| { if let Some(project_state) = this.projects.get(&project.downgrade()) { + let mut project_state = project_state.borrow_mut(); let worktree_db_ids = project_state.worktree_db_ids.clone(); if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event { - // Iterate through changes - let language_registry = this.language_registry.clone(); - - let db = - VectorDatabase::new(this.database_url.to_string_lossy().into()); - if db.is_err() { + // Get Worktree Object + let worktree = + project.read(cx).worktree_for_id(worktree_id.clone(), cx); + if worktree.is_none() { return; } - let db = db.unwrap(); + let worktree = worktree.unwrap(); - let worktree_db_id: Option = { - let mut found_db_id = None; - for (w_id, db_id) in worktree_db_ids.into_iter() { - if &w_id == worktree_id { - found_db_id = Some(db_id); + // Get Database + let db_values = { + if let Ok(db) = + VectorDatabase::new(this.database_url.to_string_lossy().into()) + { + let worktree_db_id: Option = { + let mut found_db_id = None; + for (w_id, db_id) in worktree_db_ids.into_iter() { + if &w_id == &worktree.read(cx).id() { + found_db_id = Some(db_id) + } + } + found_db_id + }; + if worktree_db_id.is_none() { + return; } - } + let worktree_db_id = worktree_db_id.unwrap(); - found_db_id + let file_mtimes = db.get_file_mtimes(worktree_db_id); + if file_mtimes.is_err() { + return; + } + + let file_mtimes = file_mtimes.unwrap(); + Some((file_mtimes, worktree_db_id)) + } else { + return; + } }; - if worktree_db_id.is_none() { - return; - } - let worktree_db_id = worktree_db_id.unwrap(); - - let file_mtimes = db.get_file_mtimes(worktree_db_id); - if file_mtimes.is_err() { + if db_values.is_none() { return; } - let file_mtimes = file_mtimes.unwrap(); + let (file_mtimes, worktree_db_id) = db_values.unwrap(); + + // Iterate Through Changes + let language_registry = this.language_registry.clone(); let parsing_files_tx = this.parsing_files_tx.clone(); smol::block_on(async move { for change in changes.into_iter() { let change_path = change.0.clone(); - log::info!("Change: {:?}", &change_path); + // Skip if git ignored or symlink + if let Some(entry) = worktree.read(cx).entry_for_id(change.1) { + if entry.is_ignored || entry.is_symlink { + continue; + } else { + log::info!( + "Testing for Reindexing: {:?}", + &change_path + ); + } + }; + if let Ok(language) = language_registry .language_for_file(&change_path.to_path_buf(), None) .await @@ -603,47 +677,59 @@ impl VectorStore { continue; } - // TODO: Make this a bit more defensive - let modified_time = - change_path.metadata().unwrap().modified().unwrap(); - let existing_time = - file_mtimes.get(&change_path.to_path_buf()); - let already_stored = - existing_time.map_or(false, |existing_time| { - if &modified_time != existing_time - && existing_time.elapsed().unwrap().as_secs() - > REINDEXING_DELAY - { - false + if let Some(modified_time) = { + let metadata = change_path.metadata(); + if metadata.is_err() { + None + } else { + let mtime = metadata.unwrap().modified(); + if mtime.is_err() { + None } else { - true + Some(mtime.unwrap()) } - }); + } + } { + let existing_time = + file_mtimes.get(&change_path.to_path_buf()); + let already_stored = existing_time + .map_or(false, |existing_time| { + &modified_time != existing_time + }); - if !already_stored { - log::info!("Need to reindex: {:?}", &change_path); - parsing_files_tx - .try_send(( - worktree_db_id, - change_path.to_path_buf(), - language, - modified_time, - )) - .unwrap(); + let reindex_time = modified_time + + Duration::from_secs(REINDEXING_DELAY_SECONDS); + + if !already_stored { + project_state.update_pending_files( + PendingFile { + path: change_path.to_path_buf(), + modified_time, + worktree_db_id, + language: language.clone(), + }, + reindex_time, + ); + + for file in project_state.get_outstanding_files() { + parsing_files_tx.try_send(file).unwrap(); + } + } } } } - }) - } + }); + }; } }); this.projects.insert( project.downgrade(), - ProjectState { + Rc::new(RefCell::new(ProjectState { + pending_files: HashMap::new(), worktree_db_ids: db_ids_by_worktree_id.into_iter().collect(), _subscription, - }, + })), ); }); @@ -659,7 +745,7 @@ impl VectorStore { cx: &mut ModelContext, ) -> Task>> { let project_state = if let Some(state) = self.projects.get(&project.downgrade()) { - state + state.borrow() } else { return Task::ready(Err(anyhow!("project not added"))); }; @@ -717,7 +803,7 @@ impl VectorStore { this.read_with(&cx, |this, _| { let project_state = if let Some(state) = this.projects.get(&project.downgrade()) { - state + state.borrow() } else { return Err(anyhow!("project not added")); }; From 7d634f66e2b2b7196b3e9141c0664d8641251323 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Thu, 6 Jul 2023 16:33:54 -0400 Subject: [PATCH 031/115] updated vector_store to include extra context for semantic search modal --- crates/vector_store/src/vector_store.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 57277e39af..065dfb51f2 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -418,21 +418,34 @@ impl VectorStore { ) { let mut item_range = None; let mut name_range = None; + let mut context_range = None; for capture in mat.captures { if capture.index == embedding_config.item_capture_ix { item_range = Some(capture.node.byte_range()); } else if capture.index == embedding_config.name_capture_ix { name_range = Some(capture.node.byte_range()); } + if let Some(context_capture_ix) = embedding_config.context_capture_ix { + if capture.index == context_capture_ix { + context_range = Some(capture.node.byte_range()); + } + } } if let Some((item_range, name_range)) = item_range.zip(name_range) { + let mut context_data = String::new(); + if let Some(context_range) = context_range { + if let Some(context) = content.get(context_range.clone()) { + context_data.push_str(context); + } + } + if let Some((item, name)) = content.get(item_range.clone()).zip(content.get(name_range)) { context_spans.push(item.to_string()); documents.push(Document { - name: name.to_string(), + name: format!("{} {}", context_data.to_string(), name.to_string()), offset: item_range.start, embedding: Vec::new(), }); From 6f1e988cb92aa76bc31c841e0009884576370219 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Thu, 6 Jul 2023 16:36:28 -0400 Subject: [PATCH 032/115] updated embedding treesitter query for python --- crates/zed/src/languages/python/embedding.scm | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 crates/zed/src/languages/python/embedding.scm diff --git a/crates/zed/src/languages/python/embedding.scm b/crates/zed/src/languages/python/embedding.scm new file mode 100644 index 0000000000..e3efb3dbf6 --- /dev/null +++ b/crates/zed/src/languages/python/embedding.scm @@ -0,0 +1,9 @@ +(class_definition + "class" @context + name: (identifier) @name + ) @item + +(function_definition + "async"? @context + "def" @context + name: (_) @name) @item From c03dda1a0cc9f99f841622dc95358e8b9dc39ea8 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Thu, 6 Jul 2023 17:15:41 -0400 Subject: [PATCH 033/115] fixed bug on absolute vs relative path --- crates/vector_store/src/vector_store.rs | 39 ++++++++++++++++--------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 065dfb51f2..baab05bec2 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -144,12 +144,19 @@ impl ProjectState { fn update_pending_files(&mut self, pending_file: PendingFile, indexing_time: SystemTime) { // If Pending File Already Exists, Replace it with the new one // but keep the old indexing time - if let Some(old_file) = self.pending_files.remove(&pending_file.path.clone()) { - self.pending_files - .insert(pending_file.path.clone(), (pending_file, old_file.1)); + if let Some(old_file) = self + .pending_files + .remove(&pending_file.relative_path.clone()) + { + self.pending_files.insert( + pending_file.relative_path.clone(), + (pending_file, old_file.1), + ); } else { - self.pending_files - .insert(pending_file.path.clone(), (pending_file, indexing_time)); + self.pending_files.insert( + pending_file.relative_path.clone(), + (pending_file, indexing_time), + ); }; } @@ -177,7 +184,8 @@ impl ProjectState { #[derive(Clone, Debug)] struct PendingFile { worktree_db_id: i64, - path: PathBuf, + relative_path: PathBuf, + absolute_path: PathBuf, language: Arc, modified_time: SystemTime, } @@ -348,13 +356,14 @@ impl VectorStore { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); while let Ok(pending_file) = parsing_files_rx.recv().await { - log::info!("Parsing File: {:?}", &pending_file.path); + log::info!("Parsing File: {:?}", &pending_file.relative_path); if let Some((indexed_file, document_spans)) = Self::index_file( &mut cursor, &mut parser, &fs, pending_file.language, - pending_file.path.clone(), + pending_file.relative_path.clone(), + pending_file.absolute_path.clone(), pending_file.modified_time, ) .await @@ -393,7 +402,8 @@ impl VectorStore { parser: &mut Parser, fs: &Arc, language: Arc, - file_path: PathBuf, + relative_file_path: PathBuf, + absolute_file_path: PathBuf, mtime: SystemTime, ) -> Result<(IndexedFile, Vec)> { let grammar = language.grammar().ok_or_else(|| anyhow!("no grammar"))?; @@ -402,7 +412,7 @@ impl VectorStore { .as_ref() .ok_or_else(|| anyhow!("no outline query"))?; - let content = fs.load(&file_path).await?; + let content = fs.load(&absolute_file_path).await?; parser.set_language(grammar.ts_language).unwrap(); let tree = parser @@ -455,7 +465,7 @@ impl VectorStore { return Ok(( IndexedFile { - path: file_path, + path: relative_file_path, mtime, documents, }, @@ -577,7 +587,8 @@ impl VectorStore { .try_send(PendingFile { worktree_db_id: db_ids_by_worktree_id [&worktree.id()], - path: path_buf, + relative_path: path_buf, + absolute_path, language, modified_time: file.mtime, }) @@ -666,6 +677,7 @@ impl VectorStore { smol::block_on(async move { for change in changes.into_iter() { let change_path = change.0.clone(); + let absolute_path = worktree.read(cx).absolutize(&change_path); // Skip if git ignored or symlink if let Some(entry) = worktree.read(cx).entry_for_id(change.1) { if entry.is_ignored || entry.is_symlink { @@ -716,7 +728,8 @@ impl VectorStore { if !already_stored { project_state.update_pending_files( PendingFile { - path: change_path.to_path_buf(), + relative_path: change_path.to_path_buf(), + absolute_path, modified_time, worktree_db_id, language: language.clone(), From cb24cb1ea5d5cf36a61eb2450ee12f4ca8a685d6 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Fri, 7 Jul 2023 18:36:55 +0200 Subject: [PATCH 034/115] vcs: Add 'create branch' button --- crates/collab_ui/src/branch_list.rs | 133 +++++++++++++++++++++------- crates/fs/src/repository.rs | 9 ++ 2 files changed, 108 insertions(+), 34 deletions(-) diff --git a/crates/collab_ui/src/branch_list.rs b/crates/collab_ui/src/branch_list.rs index 16fefbd2eb..e6f1504503 100644 --- a/crates/collab_ui/src/branch_list.rs +++ b/crates/collab_ui/src/branch_list.rs @@ -1,6 +1,8 @@ use anyhow::{anyhow, bail}; use fuzzy::{StringMatch, StringMatchCandidate}; -use gpui::{elements::*, AppContext, MouseState, Task, ViewContext, ViewHandle}; +use gpui::{ + elements::*, platform::MouseButton, AppContext, MouseState, Task, ViewContext, ViewHandle, +}; use picker::{Picker, PickerDelegate, PickerEvent}; use std::{ops::Not, sync::Arc}; use util::ResultExt; @@ -35,6 +37,14 @@ pub struct BranchListDelegate { last_query: String, } +impl BranchListDelegate { + fn display_error_toast(&self, message: String, cx: &mut ViewContext) { + const GIT_CHECKOUT_FAILURE_ID: usize = 2048; + self.workspace.update(cx, |model, ctx| { + model.show_toast(Toast::new(GIT_CHECKOUT_FAILURE_ID, message), ctx) + }); + } +} impl PickerDelegate for BranchListDelegate { fn placeholder_text(&self) -> Arc { "Select branch...".into() @@ -136,40 +146,39 @@ impl PickerDelegate for BranchListDelegate { let current_pick = self.selected_index(); let current_pick = self.matches[current_pick].string.clone(); cx.spawn(|picker, mut cx| async move { - picker.update(&mut cx, |this, cx| { - let project = this.delegate().workspace.read(cx).project().read(cx); - let mut cwd = project - .visible_worktrees(cx) - .next() - .ok_or_else(|| anyhow!("There are no visisible worktrees."))? - .read(cx) - .abs_path() - .to_path_buf(); - cwd.push(".git"); - let status = project - .fs() - .open_repo(&cwd) - .ok_or_else(|| anyhow!("Could not open repository at path `{}`", cwd.as_os_str().to_string_lossy()))? - .lock() - .change_branch(¤t_pick); - if status.is_err() { - const GIT_CHECKOUT_FAILURE_ID: usize = 2048; - this.delegate().workspace.update(cx, |model, ctx| { - model.show_toast( - Toast::new( - GIT_CHECKOUT_FAILURE_ID, - format!("Failed to checkout branch '{current_pick}', check for conflicts or unstashed files"), - ), - ctx, - ) - }); - status?; - } - cx.emit(PickerEvent::Dismiss); + picker + .update(&mut cx, |this, cx| { + let project = this.delegate().workspace.read(cx).project().read(cx); + let mut cwd = project + .visible_worktrees(cx) + .next() + .ok_or_else(|| anyhow!("There are no visisible worktrees."))? + .read(cx) + .abs_path() + .to_path_buf(); + cwd.push(".git"); + let status = project + .fs() + .open_repo(&cwd) + .ok_or_else(|| { + anyhow!( + "Could not open repository at path `{}`", + cwd.as_os_str().to_string_lossy() + ) + })? + .lock() + .change_branch(¤t_pick); + if status.is_err() { + this.delegate().display_error_toast(format!("Failed to checkout branch '{current_pick}', check for conflicts or unstashed files"), cx); + status?; + } + cx.emit(PickerEvent::Dismiss); - Ok::<(), anyhow::Error>(()) - }).log_err(); - }).detach(); + Ok::<(), anyhow::Error>(()) + }) + .log_err(); + }) + .detach(); } fn dismissed(&mut self, cx: &mut ViewContext>) { @@ -235,4 +244,60 @@ impl PickerDelegate for BranchListDelegate { }; Some(label.into_any()) } + fn render_footer( + &self, + cx: &mut ViewContext>, + ) -> Option>> { + if !self.last_query.is_empty() { + let theme = &theme::current(cx); + let style = theme.picker.footer.clone(); + enum BranchCreateButton {} + Some( + Flex::row().with_child(MouseEventHandler::::new(0, cx, |_, _| { + Label::new("Create branch", style.label.clone()) + .contained() + .with_style(style.container) + .aligned() + .right() + }) + .on_down(MouseButton::Left, |_, _, cx| { + cx.spawn(|picker, mut cx| async move { + picker.update(&mut cx, |this, cx| { + let project = this.delegate().workspace.read(cx).project().read(cx); + let current_pick = &this.delegate().last_query; + let mut cwd = project + .visible_worktrees(cx) + .next() + .ok_or_else(|| anyhow!("There are no visisible worktrees."))? + .read(cx) + .abs_path() + .to_path_buf(); + cwd.push(".git"); + let repo = project + .fs() + .open_repo(&cwd) + .ok_or_else(|| anyhow!("Could not open repository at path `{}`", cwd.as_os_str().to_string_lossy()))?; + let repo = repo + .lock(); + let status = repo + .create_branch(¤t_pick); + if status.is_err() { + this.delegate().display_error_toast(format!("Failed to create branch '{current_pick}', check for conflicts or unstashed files"), cx); + status?; + } + let status = repo.change_branch(¤t_pick); + if status.is_err() { + this.delegate().display_error_toast(format!("Failed to chec branch '{current_pick}', check for conflicts or unstashed files"), cx); + status?; + } + Ok::<(), anyhow::Error>(()) + }) + }).detach(); + })) + .into_any(), + ) + } else { + None + } + } } diff --git a/crates/fs/src/repository.rs b/crates/fs/src/repository.rs index 0e5fd8343f..ed9aa85a89 100644 --- a/crates/fs/src/repository.rs +++ b/crates/fs/src/repository.rs @@ -39,6 +39,9 @@ pub trait GitRepository: Send { fn change_branch(&self, _: &str) -> Result<()> { Ok(()) } + fn create_branch(&self, _: &str) -> Result<()> { + Ok(()) + } } impl std::fmt::Debug for dyn GitRepository { @@ -152,6 +155,12 @@ impl GitRepository for LibGitRepository { )?; Ok(()) } + fn create_branch(&self, name: &str) -> Result<()> { + let current_commit = self.head()?.peel_to_commit()?; + self.branch(name, ¤t_commit, false)?; + + Ok(()) + } } fn read_status(status: git2::Status) -> Option { From 4a69c711671b0fe826358b994199288cfbc0d860 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Fri, 7 Jul 2023 18:37:53 +0200 Subject: [PATCH 035/115] fixup! vcs: Add 'create branch' button --- crates/collab_ui/src/branch_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/collab_ui/src/branch_list.rs b/crates/collab_ui/src/branch_list.rs index e6f1504503..8772f88958 100644 --- a/crates/collab_ui/src/branch_list.rs +++ b/crates/collab_ui/src/branch_list.rs @@ -290,6 +290,7 @@ impl PickerDelegate for BranchListDelegate { this.delegate().display_error_toast(format!("Failed to chec branch '{current_pick}', check for conflicts or unstashed files"), cx); status?; } + cx.emit(PickerEvent::Dismiss); Ok::<(), anyhow::Error>(()) }) }).detach(); From 01897424979b51cd4f5cf52dd909c807e229324f Mon Sep 17 00:00:00 2001 From: KCaverly Date: Mon, 10 Jul 2023 10:06:07 -0400 Subject: [PATCH 036/115] pulled treesitter parsing to own file for ease of testing and management --- crates/vector_store/src/db.rs | 4 +- crates/vector_store/src/parsing.rs | 94 ++++++++++++++++++ crates/vector_store/src/vector_store.rs | 121 +++--------------------- 3 files changed, 110 insertions(+), 109 deletions(-) create mode 100644 crates/vector_store/src/parsing.rs diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index f822cca77e..4882db443b 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -7,7 +7,7 @@ use std::{ use anyhow::{anyhow, Result}; -use crate::IndexedFile; +use crate::parsing::ParsedFile; use rpc::proto::Timestamp; use rusqlite::{ params, @@ -109,7 +109,7 @@ impl VectorDatabase { Ok(()) } - pub fn insert_file(&self, worktree_id: i64, indexed_file: IndexedFile) -> Result<()> { + pub fn insert_file(&self, worktree_id: i64, indexed_file: ParsedFile) -> Result<()> { // Write to files table, and return generated id. self.db.execute( " diff --git a/crates/vector_store/src/parsing.rs b/crates/vector_store/src/parsing.rs new file mode 100644 index 0000000000..6a8742fedd --- /dev/null +++ b/crates/vector_store/src/parsing.rs @@ -0,0 +1,94 @@ +use std::{ops::Range, path::PathBuf, sync::Arc, time::SystemTime}; + +use anyhow::{anyhow, Ok, Result}; +use project::Fs; +use tree_sitter::{Parser, QueryCursor}; + +use crate::PendingFile; + +#[derive(Debug, PartialEq, Clone)] +pub struct Document { + pub offset: usize, + pub name: String, + pub embedding: Vec, +} + +#[derive(Debug, PartialEq, Clone)] +pub struct ParsedFile { + pub path: PathBuf, + pub mtime: SystemTime, + pub documents: Vec, +} + +pub struct CodeContextRetriever { + pub parser: Parser, + pub cursor: QueryCursor, + pub fs: Arc, +} + +impl CodeContextRetriever { + pub async fn parse_file( + &mut self, + pending_file: PendingFile, + ) -> Result<(ParsedFile, Vec)> { + let grammar = pending_file + .language + .grammar() + .ok_or_else(|| anyhow!("no grammar for language"))?; + let embedding_config = grammar + .embedding_config + .as_ref() + .ok_or_else(|| anyhow!("no embedding queries"))?; + + let content = self.fs.load(&pending_file.absolute_path).await?; + + self.parser.set_language(grammar.ts_language).unwrap(); + + let tree = self + .parser + .parse(&content, None) + .ok_or_else(|| anyhow!("parsing failed"))?; + + let mut documents = Vec::new(); + let mut context_spans = Vec::new(); + + // Iterate through query matches + for mat in self.cursor.matches( + &embedding_config.query, + tree.root_node(), + content.as_bytes(), + ) { + let mut item_range: Option> = None; + let mut name_range: Option> = None; + for capture in mat.captures { + if capture.index == embedding_config.item_capture_ix { + item_range = Some(capture.node.byte_range()); + } else if capture.index == embedding_config.name_capture_ix { + name_range = Some(capture.node.byte_range()); + } + } + + if let Some((item_range, name_range)) = item_range.zip(name_range) { + if let Some((item, name)) = + content.get(item_range.clone()).zip(content.get(name_range)) + { + context_spans.push(item.to_string()); + documents.push(Document { + name: name.to_string(), + offset: item_range.start, + embedding: Vec::new(), + }); + } + } + } + + return Ok(( + ParsedFile { + path: pending_file.relative_path, + mtime: pending_file.modified_time, + documents, + }, + context_spans, + )); + } +} diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index baab05bec2..92557fd801 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -1,6 +1,7 @@ mod db; mod embedding; mod modal; +mod parsing; #[cfg(test)] mod vector_store_tests; @@ -15,6 +16,7 @@ use gpui::{ }; use language::{Language, LanguageRegistry}; use modal::{SemanticSearch, SemanticSearchDelegate, Toggle}; +use parsing::{CodeContextRetriever, ParsedFile}; use project::{Fs, Project, WorktreeId}; use smol::channel; use std::{ @@ -38,13 +40,6 @@ use workspace::{Workspace, WorkspaceCreated}; const REINDEXING_DELAY_SECONDS: u64 = 3; const EMBEDDINGS_BATCH_SIZE: usize = 150; -#[derive(Debug, Clone)] -pub struct Document { - pub offset: usize, - pub name: String, - pub embedding: Vec, -} - pub fn init( fs: Arc, http_client: Arc, @@ -113,13 +108,6 @@ pub fn init( .detach(); } -#[derive(Debug, Clone)] -pub struct IndexedFile { - path: PathBuf, - mtime: SystemTime, - documents: Vec, -} - pub struct VectorStore { fs: Arc, database_url: Arc, @@ -182,7 +170,7 @@ impl ProjectState { } #[derive(Clone, Debug)] -struct PendingFile { +pub struct PendingFile { worktree_db_id: i64, relative_path: PathBuf, absolute_path: PathBuf, @@ -201,7 +189,7 @@ pub struct SearchResult { enum DbWrite { InsertFile { worktree_id: i64, - indexed_file: IndexedFile, + indexed_file: ParsedFile, }, Delete { worktree_id: i64, @@ -267,7 +255,7 @@ impl VectorStore { // embed_tx/rx: Embed Batch and Send to Database let (embed_batch_tx, embed_batch_rx) = - channel::unbounded::)>>(); + channel::unbounded::)>>(); let mut _embed_batch_task = Vec::new(); for _ in 0..1 { //cx.background().num_cpus() { @@ -324,13 +312,14 @@ impl VectorStore { // batch_tx/rx: Batch Files to Send for Embeddings let (batch_files_tx, batch_files_rx) = - channel::unbounded::<(i64, IndexedFile, Vec)>(); + channel::unbounded::<(i64, ParsedFile, Vec)>(); let _batch_files_task = cx.background().spawn(async move { let mut queue_len = 0; let mut embeddings_queue = vec![]; while let Ok((worktree_id, indexed_file, document_spans)) = batch_files_rx.recv().await { + dbg!("Batching in while loop"); queue_len += &document_spans.len(); embeddings_queue.push((worktree_id, indexed_file, document_spans)); if queue_len >= EMBEDDINGS_BATCH_SIZE { @@ -339,6 +328,7 @@ impl VectorStore { queue_len = 0; } } + // TODO: This is never getting called, We've gotta manage for how to clear the embedding batch if its less than the necessary batch size. if queue_len > 0 { embed_batch_tx.try_send(embeddings_queue).unwrap(); } @@ -353,21 +343,14 @@ impl VectorStore { let parsing_files_rx = parsing_files_rx.clone(); let batch_files_tx = batch_files_tx.clone(); _parsing_files_tasks.push(cx.background().spawn(async move { - let mut parser = Parser::new(); - let mut cursor = QueryCursor::new(); + let parser = Parser::new(); + let cursor = QueryCursor::new(); + let mut retriever = CodeContextRetriever { parser, cursor, fs }; while let Ok(pending_file) = parsing_files_rx.recv().await { log::info!("Parsing File: {:?}", &pending_file.relative_path); - if let Some((indexed_file, document_spans)) = Self::index_file( - &mut cursor, - &mut parser, - &fs, - pending_file.language, - pending_file.relative_path.clone(), - pending_file.absolute_path.clone(), - pending_file.modified_time, - ) - .await - .log_err() + + if let Some((indexed_file, document_spans)) = + retriever.parse_file(pending_file.clone()).await.log_err() { batch_files_tx .try_send(( @@ -397,82 +380,6 @@ impl VectorStore { })) } - async fn index_file( - cursor: &mut QueryCursor, - parser: &mut Parser, - fs: &Arc, - language: Arc, - relative_file_path: PathBuf, - absolute_file_path: PathBuf, - mtime: SystemTime, - ) -> Result<(IndexedFile, Vec)> { - let grammar = language.grammar().ok_or_else(|| anyhow!("no grammar"))?; - let embedding_config = grammar - .embedding_config - .as_ref() - .ok_or_else(|| anyhow!("no outline query"))?; - - let content = fs.load(&absolute_file_path).await?; - - parser.set_language(grammar.ts_language).unwrap(); - let tree = parser - .parse(&content, None) - .ok_or_else(|| anyhow!("parsing failed"))?; - - let mut documents = Vec::new(); - let mut context_spans = Vec::new(); - for mat in cursor.matches( - &embedding_config.query, - tree.root_node(), - content.as_bytes(), - ) { - let mut item_range = None; - let mut name_range = None; - let mut context_range = None; - for capture in mat.captures { - if capture.index == embedding_config.item_capture_ix { - item_range = Some(capture.node.byte_range()); - } else if capture.index == embedding_config.name_capture_ix { - name_range = Some(capture.node.byte_range()); - } - if let Some(context_capture_ix) = embedding_config.context_capture_ix { - if capture.index == context_capture_ix { - context_range = Some(capture.node.byte_range()); - } - } - } - - if let Some((item_range, name_range)) = item_range.zip(name_range) { - let mut context_data = String::new(); - if let Some(context_range) = context_range { - if let Some(context) = content.get(context_range.clone()) { - context_data.push_str(context); - } - } - - if let Some((item, name)) = - content.get(item_range.clone()).zip(content.get(name_range)) - { - context_spans.push(item.to_string()); - documents.push(Document { - name: format!("{} {}", context_data.to_string(), name.to_string()), - offset: item_range.start, - embedding: Vec::new(), - }); - } - } - } - - return Ok(( - IndexedFile { - path: relative_file_path, - mtime, - documents, - }, - context_spans, - )); - } - fn find_or_create_worktree(&self, path: PathBuf) -> impl Future> { let (tx, rx) = oneshot::channel(); self.db_update_tx From 92a0a4e3678b5d3bb2840bc3ba854cdaa16a141b Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Mon, 10 Jul 2023 16:51:18 +0200 Subject: [PATCH 037/115] Add styles for branch create button --- crates/collab_ui/src/branch_list.rs | 7 +++-- crates/theme/src/theme.rs | 2 +- styles/src/style_tree/picker.ts | 42 +++++++++++++++++++++++------ 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/crates/collab_ui/src/branch_list.rs b/crates/collab_ui/src/branch_list.rs index 8772f88958..9b06e54d4e 100644 --- a/crates/collab_ui/src/branch_list.rs +++ b/crates/collab_ui/src/branch_list.rs @@ -253,12 +253,11 @@ impl PickerDelegate for BranchListDelegate { let style = theme.picker.footer.clone(); enum BranchCreateButton {} Some( - Flex::row().with_child(MouseEventHandler::::new(0, cx, |_, _| { + Flex::row().with_child(MouseEventHandler::::new(0, cx, |state, _| { + let style = style.style_for(state); Label::new("Create branch", style.label.clone()) .contained() .with_style(style.container) - .aligned() - .right() }) .on_down(MouseButton::Left, |_, _, cx| { cx.spawn(|picker, mut cx| async move { @@ -294,7 +293,7 @@ impl PickerDelegate for BranchListDelegate { Ok::<(), anyhow::Error>(()) }) }).detach(); - })) + })).aligned().right() .into_any(), ) } else { diff --git a/crates/theme/src/theme.rs b/crates/theme/src/theme.rs index 1949a5d9bb..a47d97e002 100644 --- a/crates/theme/src/theme.rs +++ b/crates/theme/src/theme.rs @@ -586,7 +586,7 @@ pub struct Picker { pub no_matches: ContainedLabel, pub item: Toggleable>, pub header: ContainedLabel, - pub footer: ContainedLabel, + pub footer: Interactive, } #[derive(Clone, Debug, Deserialize, Default, JsonSchema)] diff --git a/styles/src/style_tree/picker.ts b/styles/src/style_tree/picker.ts index bbd664397f..b8817a25e9 100644 --- a/styles/src/style_tree/picker.ts +++ b/styles/src/style_tree/picker.ts @@ -119,14 +119,40 @@ export default function picker(): any { right: 8, }, }, - footer: { - text: text(theme.lowest, "sans", "variant", { size: "xs" }), - margin: { - top: 1, - left: 8, - right: 8, + footer: interactive({ + base: { + text: text(theme.lowest, "sans", "variant", { size: "xs" }), + padding: { + bottom: 4, + left: 12, + right: 12, + top: 4, + }, + margin: { + top: 1, + left: 4, + right: 4, + }, + corner_radius: 8, + background: with_opacity( + background(theme.lowest, "active"), + 0.5 + ), }, - - } + state: { + hovered: { + background: with_opacity( + background(theme.lowest, "hovered"), + 0.5 + ), + }, + clicked: { + background: with_opacity( + background(theme.lowest, "pressed"), + 0.5 + ), + }, + } + }), } } From e00e73f60848334006c2a42a1364291ba40b8e25 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Mon, 10 Jul 2023 17:18:12 +0200 Subject: [PATCH 038/115] branches: Add a modal branch list. Extract branch list into a separate vcs_menu crate akin to recent_projects. Add current bind for a modal branch to branch popover's tooltip. Z-2555 --- Cargo.lock | 14 ++++++++ Cargo.toml | 1 + assets/keymaps/default.json | 1 + assets/keymaps/textmate.json | 1 + crates/collab_ui/Cargo.toml | 1 + crates/collab_ui/src/collab_titlebar_item.rs | 9 ++--- crates/collab_ui/src/collab_ui.rs | 3 +- crates/vcs_menu/Cargo.toml | 16 +++++++++ .../branch_list.rs => vcs_menu/src/lib.rs} | 36 +++++++++++++++++-- 9 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 crates/vcs_menu/Cargo.toml rename crates/{collab_ui/src/branch_list.rs => vcs_menu/src/lib.rs} (89%) diff --git a/Cargo.lock b/Cargo.lock index 60ed830683..ba24a756d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1491,6 +1491,7 @@ dependencies = [ "theme", "theme_selector", "util", + "vcs_menu", "workspace", "zed-actions", ] @@ -8377,6 +8378,19 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vcs_menu" +version = "0.1.0" +dependencies = [ + "anyhow", + "fuzzy", + "gpui", + "picker", + "theme", + "util", + "workspace", +] + [[package]] name = "version_check" version = "0.9.4" diff --git a/Cargo.toml b/Cargo.toml index 1708ccfc0a..5757871962 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,7 @@ members = [ "crates/theme_selector", "crates/util", "crates/vim", + "crates/vcs_menu", "crates/workspace", "crates/welcome", "crates/xtask", diff --git a/assets/keymaps/default.json b/assets/keymaps/default.json index 6fc06198fe..8c3a1f407c 100644 --- a/assets/keymaps/default.json +++ b/assets/keymaps/default.json @@ -39,6 +39,7 @@ "cmd-shift-n": "workspace::NewWindow", "cmd-o": "workspace::Open", "alt-cmd-o": "projects::OpenRecent", + "alt-cmd-b": "branches::OpenRecent", "ctrl-~": "workspace::NewTerminal", "ctrl-`": "terminal_panel::ToggleFocus", "shift-escape": "workspace::ToggleZoom" diff --git a/assets/keymaps/textmate.json b/assets/keymaps/textmate.json index 591d6e443f..1f28c05158 100644 --- a/assets/keymaps/textmate.json +++ b/assets/keymaps/textmate.json @@ -2,6 +2,7 @@ { "bindings": { "cmd-shift-o": "projects::OpenRecent", + "cmd-shift-b": "branches::OpenRecent", "cmd-alt-tab": "project_panel::ToggleFocus" } }, diff --git a/crates/collab_ui/Cargo.toml b/crates/collab_ui/Cargo.toml index f81885c07a..4a38c2691c 100644 --- a/crates/collab_ui/Cargo.toml +++ b/crates/collab_ui/Cargo.toml @@ -39,6 +39,7 @@ recent_projects = {path = "../recent_projects"} settings = { path = "../settings" } theme = { path = "../theme" } theme_selector = { path = "../theme_selector" } +vcs_menu = { path = "../vcs_menu" } util = { path = "../util" } workspace = { path = "../workspace" } zed-actions = {path = "../zed-actions"} diff --git a/crates/collab_ui/src/collab_titlebar_item.rs b/crates/collab_ui/src/collab_titlebar_item.rs index 57e3ea711d..6cfc9d8e30 100644 --- a/crates/collab_ui/src/collab_titlebar_item.rs +++ b/crates/collab_ui/src/collab_titlebar_item.rs @@ -1,8 +1,5 @@ use crate::{ - branch_list::{build_branch_list, BranchList}, - contact_notification::ContactNotification, - contacts_popover, - face_pile::FacePile, + contact_notification::ContactNotification, contacts_popover, face_pile::FacePile, toggle_deafen, toggle_mute, toggle_screen_sharing, LeaveCall, ToggleDeafen, ToggleMute, ToggleScreenSharing, }; @@ -27,6 +24,7 @@ use recent_projects::{build_recent_projects, RecentProjects}; use std::{ops::Range, sync::Arc}; use theme::{AvatarStyle, Theme}; use util::ResultExt; +use vcs_menu::{build_branch_list, BranchList, OpenRecent as ToggleVcsMenu}; use workspace::{FollowNextCollaborator, Workspace, WORKSPACE_DB}; const MAX_PROJECT_NAME_LENGTH: usize = 40; @@ -37,7 +35,6 @@ actions!( [ ToggleContactsMenu, ToggleUserMenu, - ToggleVcsMenu, ToggleProjectMenu, SwitchBranch, ShareProject, @@ -286,7 +283,7 @@ impl CollabTitlebarItem { .with_tooltip::( 0, "Recent branches".into(), - None, + Some(Box::new(ToggleVcsMenu)), theme.tooltip.clone(), cx, ) diff --git a/crates/collab_ui/src/collab_ui.rs b/crates/collab_ui/src/collab_ui.rs index 26d9c70a43..76f2e26571 100644 --- a/crates/collab_ui/src/collab_ui.rs +++ b/crates/collab_ui/src/collab_ui.rs @@ -1,4 +1,3 @@ -mod branch_list; mod collab_titlebar_item; mod contact_finder; mod contact_list; @@ -29,7 +28,7 @@ actions!( ); pub fn init(app_state: &Arc, cx: &mut AppContext) { - branch_list::init(cx); + vcs_menu::init(cx); collab_titlebar_item::init(cx); contact_list::init(cx); contact_finder::init(cx); diff --git a/crates/vcs_menu/Cargo.toml b/crates/vcs_menu/Cargo.toml new file mode 100644 index 0000000000..4ddf1214d0 --- /dev/null +++ b/crates/vcs_menu/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "vcs_menu" +version = "0.1.0" +edition = "2021" +publish = false +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +fuzzy = {path = "../fuzzy"} +gpui = {path = "../gpui"} +picker = {path = "../picker"} +util = {path = "../util"} +theme = {path = "../theme"} +workspace = {path = "../workspace"} + +anyhow.workspace = true diff --git a/crates/collab_ui/src/branch_list.rs b/crates/vcs_menu/src/lib.rs similarity index 89% rename from crates/collab_ui/src/branch_list.rs rename to crates/vcs_menu/src/lib.rs index 16fefbd2eb..b5b1036b36 100644 --- a/crates/collab_ui/src/branch_list.rs +++ b/crates/vcs_menu/src/lib.rs @@ -1,15 +1,17 @@ -use anyhow::{anyhow, bail}; +use anyhow::{anyhow, bail, Result}; use fuzzy::{StringMatch, StringMatchCandidate}; -use gpui::{elements::*, AppContext, MouseState, Task, ViewContext, ViewHandle}; +use gpui::{actions, elements::*, AppContext, MouseState, Task, ViewContext, ViewHandle}; use picker::{Picker, PickerDelegate, PickerEvent}; use std::{ops::Not, sync::Arc}; use util::ResultExt; use workspace::{Toast, Workspace}; +actions!(branches, [OpenRecent]); + pub fn init(cx: &mut AppContext) { Picker::::init(cx); + cx.add_async_action(toggle); } - pub type BranchList = Picker; pub fn build_branch_list( @@ -28,6 +30,34 @@ pub fn build_branch_list( .with_theme(|theme| theme.picker.clone()) } +fn toggle( + _: &mut Workspace, + _: &OpenRecent, + cx: &mut ViewContext, +) -> Option>> { + Some(cx.spawn(|workspace, mut cx| async move { + workspace.update(&mut cx, |workspace, cx| { + workspace.toggle_modal(cx, |_, cx| { + let workspace = cx.handle(); + cx.add_view(|cx| { + Picker::new( + BranchListDelegate { + matches: vec![], + workspace, + selected_index: 0, + last_query: String::default(), + }, + cx, + ) + .with_theme(|theme| theme.picker.clone()) + .with_max_size(800., 1200.) + }) + }); + })?; + Ok(()) + })) +} + pub struct BranchListDelegate { matches: Vec, workspace: ViewHandle, From a6d713eb3d476e56aec450d338167d3e6d25822d Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Mon, 10 Jul 2023 17:44:23 +0200 Subject: [PATCH 039/115] editor: Keep scrollbar up if there are selections Z-2556 --- crates/editor/src/element.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/editor/src/element.rs b/crates/editor/src/element.rs index e96f1efe92..bd662c039b 100644 --- a/crates/editor/src/element.rs +++ b/crates/editor/src/element.rs @@ -2149,6 +2149,9 @@ impl Element for EditorElement { ShowScrollbar::Auto => { // Git (is_singleton && scrollbar_settings.git_diff && snapshot.buffer_snapshot.has_git_diffs()) + || + // Selections + (is_singleton && scrollbar_settings.selections && !highlighted_ranges.is_empty()) // Scrollmanager || editor.scroll_manager.scrollbars_visible() } From 82079dd422613b98c8b1c6edfedaac1187ab2536 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Mon, 10 Jul 2023 16:33:14 -0400 Subject: [PATCH 040/115] Updated batching to accomodate for full flushes, and cleaned up reindexing. Co-authored-by: maxbrunsfeld --- crates/vector_store/src/embedding.rs | 4 +- crates/vector_store/src/vector_store.rs | 300 ++++++++++++------------ 2 files changed, 150 insertions(+), 154 deletions(-) diff --git a/crates/vector_store/src/embedding.rs b/crates/vector_store/src/embedding.rs index 029a6cdf61..ea349c8afa 100644 --- a/crates/vector_store/src/embedding.rs +++ b/crates/vector_store/src/embedding.rs @@ -1,6 +1,7 @@ use anyhow::{anyhow, Result}; use async_trait::async_trait; use futures::AsyncReadExt; +use gpui::executor::Background; use gpui::serde_json; use isahc::http::StatusCode; use isahc::prelude::Configurable; @@ -21,6 +22,7 @@ lazy_static! { #[derive(Clone)] pub struct OpenAIEmbeddings { pub client: Arc, + pub executor: Arc, } #[derive(Serialize)] @@ -128,7 +130,7 @@ impl EmbeddingProvider for OpenAIEmbeddings { match response.status() { StatusCode::TOO_MANY_REQUESTS => { let delay = Duration::from_secs(BACKOFF_SECONDS[request_number - 1] as u64); - std::thread::sleep(delay); + self.executor.timer(delay).await; } StatusCode::BAD_REQUEST => { log::info!("BAD REQUEST: {:?}", &response.status()); diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 92557fd801..c27c4992f3 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -17,14 +17,12 @@ use gpui::{ use language::{Language, LanguageRegistry}; use modal::{SemanticSearch, SemanticSearchDelegate, Toggle}; use parsing::{CodeContextRetriever, ParsedFile}; -use project::{Fs, Project, WorktreeId}; +use project::{Fs, PathChange, Project, ProjectEntryId, WorktreeId}; use smol::channel; use std::{ - cell::RefCell, cmp::Ordering, collections::HashMap, path::{Path, PathBuf}, - rc::Rc, sync::Arc, time::{Duration, Instant, SystemTime}, }; @@ -61,6 +59,7 @@ pub fn init( // Arc::new(embedding::DummyEmbeddings {}), Arc::new(OpenAIEmbeddings { client: http_client, + executor: cx.background(), }), language_registry, cx.clone(), @@ -119,7 +118,7 @@ pub struct VectorStore { _embed_batch_task: Vec>, _batch_files_task: Task<()>, _parsing_files_tasks: Vec>, - projects: HashMap, Rc>>, + projects: HashMap, ProjectState>, } struct ProjectState { @@ -201,6 +200,15 @@ enum DbWrite { }, } +enum EmbeddingJob { + Enqueue { + worktree_id: i64, + parsed_file: ParsedFile, + document_spans: Vec, + }, + Flush, +} + impl VectorStore { async fn new( fs: Arc, @@ -309,29 +317,32 @@ impl VectorStore { } })) } - // batch_tx/rx: Batch Files to Send for Embeddings - let (batch_files_tx, batch_files_rx) = - channel::unbounded::<(i64, ParsedFile, Vec)>(); + let (batch_files_tx, batch_files_rx) = channel::unbounded::(); let _batch_files_task = cx.background().spawn(async move { let mut queue_len = 0; let mut embeddings_queue = vec![]; - while let Ok((worktree_id, indexed_file, document_spans)) = - batch_files_rx.recv().await - { - dbg!("Batching in while loop"); - queue_len += &document_spans.len(); - embeddings_queue.push((worktree_id, indexed_file, document_spans)); - if queue_len >= EMBEDDINGS_BATCH_SIZE { + + while let Ok(job) = batch_files_rx.recv().await { + let should_flush = match job { + EmbeddingJob::Enqueue { + document_spans, + worktree_id, + parsed_file, + } => { + queue_len += &document_spans.len(); + embeddings_queue.push((worktree_id, parsed_file, document_spans)); + queue_len >= EMBEDDINGS_BATCH_SIZE + } + EmbeddingJob::Flush => true, + }; + + if should_flush { embed_batch_tx.try_send(embeddings_queue).unwrap(); embeddings_queue = vec![]; queue_len = 0; } } - // TODO: This is never getting called, We've gotta manage for how to clear the embedding batch if its less than the necessary batch size. - if queue_len > 0 { - embed_batch_tx.try_send(embeddings_queue).unwrap(); - } }); // parsing_files_tx/rx: Parsing Files to Embeddable Documents @@ -353,13 +364,17 @@ impl VectorStore { retriever.parse_file(pending_file.clone()).await.log_err() { batch_files_tx - .try_send(( - pending_file.worktree_db_id, - indexed_file, + .try_send(EmbeddingJob::Enqueue { + worktree_id: pending_file.worktree_db_id, + parsed_file: indexed_file, document_spans, - )) + }) .unwrap(); } + + if parsing_files_rx.len() == 0 { + batch_files_tx.try_send(EmbeddingJob::Flush).unwrap(); + } } })); } @@ -526,143 +541,18 @@ impl VectorStore { // Currently each time a file is saved, this code is run, and for all the files that were changed, if the current time is // greater than the previous embedded time by the REINDEXING_DELAY variable, we will send the file off to be indexed. let _subscription = cx.subscribe(&project, |this, project, event, cx| { - if let Some(project_state) = this.projects.get(&project.downgrade()) { - let mut project_state = project_state.borrow_mut(); - let worktree_db_ids = project_state.worktree_db_ids.clone(); - - if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event - { - // Get Worktree Object - let worktree = - project.read(cx).worktree_for_id(worktree_id.clone(), cx); - if worktree.is_none() { - return; - } - let worktree = worktree.unwrap(); - - // Get Database - let db_values = { - if let Ok(db) = - VectorDatabase::new(this.database_url.to_string_lossy().into()) - { - let worktree_db_id: Option = { - let mut found_db_id = None; - for (w_id, db_id) in worktree_db_ids.into_iter() { - if &w_id == &worktree.read(cx).id() { - found_db_id = Some(db_id) - } - } - found_db_id - }; - if worktree_db_id.is_none() { - return; - } - let worktree_db_id = worktree_db_id.unwrap(); - - let file_mtimes = db.get_file_mtimes(worktree_db_id); - if file_mtimes.is_err() { - return; - } - - let file_mtimes = file_mtimes.unwrap(); - Some((file_mtimes, worktree_db_id)) - } else { - return; - } - }; - - if db_values.is_none() { - return; - } - - let (file_mtimes, worktree_db_id) = db_values.unwrap(); - - // Iterate Through Changes - let language_registry = this.language_registry.clone(); - let parsing_files_tx = this.parsing_files_tx.clone(); - - smol::block_on(async move { - for change in changes.into_iter() { - let change_path = change.0.clone(); - let absolute_path = worktree.read(cx).absolutize(&change_path); - // Skip if git ignored or symlink - if let Some(entry) = worktree.read(cx).entry_for_id(change.1) { - if entry.is_ignored || entry.is_symlink { - continue; - } else { - log::info!( - "Testing for Reindexing: {:?}", - &change_path - ); - } - }; - - if let Ok(language) = language_registry - .language_for_file(&change_path.to_path_buf(), None) - .await - { - if language - .grammar() - .and_then(|grammar| grammar.embedding_config.as_ref()) - .is_none() - { - continue; - } - - if let Some(modified_time) = { - let metadata = change_path.metadata(); - if metadata.is_err() { - None - } else { - let mtime = metadata.unwrap().modified(); - if mtime.is_err() { - None - } else { - Some(mtime.unwrap()) - } - } - } { - let existing_time = - file_mtimes.get(&change_path.to_path_buf()); - let already_stored = existing_time - .map_or(false, |existing_time| { - &modified_time != existing_time - }); - - let reindex_time = modified_time - + Duration::from_secs(REINDEXING_DELAY_SECONDS); - - if !already_stored { - project_state.update_pending_files( - PendingFile { - relative_path: change_path.to_path_buf(), - absolute_path, - modified_time, - worktree_db_id, - language: language.clone(), - }, - reindex_time, - ); - - for file in project_state.get_outstanding_files() { - parsing_files_tx.try_send(file).unwrap(); - } - } - } - } - } - }); - }; + if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event { + this.project_entries_changed(project, changes, cx, worktree_id); } }); this.projects.insert( project.downgrade(), - Rc::new(RefCell::new(ProjectState { + ProjectState { pending_files: HashMap::new(), worktree_db_ids: db_ids_by_worktree_id.into_iter().collect(), _subscription, - })), + }, ); }); @@ -678,7 +568,7 @@ impl VectorStore { cx: &mut ModelContext, ) -> Task>> { let project_state = if let Some(state) = self.projects.get(&project.downgrade()) { - state.borrow() + state } else { return Task::ready(Err(anyhow!("project not added"))); }; @@ -736,7 +626,7 @@ impl VectorStore { this.read_with(&cx, |this, _| { let project_state = if let Some(state) = this.projects.get(&project.downgrade()) { - state.borrow() + state } else { return Err(anyhow!("project not added")); }; @@ -766,6 +656,110 @@ impl VectorStore { }) }) } + + fn project_entries_changed( + &mut self, + project: ModelHandle, + changes: &[(Arc, ProjectEntryId, PathChange)], + cx: &mut ModelContext<'_, VectorStore>, + worktree_id: &WorktreeId, + ) -> Option<()> { + let project_state = self.projects.get_mut(&project.downgrade())?; + let worktree_db_ids = project_state.worktree_db_ids.clone(); + let worktree = project.read(cx).worktree_for_id(worktree_id.clone(), cx)?; + + // Get Database + let (file_mtimes, worktree_db_id) = { + if let Ok(db) = VectorDatabase::new(self.database_url.to_string_lossy().into()) { + let worktree_db_id = { + let mut found_db_id = None; + for (w_id, db_id) in worktree_db_ids.into_iter() { + if &w_id == &worktree.read(cx).id() { + found_db_id = Some(db_id) + } + } + found_db_id + }?; + + let file_mtimes = db.get_file_mtimes(worktree_db_id).log_err()?; + + Some((file_mtimes, worktree_db_id)) + } else { + return None; + } + }?; + + // Iterate Through Changes + let language_registry = self.language_registry.clone(); + let parsing_files_tx = self.parsing_files_tx.clone(); + + smol::block_on(async move { + for change in changes.into_iter() { + let change_path = change.0.clone(); + let absolute_path = worktree.read(cx).absolutize(&change_path); + // Skip if git ignored or symlink + if let Some(entry) = worktree.read(cx).entry_for_id(change.1) { + if entry.is_ignored || entry.is_symlink { + continue; + } else { + log::info!("Testing for Reindexing: {:?}", &change_path); + } + }; + + if let Ok(language) = language_registry + .language_for_file(&change_path.to_path_buf(), None) + .await + { + if language + .grammar() + .and_then(|grammar| grammar.embedding_config.as_ref()) + .is_none() + { + continue; + } + + if let Some(modified_time) = { + let metadata = change_path.metadata(); + if metadata.is_err() { + None + } else { + let mtime = metadata.unwrap().modified(); + if mtime.is_err() { + None + } else { + Some(mtime.unwrap()) + } + } + } { + let existing_time = file_mtimes.get(&change_path.to_path_buf()); + let already_stored = existing_time + .map_or(false, |existing_time| &modified_time != existing_time); + + let reindex_time = + modified_time + Duration::from_secs(REINDEXING_DELAY_SECONDS); + + if !already_stored { + project_state.update_pending_files( + PendingFile { + relative_path: change_path.to_path_buf(), + absolute_path, + modified_time, + worktree_db_id, + language: language.clone(), + }, + reindex_time, + ); + + for file in project_state.get_outstanding_files() { + parsing_files_tx.try_send(file).unwrap(); + } + } + } + } + } + }); + Some(()) + } } impl Entity for VectorStore { From 307d8d9c8d26ecaf4ecd2a3bddf58ec00be7a666 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Mon, 10 Jul 2023 17:50:19 -0400 Subject: [PATCH 041/115] Reduced redundant database connections on each worktree change. Co-authored-by: maxbrunsfeld --- crates/vector_store/src/db.rs | 78 +++++-- crates/vector_store/src/vector_store.rs | 282 ++++++++++-------------- 2 files changed, 182 insertions(+), 178 deletions(-) diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index 4882db443b..197e7d5696 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -1,4 +1,5 @@ use std::{ + cmp::Ordering, collections::HashMap, path::{Path, PathBuf}, rc::Rc, @@ -14,16 +15,6 @@ use rusqlite::{ types::{FromSql, FromSqlResult, ValueRef}, }; -// Note this is not an appropriate document -#[derive(Debug)] -pub struct DocumentRecord { - pub id: usize, - pub file_id: usize, - pub offset: usize, - pub name: String, - pub embedding: Embedding, -} - #[derive(Debug)] pub struct FileRecord { pub id: usize, @@ -32,7 +23,7 @@ pub struct FileRecord { } #[derive(Debug)] -pub struct Embedding(pub Vec); +struct Embedding(pub Vec); impl FromSql for Embedding { fn column_result(value: ValueRef) -> FromSqlResult { @@ -205,10 +196,35 @@ impl VectorDatabase { Ok(result) } - pub fn for_each_document( + pub fn top_k_search( &self, worktree_ids: &[i64], - mut f: impl FnMut(i64, Embedding), + query_embedding: &Vec, + limit: usize, + ) -> Result> { + let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1); + self.for_each_document(&worktree_ids, |id, embedding| { + eprintln!("document {id} {embedding:?}"); + + let similarity = dot(&embedding, &query_embedding); + let ix = match results + .binary_search_by(|(_, s)| similarity.partial_cmp(&s).unwrap_or(Ordering::Equal)) + { + Ok(ix) => ix, + Err(ix) => ix, + }; + results.insert(ix, (id, similarity)); + results.truncate(limit); + })?; + + let ids = results.into_iter().map(|(id, _)| id).collect::>(); + self.get_documents_by_ids(&ids) + } + + fn for_each_document( + &self, + worktree_ids: &[i64], + mut f: impl FnMut(i64, Vec), ) -> Result<()> { let mut query_statement = self.db.prepare( " @@ -221,16 +237,20 @@ impl VectorDatabase { files.worktree_id IN rarray(?) ", )?; + query_statement .query_map(params![ids_to_sql(worktree_ids)], |row| { - Ok((row.get(0)?, row.get(1)?)) + Ok((row.get(0)?, row.get::<_, Embedding>(1)?)) })? .filter_map(|row| row.ok()) - .for_each(|row| f(row.0, row.1)); + .for_each(|(id, embedding)| { + dbg!("id"); + f(id, embedding.0) + }); Ok(()) } - pub fn get_documents_by_ids(&self, ids: &[i64]) -> Result> { + fn get_documents_by_ids(&self, ids: &[i64]) -> Result> { let mut statement = self.db.prepare( " SELECT @@ -279,3 +299,29 @@ fn ids_to_sql(ids: &[i64]) -> Rc> { .collect::>(), ) } + +pub(crate) fn dot(vec_a: &[f32], vec_b: &[f32]) -> f32 { + let len = vec_a.len(); + assert_eq!(len, vec_b.len()); + + let mut result = 0.0; + unsafe { + matrixmultiply::sgemm( + 1, + len, + 1, + 1.0, + vec_a.as_ptr(), + len as isize, + 1, + vec_b.as_ptr(), + 1, + len as isize, + 0.0, + &mut result as *mut f32, + 1, + 1, + ); + } + result +} diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index c27c4992f3..c42b7ab129 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -20,7 +20,6 @@ use parsing::{CodeContextRetriever, ParsedFile}; use project::{Fs, PathChange, Project, ProjectEntryId, WorktreeId}; use smol::channel; use std::{ - cmp::Ordering, collections::HashMap, path::{Path, PathBuf}, sync::Arc, @@ -112,10 +111,10 @@ pub struct VectorStore { database_url: Arc, embedding_provider: Arc, language_registry: Arc, - db_update_tx: channel::Sender, + db_update_tx: channel::Sender, parsing_files_tx: channel::Sender, _db_update_task: Task<()>, - _embed_batch_task: Vec>, + _embed_batch_task: Task<()>, _batch_files_task: Task<()>, _parsing_files_tasks: Vec>, projects: HashMap, ProjectState>, @@ -128,6 +127,30 @@ struct ProjectState { } impl ProjectState { + fn db_id_for_worktree_id(&self, id: WorktreeId) -> Option { + self.worktree_db_ids + .iter() + .find_map(|(worktree_id, db_id)| { + if *worktree_id == id { + Some(*db_id) + } else { + None + } + }) + } + + fn worktree_id_for_db_id(&self, id: i64) -> Option { + self.worktree_db_ids + .iter() + .find_map(|(worktree_id, db_id)| { + if *db_id == id { + Some(*worktree_id) + } else { + None + } + }) + } + fn update_pending_files(&mut self, pending_file: PendingFile, indexing_time: SystemTime) { // If Pending File Already Exists, Replace it with the new one // but keep the old indexing time @@ -185,7 +208,7 @@ pub struct SearchResult { pub file_path: PathBuf, } -enum DbWrite { +enum DbOperation { InsertFile { worktree_id: i64, indexed_file: ParsedFile, @@ -198,6 +221,10 @@ enum DbWrite { path: PathBuf, sender: oneshot::Sender>, }, + FileMTimes { + worktree_id: i64, + sender: oneshot::Sender>>, + }, } enum EmbeddingJob { @@ -243,20 +270,27 @@ impl VectorStore { let _db_update_task = cx.background().spawn(async move { while let Ok(job) = db_update_rx.recv().await { match job { - DbWrite::InsertFile { + DbOperation::InsertFile { worktree_id, indexed_file, } => { log::info!("Inserting Data for {:?}", &indexed_file.path); db.insert_file(worktree_id, indexed_file).log_err(); } - DbWrite::Delete { worktree_id, path } => { + DbOperation::Delete { worktree_id, path } => { db.delete_file(worktree_id, path).log_err(); } - DbWrite::FindOrCreateWorktree { path, sender } => { + DbOperation::FindOrCreateWorktree { path, sender } => { let id = db.find_or_create_worktree(&path); sender.send(id).ok(); } + DbOperation::FileMTimes { + worktree_id: worktree_db_id, + sender, + } => { + let file_mtimes = db.get_file_mtimes(worktree_db_id); + sender.send(file_mtimes).ok(); + } } } }); @@ -264,24 +298,18 @@ impl VectorStore { // embed_tx/rx: Embed Batch and Send to Database let (embed_batch_tx, embed_batch_rx) = channel::unbounded::)>>(); - let mut _embed_batch_task = Vec::new(); - for _ in 0..1 { - //cx.background().num_cpus() { + let _embed_batch_task = cx.background().spawn({ let db_update_tx = db_update_tx.clone(); - let embed_batch_rx = embed_batch_rx.clone(); let embedding_provider = embedding_provider.clone(); - _embed_batch_task.push(cx.background().spawn(async move { - while let Ok(embeddings_queue) = embed_batch_rx.recv().await { + async move { + while let Ok(mut embeddings_queue) = embed_batch_rx.recv().await { // Construct Batch - let mut embeddings_queue = embeddings_queue.clone(); let mut document_spans = vec![]; - for (_, _, document_span) in embeddings_queue.clone().into_iter() { - document_spans.extend(document_span); + for (_, _, document_span) in embeddings_queue.iter() { + document_spans.extend(document_span.iter().map(|s| s.as_str())); } - if let Ok(embeddings) = embedding_provider - .embed_batch(document_spans.iter().map(|x| &**x).collect()) - .await + if let Ok(embeddings) = embedding_provider.embed_batch(document_spans).await { let mut i = 0; let mut j = 0; @@ -306,7 +334,7 @@ impl VectorStore { } db_update_tx - .send(DbWrite::InsertFile { + .send(DbOperation::InsertFile { worktree_id, indexed_file, }) @@ -315,8 +343,9 @@ impl VectorStore { } } } - })) - } + } + }); + // batch_tx/rx: Batch Files to Send for Embeddings let (batch_files_tx, batch_files_rx) = channel::unbounded::(); let _batch_files_task = cx.background().spawn(async move { @@ -398,7 +427,21 @@ impl VectorStore { fn find_or_create_worktree(&self, path: PathBuf) -> impl Future> { let (tx, rx) = oneshot::channel(); self.db_update_tx - .try_send(DbWrite::FindOrCreateWorktree { path, sender: tx }) + .try_send(DbOperation::FindOrCreateWorktree { path, sender: tx }) + .unwrap(); + async move { rx.await? } + } + + fn get_file_mtimes( + &self, + worktree_id: i64, + ) -> impl Future>> { + let (tx, rx) = oneshot::channel(); + self.db_update_tx + .try_send(DbOperation::FileMTimes { + worktree_id, + sender: tx, + }) .unwrap(); async move { rx.await? } } @@ -450,26 +493,17 @@ impl VectorStore { .collect::>() }); - // Here we query the worktree ids, and yet we dont have them elsewhere - // We likely want to clean up these datastructures - let (mut worktree_file_times, db_ids_by_worktree_id) = cx - .background() - .spawn({ - let worktrees = worktrees.clone(); - async move { - let db = VectorDatabase::new(database_url.to_string_lossy().into())?; - let mut db_ids_by_worktree_id = HashMap::new(); - let mut file_times: HashMap> = - HashMap::new(); - for (worktree, db_id) in worktrees.iter().zip(worktree_db_ids) { - let db_id = db_id?; - db_ids_by_worktree_id.insert(worktree.id(), db_id); - file_times.insert(worktree.id(), db.get_file_mtimes(db_id)?); - } - anyhow::Ok((file_times, db_ids_by_worktree_id)) - } - }) - .await?; + let mut worktree_file_times = HashMap::new(); + let mut db_ids_by_worktree_id = HashMap::new(); + for (worktree, db_id) in worktrees.iter().zip(worktree_db_ids) { + let db_id = db_id?; + db_ids_by_worktree_id.insert(worktree.id(), db_id); + worktree_file_times.insert( + worktree.id(), + this.read_with(&cx, |this, _| this.get_file_mtimes(db_id)) + .await?, + ); + } cx.background() .spawn({ @@ -520,7 +554,7 @@ impl VectorStore { } for file in file_mtimes.keys() { db_update_tx - .try_send(DbWrite::Delete { + .try_send(DbOperation::Delete { worktree_id: db_ids_by_worktree_id[&worktree.id()], path: file.to_owned(), }) @@ -542,7 +576,7 @@ impl VectorStore { // greater than the previous embedded time by the REINDEXING_DELAY variable, we will send the file off to be indexed. let _subscription = cx.subscribe(&project, |this, project, event, cx| { if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event { - this.project_entries_changed(project, changes, cx, worktree_id); + this.project_entries_changed(project, changes.clone(), cx, worktree_id); } }); @@ -578,16 +612,7 @@ impl VectorStore { .worktrees(cx) .filter_map(|worktree| { let worktree_id = worktree.read(cx).id(); - project_state - .worktree_db_ids - .iter() - .find_map(|(id, db_id)| { - if *id == worktree_id { - Some(*db_id) - } else { - None - } - }) + project_state.db_id_for_worktree_id(worktree_id) }) .collect::>(); @@ -606,24 +631,12 @@ impl VectorStore { .next() .unwrap(); - let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1); - database.for_each_document(&worktree_db_ids, |id, embedding| { - let similarity = dot(&embedding.0, &phrase_embedding); - let ix = match results.binary_search_by(|(_, s)| { - similarity.partial_cmp(&s).unwrap_or(Ordering::Equal) - }) { - Ok(ix) => ix, - Err(ix) => ix, - }; - results.insert(ix, (id, similarity)); - results.truncate(limit); - })?; - - let ids = results.into_iter().map(|(id, _)| id).collect::>(); - database.get_documents_by_ids(&ids) + database.top_k_search(&worktree_db_ids, &phrase_embedding, limit) }) .await?; + dbg!(&documents); + this.read_with(&cx, |this, _| { let project_state = if let Some(state) = this.projects.get(&project.downgrade()) { state @@ -634,17 +647,7 @@ impl VectorStore { Ok(documents .into_iter() .filter_map(|(worktree_db_id, file_path, offset, name)| { - let worktree_id = - project_state - .worktree_db_ids - .iter() - .find_map(|(id, db_id)| { - if *db_id == worktree_db_id { - Some(*id) - } else { - None - } - })?; + let worktree_id = project_state.worktree_id_for_db_id(worktree_db_id)?; Some(SearchResult { worktree_id, name, @@ -660,51 +663,36 @@ impl VectorStore { fn project_entries_changed( &mut self, project: ModelHandle, - changes: &[(Arc, ProjectEntryId, PathChange)], + changes: Arc<[(Arc, ProjectEntryId, PathChange)]>, cx: &mut ModelContext<'_, VectorStore>, worktree_id: &WorktreeId, ) -> Option<()> { - let project_state = self.projects.get_mut(&project.downgrade())?; - let worktree_db_ids = project_state.worktree_db_ids.clone(); - let worktree = project.read(cx).worktree_for_id(worktree_id.clone(), cx)?; + let worktree = project + .read(cx) + .worktree_for_id(worktree_id.clone(), cx)? + .read(cx) + .snapshot(); - // Get Database - let (file_mtimes, worktree_db_id) = { - if let Ok(db) = VectorDatabase::new(self.database_url.to_string_lossy().into()) { - let worktree_db_id = { - let mut found_db_id = None; - for (w_id, db_id) in worktree_db_ids.into_iter() { - if &w_id == &worktree.read(cx).id() { - found_db_id = Some(db_id) - } - } - found_db_id - }?; + let worktree_db_id = self + .projects + .get(&project.downgrade())? + .db_id_for_worktree_id(worktree.id())?; + let file_mtimes = self.get_file_mtimes(worktree_db_id); - let file_mtimes = db.get_file_mtimes(worktree_db_id).log_err()?; - - Some((file_mtimes, worktree_db_id)) - } else { - return None; - } - }?; - - // Iterate Through Changes let language_registry = self.language_registry.clone(); - let parsing_files_tx = self.parsing_files_tx.clone(); - smol::block_on(async move { + cx.spawn(|this, mut cx| async move { + let file_mtimes = file_mtimes.await.log_err()?; + for change in changes.into_iter() { let change_path = change.0.clone(); - let absolute_path = worktree.read(cx).absolutize(&change_path); + let absolute_path = worktree.absolutize(&change_path); // Skip if git ignored or symlink - if let Some(entry) = worktree.read(cx).entry_for_id(change.1) { - if entry.is_ignored || entry.is_symlink { + if let Some(entry) = worktree.entry_for_id(change.1) { + if entry.is_ignored || entry.is_symlink || entry.is_external { continue; - } else { - log::info!("Testing for Reindexing: {:?}", &change_path); } - }; + } if let Ok(language) = language_registry .language_for_file(&change_path.to_path_buf(), None) @@ -718,27 +706,18 @@ impl VectorStore { continue; } - if let Some(modified_time) = { - let metadata = change_path.metadata(); - if metadata.is_err() { - None - } else { - let mtime = metadata.unwrap().modified(); - if mtime.is_err() { - None - } else { - Some(mtime.unwrap()) - } - } - } { - let existing_time = file_mtimes.get(&change_path.to_path_buf()); - let already_stored = existing_time - .map_or(false, |existing_time| &modified_time != existing_time); + let modified_time = change_path.metadata().log_err()?.modified().log_err()?; - let reindex_time = - modified_time + Duration::from_secs(REINDEXING_DELAY_SECONDS); + let existing_time = file_mtimes.get(&change_path.to_path_buf()); + let already_stored = existing_time + .map_or(false, |existing_time| &modified_time != existing_time); - if !already_stored { + if !already_stored { + this.update(&mut cx, |this, _| { + let reindex_time = + modified_time + Duration::from_secs(REINDEXING_DELAY_SECONDS); + + let project_state = this.projects.get_mut(&project.downgrade())?; project_state.update_pending_files( PendingFile { relative_path: change_path.to_path_buf(), @@ -751,13 +730,18 @@ impl VectorStore { ); for file in project_state.get_outstanding_files() { - parsing_files_tx.try_send(file).unwrap(); + this.parsing_files_tx.try_send(file).unwrap(); } - } + Some(()) + }); } } } - }); + + Some(()) + }) + .detach(); + Some(()) } } @@ -765,29 +749,3 @@ impl VectorStore { impl Entity for VectorStore { type Event = (); } - -fn dot(vec_a: &[f32], vec_b: &[f32]) -> f32 { - let len = vec_a.len(); - assert_eq!(len, vec_b.len()); - - let mut result = 0.0; - unsafe { - matrixmultiply::sgemm( - 1, - len, - 1, - 1.0, - vec_a.as_ptr(), - len as isize, - 1, - vec_b.as_ptr(), - 1, - len as isize, - 0.0, - &mut result as *mut f32, - 1, - 1, - ); - } - result -} From dce72a1ce71ddf16b0d900e4d673fee49204888a Mon Sep 17 00:00:00 2001 From: KCaverly Date: Mon, 10 Jul 2023 18:19:29 -0400 Subject: [PATCH 042/115] updated tests to accomodate for new dot location --- crates/vector_store/src/vector_store_tests.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index e25b737b06..b1756b7964 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -1,6 +1,4 @@ -use std::sync::Arc; - -use crate::{dot, embedding::EmbeddingProvider, VectorStore}; +use crate::{db::dot, embedding::EmbeddingProvider, VectorStore}; use anyhow::Result; use async_trait::async_trait; use gpui::{Task, TestAppContext}; @@ -8,6 +6,7 @@ use language::{Language, LanguageConfig, LanguageRegistry}; use project::{FakeFs, Project}; use rand::Rng; use serde_json::json; +use std::sync::Arc; use unindent::Unindent; #[gpui::test] From 4f606798610e513924a3bd055b90f4a0de378b6b Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Mon, 10 Jul 2023 22:51:04 +0200 Subject: [PATCH 043/115] Highlight only search results --- crates/editor/src/editor.rs | 41 ++++++++++++++++++++++++++++++++++++ crates/editor/src/element.rs | 12 ++++++----- crates/editor/src/items.rs | 2 +- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 8d7b8ffad6..98fb887ffd 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -7222,6 +7222,47 @@ impl Editor { } results } + pub fn background_highlights_in_range_for_key( + &self, + search_range: Range, + display_snapshot: &DisplaySnapshot, + theme: &Theme, + ) -> Vec<(Range, Color)> { + let mut results = Vec::new(); + let buffer = &display_snapshot.buffer_snapshot; + let Some((color_fetcher, ranges)) = self.background_highlights + .get(&TypeId::of::()) else { + return vec![]; + }; + + let color = color_fetcher(theme); + let start_ix = match ranges.binary_search_by(|probe| { + let cmp = probe.end.cmp(&search_range.start, buffer); + if cmp.is_gt() { + Ordering::Greater + } else { + Ordering::Less + } + }) { + Ok(i) | Err(i) => i, + }; + for range in &ranges[start_ix..] { + if range.start.cmp(&search_range.end, buffer).is_ge() { + break; + } + let start = range + .start + .to_point(buffer) + .to_display_point(display_snapshot); + let end = range + .end + .to_point(buffer) + .to_display_point(display_snapshot); + results.push((start..end, color)) + } + + results + } pub fn highlight_text( &mut self, diff --git a/crates/editor/src/element.rs b/crates/editor/src/element.rs index e96f1efe92..e79110c81e 100644 --- a/crates/editor/src/element.rs +++ b/crates/editor/src/element.rs @@ -1086,11 +1086,13 @@ impl EditorElement { }) } }; - for (row, _) in &editor.background_highlights_in_range( - start_anchor..end_anchor, - &layout.position_map.snapshot, - &theme, - ) { + for (row, _) in &editor + .background_highlights_in_range_for_key::( + start_anchor..end_anchor, + &layout.position_map.snapshot, + &theme, + ) + { let start_display = row.start; let end_display = row.end; diff --git a/crates/editor/src/items.rs b/crates/editor/src/items.rs index 74b8e0ddb6..431ccf0bfe 100644 --- a/crates/editor/src/items.rs +++ b/crates/editor/src/items.rs @@ -883,7 +883,7 @@ impl ProjectItem for Editor { } } -enum BufferSearchHighlights {} +pub(crate) enum BufferSearchHighlights {} impl SearchableItem for Editor { type Match = Range; From e83afdc5abcea6cd3a78c6bd0a41a9c6c1de2360 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Tue, 11 Jul 2023 09:31:08 +0200 Subject: [PATCH 044/115] Rename background_highlights_in_range_for_key to background_highlights_in_range_for --- crates/editor/src/editor.rs | 2 +- crates/editor/src/element.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 98fb887ffd..28edd2a460 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -7222,7 +7222,7 @@ impl Editor { } results } - pub fn background_highlights_in_range_for_key( + pub fn background_highlights_in_range_for( &self, search_range: Range, display_snapshot: &DisplaySnapshot, diff --git a/crates/editor/src/element.rs b/crates/editor/src/element.rs index e79110c81e..c07d18767c 100644 --- a/crates/editor/src/element.rs +++ b/crates/editor/src/element.rs @@ -1087,7 +1087,7 @@ impl EditorElement { } }; for (row, _) in &editor - .background_highlights_in_range_for_key::( + .background_highlights_in_range_for::( start_anchor..end_anchor, &layout.position_map.snapshot, &theme, From f164eb5289109699077bcef7d99bce9dac005c30 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Tue, 11 Jul 2023 15:23:17 +0200 Subject: [PATCH 045/115] recent_projects: Perform fuzzy search on compacted paths. Match highlighting for recent projects picker was off, because the path representation was compacted - for a path '/Users/hiro/Projects/zed' we compact it to use a tilde instead of home directory. However, the highlight positions were always calculated for a full path, leading to a mismatch in highlights. This commit addresses this by running fuzzy search on compacted paths instead of using long paths. This might lead to a slight performance hit, but given that recent projects modal shouldn't have that many items in the first place, it should be okay. Z-2546 --- crates/recent_projects/src/recent_projects.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/recent_projects/src/recent_projects.rs b/crates/recent_projects/src/recent_projects.rs index 4ba6103167..f04dab9edc 100644 --- a/crates/recent_projects/src/recent_projects.rs +++ b/crates/recent_projects/src/recent_projects.rs @@ -134,7 +134,10 @@ impl PickerDelegate for RecentProjectsDelegate { let combined_string = location .paths() .iter() - .map(|path| path.to_string_lossy().to_owned()) + .map(|path| { + let compact = util::paths::compact(&path); + compact.to_string_lossy().into_owned() + }) .collect::>() .join(""); StringMatchCandidate::new(id, combined_string) From 15010e94fda315d5743f0e572563fc2357a18de2 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Tue, 11 Jul 2023 15:29:15 +0200 Subject: [PATCH 046/115] fixup! recent_projects: Perform fuzzy search on compacted paths. --- crates/recent_projects/src/recent_projects.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/recent_projects/src/recent_projects.rs b/crates/recent_projects/src/recent_projects.rs index f04dab9edc..cd512f1e57 100644 --- a/crates/recent_projects/src/recent_projects.rs +++ b/crates/recent_projects/src/recent_projects.rs @@ -134,10 +134,7 @@ impl PickerDelegate for RecentProjectsDelegate { let combined_string = location .paths() .iter() - .map(|path| { - let compact = util::paths::compact(&path); - compact.to_string_lossy().into_owned() - }) + .map(|path| util::paths::compact(&path).to_string_lossy().into_owned()) .collect::>() .join(""); StringMatchCandidate::new(id, combined_string) From 91832c8cd8de4743a5c8dad87005a67d9601d7e5 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Tue, 11 Jul 2023 13:20:02 +0300 Subject: [PATCH 047/115] Fix language servers improper restarts Language servers mixed `initialization_options` from hardcodes and user settings, fix that to ensure we restart servers on their settings changes only. --- crates/language/src/language.rs | 28 +++++++++++++++++++++++++++- crates/project/src/project.rs | 17 +++++------------ 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index e8450344b8..d186bf630d 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -90,7 +90,8 @@ pub struct LanguageServerName(pub Arc); /// once at startup, and caches the results. pub struct CachedLspAdapter { pub name: LanguageServerName, - pub initialization_options: Option, + initialization_options: Option, + initialization_overrides: Mutex>, pub disk_based_diagnostic_sources: Vec, pub disk_based_diagnostics_progress_token: Option, pub language_ids: HashMap, @@ -109,6 +110,7 @@ impl CachedLspAdapter { Arc::new(CachedLspAdapter { name, initialization_options, + initialization_overrides: Mutex::new(None), disk_based_diagnostic_sources, disk_based_diagnostics_progress_token, language_ids, @@ -208,6 +210,30 @@ impl CachedLspAdapter { ) -> Option { self.adapter.label_for_symbol(name, kind, language).await } + + pub fn update_initialization_overrides(&self, new: Option<&Value>) -> bool { + let mut current = self.initialization_overrides.lock(); + if current.as_ref() != new { + *current = new.cloned(); + true + } else { + false + } + } + + pub fn initialization_options(&self) -> Option { + let initialization_options = self.initialization_options.as_ref(); + let override_options = self.initialization_overrides.lock().clone(); + match (initialization_options, override_options) { + (None, override_options) => override_options, + (initialization_options, None) => initialization_options.cloned(), + (Some(initialization_options), Some(override_options)) => { + let mut initialization_options = initialization_options.clone(); + merge_json_value_into(override_options, &mut initialization_options); + Some(initialization_options) + } + } + } } pub trait LspAdapterDelegate: Send + Sync { diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 81db0c7ed7..dc4c8852dd 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -78,8 +78,8 @@ use std::{ use terminals::Terminals; use text::Anchor; use util::{ - debug_panic, defer, http::HttpClient, merge_json_value_into, - paths::LOCAL_SETTINGS_RELATIVE_PATH, post_inc, ResultExt, TryFutureExt as _, + debug_panic, defer, http::HttpClient, paths::LOCAL_SETTINGS_RELATIVE_PATH, post_inc, ResultExt, + TryFutureExt as _, }; pub use fs::*; @@ -800,7 +800,7 @@ impl Project { .lsp .get(&adapter.name.0) .and_then(|s| s.initialization_options.as_ref()); - if adapter.initialization_options.as_ref() != new_lsp_settings { + if adapter.update_initialization_overrides(new_lsp_settings) { language_servers_to_restart.push((worktree, Arc::clone(language))); } } @@ -2545,20 +2545,13 @@ impl Project { let project_settings = settings::get::(cx); let lsp = project_settings.lsp.get(&adapter.name.0); let override_options = lsp.map(|s| s.initialization_options.clone()).flatten(); - - let mut initialization_options = adapter.initialization_options.clone(); - match (&mut initialization_options, override_options) { - (Some(initialization_options), Some(override_options)) => { - merge_json_value_into(override_options, initialization_options); - } - (None, override_options) => initialization_options = override_options, - _ => {} - } + adapter.update_initialization_overrides(override_options.as_ref()); let server_id = pending_server.server_id; let container_dir = pending_server.container_dir.clone(); let state = LanguageServerState::Starting({ let adapter = adapter.clone(); + let initialization_options = adapter.initialization_options(); let server_name = adapter.name.0.clone(); let languages = self.languages.clone(); let language = language.clone(); From f5fec559308a12391bca54e1d981c5be7c846d1e Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 11 Jul 2023 10:03:53 -0400 Subject: [PATCH 048/115] updated vector_store to handle for removed files --- crates/vector_store/src/vector_store.rs | 95 ++++++++++++++----------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index c42b7ab129..9b21073998 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -635,8 +635,6 @@ impl VectorStore { }) .await?; - dbg!(&documents); - this.read_with(&cx, |this, _| { let project_state = if let Some(state) = this.projects.get(&project.downgrade()) { state @@ -687,6 +685,7 @@ impl VectorStore { for change in changes.into_iter() { let change_path = change.0.clone(); let absolute_path = worktree.absolutize(&change_path); + // Skip if git ignored or symlink if let Some(entry) = worktree.entry_for_id(change.1) { if entry.is_ignored || entry.is_symlink || entry.is_external { @@ -694,46 +693,60 @@ impl VectorStore { } } - if let Ok(language) = language_registry - .language_for_file(&change_path.to_path_buf(), None) - .await - { - if language - .grammar() - .and_then(|grammar| grammar.embedding_config.as_ref()) - .is_none() - { - continue; - } - - let modified_time = change_path.metadata().log_err()?.modified().log_err()?; - - let existing_time = file_mtimes.get(&change_path.to_path_buf()); - let already_stored = existing_time - .map_or(false, |existing_time| &modified_time != existing_time); - - if !already_stored { - this.update(&mut cx, |this, _| { - let reindex_time = - modified_time + Duration::from_secs(REINDEXING_DELAY_SECONDS); - - let project_state = this.projects.get_mut(&project.downgrade())?; - project_state.update_pending_files( - PendingFile { - relative_path: change_path.to_path_buf(), - absolute_path, - modified_time, - worktree_db_id, - language: language.clone(), - }, - reindex_time, - ); - - for file in project_state.get_outstanding_files() { - this.parsing_files_tx.try_send(file).unwrap(); + match change.2 { + PathChange::Removed => this.update(&mut cx, |this, _| { + this.db_update_tx + .try_send(DbOperation::Delete { + worktree_id: worktree_db_id, + path: absolute_path, + }) + .unwrap(); + }), + _ => { + if let Ok(language) = language_registry + .language_for_file(&change_path.to_path_buf(), None) + .await + { + if language + .grammar() + .and_then(|grammar| grammar.embedding_config.as_ref()) + .is_none() + { + continue; } - Some(()) - }); + + let modified_time = + change_path.metadata().log_err()?.modified().log_err()?; + + let existing_time = file_mtimes.get(&change_path.to_path_buf()); + let already_stored = existing_time + .map_or(false, |existing_time| &modified_time != existing_time); + + if !already_stored { + this.update(&mut cx, |this, _| { + let reindex_time = modified_time + + Duration::from_secs(REINDEXING_DELAY_SECONDS); + + let project_state = + this.projects.get_mut(&project.downgrade())?; + project_state.update_pending_files( + PendingFile { + relative_path: change_path.to_path_buf(), + absolute_path, + modified_time, + worktree_db_id, + language: language.clone(), + }, + reindex_time, + ); + + for file in project_state.get_outstanding_files() { + this.parsing_files_tx.try_send(file).unwrap(); + } + Some(()) + }); + } + } } } } From 748e7af5a2c77e572474d836f9a4292dfd589780 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Tue, 11 Jul 2023 17:06:02 +0300 Subject: [PATCH 049/115] Add a test --- crates/editor/src/editor_tests.rs | 160 +++++++++++++++++++++++++- crates/editor/src/element.rs | 4 +- crates/editor/src/inlay_hint_cache.rs | 10 +- crates/language/src/language.rs | 6 + 4 files changed, 168 insertions(+), 12 deletions(-) diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs index 9e726d6cc4..7b36287dca 100644 --- a/crates/editor/src/editor_tests.rs +++ b/crates/editor/src/editor_tests.rs @@ -22,7 +22,10 @@ use language::{ BracketPairConfig, FakeLspAdapter, LanguageConfig, LanguageRegistry, Point, }; use parking_lot::Mutex; +use project::project_settings::{LspSettings, ProjectSettings}; use project::FakeFs; +use std::sync::atomic; +use std::sync::atomic::AtomicUsize; use std::{cell::RefCell, future::Future, rc::Rc, time::Instant}; use unindent::Unindent; use util::{ @@ -1796,7 +1799,7 @@ async fn test_newline_comments(cx: &mut gpui::TestAppContext) { "}); } // Ensure that comment continuations can be disabled. - update_test_settings(cx, |settings| { + update_test_language_settings(cx, |settings| { settings.defaults.extend_comment_on_newline = Some(false); }); let mut cx = EditorTestContext::new(cx).await; @@ -4546,7 +4549,7 @@ async fn test_document_format_during_save(cx: &mut gpui::TestAppContext) { assert!(!cx.read(|cx| editor.is_dirty(cx))); // Set rust language override and assert overridden tabsize is sent to language server - update_test_settings(cx, |settings| { + update_test_language_settings(cx, |settings| { settings.languages.insert( "Rust".into(), LanguageSettingsContent { @@ -4660,7 +4663,7 @@ async fn test_range_format_during_save(cx: &mut gpui::TestAppContext) { assert!(!cx.read(|cx| editor.is_dirty(cx))); // Set rust language override and assert overridden tabsize is sent to language server - update_test_settings(cx, |settings| { + update_test_language_settings(cx, |settings| { settings.languages.insert( "Rust".into(), LanguageSettingsContent { @@ -7084,6 +7087,142 @@ async fn test_on_type_formatting_not_triggered(cx: &mut gpui::TestAppContext) { }); } +#[gpui::test] +async fn test_language_server_restart_due_to_settings_change(cx: &mut gpui::TestAppContext) { + init_test(cx, |_| {}); + + let language_name: Arc = "Rust".into(); + let mut language = Language::new( + LanguageConfig { + name: Arc::clone(&language_name), + path_suffixes: vec!["rs".to_string()], + ..Default::default() + }, + Some(tree_sitter_rust::language()), + ); + + let server_restarts = Arc::new(AtomicUsize::new(0)); + let closure_restarts = Arc::clone(&server_restarts); + let language_server_name = "test language server"; + let mut fake_servers = language + .set_fake_lsp_adapter(Arc::new(FakeLspAdapter { + name: language_server_name, + initialization_options: Some(json!({ + "testOptionValue": true + })), + initializer: Some(Box::new(move |fake_server| { + let task_restarts = Arc::clone(&closure_restarts); + fake_server.handle_request::(move |_, _| { + task_restarts.fetch_add(1, atomic::Ordering::Release); + futures::future::ready(Ok(())) + }); + })), + ..Default::default() + })) + .await; + + let fs = FakeFs::new(cx.background()); + fs.insert_tree( + "/a", + json!({ + "main.rs": "fn main() { let a = 5; }", + "other.rs": "// Test file", + }), + ) + .await; + let project = Project::test(fs, ["/a".as_ref()], cx).await; + project.update(cx, |project, _| project.languages().add(Arc::new(language))); + let (_, _workspace) = cx.add_window(|cx| Workspace::test_new(project.clone(), cx)); + let _buffer = project + .update(cx, |project, cx| { + project.open_local_buffer("/a/main.rs", cx) + }) + .await + .unwrap(); + let _fake_server = fake_servers.next().await.unwrap(); + update_test_language_settings(cx, |language_settings| { + language_settings.languages.insert( + Arc::clone(&language_name), + LanguageSettingsContent { + tab_size: NonZeroU32::new(8), + ..Default::default() + }, + ); + }); + cx.foreground().run_until_parked(); + assert_eq!( + server_restarts.load(atomic::Ordering::Acquire), + 0, + "Should not restart LSP server on an unrelated change" + ); + + update_test_project_settings(cx, |project_settings| { + project_settings.lsp.insert( + "Some other server name".into(), + LspSettings { + initialization_options: Some(json!({ + "some other init value": false + })), + }, + ); + }); + cx.foreground().run_until_parked(); + assert_eq!( + server_restarts.load(atomic::Ordering::Acquire), + 0, + "Should not restart LSP server on an unrelated LSP settings change" + ); + + update_test_project_settings(cx, |project_settings| { + project_settings.lsp.insert( + language_server_name.into(), + LspSettings { + initialization_options: Some(json!({ + "anotherInitValue": false + })), + }, + ); + }); + cx.foreground().run_until_parked(); + assert_eq!( + server_restarts.load(atomic::Ordering::Acquire), + 1, + "Should restart LSP server on a related LSP settings change" + ); + + update_test_project_settings(cx, |project_settings| { + project_settings.lsp.insert( + language_server_name.into(), + LspSettings { + initialization_options: Some(json!({ + "anotherInitValue": false + })), + }, + ); + }); + cx.foreground().run_until_parked(); + assert_eq!( + server_restarts.load(atomic::Ordering::Acquire), + 1, + "Should not restart LSP server on a related LSP settings change that is the same" + ); + + update_test_project_settings(cx, |project_settings| { + project_settings.lsp.insert( + language_server_name.into(), + LspSettings { + initialization_options: None, + }, + ); + }); + cx.foreground().run_until_parked(); + assert_eq!( + server_restarts.load(atomic::Ordering::Acquire), + 2, + "Should restart LSP server on another related LSP settings change" + ); +} + fn empty_range(row: usize, column: usize) -> Range { let point = DisplayPoint::new(row as u32, column as u32); point..point @@ -7203,7 +7342,7 @@ fn handle_copilot_completion_request( }); } -pub(crate) fn update_test_settings( +pub(crate) fn update_test_language_settings( cx: &mut TestAppContext, f: impl Fn(&mut AllLanguageSettingsContent), ) { @@ -7214,6 +7353,17 @@ pub(crate) fn update_test_settings( }); } +pub(crate) fn update_test_project_settings( + cx: &mut TestAppContext, + f: impl Fn(&mut ProjectSettings), +) { + cx.update(|cx| { + cx.update_global::(|store, cx| { + store.update_user_settings::(cx, f); + }); + }); +} + pub(crate) fn init_test(cx: &mut TestAppContext, f: fn(&mut AllLanguageSettingsContent)) { cx.foreground().forbid_parking(); @@ -7227,5 +7377,5 @@ pub(crate) fn init_test(cx: &mut TestAppContext, f: fn(&mut AllLanguageSettingsC crate::init(cx); }); - update_test_settings(cx, f); + update_test_language_settings(cx, f); } diff --git a/crates/editor/src/element.rs b/crates/editor/src/element.rs index fafbc33189..f0bae9533b 100644 --- a/crates/editor/src/element.rs +++ b/crates/editor/src/element.rs @@ -2916,7 +2916,7 @@ mod tests { use super::*; use crate::{ display_map::{BlockDisposition, BlockProperties}, - editor_tests::{init_test, update_test_settings}, + editor_tests::{init_test, update_test_language_settings}, Editor, MultiBuffer, }; use gpui::TestAppContext; @@ -3113,7 +3113,7 @@ mod tests { let resize_step = 10.0; let mut editor_width = 200.0; while editor_width <= 1000.0 { - update_test_settings(cx, |s| { + update_test_language_settings(cx, |s| { s.defaults.tab_size = NonZeroU32::new(tab_size); s.defaults.show_whitespaces = Some(ShowWhitespaceSetting::All); s.defaults.preferred_line_length = Some(editor_width as u32); diff --git a/crates/editor/src/inlay_hint_cache.rs b/crates/editor/src/inlay_hint_cache.rs index 70fb372504..52473f9971 100644 --- a/crates/editor/src/inlay_hint_cache.rs +++ b/crates/editor/src/inlay_hint_cache.rs @@ -847,7 +847,7 @@ mod tests { use text::Point; use workspace::Workspace; - use crate::editor_tests::update_test_settings; + use crate::editor_tests::update_test_language_settings; use super::*; @@ -1476,7 +1476,7 @@ mod tests { ), ] { edits_made += 1; - update_test_settings(cx, |settings| { + update_test_language_settings(cx, |settings| { settings.defaults.inlay_hints = Some(InlayHintSettings { enabled: true, show_type_hints: new_allowed_hint_kinds.contains(&Some(InlayHintKind::Type)), @@ -1520,7 +1520,7 @@ mod tests { edits_made += 1; let another_allowed_hint_kinds = HashSet::from_iter([Some(InlayHintKind::Type)]); - update_test_settings(cx, |settings| { + update_test_language_settings(cx, |settings| { settings.defaults.inlay_hints = Some(InlayHintSettings { enabled: false, show_type_hints: another_allowed_hint_kinds.contains(&Some(InlayHintKind::Type)), @@ -1577,7 +1577,7 @@ mod tests { let final_allowed_hint_kinds = HashSet::from_iter([Some(InlayHintKind::Parameter)]); edits_made += 1; - update_test_settings(cx, |settings| { + update_test_language_settings(cx, |settings| { settings.defaults.inlay_hints = Some(InlayHintSettings { enabled: true, show_type_hints: final_allowed_hint_kinds.contains(&Some(InlayHintKind::Type)), @@ -2269,7 +2269,7 @@ unedited (2nd) buffer should have the same hint"); crate::init(cx); }); - update_test_settings(cx, f); + update_test_language_settings(cx, f); } async fn prepare_test_objects( diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index d186bf630d..642f5469cd 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -453,6 +453,7 @@ fn deserialize_regex<'de, D: Deserializer<'de>>(d: D) -> Result, D #[cfg(any(test, feature = "test-support"))] pub struct FakeLspAdapter { pub name: &'static str, + pub initialization_options: Option, pub capabilities: lsp::ServerCapabilities, pub initializer: Option>, pub disk_based_diagnostics_progress_token: Option, @@ -1663,6 +1664,7 @@ impl Default for FakeLspAdapter { capabilities: lsp::LanguageServer::full_capabilities(), initializer: None, disk_based_diagnostics_progress_token: None, + initialization_options: None, disk_based_diagnostics_sources: Vec::new(), } } @@ -1712,6 +1714,10 @@ impl LspAdapter for Arc { async fn disk_based_diagnostics_progress_token(&self) -> Option { self.disk_based_diagnostics_progress_token.clone() } + + async fn initialization_options(&self) -> Option { + self.initialization_options.clone() + } } fn get_capture_indices(query: &Query, captures: &mut [(&str, &mut Option)]) { From 298c2213a0afa68f1dbaf04dc8b90420303743a9 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 11 Jul 2023 12:03:56 -0400 Subject: [PATCH 050/115] added opt-in default settings for vector store --- Cargo.lock | 2 ++ assets/settings/default.json | 6 ++++ crates/vector_store/Cargo.toml | 3 ++ crates/vector_store/src/db.rs | 7 +--- crates/vector_store/src/vector_store.rs | 29 ++++++++++------- .../vector_store/src/vector_store_settings.rs | 32 +++++++++++++++++++ crates/vector_store/src/vector_store_tests.rs | 10 +++++- 7 files changed, 70 insertions(+), 19 deletions(-) create mode 100644 crates/vector_store/src/vector_store_settings.rs diff --git a/Cargo.lock b/Cargo.lock index 22df4083fd..cd92d0003a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8503,8 +8503,10 @@ dependencies = [ "rand 0.8.5", "rpc", "rusqlite", + "schemars", "serde", "serde_json", + "settings", "smol", "tempdir", "theme", diff --git a/assets/settings/default.json b/assets/settings/default.json index 9ae5c916b5..cf8f630dfb 100644 --- a/assets/settings/default.json +++ b/assets/settings/default.json @@ -291,6 +291,12 @@ // the terminal will default to matching the buffer's font family. // "font_family": "Zed Mono" }, + // Difference settings for vector_store + "vector_store": { + "enable": false, + "reindexing_delay_seconds": 600, + "embedding_batch_size": 150 + }, // Different settings for specific languages. "languages": { "Plain Text": { diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml index 35a6a689ae..40bff8b95c 100644 --- a/crates/vector_store/Cargo.toml +++ b/crates/vector_store/Cargo.toml @@ -18,6 +18,7 @@ picker = { path = "../picker" } theme = { path = "../theme" } editor = { path = "../editor" } rpc = { path = "../rpc" } +settings = { path = "../settings" } anyhow.workspace = true futures.workspace = true smol.workspace = true @@ -33,6 +34,7 @@ bincode = "1.3.3" matrixmultiply = "0.3.7" tiktoken-rs = "0.5.0" rand.workspace = true +schemars.workspace = true [dev-dependencies] gpui = { path = "../gpui", features = ["test-support"] } @@ -40,6 +42,7 @@ language = { path = "../language", features = ["test-support"] } project = { path = "../project", features = ["test-support"] } rpc = { path = "../rpc", features = ["test-support"] } workspace = { path = "../workspace", features = ["test-support"] } +settings = { path = "../settings", features = ["test-support"]} tree-sitter-rust = "*" rand.workspace = true unindent.workspace = true diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index 197e7d5696..79d90e87bf 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -204,8 +204,6 @@ impl VectorDatabase { ) -> Result> { let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1); self.for_each_document(&worktree_ids, |id, embedding| { - eprintln!("document {id} {embedding:?}"); - let similarity = dot(&embedding, &query_embedding); let ix = match results .binary_search_by(|(_, s)| similarity.partial_cmp(&s).unwrap_or(Ordering::Equal)) @@ -243,10 +241,7 @@ impl VectorDatabase { Ok((row.get(0)?, row.get::<_, Embedding>(1)?)) })? .filter_map(|row| row.ok()) - .for_each(|(id, embedding)| { - dbg!("id"); - f(id, embedding.0) - }); + .for_each(|(id, embedding)| f(id, embedding.0)); Ok(()) } diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 9b21073998..4b5f6b636f 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -2,22 +2,25 @@ mod db; mod embedding; mod modal; mod parsing; +mod vector_store_settings; #[cfg(test)] mod vector_store_tests; +use crate::vector_store_settings::VectorStoreSettings; use anyhow::{anyhow, Result}; use db::VectorDatabase; use embedding::{EmbeddingProvider, OpenAIEmbeddings}; use futures::{channel::oneshot, Future}; use gpui::{ - AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, ViewContext, - WeakModelHandle, + AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Subscription, Task, + ViewContext, WeakModelHandle, }; use language::{Language, LanguageRegistry}; use modal::{SemanticSearch, SemanticSearchDelegate, Toggle}; use parsing::{CodeContextRetriever, ParsedFile}; use project::{Fs, PathChange, Project, ProjectEntryId, WorktreeId}; +use settings::SettingsStore; use smol::channel; use std::{ collections::HashMap, @@ -34,9 +37,6 @@ use util::{ }; use workspace::{Workspace, WorkspaceCreated}; -const REINDEXING_DELAY_SECONDS: u64 = 3; -const EMBEDDINGS_BATCH_SIZE: usize = 150; - pub fn init( fs: Arc, http_client: Arc, @@ -47,6 +47,12 @@ pub fn init( return; } + settings::register::(cx); + + if !settings::get::(cx).enable { + return; + } + let db_file_path = EMBEDDINGS_DIR .join(Path::new(RELEASE_CHANNEL_NAME.as_str())) .join("embeddings_db"); @@ -83,6 +89,7 @@ pub fn init( .detach(); cx.add_action({ + // "semantic search: Toggle" move |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext| { let vector_store = vector_store.clone(); workspace.toggle_modal(cx, |workspace, cx| { @@ -274,7 +281,6 @@ impl VectorStore { worktree_id, indexed_file, } => { - log::info!("Inserting Data for {:?}", &indexed_file.path); db.insert_file(worktree_id, indexed_file).log_err(); } DbOperation::Delete { worktree_id, path } => { @@ -347,6 +353,7 @@ impl VectorStore { }); // batch_tx/rx: Batch Files to Send for Embeddings + let batch_size = settings::get::(cx).embedding_batch_size; let (batch_files_tx, batch_files_rx) = channel::unbounded::(); let _batch_files_task = cx.background().spawn(async move { let mut queue_len = 0; @@ -361,7 +368,7 @@ impl VectorStore { } => { queue_len += &document_spans.len(); embeddings_queue.push((worktree_id, parsed_file, document_spans)); - queue_len >= EMBEDDINGS_BATCH_SIZE + queue_len >= batch_size } EmbeddingJob::Flush => true, }; @@ -387,8 +394,6 @@ impl VectorStore { let cursor = QueryCursor::new(); let mut retriever = CodeContextRetriever { parser, cursor, fs }; while let Ok(pending_file) = parsing_files_rx.recv().await { - log::info!("Parsing File: {:?}", &pending_file.relative_path); - if let Some((indexed_file, document_spans)) = retriever.parse_file(pending_file.clone()).await.log_err() { @@ -476,11 +481,9 @@ impl VectorStore { let parsing_files_tx = self.parsing_files_tx.clone(); cx.spawn(|this, mut cx| async move { - let t0 = Instant::now(); futures::future::join_all(worktree_scans_complete).await; let worktree_db_ids = futures::future::join_all(worktree_db_ids).await; - log::info!("Worktree Scanning Done in {:?}", t0.elapsed().as_millis()); if let Some(db_directory) = database_url.parent() { fs.create_dir(db_directory).await.log_err(); @@ -665,6 +668,8 @@ impl VectorStore { cx: &mut ModelContext<'_, VectorStore>, worktree_id: &WorktreeId, ) -> Option<()> { + let reindexing_delay = settings::get::(cx).reindexing_delay_seconds; + let worktree = project .read(cx) .worktree_for_id(worktree_id.clone(), cx)? @@ -725,7 +730,7 @@ impl VectorStore { if !already_stored { this.update(&mut cx, |this, _| { let reindex_time = modified_time - + Duration::from_secs(REINDEXING_DELAY_SECONDS); + + Duration::from_secs(reindexing_delay as u64); let project_state = this.projects.get_mut(&project.downgrade())?; diff --git a/crates/vector_store/src/vector_store_settings.rs b/crates/vector_store/src/vector_store_settings.rs new file mode 100644 index 0000000000..0bde07dd65 --- /dev/null +++ b/crates/vector_store/src/vector_store_settings.rs @@ -0,0 +1,32 @@ +use anyhow; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use settings::Setting; + +#[derive(Deserialize, Debug)] +pub struct VectorStoreSettings { + pub enable: bool, + pub reindexing_delay_seconds: usize, + pub embedding_batch_size: usize, +} + +#[derive(Clone, Default, Serialize, Deserialize, JsonSchema, Debug)] +pub struct VectorStoreSettingsContent { + pub enable: Option, + pub reindexing_delay_seconds: Option, + pub embedding_batch_size: Option, +} + +impl Setting for VectorStoreSettings { + const KEY: Option<&'static str> = Some("vector_store"); + + type FileContent = VectorStoreSettingsContent; + + fn load( + default_value: &Self::FileContent, + user_values: &[&Self::FileContent], + _: &gpui::AppContext, + ) -> anyhow::Result { + Self::load_via_json_merge(default_value, user_values) + } +} diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index b1756b7964..a3a40722ea 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -1,4 +1,6 @@ -use crate::{db::dot, embedding::EmbeddingProvider, VectorStore}; +use crate::{ + db::dot, embedding::EmbeddingProvider, vector_store_settings::VectorStoreSettings, VectorStore, +}; use anyhow::Result; use async_trait::async_trait; use gpui::{Task, TestAppContext}; @@ -6,11 +8,17 @@ use language::{Language, LanguageConfig, LanguageRegistry}; use project::{FakeFs, Project}; use rand::Rng; use serde_json::json; +use settings::SettingsStore; use std::sync::Arc; use unindent::Unindent; #[gpui::test] async fn test_vector_store(cx: &mut TestAppContext) { + cx.update(|cx| { + cx.set_global(SettingsStore::test(cx)); + settings::register::(cx); + }); + let fs = FakeFs::new(cx.background()); fs.insert_tree( "/the-root", From efe8b8b6d0ddeed8bba92a38e04f8b7f32e387a9 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Tue, 11 Jul 2023 20:46:45 +0300 Subject: [PATCH 051/115] Revert "Fix language servers improper restarts" This reverts commit 91832c8cd8de4743a5c8dad87005a67d9601d7e5. --- crates/language/src/language.rs | 28 +--------------------------- crates/project/src/project.rs | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 642f5469cd..976d8062ea 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -90,8 +90,7 @@ pub struct LanguageServerName(pub Arc); /// once at startup, and caches the results. pub struct CachedLspAdapter { pub name: LanguageServerName, - initialization_options: Option, - initialization_overrides: Mutex>, + pub initialization_options: Option, pub disk_based_diagnostic_sources: Vec, pub disk_based_diagnostics_progress_token: Option, pub language_ids: HashMap, @@ -110,7 +109,6 @@ impl CachedLspAdapter { Arc::new(CachedLspAdapter { name, initialization_options, - initialization_overrides: Mutex::new(None), disk_based_diagnostic_sources, disk_based_diagnostics_progress_token, language_ids, @@ -210,30 +208,6 @@ impl CachedLspAdapter { ) -> Option { self.adapter.label_for_symbol(name, kind, language).await } - - pub fn update_initialization_overrides(&self, new: Option<&Value>) -> bool { - let mut current = self.initialization_overrides.lock(); - if current.as_ref() != new { - *current = new.cloned(); - true - } else { - false - } - } - - pub fn initialization_options(&self) -> Option { - let initialization_options = self.initialization_options.as_ref(); - let override_options = self.initialization_overrides.lock().clone(); - match (initialization_options, override_options) { - (None, override_options) => override_options, - (initialization_options, None) => initialization_options.cloned(), - (Some(initialization_options), Some(override_options)) => { - let mut initialization_options = initialization_options.clone(); - merge_json_value_into(override_options, &mut initialization_options); - Some(initialization_options) - } - } - } } pub trait LspAdapterDelegate: Send + Sync { diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index dc4c8852dd..81db0c7ed7 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -78,8 +78,8 @@ use std::{ use terminals::Terminals; use text::Anchor; use util::{ - debug_panic, defer, http::HttpClient, paths::LOCAL_SETTINGS_RELATIVE_PATH, post_inc, ResultExt, - TryFutureExt as _, + debug_panic, defer, http::HttpClient, merge_json_value_into, + paths::LOCAL_SETTINGS_RELATIVE_PATH, post_inc, ResultExt, TryFutureExt as _, }; pub use fs::*; @@ -800,7 +800,7 @@ impl Project { .lsp .get(&adapter.name.0) .and_then(|s| s.initialization_options.as_ref()); - if adapter.update_initialization_overrides(new_lsp_settings) { + if adapter.initialization_options.as_ref() != new_lsp_settings { language_servers_to_restart.push((worktree, Arc::clone(language))); } } @@ -2545,13 +2545,20 @@ impl Project { let project_settings = settings::get::(cx); let lsp = project_settings.lsp.get(&adapter.name.0); let override_options = lsp.map(|s| s.initialization_options.clone()).flatten(); - adapter.update_initialization_overrides(override_options.as_ref()); + + let mut initialization_options = adapter.initialization_options.clone(); + match (&mut initialization_options, override_options) { + (Some(initialization_options), Some(override_options)) => { + merge_json_value_into(override_options, initialization_options); + } + (None, override_options) => initialization_options = override_options, + _ => {} + } let server_id = pending_server.server_id; let container_dir = pending_server.container_dir.clone(); let state = LanguageServerState::Starting({ let adapter = adapter.clone(); - let initialization_options = adapter.initialization_options(); let server_name = adapter.name.0.clone(); let languages = self.languages.clone(); let language = language.clone(); From 98a0113ac398495e10ecc5e6fbbc708df41bc396 Mon Sep 17 00:00:00 2001 From: "Joseph T. Lyons" Date: Tue, 11 Jul 2023 13:58:55 -0400 Subject: [PATCH 052/115] Add call events Co-Authored-By: Max Brunsfeld --- crates/call/src/call.rs | 43 ++++++++++++++++++- crates/client/src/telemetry.rs | 4 ++ crates/collab/src/tests/integration_tests.rs | 6 +-- .../src/tests/randomized_integration_tests.rs | 2 +- crates/collab_ui/src/collab_ui.rs | 15 ++----- .../src/incoming_call_notification.rs | 4 +- crates/editor/src/editor.rs | 4 +- 7 files changed, 57 insertions(+), 21 deletions(-) diff --git a/crates/call/src/call.rs b/crates/call/src/call.rs index 0a8f150194..ed5e560218 100644 --- a/crates/call/src/call.rs +++ b/crates/call/src/call.rs @@ -4,7 +4,7 @@ pub mod room; use std::sync::Arc; use anyhow::{anyhow, Result}; -use client::{proto, Client, TypedEnvelope, User, UserStore}; +use client::{proto, ClickhouseEvent, Client, TelemetrySettings, TypedEnvelope, User, UserStore}; use collections::HashSet; use futures::{future::Shared, FutureExt}; use postage::watch; @@ -198,6 +198,7 @@ impl ActiveCall { let result = invite.await; this.update(&mut cx, |this, cx| { this.pending_invites.remove(&called_user_id); + this.report_call_event("invite", cx); cx.notify(); }); result @@ -243,21 +244,26 @@ impl ActiveCall { }; let join = Room::join(&call, self.client.clone(), self.user_store.clone(), cx); + cx.spawn(|this, mut cx| async move { let room = join.await?; this.update(&mut cx, |this, cx| this.set_room(Some(room.clone()), cx)) .await?; + this.update(&mut cx, |this, cx| { + this.report_call_event("accept incoming", cx) + }); Ok(()) }) } - pub fn decline_incoming(&mut self) -> Result<()> { + pub fn decline_incoming(&mut self, cx: &mut ModelContext) -> Result<()> { let call = self .incoming_call .0 .borrow_mut() .take() .ok_or_else(|| anyhow!("no incoming call"))?; + self.report_call_event_for_room("decline incoming", call.room_id, cx); self.client.send(proto::DeclineCall { room_id: call.room_id, })?; @@ -266,6 +272,7 @@ impl ActiveCall { pub fn hang_up(&mut self, cx: &mut ModelContext) -> Task> { cx.notify(); + self.report_call_event("hang up", cx); if let Some((room, _)) = self.room.take() { room.update(cx, |room, cx| room.leave(cx)) } else { @@ -273,12 +280,28 @@ impl ActiveCall { } } + pub fn toggle_screen_sharing(&self, cx: &mut AppContext) { + if let Some(room) = self.room().cloned() { + let toggle_screen_sharing = room.update(cx, |room, cx| { + if room.is_screen_sharing() { + self.report_call_event("disable screen share", cx); + Task::ready(room.unshare_screen(cx)) + } else { + self.report_call_event("enable screen share", cx); + room.share_screen(cx) + } + }); + toggle_screen_sharing.detach_and_log_err(cx); + } + } + pub fn share_project( &mut self, project: ModelHandle, cx: &mut ModelContext, ) -> Task> { if let Some((room, _)) = self.room.as_ref() { + self.report_call_event("share project", cx); room.update(cx, |room, cx| room.share_project(project, cx)) } else { Task::ready(Err(anyhow!("no active call"))) @@ -291,6 +314,7 @@ impl ActiveCall { cx: &mut ModelContext, ) -> Result<()> { if let Some((room, _)) = self.room.as_ref() { + self.report_call_event("unshare project", cx); room.update(cx, |room, cx| room.unshare_project(project, cx)) } else { Err(anyhow!("no active call")) @@ -352,4 +376,19 @@ impl ActiveCall { pub fn pending_invites(&self) -> &HashSet { &self.pending_invites } + + fn report_call_event(&self, operation: &'static str, cx: &AppContext) { + if let Some(room) = self.room() { + self.report_call_event_for_room(operation, room.read(cx).id(), cx) + } + } + + fn report_call_event_for_room(&self, operation: &'static str, room_id: u64, cx: &AppContext) { + let telemetry = self.client.telemetry(); + let telemetry_settings = *settings::get::(cx); + + let event = ClickhouseEvent::Call { operation, room_id }; + + telemetry.report_clickhouse_event(event, telemetry_settings); + } } diff --git a/crates/client/src/telemetry.rs b/crates/client/src/telemetry.rs index 9c4e187dbc..959f4cc783 100644 --- a/crates/client/src/telemetry.rs +++ b/crates/client/src/telemetry.rs @@ -70,6 +70,10 @@ pub enum ClickhouseEvent { suggestion_accepted: bool, file_extension: Option, }, + Call { + operation: &'static str, + room_id: u64, + }, } #[cfg(debug_assertions)] diff --git a/crates/collab/src/tests/integration_tests.rs b/crates/collab/src/tests/integration_tests.rs index 66dc19d690..c32129818f 100644 --- a/crates/collab/src/tests/integration_tests.rs +++ b/crates/collab/src/tests/integration_tests.rs @@ -157,7 +157,7 @@ async fn test_basic_calls( // User C receives the call, but declines it. let call_c = incoming_call_c.next().await.unwrap().unwrap(); assert_eq!(call_c.calling_user.github_login, "user_b"); - active_call_c.update(cx_c, |call, _| call.decline_incoming().unwrap()); + active_call_c.update(cx_c, |call, cx| call.decline_incoming(cx).unwrap()); assert!(incoming_call_c.next().await.unwrap().is_none()); deterministic.run_until_parked(); @@ -1080,7 +1080,7 @@ async fn test_calls_on_multiple_connections( // User B declines the call on one of the two connections, causing both connections // to stop ringing. - active_call_b2.update(cx_b2, |call, _| call.decline_incoming().unwrap()); + active_call_b2.update(cx_b2, |call, cx| call.decline_incoming(cx).unwrap()); deterministic.run_until_parked(); assert!(incoming_call_b1.next().await.unwrap().is_none()); assert!(incoming_call_b2.next().await.unwrap().is_none()); @@ -5945,7 +5945,7 @@ async fn test_contacts( [("user_b".to_string(), "online", "busy")] ); - active_call_b.update(cx_b, |call, _| call.decline_incoming().unwrap()); + active_call_b.update(cx_b, |call, cx| call.decline_incoming(cx).unwrap()); deterministic.run_until_parked(); assert_eq!( contacts(&client_a, cx_a), diff --git a/crates/collab/src/tests/randomized_integration_tests.rs b/crates/collab/src/tests/randomized_integration_tests.rs index f5dfe17d6f..8062a12b83 100644 --- a/crates/collab/src/tests/randomized_integration_tests.rs +++ b/crates/collab/src/tests/randomized_integration_tests.rs @@ -365,7 +365,7 @@ async fn apply_client_operation( } log::info!("{}: declining incoming call", client.username); - active_call.update(cx, |call, _| call.decline_incoming())?; + active_call.update(cx, |call, cx| call.decline_incoming(cx))?; } ClientOperation::LeaveCall => { diff --git a/crates/collab_ui/src/collab_ui.rs b/crates/collab_ui/src/collab_ui.rs index 76f2e26571..3f5ca17a20 100644 --- a/crates/collab_ui/src/collab_ui.rs +++ b/crates/collab_ui/src/collab_ui.rs @@ -11,7 +11,7 @@ mod sharing_status_indicator; use call::{ActiveCall, Room}; pub use collab_titlebar_item::{CollabTitlebarItem, ToggleContactsMenu}; -use gpui::{actions, AppContext, Task}; +use gpui::{actions, AppContext}; use std::sync::Arc; use util::ResultExt; use workspace::AppState; @@ -44,16 +44,9 @@ pub fn init(app_state: &Arc, cx: &mut AppContext) { } pub fn toggle_screen_sharing(_: &ToggleScreenSharing, cx: &mut AppContext) { - if let Some(room) = ActiveCall::global(cx).read(cx).room().cloned() { - let toggle_screen_sharing = room.update(cx, |room, cx| { - if room.is_screen_sharing() { - Task::ready(room.unshare_screen(cx)) - } else { - room.share_screen(cx) - } - }); - toggle_screen_sharing.detach_and_log_err(cx); - } + ActiveCall::global(cx).update(cx, |call, cx| { + call.toggle_screen_sharing(cx); + }); } pub fn toggle_mute(_: &ToggleMute, cx: &mut AppContext) { diff --git a/crates/collab_ui/src/incoming_call_notification.rs b/crates/collab_ui/src/incoming_call_notification.rs index 12fad467e3..4066b5b229 100644 --- a/crates/collab_ui/src/incoming_call_notification.rs +++ b/crates/collab_ui/src/incoming_call_notification.rs @@ -99,8 +99,8 @@ impl IncomingCallNotification { }) .detach_and_log_err(cx); } else { - active_call.update(cx, |active_call, _| { - active_call.decline_incoming().log_err(); + active_call.update(cx, |active_call, cx| { + active_call.decline_incoming(cx).log_err(); }); } } diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 28edd2a460..85a428d801 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -7565,7 +7565,7 @@ impl Editor { fn report_editor_event( &self, - name: &'static str, + operation: &'static str, file_extension: Option, cx: &AppContext, ) { @@ -7602,7 +7602,7 @@ impl Editor { let event = ClickhouseEvent::Editor { file_extension, vim_mode, - operation: name, + operation, copilot_enabled, copilot_enabled_for_language, }; From 1649cf81de4bc3cc506b3d118c2454693758088f Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 11 Jul 2023 14:42:03 -0400 Subject: [PATCH 053/115] added versioning to files table --- crates/vector_store/src/db.rs | 9 ++++++--- crates/vector_store/src/vector_store.rs | 7 ++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs index 79d90e87bf..a91a1872b5 100644 --- a/crates/vector_store/src/db.rs +++ b/crates/vector_store/src/db.rs @@ -9,6 +9,7 @@ use std::{ use anyhow::{anyhow, Result}; use crate::parsing::ParsedFile; +use crate::VECTOR_STORE_VERSION; use rpc::proto::Timestamp; use rusqlite::{ params, @@ -72,6 +73,7 @@ impl VectorDatabase { relative_path VARCHAR NOT NULL, mtime_seconds INTEGER NOT NULL, mtime_nanos INTEGER NOT NULL, + vector_store_version INTEGER NOT NULL, FOREIGN KEY(worktree_id) REFERENCES worktrees(id) ON DELETE CASCADE )", [], @@ -112,15 +114,16 @@ impl VectorDatabase { self.db.execute( " INSERT INTO files - (worktree_id, relative_path, mtime_seconds, mtime_nanos) + (worktree_id, relative_path, mtime_seconds, mtime_nanos, vector_store_version) VALUES - (?1, ?2, $3, $4); + (?1, ?2, $3, $4, $5); ", params![ worktree_id, indexed_file.path.to_str(), mtime.seconds, - mtime.nanos + mtime.nanos, + VECTOR_STORE_VERSION ], )?; diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 4b5f6b636f..6f63f07b88 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -13,14 +13,13 @@ use db::VectorDatabase; use embedding::{EmbeddingProvider, OpenAIEmbeddings}; use futures::{channel::oneshot, Future}; use gpui::{ - AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Subscription, Task, - ViewContext, WeakModelHandle, + AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, ViewContext, + WeakModelHandle, }; use language::{Language, LanguageRegistry}; use modal::{SemanticSearch, SemanticSearchDelegate, Toggle}; use parsing::{CodeContextRetriever, ParsedFile}; use project::{Fs, PathChange, Project, ProjectEntryId, WorktreeId}; -use settings::SettingsStore; use smol::channel; use std::{ collections::HashMap, @@ -37,6 +36,8 @@ use util::{ }; use workspace::{Workspace, WorkspaceCreated}; +const VECTOR_STORE_VERSION: usize = 0; + pub fn init( fs: Arc, http_client: Arc, From 4b4d049b0a7b5ce278051202c82dec09c898ae50 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Tue, 11 Jul 2023 21:29:47 +0300 Subject: [PATCH 054/115] Refactor LSP restart logic Instead of storing `initialization_options` in every LSP adapter as before, store previous LSP settings in `Project` entirely. This way, we can later have use multiple different project configurations per single LSP with its associated adapter. co-authored-by: Max Brunsfeld --- crates/command_palette/src/command_palette.rs | 1 + crates/project/src/project.rs | 30 ++++++++++++++----- crates/terminal_view/src/terminal_view.rs | 1 + crates/workspace/src/pane.rs | 1 + 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/crates/command_palette/src/command_palette.rs b/crates/command_palette/src/command_palette.rs index aec876bd78..77dde09875 100644 --- a/crates/command_palette/src/command_palette.rs +++ b/crates/command_palette/src/command_palette.rs @@ -369,6 +369,7 @@ mod tests { editor::init(cx); workspace::init(app_state.clone(), cx); init(cx); + Project::init_settings(cx); app_state }) } diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 81db0c7ed7..364b19e3a9 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -50,7 +50,7 @@ use lsp::{ }; use lsp_command::*; use postage::watch; -use project_settings::ProjectSettings; +use project_settings::{LspSettings, ProjectSettings}; use rand::prelude::*; use search::SearchQuery; use serde::Serialize; @@ -149,6 +149,7 @@ pub struct Project { _maintain_workspace_config: Task<()>, terminals: Terminals, copilot_enabled: bool, + current_lsp_settings: HashMap, LspSettings>, } struct DelayedDebounced { @@ -614,6 +615,7 @@ impl Project { local_handles: Vec::new(), }, copilot_enabled: Copilot::global(cx).is_some(), + current_lsp_settings: settings::get::(cx).lsp.clone(), } }) } @@ -706,6 +708,7 @@ impl Project { local_handles: Vec::new(), }, copilot_enabled: Copilot::global(cx).is_some(), + current_lsp_settings: settings::get::(cx).lsp.clone(), }; for worktree in worktrees { let _ = this.add_worktree(&worktree, cx); @@ -779,7 +782,9 @@ impl Project { let mut language_servers_to_stop = Vec::new(); let mut language_servers_to_restart = Vec::new(); let languages = self.languages.to_vec(); - let project_settings = settings::get::(cx).clone(); + + let new_lsp_settings = settings::get::(cx).lsp.clone(); + let current_lsp_settings = &self.current_lsp_settings; for (worktree_id, started_lsp_name) in self.language_server_ids.keys() { let language = languages.iter().find_map(|l| { let adapter = l @@ -796,16 +801,25 @@ impl Project { if !language_settings(Some(language), file.as_ref(), cx).enable_language_server { language_servers_to_stop.push((*worktree_id, started_lsp_name.clone())); } else if let Some(worktree) = worktree { - let new_lsp_settings = project_settings - .lsp - .get(&adapter.name.0) - .and_then(|s| s.initialization_options.as_ref()); - if adapter.initialization_options.as_ref() != new_lsp_settings { - language_servers_to_restart.push((worktree, Arc::clone(language))); + let server_name = &adapter.name.0; + match ( + current_lsp_settings.get(server_name), + new_lsp_settings.get(server_name), + ) { + (None, None) => {} + (Some(_), None) | (None, Some(_)) => { + language_servers_to_restart.push((worktree, Arc::clone(language))); + } + (Some(current_lsp_settings), Some(new_lsp_settings)) => { + if current_lsp_settings != new_lsp_settings { + language_servers_to_restart.push((worktree, Arc::clone(language))); + } + } } } } } + self.current_lsp_settings = new_lsp_settings; // Stop all newly-disabled language servers. for (worktree_id, adapter_name) in language_servers_to_stop { diff --git a/crates/terminal_view/src/terminal_view.rs b/crates/terminal_view/src/terminal_view.rs index c40a1a7ccd..f7963f6e5f 100644 --- a/crates/terminal_view/src/terminal_view.rs +++ b/crates/terminal_view/src/terminal_view.rs @@ -907,6 +907,7 @@ mod tests { let params = cx.update(AppState::test); cx.update(|cx| { theme::init((), cx); + Project::init_settings(cx); language::init(cx); }); diff --git a/crates/workspace/src/pane.rs b/crates/workspace/src/pane.rs index 6a20fab9a2..8e6e107488 100644 --- a/crates/workspace/src/pane.rs +++ b/crates/workspace/src/pane.rs @@ -2316,6 +2316,7 @@ mod tests { cx.set_global(SettingsStore::test(cx)); theme::init((), cx); crate::init_settings(cx); + Project::init_settings(cx); }); } From be881369fafce53fd43cdcf2e67f4b9966db37d4 Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Tue, 11 Jul 2023 12:12:37 -0700 Subject: [PATCH 055/115] Fix a bug where the terminal panel's items wouldn't be hooked up properly to workspace actions --- crates/terminal_view/src/terminal_panel.rs | 10 ++++++++++ crates/terminal_view/src/terminal_view.rs | 2 +- crates/workspace/src/item.rs | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/crates/terminal_view/src/terminal_panel.rs b/crates/terminal_view/src/terminal_panel.rs index 11f8f7abde..2670226e26 100644 --- a/crates/terminal_view/src/terminal_panel.rs +++ b/crates/terminal_view/src/terminal_panel.rs @@ -221,6 +221,16 @@ impl TerminalPanel { pane::Event::ZoomIn => cx.emit(Event::ZoomIn), pane::Event::ZoomOut => cx.emit(Event::ZoomOut), pane::Event::Focus => cx.emit(Event::Focus), + + pane::Event::AddItem { item } => { + if let Some(workspace) = self.workspace.upgrade(cx) { + let pane = self.pane.clone(); + workspace.update(cx, |workspace, cx| { + item.added_to_pane(workspace,pane, cx) + }) + } + }, + _ => {} } } diff --git a/crates/terminal_view/src/terminal_view.rs b/crates/terminal_view/src/terminal_view.rs index c40a1a7ccd..d1219d53e0 100644 --- a/crates/terminal_view/src/terminal_view.rs +++ b/crates/terminal_view/src/terminal_view.rs @@ -275,7 +275,7 @@ impl TerminalView { cx.spawn(|this, mut cx| async move { Timer::after(CURSOR_BLINK_INTERVAL).await; this.update(&mut cx, |this, cx| this.resume_cursor_blinking(epoch, cx)) - .log_err(); + .ok(); }) .detach(); } diff --git a/crates/workspace/src/item.rs b/crates/workspace/src/item.rs index a3e3ab9299..0c7a478e31 100644 --- a/crates/workspace/src/item.rs +++ b/crates/workspace/src/item.rs @@ -27,7 +27,7 @@ use std::{ }; use theme::Theme; -#[derive(Eq, PartialEq, Hash)] +#[derive(Eq, PartialEq, Hash, Debug)] pub enum ItemEvent { CloseItem, UpdateTab, From 550aa2d6bdc71123b6352355a42055ad265906fd Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Tue, 11 Jul 2023 12:17:50 -0700 Subject: [PATCH 056/115] fmt --- crates/terminal_view/src/terminal_panel.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/terminal_view/src/terminal_panel.rs b/crates/terminal_view/src/terminal_panel.rs index 2670226e26..ad61903a9d 100644 --- a/crates/terminal_view/src/terminal_panel.rs +++ b/crates/terminal_view/src/terminal_panel.rs @@ -225,11 +225,9 @@ impl TerminalPanel { pane::Event::AddItem { item } => { if let Some(workspace) = self.workspace.upgrade(cx) { let pane = self.pane.clone(); - workspace.update(cx, |workspace, cx| { - item.added_to_pane(workspace,pane, cx) - }) + workspace.update(cx, |workspace, cx| item.added_to_pane(workspace, pane, cx)) } - }, + } _ => {} } From 02f523094be66efdcc9f6ca6b072ce787f8860c8 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 11 Jul 2023 15:58:33 -0400 Subject: [PATCH 057/115] expanded embeddable context to accomodate for struct context and file paths --- crates/vector_store/src/parsing.rs | 46 ++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/crates/vector_store/src/parsing.rs b/crates/vector_store/src/parsing.rs index 6a8742fedd..a91e87aa22 100644 --- a/crates/vector_store/src/parsing.rs +++ b/crates/vector_store/src/parsing.rs @@ -20,6 +20,9 @@ pub struct ParsedFile { pub documents: Vec, } +const CODE_CONTEXT_TEMPLATE: &str = + "The below code snippet is from file ''\n\n```\n\n```"; + pub struct CodeContextRetriever { pub parser: Parser, pub cursor: QueryCursor, @@ -58,27 +61,40 @@ impl CodeContextRetriever { tree.root_node(), content.as_bytes(), ) { - let mut item_range: Option> = None; - let mut name_range: Option> = None; + let mut name: Vec<&str> = vec![]; + let mut item: Option<&str> = None; + let mut offset: Option = None; for capture in mat.captures { if capture.index == embedding_config.item_capture_ix { - item_range = Some(capture.node.byte_range()); + offset = Some(capture.node.byte_range().start); + item = content.get(capture.node.byte_range()); } else if capture.index == embedding_config.name_capture_ix { - name_range = Some(capture.node.byte_range()); + if let Some(name_content) = content.get(capture.node.byte_range()) { + name.push(name_content); + } + } + + if let Some(context_capture_ix) = embedding_config.context_capture_ix { + if capture.index == context_capture_ix { + if let Some(context) = content.get(capture.node.byte_range()) { + name.push(context); + } + } } } - if let Some((item_range, name_range)) = item_range.zip(name_range) { - if let Some((item, name)) = - content.get(item_range.clone()).zip(content.get(name_range)) - { - context_spans.push(item.to_string()); - documents.push(Document { - name: name.to_string(), - offset: item_range.start, - embedding: Vec::new(), - }); - } + if item.is_some() && offset.is_some() && name.len() > 0 { + let context_span = CODE_CONTEXT_TEMPLATE + .replace("", pending_file.relative_path.to_str().unwrap()) + .replace("", &pending_file.language.name().to_lowercase()) + .replace("", item.unwrap()); + + context_spans.push(context_span); + documents.push(Document { + name: name.join(" "), + offset: offset.unwrap(), + embedding: Vec::new(), + }) } } From debe6f107e44c4c9a5b07c9286135d474508da88 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 11 Jul 2023 16:22:40 -0400 Subject: [PATCH 058/115] updated embedding queries for tsx and typescript --- crates/zed/src/languages/tsx/embedding.scm | 59 +++++++++++++++++++ .../src/languages/typescript/embedding.scm | 59 +++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 crates/zed/src/languages/tsx/embedding.scm create mode 100644 crates/zed/src/languages/typescript/embedding.scm diff --git a/crates/zed/src/languages/tsx/embedding.scm b/crates/zed/src/languages/tsx/embedding.scm new file mode 100644 index 0000000000..a8cde61b9e --- /dev/null +++ b/crates/zed/src/languages/tsx/embedding.scm @@ -0,0 +1,59 @@ +; (internal_module +; "namespace" @context + name: (_) @name) @item + +(enum_declaration + "enum" @context + name: (_) @name) @item + +; (type_alias_declaration +; "type" @context + name: (_) @name) @item + +(function_declaration + "async"? @context + "function" @context + name: (_) @name) @item + +(interface_declaration + "interface" @context + name: (_) @name) @item + +; (export_statement +; (lexical_declaration +; ["let" "const"] @context +; (variable_declarator +; name: (_) @name) @item)) + +(program + (lexical_declaration + ["let" "const"] @context + (variable_declarator + name: (_) @name) @item)) + +(class_declaration + "class" @context + name: (_) @name) @item + +(method_definition + [ + "get" + "set" + "async" + "*" + "readonly" + "static" + (override_modifier) + (accessibility_modifier) + ]* @context + name: (_) @name) @item + +; (public_field_definition +; [ +; "declare" +; "readonly" +; "abstract" +; "static" +; (accessibility_modifier) +; ]* @context +; name: (_) @name) @item diff --git a/crates/zed/src/languages/typescript/embedding.scm b/crates/zed/src/languages/typescript/embedding.scm new file mode 100644 index 0000000000..f261a0a565 --- /dev/null +++ b/crates/zed/src/languages/typescript/embedding.scm @@ -0,0 +1,59 @@ +; (internal_module +; "namespace" @context +; name: (_) @name) @item + +(enum_declaration + "enum" @context + name: (_) @name) @item + +; (type_alias_declaration +; "type" @context +; name: (_) @name) @item + +(function_declaration + "async"? @context + "function" @context + name: (_) @name) @item + +(interface_declaration + "interface" @context + name: (_) @name) @item + +; (export_statement +; (lexical_declaration +; ["let" "const"] @context +; (variable_declarator +; name: (_) @name) @item)) + +(program + (lexical_declaration + ["let" "const"] @context + (variable_declarator + name: (_) @name) @item)) + +(class_declaration + "class" @context + name: (_) @name) @item + +(method_definition + [ + "get" + "set" + "async" + "*" + "readonly" + "static" + (override_modifier) + (accessibility_modifier) + ]* @context + name: (_) @name) @item + +; (public_field_definition +; [ +; "declare" +; "readonly" +; "abstract" +; "static" +; (accessibility_modifier) +; ]* @context +; name: (_) @name) @item From 2ca4b3f4cc399ac5773514a7b76962cfc0aa568f Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 11 Jul 2023 16:41:08 -0400 Subject: [PATCH 059/115] cleaned up warnings and added javascript --- crates/vector_store/src/parsing.rs | 10 +++- crates/vector_store/src/vector_store.rs | 3 +- .../src/languages/javascript/embedding.scm | 56 +++++++++++++++++++ crates/zed/src/languages/tsx/embedding.scm | 2 +- 4 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 crates/zed/src/languages/javascript/embedding.scm diff --git a/crates/vector_store/src/parsing.rs b/crates/vector_store/src/parsing.rs index a91e87aa22..91dcf699f8 100644 --- a/crates/vector_store/src/parsing.rs +++ b/crates/vector_store/src/parsing.rs @@ -1,4 +1,4 @@ -use std::{ops::Range, path::PathBuf, sync::Arc, time::SystemTime}; +use std::{path::PathBuf, sync::Arc, time::SystemTime}; use anyhow::{anyhow, Ok, Result}; use project::Fs; @@ -61,6 +61,8 @@ impl CodeContextRetriever { tree.root_node(), content.as_bytes(), ) { + // log::info!("-----MATCH-----"); + let mut name: Vec<&str> = vec![]; let mut item: Option<&str> = None; let mut offset: Option = None; @@ -89,6 +91,12 @@ impl CodeContextRetriever { .replace("", &pending_file.language.name().to_lowercase()) .replace("", item.unwrap()); + let mut truncated_span = context_span.clone(); + truncated_span.truncate(100); + + // log::info!("Name: {:?}", name); + // log::info!("Span: {:?}", truncated_span); + context_spans.push(context_span); documents.push(Document { name: name.join(" "), diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 6f63f07b88..a2ca90e84e 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -386,7 +386,8 @@ impl VectorStore { let (parsing_files_tx, parsing_files_rx) = channel::unbounded::(); let mut _parsing_files_tasks = Vec::new(); - for _ in 0..cx.background().num_cpus() { + // for _ in 0..cx.background().num_cpus() { + for _ in 0..1 { let fs = fs.clone(); let parsing_files_rx = parsing_files_rx.clone(); let batch_files_tx = batch_files_tx.clone(); diff --git a/crates/zed/src/languages/javascript/embedding.scm b/crates/zed/src/languages/javascript/embedding.scm new file mode 100644 index 0000000000..ec6eb5ab1a --- /dev/null +++ b/crates/zed/src/languages/javascript/embedding.scm @@ -0,0 +1,56 @@ +; (internal_module +; "namespace" @context +; name: (_) @name) @item + +(enum_declaration + "enum" @context + name: (_) @name) @item + +(function_declaration + "async"? @context + "function" @context + name: (_) @name) @item + +(interface_declaration + "interface" @context + name: (_) @name) @item + +; (program +; (export_statement +; (lexical_declaration +; ["let" "const"] @context +; (variable_declarator +; name: (_) @name) @item))) + +(program + (lexical_declaration + ["let" "const"] @context + (variable_declarator + name: (_) @name) @item)) + +(class_declaration + "class" @context + name: (_) @name) @item + +(method_definition + [ + "get" + "set" + "async" + "*" + "readonly" + "static" + (override_modifier) + (accessibility_modifier) + ]* @context + name: (_) @name) @item + +; (public_field_definition +; [ +; "declare" +; "readonly" +; "abstract" +; "static" +; (accessibility_modifier) +; ]* @context +; name: (_) @name) @item diff --git a/crates/zed/src/languages/tsx/embedding.scm b/crates/zed/src/languages/tsx/embedding.scm index a8cde61b9e..96c56abe9f 100644 --- a/crates/zed/src/languages/tsx/embedding.scm +++ b/crates/zed/src/languages/tsx/embedding.scm @@ -1,6 +1,6 @@ ; (internal_module ; "namespace" @context - name: (_) @name) @item + ; name: (_) @name) @item (enum_declaration "enum" @context From ef296e46cbbcfeacac8f194ab1a343fd984c4729 Mon Sep 17 00:00:00 2001 From: Julia Date: Tue, 11 Jul 2023 16:49:53 -0400 Subject: [PATCH 060/115] Avoid user menu toggle button overlapping with tab bar top border --- styles/src/style_tree/titlebar.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/styles/src/style_tree/titlebar.ts b/styles/src/style_tree/titlebar.ts index 60894b08f6..177a8c5bd8 100644 --- a/styles/src/style_tree/titlebar.ts +++ b/styles/src/style_tree/titlebar.ts @@ -84,7 +84,7 @@ function user_menu() { base: { corner_radius: 6, height: button_height, - width: online ? 37 : 24, + width: 20, padding: { top: 2, bottom: 2, @@ -153,6 +153,7 @@ function user_menu() { }, } } + return { user_menu_button_online: build_button({ online: true }), user_menu_button_offline: build_button({ online: false }), From af7b2f17ae28699fc20bbe88513db00450390fa3 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 11 Jul 2023 17:13:58 -0400 Subject: [PATCH 061/115] added initial keymap for toggle semantic search Co-authored-by: maxbrunsfeld --- assets/keymaps/default.json | 1 + crates/vector_store/src/vector_store.rs | 42 +++++++++++++------------ 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/assets/keymaps/default.json b/assets/keymaps/default.json index 8c3a1f407c..3f0b545ebc 100644 --- a/assets/keymaps/default.json +++ b/assets/keymaps/default.json @@ -405,6 +405,7 @@ "cmd-k cmd-t": "theme_selector::Toggle", "cmd-k cmd-s": "zed::OpenKeymap", "cmd-t": "project_symbols::Toggle", + "cmd-alt-t": "semantic_search::Toggle", "cmd-p": "file_finder::Toggle", "cmd-shift-p": "command_palette::Toggle", "cmd-shift-m": "diagnostics::Deploy", diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index a2ca90e84e..d3f89d568a 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -49,7 +49,6 @@ pub fn init( } settings::register::(cx); - if !settings::get::(cx).enable { return; } @@ -58,6 +57,27 @@ pub fn init( .join(Path::new(RELEASE_CHANNEL_NAME.as_str())) .join("embeddings_db"); + SemanticSearch::init(cx); + cx.add_action( + |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext| { + eprintln!("semantic_search::Toggle action"); + + if cx.has_global::>() { + let vector_store = cx.global::>().clone(); + workspace.toggle_modal(cx, |workspace, cx| { + let project = workspace.project().clone(); + let workspace = cx.weak_handle(); + cx.add_view(|cx| { + SemanticSearch::new( + SemanticSearchDelegate::new(workspace, project, vector_store), + cx, + ) + }) + }); + } + }, + ); + cx.spawn(move |mut cx| async move { let vector_store = VectorStore::new( fs, @@ -73,6 +93,7 @@ pub fn init( .await?; cx.update(|cx| { + cx.set_global(vector_store.clone()); cx.subscribe_global::({ let vector_store = vector_store.clone(); move |event, cx| { @@ -88,25 +109,6 @@ pub fn init( } }) .detach(); - - cx.add_action({ - // "semantic search: Toggle" - move |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext| { - let vector_store = vector_store.clone(); - workspace.toggle_modal(cx, |workspace, cx| { - let project = workspace.project().clone(); - let workspace = cx.weak_handle(); - cx.add_view(|cx| { - SemanticSearch::new( - SemanticSearchDelegate::new(workspace, project, vector_store), - cx, - ) - }) - }) - } - }); - - SemanticSearch::init(cx); }); anyhow::Ok(()) From 08e24bbbae8de4f8db3d0bdc68c2c1e3293958f6 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Jul 2023 14:29:06 -0700 Subject: [PATCH 062/115] Use cmd-ctrl-t for semantic search key binding Co-authored-by: Kyle --- assets/keymaps/default.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/keymaps/default.json b/assets/keymaps/default.json index 3f0b545ebc..4726c67aea 100644 --- a/assets/keymaps/default.json +++ b/assets/keymaps/default.json @@ -405,7 +405,7 @@ "cmd-k cmd-t": "theme_selector::Toggle", "cmd-k cmd-s": "zed::OpenKeymap", "cmd-t": "project_symbols::Toggle", - "cmd-alt-t": "semantic_search::Toggle", + "cmd-ctrl-t": "semantic_search::Toggle", "cmd-p": "file_finder::Toggle", "cmd-shift-p": "command_palette::Toggle", "cmd-shift-m": "diagnostics::Deploy", From badf94b097e7fc5c158b624d4ef2907ac0ae1b0e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Jul 2023 14:29:48 -0700 Subject: [PATCH 063/115] Update dot product test to use larger vectors Co-authored-by: Kyle --- crates/vector_store/src/vector_store_tests.rs | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index a3a40722ea..ede43b9ff8 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -6,7 +6,7 @@ use async_trait::async_trait; use gpui::{Task, TestAppContext}; use language::{Language, LanguageConfig, LanguageRegistry}; use project::{FakeFs, Project}; -use rand::Rng; +use rand::{rngs::StdRng, Rng}; use serde_json::json; use settings::SettingsStore; use std::sync::Arc; @@ -97,18 +97,23 @@ async fn test_vector_store(cx: &mut TestAppContext) { assert_eq!(search_results[0].worktree_id, worktree_id); } -#[test] -fn test_dot_product() { +#[gpui::test] +fn test_dot_product(mut rng: StdRng) { assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.); assert_eq!(dot(&[2., 0., 0., 0., 0.], &[3., 1., 0., 0., 0.]), 6.); for _ in 0..100 { - let mut rng = rand::thread_rng(); - let a: [f32; 32] = rng.gen(); - let b: [f32; 32] = rng.gen(); + let size = 1536; + let mut a = vec![0.; size]; + let mut b = vec![0.; size]; + for (a, b) in a.iter_mut().zip(b.iter_mut()) { + *a = rng.gen(); + *b = rng.gen(); + } + assert_eq!( - round_to_decimals(dot(&a, &b), 3), - round_to_decimals(reference_dot(&a, &b), 3) + round_to_decimals(dot(&a, &b), 1), + round_to_decimals(reference_dot(&a, &b), 1) ); } From d244c0fcea07bc936baaab71e21a40638f24f383 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Jul 2023 14:30:11 -0700 Subject: [PATCH 064/115] Get vector store test passing - wait for indexing Co-authored-by: Kyle --- crates/vector_store/src/vector_store_tests.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index ede43b9ff8..8c5a667c7d 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -81,9 +81,11 @@ async fn test_vector_store(cx: &mut TestAppContext) { let worktree_id = project.read_with(cx, |project, cx| { project.worktrees(cx).next().unwrap().read(cx).id() }); - let add_project = store.update(cx, |store, cx| store.add_project(project.clone(), cx)); - - add_project.await.unwrap(); + store + .update(cx, |store, cx| store.add_project(project.clone(), cx)) + .await + .unwrap(); + cx.foreground().run_until_parked(); let search_results = store .update(cx, |store, cx| { From 4a4dd398750add1b93c7db41f7e4739405043e22 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Jul 2023 15:02:19 -0700 Subject: [PATCH 065/115] Fix TSX embedding query --- crates/zed/src/languages/tsx/embedding.scm | 24 ---------------------- 1 file changed, 24 deletions(-) diff --git a/crates/zed/src/languages/tsx/embedding.scm b/crates/zed/src/languages/tsx/embedding.scm index 96c56abe9f..305f634e04 100644 --- a/crates/zed/src/languages/tsx/embedding.scm +++ b/crates/zed/src/languages/tsx/embedding.scm @@ -1,15 +1,7 @@ -; (internal_module -; "namespace" @context - ; name: (_) @name) @item - (enum_declaration "enum" @context name: (_) @name) @item -; (type_alias_declaration -; "type" @context - name: (_) @name) @item - (function_declaration "async"? @context "function" @context @@ -19,12 +11,6 @@ "interface" @context name: (_) @name) @item -; (export_statement -; (lexical_declaration -; ["let" "const"] @context -; (variable_declarator -; name: (_) @name) @item)) - (program (lexical_declaration ["let" "const"] @context @@ -47,13 +33,3 @@ (accessibility_modifier) ]* @context name: (_) @name) @item - -; (public_field_definition -; [ -; "declare" -; "readonly" -; "abstract" -; "static" -; (accessibility_modifier) -; ]* @context -; name: (_) @name) @item From 4b3bb2c6611eda1eed0ba88e9d3ef731f1439a62 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Jul 2023 15:02:43 -0700 Subject: [PATCH 066/115] Define semantic search action regardless of whether the feature is enabled --- crates/vector_store/src/vector_store.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index d3f89d568a..87e70230ee 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -44,14 +44,7 @@ pub fn init( language_registry: Arc, cx: &mut AppContext, ) { - if *RELEASE_CHANNEL == ReleaseChannel::Stable { - return; - } - settings::register::(cx); - if !settings::get::(cx).enable { - return; - } let db_file_path = EMBEDDINGS_DIR .join(Path::new(RELEASE_CHANNEL_NAME.as_str())) @@ -60,8 +53,6 @@ pub fn init( SemanticSearch::init(cx); cx.add_action( |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext| { - eprintln!("semantic_search::Toggle action"); - if cx.has_global::>() { let vector_store = cx.global::>().clone(); workspace.toggle_modal(cx, |workspace, cx| { @@ -78,6 +69,12 @@ pub fn init( }, ); + if *RELEASE_CHANNEL == ReleaseChannel::Stable + || !settings::get::(cx).enable + { + return; + } + cx.spawn(move |mut cx| async move { let vector_store = VectorStore::new( fs, From b68cd58a3b9ac1aa4a13955bae4a8c2fc08ce279 Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 11 Jul 2023 19:54:03 -0400 Subject: [PATCH 067/115] updated vector store settings to remove batch embeddings size --- assets/settings/default.json | 5 ++--- crates/vector_store/src/vector_store.rs | 6 +++--- crates/vector_store/src/vector_store_settings.rs | 6 ++---- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/assets/settings/default.json b/assets/settings/default.json index cf8f630dfb..1f8d12a3d9 100644 --- a/assets/settings/default.json +++ b/assets/settings/default.json @@ -293,9 +293,8 @@ }, // Difference settings for vector_store "vector_store": { - "enable": false, - "reindexing_delay_seconds": 600, - "embedding_batch_size": 150 + "enabled": false, + "reindexing_delay_seconds": 600 }, // Different settings for specific languages. "languages": { diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs index 87e70230ee..0a197bc406 100644 --- a/crates/vector_store/src/vector_store.rs +++ b/crates/vector_store/src/vector_store.rs @@ -37,6 +37,7 @@ use util::{ use workspace::{Workspace, WorkspaceCreated}; const VECTOR_STORE_VERSION: usize = 0; +const EMBEDDINGS_BATCH_SIZE: usize = 150; pub fn init( fs: Arc, @@ -70,7 +71,7 @@ pub fn init( ); if *RELEASE_CHANNEL == ReleaseChannel::Stable - || !settings::get::(cx).enable + || !settings::get::(cx).enabled { return; } @@ -353,7 +354,6 @@ impl VectorStore { }); // batch_tx/rx: Batch Files to Send for Embeddings - let batch_size = settings::get::(cx).embedding_batch_size; let (batch_files_tx, batch_files_rx) = channel::unbounded::(); let _batch_files_task = cx.background().spawn(async move { let mut queue_len = 0; @@ -368,7 +368,7 @@ impl VectorStore { } => { queue_len += &document_spans.len(); embeddings_queue.push((worktree_id, parsed_file, document_spans)); - queue_len >= batch_size + queue_len >= EMBEDDINGS_BATCH_SIZE } EmbeddingJob::Flush => true, }; diff --git a/crates/vector_store/src/vector_store_settings.rs b/crates/vector_store/src/vector_store_settings.rs index 0bde07dd65..e1fa7cc05a 100644 --- a/crates/vector_store/src/vector_store_settings.rs +++ b/crates/vector_store/src/vector_store_settings.rs @@ -5,16 +5,14 @@ use settings::Setting; #[derive(Deserialize, Debug)] pub struct VectorStoreSettings { - pub enable: bool, + pub enabled: bool, pub reindexing_delay_seconds: usize, - pub embedding_batch_size: usize, } #[derive(Clone, Default, Serialize, Deserialize, JsonSchema, Debug)] pub struct VectorStoreSettingsContent { - pub enable: Option, + pub enabled: Option, pub reindexing_delay_seconds: Option, - pub embedding_batch_size: Option, } impl Setting for VectorStoreSettings { From 33e2b52a01fce046082b4aa8f7933b73343ecd6c Mon Sep 17 00:00:00 2001 From: KCaverly Date: Tue, 11 Jul 2023 20:12:43 -0400 Subject: [PATCH 068/115] added test registration for project settings --- crates/vector_store/src/vector_store_tests.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs index 8c5a667c7d..b6e47e7a23 100644 --- a/crates/vector_store/src/vector_store_tests.rs +++ b/crates/vector_store/src/vector_store_tests.rs @@ -5,7 +5,7 @@ use anyhow::Result; use async_trait::async_trait; use gpui::{Task, TestAppContext}; use language::{Language, LanguageConfig, LanguageRegistry}; -use project::{FakeFs, Project}; +use project::{project_settings::ProjectSettings, FakeFs, Project}; use rand::{rngs::StdRng, Rng}; use serde_json::json; use settings::SettingsStore; @@ -17,6 +17,7 @@ async fn test_vector_store(cx: &mut TestAppContext) { cx.update(|cx| { cx.set_global(SettingsStore::test(cx)); settings::register::(cx); + settings::register::(cx); }); let fs = FakeFs::new(cx.background()); From 5086e37e73bf93b65a2da784f72559d9b7dde967 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Wed, 12 Jul 2023 13:27:14 +0200 Subject: [PATCH 069/115] chore: Bump ipc-channel to 0.16.1. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kevin Hovsäter reported a crash in cli when running 'cargo run -po cli -- --bundle-path target/debug/Zed'. It was caused by unaligned pointer access in ipc-channel library; rustc started generating debug_asserts for pointer alignment starting with 1.70, which we have oh-so-conveniently upgraded to shortly before Kevin noticed a fix. Rust 1.70 did not introduce this panic, it merely started triggering on UB that was previously ignored. --- Cargo.lock | 62 ++++++++++++------------------------------------------ 1 file changed, 13 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 60ed830683..e2af61b810 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -482,7 +482,7 @@ dependencies = [ "async-global-executor", "async-io", "async-lock", - "crossbeam-utils 0.8.15", + "crossbeam-utils", "futures-channel", "futures-core", "futures-io", @@ -1550,7 +1550,7 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62ec6771ecfa0762d24683ee5a32ad78487a3d3afdc0fb8cae19d2c5deb50b7c" dependencies = [ - "crossbeam-utils 0.8.15", + "crossbeam-utils", ] [[package]] @@ -1863,16 +1863,6 @@ dependencies = [ "cfg-if 1.0.0", ] -[[package]] -name = "crossbeam-channel" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b153fe7cbef478c567df0f972e02e6d736db11affe43dfc9c56a9374d1adfb87" -dependencies = [ - "crossbeam-utils 0.7.2", - "maybe-uninit", -] - [[package]] name = "crossbeam-channel" version = "0.5.8" @@ -1880,7 +1870,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils 0.8.15", + "crossbeam-utils", ] [[package]] @@ -1891,7 +1881,7 @@ checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if 1.0.0", "crossbeam-epoch", - "crossbeam-utils 0.8.15", + "crossbeam-utils", ] [[package]] @@ -1902,7 +1892,7 @@ checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" dependencies = [ "autocfg 1.1.0", "cfg-if 1.0.0", - "crossbeam-utils 0.8.15", + "crossbeam-utils", "memoffset 0.8.0", "scopeguard", ] @@ -1914,18 +1904,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" dependencies = [ "cfg-if 1.0.0", - "crossbeam-utils 0.8.15", -] - -[[package]] -name = "crossbeam-utils" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" -dependencies = [ - "autocfg 1.1.0", - "cfg-if 0.1.10", - "lazy_static", + "crossbeam-utils", ] [[package]] @@ -3521,12 +3500,12 @@ dependencies = [ [[package]] name = "ipc-channel" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cb1d9211085f0ea6f1379d944b93c4d07e8207aa3bcf49f37eda12b85081887" +checksum = "342d636452fbc2895574e0b319b23c014fd01c9ed71dcd87f6a4a8e2f948db4b" dependencies = [ "bincode", - "crossbeam-channel 0.4.4", + "crossbeam-channel", "fnv", "lazy_static", "libc", @@ -3534,7 +3513,7 @@ dependencies = [ "rand 0.7.3", "serde", "tempfile", - "uuid 0.8.2", + "uuid 1.3.2", "winapi 0.3.9", ] @@ -3576,7 +3555,7 @@ checksum = "334e04b4d781f436dc315cb1e7515bd96826426345d498149e4bde36b67f8ee9" dependencies = [ "async-channel", "castaway", - "crossbeam-utils 0.8.15", + "crossbeam-utils", "curl", "curl-sys", "encoding_rs", @@ -4148,12 +4127,6 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4facc753ae494aeb6e3c22f839b158aebd4f9270f55cd3c79906c45476c47ab4" -[[package]] -name = "maybe-uninit" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" - [[package]] name = "md-5" version = "0.10.5" @@ -5677,9 +5650,9 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" dependencies = [ - "crossbeam-channel 0.5.8", + "crossbeam-channel", "crossbeam-deque", - "crossbeam-utils 0.8.15", + "crossbeam-utils", "num_cpus", ] @@ -8332,15 +8305,6 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcc7e3b898aa6f6c08e5295b6c89258d1331e9ac578cc992fb818759951bdc22" -[[package]] -name = "uuid" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" -dependencies = [ - "getrandom 0.2.9", -] - [[package]] name = "uuid" version = "1.3.2" From 78c83246982553451eadcb36263322993b4a3f86 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Wed, 12 Jul 2023 16:53:01 +0200 Subject: [PATCH 070/115] chore: Disable http2 feature in isahc. This removes transitive dependency on libnghttp2, which is pretty heavy. --- Cargo.lock | 11 ----------- Cargo.toml | 3 ++- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 60ed830683..4a66ea1e89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1990,7 +1990,6 @@ checksum = "14d05c10f541ae6f3bc5b3d923c20001f47db7d5f0b2bc6ad16490133842db79" dependencies = [ "cc", "libc", - "libnghttp2-sys", "libz-sys", "openssl-sys", "pkg-config", @@ -3906,16 +3905,6 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" -[[package]] -name = "libnghttp2-sys" -version = "0.1.7+1.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57ed28aba195b38d5ff02b9170cbff627e336a20925e43b4945390401c5dc93f" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "libsqlite3-sys" version = "0.24.2" diff --git a/Cargo.toml b/Cargo.toml index 1708ccfc0a..4adebc0ba7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,7 +81,8 @@ env_logger = { version = "0.9" } futures = { version = "0.3" } globset = { version = "0.4" } indoc = "1" -isahc = "1.7.2" +# We explicitly disable a http2 support in isahc. +isahc = { version = "1.7.2", default-features = false, features = ["static-curl", "text-decoding"] } lazy_static = { version = "1.4.0" } log = { version = "0.4.16", features = ["kv_unstable_serde"] } ordered-float = { version = "2.1.1" } From 6d96c6ef51e011b487088b3df119ff1a22811acf Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Wed, 12 Jul 2023 18:32:03 +0300 Subject: [PATCH 071/115] Draft the postfix completions support --- crates/editor/src/editor_tests.rs | 91 +++++++++++++++++++++++++++++++ crates/project/src/lsp_command.rs | 20 +++---- 2 files changed, 101 insertions(+), 10 deletions(-) diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs index 7b36287dca..890d34af13 100644 --- a/crates/editor/src/editor_tests.rs +++ b/crates/editor/src/editor_tests.rs @@ -7223,6 +7223,97 @@ async fn test_language_server_restart_due_to_settings_change(cx: &mut gpui::Test ); } +#[gpui::test] +async fn test_completions_with_extra_edits(cx: &mut gpui::TestAppContext) { + init_test(cx, |_| {}); + + let mut cx = EditorLspTestContext::new_rust( + lsp::ServerCapabilities { + completion_provider: Some(lsp::CompletionOptions { + trigger_characters: Some(vec![".".to_string()]), + ..Default::default() + }), + ..Default::default() + }, + cx, + ) + .await; + + cx.set_state(indoc! {"fn main() { let a = 2ˇ; }"}); + cx.simulate_keystroke("."); + let completion_item = lsp::CompletionItem { + label: "some".into(), + kind: Some(lsp::CompletionItemKind::SNIPPET), + detail: Some("Wrap the expression in an `Option::Some`".to_string()), + documentation: Some(lsp::Documentation::MarkupContent(lsp::MarkupContent { + kind: lsp::MarkupKind::Markdown, + value: "```rust\nSome(2)\n```".to_string(), + })), + deprecated: Some(false), + sort_text: Some("fffffff2".to_string()), + filter_text: Some("some".to_string()), + insert_text_format: Some(lsp::InsertTextFormat::SNIPPET), + text_edit: Some(lsp::CompletionTextEdit::Edit(lsp::TextEdit { + range: lsp::Range { + start: lsp::Position { + line: 0, + character: 22, + }, + end: lsp::Position { + line: 0, + character: 22, + }, + }, + new_text: "Some(2)".to_string(), + })), + additional_text_edits: Some(vec![lsp::TextEdit { + range: lsp::Range { + start: lsp::Position { + line: 0, + character: 20, + }, + end: lsp::Position { + line: 0, + character: 22, + }, + }, + new_text: "".to_string(), + }]), + ..Default::default() + }; + + let closure_completion_item = completion_item.clone(); + let mut request = cx.handle_request::(move |_, _, _| { + let task_completion_item = closure_completion_item.clone(); + async move { + Ok(Some(lsp::CompletionResponse::Array(vec![ + task_completion_item, + ]))) + } + }); + + request.next().await; + + cx.condition(|editor, _| editor.context_menu_visible()) + .await; + let apply_additional_edits = cx.update_editor(|editor, cx| { + editor + .confirm_completion(&ConfirmCompletion::default(), cx) + .unwrap() + }); + cx.assert_editor_state(indoc! {"fn main() { let a = 2.Some(2)ˇ; }"}); + + cx.handle_request::(move |_, _, _| { + let task_completion_item = completion_item.clone(); + async move { Ok(task_completion_item) } + }) + .next() + .await + .unwrap(); + apply_additional_edits.await.unwrap(); + cx.assert_editor_state(indoc! {"fn main() { let a = Some(2)ˇ; }"}); +} + fn empty_range(row: usize, column: usize) -> Range { let point = DisplayPoint::new(row as u32, column as u32); point..point diff --git a/crates/project/src/lsp_command.rs b/crates/project/src/lsp_command.rs index eec64beb5a..56a6c4e88d 100644 --- a/crates/project/src/lsp_command.rs +++ b/crates/project/src/lsp_command.rs @@ -1349,7 +1349,6 @@ impl LspCommand for GetCompletions { } else { Default::default() }; - let completions = buffer.read_with(&cx, |buffer, _| { let language = buffer.language().cloned(); let snapshot = buffer.snapshot(); @@ -1358,15 +1357,16 @@ impl LspCommand for GetCompletions { completions .into_iter() .filter_map(move |mut lsp_completion| { - // For now, we can only handle additional edits if they are returned - // when resolving the completion, not if they are present initially. - if lsp_completion - .additional_text_edits - .as_ref() - .map_or(false, |edits| !edits.is_empty()) - { - return None; - } + // TODO kb store these? at least, should only allow this when we have resolve + // // For now, we can only handle additional edits if they are returned + // // when resolving the completion, not if they are present initially. + // if lsp_completion + // .additional_text_edits + // .as_ref() + // .map_or(false, |edits| !edits.is_empty()) + // { + // return None; + // } let (old_range, mut new_text) = match lsp_completion.text_edit.as_ref() { // If the language server provides a range to overwrite, then From 6260d977fb628253c338dd5f572f9497983784b1 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Wed, 12 Jul 2023 17:58:00 +0200 Subject: [PATCH 072/115] Increase trailoff limit for modal branch picker. Z-2601 --- crates/vcs_menu/src/lib.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/vcs_menu/src/lib.rs b/crates/vcs_menu/src/lib.rs index b5b1036b36..3858960f36 100644 --- a/crates/vcs_menu/src/lib.rs +++ b/crates/vcs_menu/src/lib.rs @@ -24,6 +24,7 @@ pub fn build_branch_list( workspace, selected_index: 0, last_query: String::default(), + branch_name_trailoff_after: 29, }, cx, ) @@ -46,6 +47,8 @@ fn toggle( workspace, selected_index: 0, last_query: String::default(), + /// Modal branch picker has a longer trailoff than a popover one. + branch_name_trailoff_after: 70, }, cx, ) @@ -63,6 +66,8 @@ pub struct BranchListDelegate { workspace: ViewHandle, selected_index: usize, last_query: String, + /// Max length of branch name before we truncate it and add a trailing `...`. + branch_name_trailoff_after: usize, } impl PickerDelegate for BranchListDelegate { @@ -213,15 +218,15 @@ impl PickerDelegate for BranchListDelegate { selected: bool, cx: &gpui::AppContext, ) -> AnyElement> { - const DISPLAYED_MATCH_LEN: usize = 29; let theme = &theme::current(cx); let hit = &self.matches[ix]; - let shortened_branch_name = util::truncate_and_trailoff(&hit.string, DISPLAYED_MATCH_LEN); + let shortened_branch_name = + util::truncate_and_trailoff(&hit.string, self.branch_name_trailoff_after); let highlights = hit .positions .iter() .copied() - .filter(|index| index < &DISPLAYED_MATCH_LEN) + .filter(|index| index < &self.branch_name_trailoff_after) .collect(); let style = theme.picker.item.in_state(selected).style_for(mouse_state); Flex::row() From 001e8483936c8afac8b88135236652f191887fdd Mon Sep 17 00:00:00 2001 From: Nate Butler Date: Wed, 12 Jul 2023 12:40:37 -0400 Subject: [PATCH 073/115] Update picker footer button style Co-Authored-By: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> --- crates/vcs_menu/src/lib.rs | 7 +++++-- styles/src/style_tree/picker.ts | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/vcs_menu/src/lib.rs b/crates/vcs_menu/src/lib.rs index 180eb463bd..184c1f4733 100644 --- a/crates/vcs_menu/src/lib.rs +++ b/crates/vcs_menu/src/lib.rs @@ -1,8 +1,10 @@ use anyhow::{anyhow, bail, Result}; use fuzzy::{StringMatch, StringMatchCandidate}; use gpui::{ - actions, elements::*, platform::MouseButton, AppContext, MouseState, Task, ViewContext, - ViewHandle, + actions, + elements::*, + platform::{CursorStyle, MouseButton}, + AppContext, MouseState, Task, ViewContext, ViewHandle, }; use picker::{Picker, PickerDelegate, PickerEvent}; use std::{ops::Not, sync::Arc}; @@ -290,6 +292,7 @@ impl PickerDelegate for BranchListDelegate { .contained() .with_style(style.container) }) + .with_cursor_style(CursorStyle::PointingHand) .on_down(MouseButton::Left, |_, _, cx| { cx.spawn(|picker, mut cx| async move { picker.update(&mut cx, |this, cx| { diff --git a/styles/src/style_tree/picker.ts b/styles/src/style_tree/picker.ts index b8817a25e9..28ae854787 100644 --- a/styles/src/style_tree/picker.ts +++ b/styles/src/style_tree/picker.ts @@ -121,7 +121,7 @@ export default function picker(): any { }, footer: interactive({ base: { - text: text(theme.lowest, "sans", "variant", { size: "xs" }), + text: text(theme.lowest, "sans", "base", { size: "xs" }), padding: { bottom: 4, left: 12, From 2cb7d8aa96289de75c4e736fed5f72403119977e Mon Sep 17 00:00:00 2001 From: Derek Briggs Date: Wed, 12 Jul 2023 10:51:09 -0600 Subject: [PATCH 074/115] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6908cebf24..f51f0ac03b 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,10 @@ Welcome to Zed, a lightning-fast, collaborative code editor that makes your drea git clone https://github.com/zed-industries/zed.dev ``` -* Initialize submodules +* Return to Zed project directory and Initialize submodules ``` + cd zed git submodule update --init --recursive ``` From dc09a11090885239a950044a28ad60033d9e75b9 Mon Sep 17 00:00:00 2001 From: Derek Briggs Date: Wed, 12 Jul 2023 10:58:39 -0600 Subject: [PATCH 075/115] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f51f0ac03b..375c4a7ed6 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Welcome to Zed, a lightning-fast, collaborative code editor that makes your drea * Set up a local `zed` database and seed it with some initial users: - Create a personal GitHub token to run `script/bootstrap` once successfully: the token needs to have an access to private repositories for the script to work (`repo` OAuth scope). + [Create a personal GitHub token](https://github.com/settings/tokens/new) to run `script/bootstrap` once successfully: the token needs to have an access to private repositories for the script to work (`repo` OAuth scope). Then delete that token. ``` From c732aa1617a084cfe849d6aecacf5e43243532db Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Wed, 12 Jul 2023 19:29:20 +0300 Subject: [PATCH 076/115] Do not resolve completions if extra edits are available --- crates/editor/src/editor_tests.rs | 86 +++++++++++++++++++++++++++++++ crates/project/src/lsp_command.rs | 12 +---- crates/project/src/project.rs | 15 ++++-- 3 files changed, 98 insertions(+), 15 deletions(-) diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs index 890d34af13..525623738c 100644 --- a/crates/editor/src/editor_tests.rs +++ b/crates/editor/src/editor_tests.rs @@ -7294,6 +7294,92 @@ async fn test_completions_with_extra_edits(cx: &mut gpui::TestAppContext) { request.next().await; + cx.condition(|editor, _| editor.context_menu_visible()) + .await; + let apply_additional_edits = cx.update_editor(|editor, cx| { + editor + .confirm_completion(&ConfirmCompletion::default(), cx) + .unwrap() + }); + cx.assert_editor_state(indoc! {"fn main() { let a = 2.Some(2)ˇ; }"}); + apply_additional_edits.await.unwrap(); + cx.assert_editor_state(indoc! {"fn main() { let a = Some(2)ˇ; }"}); +} + +#[gpui::test] +async fn test_completions_with_extra_resolved_edits(cx: &mut gpui::TestAppContext) { + init_test(cx, |_| {}); + + let mut cx = EditorLspTestContext::new_rust( + lsp::ServerCapabilities { + completion_provider: Some(lsp::CompletionOptions { + trigger_characters: Some(vec![".".to_string()]), + ..Default::default() + }), + ..Default::default() + }, + cx, + ) + .await; + + cx.set_state(indoc! {"fn main() { let a = 2ˇ; }"}); + cx.simulate_keystroke("."); + let completion_item = lsp::CompletionItem { + label: "some".into(), + kind: Some(lsp::CompletionItemKind::SNIPPET), + detail: Some("Wrap the expression in an `Option::Some`".to_string()), + documentation: Some(lsp::Documentation::MarkupContent(lsp::MarkupContent { + kind: lsp::MarkupKind::Markdown, + value: "```rust\nSome(2)\n```".to_string(), + })), + deprecated: Some(false), + sort_text: Some("fffffff2".to_string()), + filter_text: Some("some".to_string()), + insert_text_format: Some(lsp::InsertTextFormat::SNIPPET), + text_edit: Some(lsp::CompletionTextEdit::Edit(lsp::TextEdit { + range: lsp::Range { + start: lsp::Position { + line: 0, + character: 22, + }, + end: lsp::Position { + line: 0, + character: 22, + }, + }, + new_text: "Some(2)".to_string(), + })), + additional_text_edits: Some(vec![lsp::TextEdit { + range: lsp::Range { + start: lsp::Position { + line: 0, + character: 20, + }, + end: lsp::Position { + line: 0, + character: 22, + }, + }, + new_text: "".to_string(), + }]), + ..Default::default() + }; + + let closure_completion_item = completion_item.clone(); + let mut request = cx.handle_request::(move |_, _, _| { + let task_completion_item = closure_completion_item.clone(); + async move { + Ok(Some(lsp::CompletionResponse::Array(vec![ + lsp::CompletionItem { + additional_text_edits: None, + ..task_completion_item + }, + ]))) + } + }); + + request.next().await; + cx.condition(|editor, _| editor.context_menu_visible()) .await; let apply_additional_edits = cx.update_editor(|editor, cx| { diff --git a/crates/project/src/lsp_command.rs b/crates/project/src/lsp_command.rs index 56a6c4e88d..08261b64f1 100644 --- a/crates/project/src/lsp_command.rs +++ b/crates/project/src/lsp_command.rs @@ -1349,6 +1349,7 @@ impl LspCommand for GetCompletions { } else { Default::default() }; + let completions = buffer.read_with(&cx, |buffer, _| { let language = buffer.language().cloned(); let snapshot = buffer.snapshot(); @@ -1357,17 +1358,6 @@ impl LspCommand for GetCompletions { completions .into_iter() .filter_map(move |mut lsp_completion| { - // TODO kb store these? at least, should only allow this when we have resolve - // // For now, we can only handle additional edits if they are returned - // // when resolving the completion, not if they are present initially. - // if lsp_completion - // .additional_text_edits - // .as_ref() - // .map_or(false, |edits| !edits.is_empty()) - // { - // return None; - // } - let (old_range, mut new_text) = match lsp_completion.text_edit.as_ref() { // If the language server provides a range to overwrite, then // check that the range is valid. diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 364b19e3a9..fd933f11c6 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -4446,11 +4446,18 @@ impl Project { }; cx.spawn(|this, mut cx| async move { - let resolved_completion = lang_server - .request::(completion.lsp_completion) - .await?; + let additional_text_edits = if let Some(edits) = + completion.lsp_completion.additional_text_edits.as_ref() + { + Some(edits.clone()) + } else { + lang_server + .request::(completion.lsp_completion) + .await? + .additional_text_edits + }; - if let Some(edits) = resolved_completion.additional_text_edits { + if let Some(edits) = additional_text_edits { let edits = this .update(&mut cx, |this, cx| { this.edits_from_lsp( From af9506b21d02bef0c22f66f5bbedf23c7a194d0f Mon Sep 17 00:00:00 2001 From: "Joseph T. Lyons" Date: Wed, 12 Jul 2023 13:30:28 -0400 Subject: [PATCH 077/115] v0.96.x dev --- Cargo.lock | 2 +- crates/zed/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a03291f253..0ac6a2ee89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9455,7 +9455,7 @@ dependencies = [ [[package]] name = "zed" -version = "0.95.0" +version = "0.96.0" dependencies = [ "activity_indicator", "ai", diff --git a/crates/zed/Cargo.toml b/crates/zed/Cargo.toml index 151696dc98..597e40161f 100644 --- a/crates/zed/Cargo.toml +++ b/crates/zed/Cargo.toml @@ -3,7 +3,7 @@ authors = ["Nathan Sobo "] description = "The fast, collaborative code editor." edition = "2021" name = "zed" -version = "0.95.0" +version = "0.96.0" publish = false [lib] From 0e600ad2a4a009e3cbbc51644a8811bbbcd4bd9c Mon Sep 17 00:00:00 2001 From: Derek Briggs Date: Wed, 12 Jul 2023 11:35:38 -0600 Subject: [PATCH 078/115] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 375c4a7ed6..8849f1aa73 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,13 @@ Welcome to Zed, a lightning-fast, collaborative code editor that makes your drea brew install foreman ``` -* Ensure the Zed.dev website is checked out in a sibling directory: +* Ensure the Zed.dev website is checked out in a sibling directory and install it's dependencies: ``` cd .. git clone https://github.com/zed-industries/zed.dev + cd zed.dev && npm install + npm install -g vercel ``` * Return to Zed project directory and Initialize submodules From 6297675055e963c9a262be9ebcf2351167b96ad5 Mon Sep 17 00:00:00 2001 From: Nate Butler Date: Wed, 12 Jul 2023 14:09:21 -0400 Subject: [PATCH 079/115] Update building-zed.md Co-Authored-By: Derek Briggs <1648941+PixelJanitor@users.noreply.github.com> --- docs/building-zed.md | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/docs/building-zed.md b/docs/building-zed.md index 78653571ad..6981913285 100644 --- a/docs/building-zed.md +++ b/docs/building-zed.md @@ -4,6 +4,11 @@ How to build Zed from source for the first time. +## Prerequisites + +- Be added to the GitHub organization +- Be added to the Vercel team + ## Process Expect this to take 30min to an hour! Some of these steps will take quite a while based on your connection speed, and how long your first build will be. @@ -13,11 +18,17 @@ Expect this to take 30min to an hour! Some of these steps will take quite a whil 1. Clone the `zed` repo - `gh repo clone zed-industries/zed` 1. Install Xcode from the macOS App Store +1. Install Xcode command line tools + - `xcode-select --install` + - If xcode-select --print-path prints /Library/Developer/CommandLineTools… run `sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer.` 1. Install [Postgres](https://postgresapp.com) 1. Install rust/rustup - `curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh` 1. Install the wasm toolchain - `rustup target add wasm32-wasi` +1. Install Livekit & Foreman + - `brew install livekit` + - `brew install foreman` 1. Generate an GitHub API Key - Go to https://github.com/settings/tokens and Generate new token - GitHub currently provides two kinds of tokens: @@ -25,12 +36,26 @@ Expect this to take 30min to an hour! Some of these steps will take quite a whil Unfortunately, unselecting `repo` scope and selecting every its inner scope instead does not allow the token users to read from private repositories - (not applicable) Fine-grained Tokens, at the moment of writing, did not allow any kind of access of non-owned private repos - Keep the token in the browser tab/editor for the next two steps +1. (Optional but reccomended) Add your GITHUB_TOKEN to your `.zshrc` or `.bashrc` like this: `export GITHUB_TOKEN=yourGithubAPIToken` +1. Ensure the Zed.dev website is checked out in a sibling directory and install it's dependencies: + ``` + cd .. + git clone https://github.com/zed-industries/zed.dev + cd zed.dev && npm install + npm install -g vercel + ``` +1. Link your zed.dev project to Vercel + - `vercel link` + - Select the `zed-industries` team. If you don't have this get someone on the team to add you to it. + - Select the `zed.dev` project +1. Run `vercel pull` to pull down the environment variables and project info from Vercel 1. Open Postgres.app 1. From `./path/to/zed/`: - - Run: - - `GITHUB_TOKEN={yourGithubAPIToken} script/bootstrap` - - Replace `{yourGithubAPIToken}` with the API token you generated above. - - Consider removing the token (if it's fine for you to crecreate such tokens during occasional migrations) or store this token somewhere safe (like your Zed 1Password vault). + - Run: + - `GITHUB_TOKEN={yourGithubAPIToken} script/bootstrap` + - Replace `{yourGithubAPIToken}` with the API token you generated above. + - You don't need to include the GITHUB_TOKEN if you exported it above. + - Consider removing the token (if it's fine for you to recreate such tokens during occasional migrations) or store this token somewhere safe (like your Zed 1Password vault). - If you get: - ```bash Error: Cannot install in Homebrew on ARM processor in Intel default prefix (/usr/local)! @@ -51,6 +76,7 @@ Expect this to take 30min to an hour! Some of these steps will take quite a whil - `cargo run --release` - If you need to run the collaboration server locally: - `script/zed-with-local-servers` + - If you need to test collaboration with mutl ## Troubleshooting From 0c7949bdeec2fa2ec48a9cf65f9f6cee74e85c57 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Wed, 12 Jul 2023 21:10:01 +0300 Subject: [PATCH 080/115] Force resolve all completions, to ensure their edits are up-to-date co-authored-by: Max Brunsfeld --- crates/editor/src/editor_tests.rs | 88 +------------------------------ crates/project/src/project.rs | 15 ++---- 2 files changed, 5 insertions(+), 98 deletions(-) diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs index 525623738c..260b0ccc40 100644 --- a/crates/editor/src/editor_tests.rs +++ b/crates/editor/src/editor_tests.rs @@ -7224,7 +7224,7 @@ async fn test_language_server_restart_due_to_settings_change(cx: &mut gpui::Test } #[gpui::test] -async fn test_completions_with_extra_edits(cx: &mut gpui::TestAppContext) { +async fn test_completions_with_additional_edits(cx: &mut gpui::TestAppContext) { init_test(cx, |_| {}); let mut cx = EditorLspTestContext::new_rust( @@ -7294,92 +7294,6 @@ async fn test_completions_with_extra_edits(cx: &mut gpui::TestAppContext) { request.next().await; - cx.condition(|editor, _| editor.context_menu_visible()) - .await; - let apply_additional_edits = cx.update_editor(|editor, cx| { - editor - .confirm_completion(&ConfirmCompletion::default(), cx) - .unwrap() - }); - cx.assert_editor_state(indoc! {"fn main() { let a = 2.Some(2)ˇ; }"}); - apply_additional_edits.await.unwrap(); - cx.assert_editor_state(indoc! {"fn main() { let a = Some(2)ˇ; }"}); -} - -#[gpui::test] -async fn test_completions_with_extra_resolved_edits(cx: &mut gpui::TestAppContext) { - init_test(cx, |_| {}); - - let mut cx = EditorLspTestContext::new_rust( - lsp::ServerCapabilities { - completion_provider: Some(lsp::CompletionOptions { - trigger_characters: Some(vec![".".to_string()]), - ..Default::default() - }), - ..Default::default() - }, - cx, - ) - .await; - - cx.set_state(indoc! {"fn main() { let a = 2ˇ; }"}); - cx.simulate_keystroke("."); - let completion_item = lsp::CompletionItem { - label: "some".into(), - kind: Some(lsp::CompletionItemKind::SNIPPET), - detail: Some("Wrap the expression in an `Option::Some`".to_string()), - documentation: Some(lsp::Documentation::MarkupContent(lsp::MarkupContent { - kind: lsp::MarkupKind::Markdown, - value: "```rust\nSome(2)\n```".to_string(), - })), - deprecated: Some(false), - sort_text: Some("fffffff2".to_string()), - filter_text: Some("some".to_string()), - insert_text_format: Some(lsp::InsertTextFormat::SNIPPET), - text_edit: Some(lsp::CompletionTextEdit::Edit(lsp::TextEdit { - range: lsp::Range { - start: lsp::Position { - line: 0, - character: 22, - }, - end: lsp::Position { - line: 0, - character: 22, - }, - }, - new_text: "Some(2)".to_string(), - })), - additional_text_edits: Some(vec![lsp::TextEdit { - range: lsp::Range { - start: lsp::Position { - line: 0, - character: 20, - }, - end: lsp::Position { - line: 0, - character: 22, - }, - }, - new_text: "".to_string(), - }]), - ..Default::default() - }; - - let closure_completion_item = completion_item.clone(); - let mut request = cx.handle_request::(move |_, _, _| { - let task_completion_item = closure_completion_item.clone(); - async move { - Ok(Some(lsp::CompletionResponse::Array(vec![ - lsp::CompletionItem { - additional_text_edits: None, - ..task_completion_item - }, - ]))) - } - }); - - request.next().await; - cx.condition(|editor, _| editor.context_menu_visible()) .await; let apply_additional_edits = cx.update_editor(|editor, cx| { diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index fd933f11c6..7ad8f121b7 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -4446,17 +4446,10 @@ impl Project { }; cx.spawn(|this, mut cx| async move { - let additional_text_edits = if let Some(edits) = - completion.lsp_completion.additional_text_edits.as_ref() - { - Some(edits.clone()) - } else { - lang_server - .request::(completion.lsp_completion) - .await? - .additional_text_edits - }; - + let additional_text_edits = lang_server + .request::(completion.lsp_completion) + .await? + .additional_text_edits; if let Some(edits) = additional_text_edits { let edits = this .update(&mut cx, |this, cx| { From 7cbcc28b1b4d0c85b49c552c32c89107f76a7b0e Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Wed, 12 Jul 2023 22:17:13 +0300 Subject: [PATCH 081/115] Update checkout actions --- .github/workflows/ci.yml | 10 +++++----- .github/workflows/randomized_tests.yml | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a906c8b82d..fe89801f04 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,7 @@ jobs: rustup update stable - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: clean: false submodules: 'recursive' @@ -54,12 +54,12 @@ jobs: cargo install cargo-nextest - name: Install Node - uses: actions/setup-node@v2 + uses: actions/setup-node@v3 with: node-version: '18' - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: clean: false submodules: 'recursive' @@ -104,12 +104,12 @@ jobs: rustup target add wasm32-wasi - name: Install Node - uses: actions/setup-node@v2 + uses: actions/setup-node@v3 with: node-version: '18' - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: clean: false submodules: 'recursive' diff --git a/.github/workflows/randomized_tests.yml b/.github/workflows/randomized_tests.yml index aaef0b536d..d1b8ddfdfb 100644 --- a/.github/workflows/randomized_tests.yml +++ b/.github/workflows/randomized_tests.yml @@ -29,12 +29,12 @@ jobs: rustup update stable - name: Install Node - uses: actions/setup-node@v2 + uses: actions/setup-node@v3 with: node-version: '18' - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: clean: false submodules: 'recursive' From 488b41826b90102180a3c60c3a088ff1195e0749 Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Wed, 12 Jul 2023 12:46:56 -0700 Subject: [PATCH 082/115] WIP --- crates/workspace/src/adjustable_flex.rs | 83 ++++++++++++++++++++++++ crates/workspace/src/dock.rs | 3 + crates/workspace/src/pane_group.rs | 84 ++++++++++++++++--------- crates/workspace/src/workspace.rs | 5 +- 4 files changed, 140 insertions(+), 35 deletions(-) create mode 100644 crates/workspace/src/adjustable_flex.rs diff --git a/crates/workspace/src/adjustable_flex.rs b/crates/workspace/src/adjustable_flex.rs new file mode 100644 index 0000000000..4ea1b719f1 --- /dev/null +++ b/crates/workspace/src/adjustable_flex.rs @@ -0,0 +1,83 @@ +use gpui::{Element, View, Axis, AnyElement}; + +// Model for the center group: AdjustableGroup of AdjustableGroups +// Implementation notes +// - These have two representations: Exact pixel widths and ratios of elements compared to whole space +// - We have a constraint of minimum sizes for things. +// - If The space is smaller than allowed, things run off the edge +// - When doing Drag resize, we update the pixel width representation, causing a recalc of the ratios +// - If dragging past minimum, take space from next item, until out of space +// - When doing a reflow (e.g. layout) we read off the ratios and calculate pixels from that +// - When adding / removing items in an Adjustable flex, reset to default ratios (1:1) +// - By default, every item takes up as much space as possible +// + + +struct AdjustableFlex { + axis: Axis, + handle_size: f32, + items: Vec<(AnyElement, f32)> +} + +impl AdjustableFlex { + fn new(axis: Axis) -> Self { + AdjustableFlex { + axis, + handle_size: 2., + items: Vec::new(), + } + } + + fn add_item() +} + +impl Element for AdjustableFlex { + type LayoutState = (); + + type PaintState = (); + + fn layout( + &mut self, + constraint: gpui::SizeConstraint, + view: &mut V, + cx: &mut gpui::LayoutContext, + ) -> (gpui::geometry::vector::Vector2F, Self::LayoutState) { + todo!() + } + + fn paint( + &mut self, + scene: &mut gpui::SceneBuilder, + bounds: gpui::geometry::rect::RectF, + visible_bounds: gpui::geometry::rect::RectF, + layout: &mut Self::LayoutState, + view: &mut V, + cx: &mut gpui::ViewContext, + ) -> Self::PaintState { + todo!() + } + + fn rect_for_text_range( + &self, + range_utf16: std::ops::Range, + bounds: gpui::geometry::rect::RectF, + visible_bounds: gpui::geometry::rect::RectF, + layout: &Self::LayoutState, + paint: &Self::PaintState, + view: &V, + cx: &gpui::ViewContext, + ) -> Option { + todo!() + } + + fn debug( + &self, + bounds: gpui::geometry::rect::RectF, + layout: &Self::LayoutState, + paint: &Self::PaintState, + view: &V, + cx: &gpui::ViewContext, + ) -> serde_json::Value { + todo!() + } +} diff --git a/crates/workspace/src/dock.rs b/crates/workspace/src/dock.rs index ebaf399e22..259e343248 100644 --- a/crates/workspace/src/dock.rs +++ b/crates/workspace/src/dock.rs @@ -408,6 +408,9 @@ impl View for Dock { } fn render(&mut self, cx: &mut ViewContext) -> AnyElement { + + + if let Some(active_entry) = self.visible_entry() { let style = self.style(cx); ChildView::new(active_entry.panel.as_any(), cx) diff --git a/crates/workspace/src/pane_group.rs b/crates/workspace/src/pane_group.rs index 5e5a5a98ba..8160a770a3 100644 --- a/crates/workspace/src/pane_group.rs +++ b/crates/workspace/src/pane_group.rs @@ -9,6 +9,7 @@ use gpui::{ platform::{CursorStyle, MouseButton}, AnyViewHandle, Axis, Border, ModelHandle, ViewContext, ViewHandle, }; +use itertools::Itertools; use project::Project; use serde::Deserialize; use theme::Theme; @@ -385,40 +386,61 @@ impl PaneAxis { app_state: &Arc, cx: &mut ViewContext, ) -> AnyElement { - let last_member_ix = self.members.len() - 1; - Flex::new(self.axis) - .with_children(self.members.iter().enumerate().map(|(ix, member)| { - let mut flex = 1.0; - if member.contains(active_pane) { - flex = settings::get::(cx).active_pane_magnification; + let mut flex_container = Flex::new(self.axis); + + let mut members = self.members.iter().enumerate().peekable(); + while let Some((ix, member)) = members.next() { + let last = members.peek().is_none(); + + let mut flex = 1.0; + if member.contains(active_pane) { + flex = settings::get::(cx).active_pane_magnification; + } + + let mut member = member.render( + project, + theme, + follower_state, + active_call, + active_pane, + zoomed, + app_state, + cx, + ); + if !last { + let mut border = theme.workspace.pane_divider; + border.left = false; + border.right = false; + border.top = false; + border.bottom = false; + + match self.axis { + Axis::Vertical => border.bottom = true, + Axis::Horizontal => border.right = true, } - let mut member = member.render( - project, - theme, - follower_state, - active_call, - active_pane, - zoomed, - app_state, - cx, - ); - if ix < last_member_ix { - let mut border = theme.workspace.pane_divider; - border.left = false; - border.right = false; - border.top = false; - border.bottom = false; - match self.axis { - Axis::Vertical => border.bottom = true, - Axis::Horizontal => border.right = true, - } - member = member.contained().with_border(border).into_any(); - } + let side = match self.axis { + Axis::Horizontal => HandleSide::Right, + Axis::Vertical => HandleSide::Bottom, + }; - FlexItem::new(member).flex(flex, true) - })) - .into_any() + member = member.contained().with_border(border) + .resizable(side, 1., |workspace, size, cx| { + dbg!("resize", size); + }) + .into_any(); + + + } + + flex_container = flex_container.with_child( + FlexItem::new(member) + .flex(flex, true) + .into_any() + ); + } + + flex_container.into_any() } } diff --git a/crates/workspace/src/workspace.rs b/crates/workspace/src/workspace.rs index 01d80d141c..cafcd191a3 100644 --- a/crates/workspace/src/workspace.rs +++ b/crates/workspace/src/workspace.rs @@ -1,13 +1,10 @@ pub mod dock; -/// NOTE: Focus only 'takes' after an update has flushed_effects. -/// -/// This may cause issues when you're trying to write tests that use workspace focus to add items at -/// specific locations. pub mod item; pub mod notifications; pub mod pane; pub mod pane_group; mod persistence; +mod adjustable_flex; pub mod searchable; pub mod shared_screen; mod status_bar; From 6da5008f3250c930e29c0c401bf875d02d24b6c5 Mon Sep 17 00:00:00 2001 From: "Joseph T. Lyons" Date: Wed, 12 Jul 2023 16:09:39 -0400 Subject: [PATCH 083/115] Fix screen sharing panic introduced by call events Co-Authored-By: Max Brunsfeld --- crates/call/src/call.rs | 19 +++++++++++++------ crates/collab_ui/src/collab_ui.rs | 29 +++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/crates/call/src/call.rs b/crates/call/src/call.rs index ed5e560218..cf6dd1799c 100644 --- a/crates/call/src/call.rs +++ b/crates/call/src/call.rs @@ -263,7 +263,7 @@ impl ActiveCall { .borrow_mut() .take() .ok_or_else(|| anyhow!("no incoming call"))?; - self.report_call_event_for_room("decline incoming", call.room_id, cx); + Self::report_call_event_for_room("decline incoming", call.room_id, &self.client, cx); self.client.send(proto::DeclineCall { room_id: call.room_id, })?; @@ -373,22 +373,29 @@ impl ActiveCall { self.room.as_ref().map(|(room, _)| room) } + pub fn client(&self) -> Arc { + self.client.clone() + } + pub fn pending_invites(&self) -> &HashSet { &self.pending_invites } fn report_call_event(&self, operation: &'static str, cx: &AppContext) { if let Some(room) = self.room() { - self.report_call_event_for_room(operation, room.read(cx).id(), cx) + Self::report_call_event_for_room(operation, room.read(cx).id(), &self.client, cx) } } - fn report_call_event_for_room(&self, operation: &'static str, room_id: u64, cx: &AppContext) { - let telemetry = self.client.telemetry(); + pub fn report_call_event_for_room( + operation: &'static str, + room_id: u64, + client: &Arc, + cx: &AppContext, + ) { + let telemetry = client.telemetry(); let telemetry_settings = *settings::get::(cx); - let event = ClickhouseEvent::Call { operation, room_id }; - telemetry.report_clickhouse_event(event, telemetry_settings); } } diff --git a/crates/collab_ui/src/collab_ui.rs b/crates/collab_ui/src/collab_ui.rs index 3f5ca17a20..7608fdbfee 100644 --- a/crates/collab_ui/src/collab_ui.rs +++ b/crates/collab_ui/src/collab_ui.rs @@ -11,7 +11,7 @@ mod sharing_status_indicator; use call::{ActiveCall, Room}; pub use collab_titlebar_item::{CollabTitlebarItem, ToggleContactsMenu}; -use gpui::{actions, AppContext}; +use gpui::{actions, AppContext, Task}; use std::sync::Arc; use util::ResultExt; use workspace::AppState; @@ -44,9 +44,30 @@ pub fn init(app_state: &Arc, cx: &mut AppContext) { } pub fn toggle_screen_sharing(_: &ToggleScreenSharing, cx: &mut AppContext) { - ActiveCall::global(cx).update(cx, |call, cx| { - call.toggle_screen_sharing(cx); - }); + let call = ActiveCall::global(cx).read(cx); + if let Some(room) = call.room().cloned() { + let client = call.client(); + let toggle_screen_sharing = room.update(cx, |room, cx| { + if room.is_screen_sharing() { + ActiveCall::report_call_event_for_room( + "disable screen share", + room.id(), + &client, + cx, + ); + Task::ready(room.unshare_screen(cx)) + } else { + ActiveCall::report_call_event_for_room( + "enable screen share", + room.id(), + &client, + cx, + ); + room.share_screen(cx) + } + }); + toggle_screen_sharing.detach_and_log_err(cx); + } } pub fn toggle_mute(_: &ToggleMute, cx: &mut AppContext) { From 5385ca411bc3b6392df023562b3ec28e492f0368 Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Wed, 12 Jul 2023 17:53:01 -0700 Subject: [PATCH 084/115] Added the new elements and wired through the pointers to update the pane axis ratios --- crates/workspace/src/adjustable_flex.rs | 83 ----- crates/workspace/src/pane_group.rs | 397 ++++++++++++++++++++-- crates/workspace/src/persistence/model.rs | 1 + crates/workspace/src/workspace.rs | 3 +- 4 files changed, 375 insertions(+), 109 deletions(-) delete mode 100644 crates/workspace/src/adjustable_flex.rs diff --git a/crates/workspace/src/adjustable_flex.rs b/crates/workspace/src/adjustable_flex.rs deleted file mode 100644 index 4ea1b719f1..0000000000 --- a/crates/workspace/src/adjustable_flex.rs +++ /dev/null @@ -1,83 +0,0 @@ -use gpui::{Element, View, Axis, AnyElement}; - -// Model for the center group: AdjustableGroup of AdjustableGroups -// Implementation notes -// - These have two representations: Exact pixel widths and ratios of elements compared to whole space -// - We have a constraint of minimum sizes for things. -// - If The space is smaller than allowed, things run off the edge -// - When doing Drag resize, we update the pixel width representation, causing a recalc of the ratios -// - If dragging past minimum, take space from next item, until out of space -// - When doing a reflow (e.g. layout) we read off the ratios and calculate pixels from that -// - When adding / removing items in an Adjustable flex, reset to default ratios (1:1) -// - By default, every item takes up as much space as possible -// - - -struct AdjustableFlex { - axis: Axis, - handle_size: f32, - items: Vec<(AnyElement, f32)> -} - -impl AdjustableFlex { - fn new(axis: Axis) -> Self { - AdjustableFlex { - axis, - handle_size: 2., - items: Vec::new(), - } - } - - fn add_item() -} - -impl Element for AdjustableFlex { - type LayoutState = (); - - type PaintState = (); - - fn layout( - &mut self, - constraint: gpui::SizeConstraint, - view: &mut V, - cx: &mut gpui::LayoutContext, - ) -> (gpui::geometry::vector::Vector2F, Self::LayoutState) { - todo!() - } - - fn paint( - &mut self, - scene: &mut gpui::SceneBuilder, - bounds: gpui::geometry::rect::RectF, - visible_bounds: gpui::geometry::rect::RectF, - layout: &mut Self::LayoutState, - view: &mut V, - cx: &mut gpui::ViewContext, - ) -> Self::PaintState { - todo!() - } - - fn rect_for_text_range( - &self, - range_utf16: std::ops::Range, - bounds: gpui::geometry::rect::RectF, - visible_bounds: gpui::geometry::rect::RectF, - layout: &Self::LayoutState, - paint: &Self::PaintState, - view: &V, - cx: &gpui::ViewContext, - ) -> Option { - todo!() - } - - fn debug( - &self, - bounds: gpui::geometry::rect::RectF, - layout: &Self::LayoutState, - paint: &Self::PaintState, - view: &V, - cx: &gpui::ViewContext, - ) -> serde_json::Value { - todo!() - } -} diff --git a/crates/workspace/src/pane_group.rs b/crates/workspace/src/pane_group.rs index 8160a770a3..7198dff3bf 100644 --- a/crates/workspace/src/pane_group.rs +++ b/crates/workspace/src/pane_group.rs @@ -1,6 +1,6 @@ -use std::sync::Arc; +use std::{cell::RefCell, rc::Rc, sync::Arc}; -use crate::{AppState, FollowerStatesByLeader, Pane, Workspace, WorkspaceSettings}; +use crate::{AppState, FollowerStatesByLeader, Pane, Workspace}; use anyhow::{anyhow, Result}; use call::{ActiveCall, ParticipantLocation}; use gpui::{ @@ -9,12 +9,13 @@ use gpui::{ platform::{CursorStyle, MouseButton}, AnyViewHandle, Axis, Border, ModelHandle, ViewContext, ViewHandle, }; -use itertools::Itertools; use project::Project; use serde::Deserialize; use theme::Theme; -#[derive(Clone, Debug, Eq, PartialEq)] +use self::adjustable_group::{AdjustableGroupElement, AdjustableGroupItem}; + +#[derive(Clone, Debug, PartialEq)] pub struct PaneGroup { pub(crate) root: Member, } @@ -78,6 +79,7 @@ impl PaneGroup { ) -> AnyElement { self.root.render( project, + 0, theme, follower_states, active_call, @@ -95,7 +97,7 @@ impl PaneGroup { } } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub(crate) enum Member { Axis(PaneAxis), Pane(ViewHandle), @@ -120,7 +122,11 @@ impl Member { Down | Right => vec![Member::Pane(old_pane), Member::Pane(new_pane)], }; - Member::Axis(PaneAxis { axis, members }) + Member::Axis(PaneAxis { + axis, + members, + ratios: Default::default(), + }) } fn contains(&self, needle: &ViewHandle) -> bool { @@ -133,6 +139,7 @@ impl Member { pub fn render( &self, project: &ModelHandle, + basis: usize, theme: &Theme, follower_states: &FollowerStatesByLeader, active_call: Option<&ModelHandle>, @@ -273,6 +280,7 @@ impl Member { } Member::Axis(axis) => axis.render( project, + basis + 1, theme, follower_states, active_call, @@ -296,10 +304,11 @@ impl Member { } } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub(crate) struct PaneAxis { pub axis: Axis, pub members: Vec, + pub ratios: Rc>>, } impl PaneAxis { @@ -378,6 +387,7 @@ impl PaneAxis { fn render( &self, project: &ModelHandle, + basis: usize, theme: &Theme, follower_state: &FollowerStatesByLeader, active_call: Option<&ModelHandle>, @@ -386,19 +396,29 @@ impl PaneAxis { app_state: &Arc, cx: &mut ViewContext, ) -> AnyElement { - let mut flex_container = Flex::new(self.axis); + let ratios = self.ratios.clone(); + let mut flex_container = AdjustableGroupElement::new(self.axis, 2., basis, move |new_flexes| { + let mut borrow = ratios.borrow_mut(); + borrow.extend(new_flexes); + borrow.truncate(10); + dbg!(borrow); + }); + let next_basis = basis + self.members.len(); let mut members = self.members.iter().enumerate().peekable(); - while let Some((ix, member)) = members.next() { + while let Some((_ix, member)) = members.next() { let last = members.peek().is_none(); let mut flex = 1.0; - if member.contains(active_pane) { - flex = settings::get::(cx).active_pane_magnification; - } + // TODO: Include minimum sizes + // TODO: Restore this + // if member.contains(active_pane) { + // flex = settings::get::(cx).active_pane_magnification; + // } let mut member = member.render( project, + next_basis, theme, follower_state, active_call, @@ -424,20 +444,11 @@ impl PaneAxis { Axis::Vertical => HandleSide::Bottom, }; - member = member.contained().with_border(border) - .resizable(side, 1., |workspace, size, cx| { - dbg!("resize", size); - }) - .into_any(); - - + member = member.contained().with_border(border).into_any(); } - flex_container = flex_container.with_child( - FlexItem::new(member) - .flex(flex, true) - .into_any() - ); + flex_container = + flex_container.with_child(AdjustableGroupItem::new(member, flex).into_any()); } flex_container.into_any() @@ -496,3 +507,341 @@ impl SplitDirection { } } } + +mod adjustable_group { + + use std::{any::Any, ops::Range, rc::Rc}; + + use gpui::{ + color::Color, + geometry::{ + rect::RectF, + vector::{vec2f, Vector2F}, + }, + json::{self, ToJson}, + platform::{CursorStyle, MouseButton}, + AnyElement, Axis, CursorRegion, Element, LayoutContext, MouseRegion, Quad, SceneBuilder, + SizeConstraint, Vector2FExt, View, ViewContext, + }; + use serde_json::Value; + + struct AdjustableFlexData { + flex: f32, + } + + pub struct AdjustableGroupElement { + axis: Axis, + handle_size: f32, + basis: usize, + callback: Rc)>, + children: Vec>, + } + + impl AdjustableGroupElement { + pub fn new( + axis: Axis, + handle_size: f32, + basis: usize, + callback: impl Fn(Vec) + 'static, + ) -> Self { + Self { + axis, + handle_size, + basis, + callback: Rc::new(callback), + children: Default::default(), + } + } + + fn layout_flex_children( + &mut self, + constraint: SizeConstraint, + remaining_space: &mut f32, + remaining_flex: &mut f32, + cross_axis_max: &mut f32, + view: &mut V, + cx: &mut LayoutContext, + ) { + let cross_axis = self.axis.invert(); + let last_ix = self.children.len() - 1; + for (ix, child) in self.children.iter_mut().enumerate() { + let flex = child.metadata::().unwrap().flex; + + let handle_size = if ix == last_ix { 0. } else { self.handle_size }; + + let child_size = if *remaining_flex == 0.0 { + *remaining_space + } else { + let space_per_flex = *remaining_space / *remaining_flex; + space_per_flex * flex + } - handle_size; + + let child_constraint = match self.axis { + Axis::Horizontal => SizeConstraint::new( + vec2f(child_size, constraint.min.y()), + vec2f(child_size, constraint.max.y()), + ), + Axis::Vertical => SizeConstraint::new( + vec2f(constraint.min.x(), child_size), + vec2f(constraint.max.x(), child_size), + ), + }; + let child_size = child.layout(child_constraint, view, cx); + *remaining_space -= child_size.along(self.axis) + handle_size; + *remaining_flex -= flex; + *cross_axis_max = cross_axis_max.max(child_size.along(cross_axis)); + } + } + } + + impl Extend> for AdjustableGroupElement { + fn extend>>(&mut self, children: T) { + self.children.extend(children); + } + } + + impl Element for AdjustableGroupElement { + type LayoutState = f32; + type PaintState = (); + + fn layout( + &mut self, + constraint: SizeConstraint, + view: &mut V, + cx: &mut LayoutContext, + ) -> (Vector2F, Self::LayoutState) { + let mut remaining_flex = 0.; + + let mut cross_axis_max: f32 = 0.0; + for child in &mut self.children { + let metadata = child.metadata::(); + let flex = metadata + .map(|metadata| metadata.flex) + .expect("All children of an adjustable flex must be AdjustableFlexItems"); + remaining_flex += flex; + } + + let mut remaining_space = constraint.max_along(self.axis); + + if remaining_space.is_infinite() { + panic!("flex contains flexible children but has an infinite constraint along the flex axis"); + } + + self.layout_flex_children( + constraint, + &mut remaining_space, + &mut remaining_flex, + &mut cross_axis_max, + view, + cx, + ); + + let mut size = match self.axis { + Axis::Horizontal => vec2f(constraint.max.x() - remaining_space, cross_axis_max), + Axis::Vertical => vec2f(cross_axis_max, constraint.max.y() - remaining_space), + }; + + if constraint.min.x().is_finite() { + size.set_x(size.x().max(constraint.min.x())); + } + if constraint.min.y().is_finite() { + size.set_y(size.y().max(constraint.min.y())); + } + + if size.x() > constraint.max.x() { + size.set_x(constraint.max.x()); + } + if size.y() > constraint.max.y() { + size.set_y(constraint.max.y()); + } + + (size, remaining_space) + } + + fn paint( + &mut self, + scene: &mut SceneBuilder, + bounds: RectF, + visible_bounds: RectF, + remaining_space: &mut Self::LayoutState, + view: &mut V, + cx: &mut ViewContext, + ) -> Self::PaintState { + let visible_bounds = bounds.intersection(visible_bounds).unwrap_or_default(); + + let overflowing = *remaining_space < 0.; + if overflowing { + scene.push_layer(Some(visible_bounds)); + } + + let mut child_origin = bounds.origin(); + + let last_ix = self.children.len() - 1; + for (ix, child) in self.children.iter_mut().enumerate() { + child.paint(scene, child_origin, visible_bounds, view, cx); + + match self.axis { + Axis::Horizontal => child_origin += vec2f(child.size().x(), 0.0), + Axis::Vertical => child_origin += vec2f(0.0, child.size().y()), + } + + if ix != last_ix { + let bounds = match self.axis { + Axis::Horizontal => RectF::new( + child_origin, + vec2f(self.handle_size, visible_bounds.height()), + ), + Axis::Vertical => RectF::new( + child_origin, + vec2f(visible_bounds.width(), self.handle_size), + ), + }; + + scene.push_quad(Quad { + bounds, + background: Some(Color::red()), + ..Default::default() + }); + + let style = match self.axis { + Axis::Horizontal => CursorStyle::ResizeLeftRight, + Axis::Vertical => CursorStyle::ResizeUpDown, + }; + + scene.push_cursor_region(CursorRegion { bounds, style }); + + enum ResizeHandle {} + let callback = self.callback.clone(); + let axis = self.axis; + let mut mouse_region = + MouseRegion::new::(cx.view_id(), self.basis + ix, bounds); + mouse_region = + mouse_region.on_drag(MouseButton::Left, move |drag, v: &mut V, cx| { + dbg!(drag); + callback({ + match axis { + Axis::Horizontal => vec![0., 1., 2.], + Axis::Vertical => vec![3., 2., 1.], + } + }) + }); + scene.push_mouse_region(mouse_region); + + match self.axis { + Axis::Horizontal => child_origin += vec2f(self.handle_size, 0.0), + Axis::Vertical => child_origin += vec2f(0.0, self.handle_size), + } + } + } + + if overflowing { + scene.pop_layer(); + } + } + + fn rect_for_text_range( + &self, + range_utf16: Range, + _: RectF, + _: RectF, + _: &Self::LayoutState, + _: &Self::PaintState, + view: &V, + cx: &ViewContext, + ) -> Option { + self.children + .iter() + .find_map(|child| child.rect_for_text_range(range_utf16.clone(), view, cx)) + } + + fn debug( + &self, + bounds: RectF, + _: &Self::LayoutState, + _: &Self::PaintState, + view: &V, + cx: &ViewContext, + ) -> json::Value { + serde_json::json!({ + "type": "Flex", + "bounds": bounds.to_json(), + "axis": self.axis.to_json(), + "children": self.children.iter().map(|child| child.debug(view, cx)).collect::>() + }) + } + } + + pub struct AdjustableGroupItem { + metadata: AdjustableFlexData, + child: AnyElement, + } + + impl AdjustableGroupItem { + pub fn new(child: impl Element, flex: f32) -> Self { + Self { + metadata: AdjustableFlexData { flex }, + child: child.into_any(), + } + } + } + + impl Element for AdjustableGroupItem { + type LayoutState = (); + type PaintState = (); + + fn layout( + &mut self, + constraint: SizeConstraint, + view: &mut V, + cx: &mut LayoutContext, + ) -> (Vector2F, Self::LayoutState) { + let size = self.child.layout(constraint, view, cx); + (size, ()) + } + + fn paint( + &mut self, + scene: &mut SceneBuilder, + bounds: RectF, + visible_bounds: RectF, + _: &mut Self::LayoutState, + view: &mut V, + cx: &mut ViewContext, + ) -> Self::PaintState { + self.child + .paint(scene, bounds.origin(), visible_bounds, view, cx) + } + + fn rect_for_text_range( + &self, + range_utf16: Range, + _: RectF, + _: RectF, + _: &Self::LayoutState, + _: &Self::PaintState, + view: &V, + cx: &ViewContext, + ) -> Option { + self.child.rect_for_text_range(range_utf16, view, cx) + } + + fn metadata(&self) -> Option<&dyn Any> { + Some(&self.metadata) + } + + fn debug( + &self, + _: RectF, + _: &Self::LayoutState, + _: &Self::PaintState, + view: &V, + cx: &ViewContext, + ) -> Value { + serde_json::json!({ + "type": "Flexible", + "flex": self.metadata.flex, + "child": self.child.debug(view, cx) + }) + } + } +} diff --git a/crates/workspace/src/persistence/model.rs b/crates/workspace/src/persistence/model.rs index 1075061853..c159ff0f42 100644 --- a/crates/workspace/src/persistence/model.rs +++ b/crates/workspace/src/persistence/model.rs @@ -187,6 +187,7 @@ impl SerializedPaneGroup { Member::Axis(PaneAxis { axis: *axis, members, + ratios: Default::default() }), current_active_pane, items, diff --git a/crates/workspace/src/workspace.rs b/crates/workspace/src/workspace.rs index cafcd191a3..f91204c51a 100644 --- a/crates/workspace/src/workspace.rs +++ b/crates/workspace/src/workspace.rs @@ -4,7 +4,6 @@ pub mod notifications; pub mod pane; pub mod pane_group; mod persistence; -mod adjustable_flex; pub mod searchable; pub mod shared_screen; mod status_bar; @@ -2924,7 +2923,7 @@ impl Workspace { cx: &AppContext, ) -> SerializedPaneGroup { match pane_group { - Member::Axis(PaneAxis { axis, members }) => SerializedPaneGroup::Group { + Member::Axis(PaneAxis { axis, members, .. }) => SerializedPaneGroup::Group { axis: *axis, children: members .iter() From 26b9be628ebaf238c683bb925ca155518126e5ca Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Wed, 12 Jul 2023 22:34:33 -0700 Subject: [PATCH 085/115] Add the math for pane resizing --- crates/gpui/src/app/window.rs | 13 +++ crates/gpui/src/gpui.rs | 2 +- crates/workspace/src/dock.rs | 3 - crates/workspace/src/pane_group.rs | 120 ++++++++++++++++------ crates/workspace/src/persistence/model.rs | 6 +- 5 files changed, 103 insertions(+), 41 deletions(-) diff --git a/crates/gpui/src/app/window.rs b/crates/gpui/src/app/window.rs index 58d7bb4c40..49b12d823e 100644 --- a/crates/gpui/src/app/window.rs +++ b/crates/gpui/src/app/window.rs @@ -1268,6 +1268,19 @@ impl Vector2FExt for Vector2F { } } +pub trait RectFExt { + fn length_along(self, axis: Axis) -> f32; +} + +impl RectFExt for RectF { + fn length_along(self, axis: Axis) -> f32 { + match axis { + Axis::Horizontal => self.width(), + Axis::Vertical => self.height(), + } + } +} + #[derive(Copy, Clone, Debug)] pub struct SizeConstraint { pub min: Vector2F, diff --git a/crates/gpui/src/gpui.rs b/crates/gpui/src/gpui.rs index 3442934b3a..c79c793dda 100644 --- a/crates/gpui/src/gpui.rs +++ b/crates/gpui/src/gpui.rs @@ -27,7 +27,7 @@ pub mod json; pub mod keymap_matcher; pub mod platform; pub use gpui_macros::{test, Element}; -pub use window::{Axis, SizeConstraint, Vector2FExt, WindowContext}; +pub use window::{Axis, RectFExt, SizeConstraint, Vector2FExt, WindowContext}; pub use anyhow; pub use serde_json; diff --git a/crates/workspace/src/dock.rs b/crates/workspace/src/dock.rs index 259e343248..ebaf399e22 100644 --- a/crates/workspace/src/dock.rs +++ b/crates/workspace/src/dock.rs @@ -408,9 +408,6 @@ impl View for Dock { } fn render(&mut self, cx: &mut ViewContext) -> AnyElement { - - - if let Some(active_entry) = self.visible_entry() { let style = self.style(cx); ChildView::new(active_entry.panel.as_any(), cx) diff --git a/crates/workspace/src/pane_group.rs b/crates/workspace/src/pane_group.rs index 7198dff3bf..fdda67ad22 100644 --- a/crates/workspace/src/pane_group.rs +++ b/crates/workspace/src/pane_group.rs @@ -308,10 +308,19 @@ impl Member { pub(crate) struct PaneAxis { pub axis: Axis, pub members: Vec, - pub ratios: Rc>>, + ratios: Rc>>, } impl PaneAxis { + pub fn new(axis: Axis, members: Vec) -> Self { + let ratios = Rc::new(RefCell::new(vec![1.; members.len()])); + Self { + axis, + members, + ratios, + } + } + fn split( &mut self, old_pane: &ViewHandle, @@ -397,20 +406,24 @@ impl PaneAxis { cx: &mut ViewContext, ) -> AnyElement { let ratios = self.ratios.clone(); - let mut flex_container = AdjustableGroupElement::new(self.axis, 2., basis, move |new_flexes| { - let mut borrow = ratios.borrow_mut(); - borrow.extend(new_flexes); - borrow.truncate(10); - dbg!(borrow); - }); + let mut flex_container = + AdjustableGroupElement::new(self.axis, 2., basis, move |new_flexes, _, cx| { + let mut borrow = ratios.borrow_mut(); + for (ix, flex) in new_flexes { + if let Some(el) = borrow.get_mut(ix) { + *el = flex; + } + } + cx.notify(); + }); + + let ratios_borrow = self.ratios.borrow(); let next_basis = basis + self.members.len(); - let mut members = self.members.iter().enumerate().peekable(); - while let Some((_ix, member)) = members.next() { + let mut members = self.members.iter().zip(ratios_borrow.iter()).peekable(); + while let Some((member, flex)) = members.next() { let last = members.peek().is_none(); - let mut flex = 1.0; - // TODO: Include minimum sizes // TODO: Restore this // if member.contains(active_pane) { // flex = settings::get::(cx).active_pane_magnification; @@ -439,16 +452,11 @@ impl PaneAxis { Axis::Horizontal => border.right = true, } - let side = match self.axis { - Axis::Horizontal => HandleSide::Right, - Axis::Vertical => HandleSide::Bottom, - }; - member = member.contained().with_border(border).into_any(); } flex_container = - flex_container.with_child(AdjustableGroupItem::new(member, flex).into_any()); + flex_container.with_child(AdjustableGroupItem::new(member, *flex).into_any()); } flex_container.into_any() @@ -520,10 +528,11 @@ mod adjustable_group { }, json::{self, ToJson}, platform::{CursorStyle, MouseButton}, - AnyElement, Axis, CursorRegion, Element, LayoutContext, MouseRegion, Quad, SceneBuilder, - SizeConstraint, Vector2FExt, View, ViewContext, + AnyElement, Axis, CursorRegion, Element, EventContext, LayoutContext, MouseRegion, Quad, + RectFExt, SceneBuilder, SizeConstraint, Vector2FExt, View, ViewContext, }; use serde_json::Value; + use smallvec::SmallVec; struct AdjustableFlexData { flex: f32, @@ -533,7 +542,7 @@ mod adjustable_group { axis: Axis, handle_size: f32, basis: usize, - callback: Rc)>, + callback: Rc, &mut V, &mut EventContext)>, children: Vec>, } @@ -542,7 +551,7 @@ mod adjustable_group { axis: Axis, handle_size: f32, basis: usize, - callback: impl Fn(Vec) + 'static, + callback: impl Fn(SmallVec<[(usize, f32); 2]>, &mut V, &mut EventContext) + 'static, ) -> Self { Self { axis, @@ -676,8 +685,9 @@ mod adjustable_group { let mut child_origin = bounds.origin(); - let last_ix = self.children.len() - 1; - for (ix, child) in self.children.iter_mut().enumerate() { + let mut children_iter = self.children.iter_mut().enumerate().peekable(); + while let Some((ix, child)) = children_iter.next() { + let child_start = child_origin.clone(); child.paint(scene, child_origin, visible_bounds, view, cx); match self.axis { @@ -685,7 +695,7 @@ mod adjustable_group { Axis::Vertical => child_origin += vec2f(0.0, child.size().y()), } - if ix != last_ix { + if let Some((next_ix, next_child)) = children_iter.peek() { let bounds = match self.axis { Axis::Horizontal => RectF::new( child_origin, @@ -710,20 +720,66 @@ mod adjustable_group { scene.push_cursor_region(CursorRegion { bounds, style }); - enum ResizeHandle {} let callback = self.callback.clone(); let axis = self.axis; + let child_size = child.size(); + let next_child_size = next_child.size(); + let mut drag_bounds = visible_bounds.clone(); + // Unsure why this should be needed.... + drag_bounds.set_origin_y(0.); + let current_flex = child.metadata::().unwrap().flex; + let next_flex = next_child.metadata::().unwrap().flex; + let next_ix = *next_ix; + const HORIZONTAL_MIN_SIZE: f32 = 80.; + const VERTICAL_MIN_SIZE: f32 = 100.; + enum ResizeHandle {} let mut mouse_region = MouseRegion::new::(cx.view_id(), self.basis + ix, bounds); mouse_region = mouse_region.on_drag(MouseButton::Left, move |drag, v: &mut V, cx| { - dbg!(drag); - callback({ - match axis { - Axis::Horizontal => vec![0., 1., 2.], - Axis::Vertical => vec![3., 2., 1.], - } - }) + let min_size = match axis { + Axis::Horizontal => HORIZONTAL_MIN_SIZE, + Axis::Vertical => VERTICAL_MIN_SIZE, + }; + // Don't allow resizing to less than the minimum size, if elements are already too small + if min_size - 1. > child_size.along(axis) + || min_size - 1. > next_child_size.along(axis) + { + return; + } + + let flex_position = drag.position - drag_bounds.origin(); + let mut current_target_size = (flex_position - child_start).along(axis); + let proposed_current_pixel_change = + current_target_size - child_size.along(axis); + + if proposed_current_pixel_change < 0. { + current_target_size = current_target_size.max(min_size); + } else if proposed_current_pixel_change > 0. { + // TODO: cascade this size change down, collect into a vec + let next_target_size = (next_child_size.along(axis) + - proposed_current_pixel_change) + .max(min_size); + current_target_size = current_target_size.min( + child_size.along(axis) + next_child_size.along(axis) + - next_target_size, + ); + } + + let current_pixel_change = current_target_size - child_size.along(axis); + let flex_change = current_pixel_change / drag_bounds.length_along(axis); + + let current_target_flex = current_flex + flex_change; + let next_target_flex = next_flex - flex_change; + + callback( + smallvec::smallvec![ + (ix, current_target_flex), + (next_ix, next_target_flex), + ], + v, + cx, + ) }); scene.push_mouse_region(mouse_region); diff --git a/crates/workspace/src/persistence/model.rs b/crates/workspace/src/persistence/model.rs index c159ff0f42..762d7171de 100644 --- a/crates/workspace/src/persistence/model.rs +++ b/crates/workspace/src/persistence/model.rs @@ -184,11 +184,7 @@ impl SerializedPaneGroup { } Some(( - Member::Axis(PaneAxis { - axis: *axis, - members, - ratios: Default::default() - }), + Member::Axis(PaneAxis::new(*axis, members)), current_active_pane, items, )) From 8cce403c112d43a6380f3419b38f7b9245d967e3 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 13 Jul 2023 11:52:35 +0300 Subject: [PATCH 086/115] Update another deprecated plugin --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fe89801f04..eb9b6d1f7c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -148,8 +148,8 @@ jobs: - name: Create app bundle run: script/bundle - - name: Upload app bundle to workflow run if main branch or specifi label - uses: actions/upload-artifact@v2 + - name: Upload app bundle to workflow run if main branch or specific label + uses: actions/upload-artifact@v3 if: ${{ github.ref == 'refs/heads/main' }} || contains(github.event.pull_request.labels.*.name, 'run-build-dmg') }} with: name: Zed_${{ github.event.pull_request.head.sha || github.sha }}.dmg From d5f7ad08fa48cd4997de64b3f98522970a2b9f9a Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Thu, 13 Jul 2023 11:28:21 -0700 Subject: [PATCH 087/115] Styled and refined behavior for split resizing --- crates/editor/src/element.rs | 6 +- crates/workspace/src/pane_group.rs | 259 +++++++++-------------------- 2 files changed, 86 insertions(+), 179 deletions(-) diff --git a/crates/editor/src/element.rs b/crates/editor/src/element.rs index f0bae9533b..074a96dfc1 100644 --- a/crates/editor/src/element.rs +++ b/crates/editor/src/element.rs @@ -1182,8 +1182,10 @@ impl EditorElement { }); scene.push_mouse_region( MouseRegion::new::(cx.view_id(), cx.view_id(), track_bounds) - .on_move(move |_, editor: &mut Editor, cx| { - editor.scroll_manager.show_scrollbar(cx); + .on_move(move |event, editor: &mut Editor, cx| { + if event.pressed_button.is_none() { + editor.scroll_manager.show_scrollbar(cx); + } }) .on_down(MouseButton::Left, { let row_range = row_range.clone(); diff --git a/crates/workspace/src/pane_group.rs b/crates/workspace/src/pane_group.rs index fdda67ad22..372bfd8ef4 100644 --- a/crates/workspace/src/pane_group.rs +++ b/crates/workspace/src/pane_group.rs @@ -1,6 +1,8 @@ use std::{cell::RefCell, rc::Rc, sync::Arc}; -use crate::{AppState, FollowerStatesByLeader, Pane, Workspace}; +use crate::{ + pane_group::element::PaneAxisElement, AppState, FollowerStatesByLeader, Pane, Workspace, +}; use anyhow::{anyhow, Result}; use call::{ActiveCall, ParticipantLocation}; use gpui::{ @@ -13,8 +15,6 @@ use project::Project; use serde::Deserialize; use theme::Theme; -use self::adjustable_group::{AdjustableGroupElement, AdjustableGroupItem}; - #[derive(Clone, Debug, PartialEq)] pub struct PaneGroup { pub(crate) root: Member, @@ -122,11 +122,7 @@ impl Member { Down | Right => vec![Member::Pane(old_pane), Member::Pane(new_pane)], }; - Member::Axis(PaneAxis { - axis, - members, - ratios: Default::default(), - }) + Member::Axis(PaneAxis::new(axis, members)) } fn contains(&self, needle: &ViewHandle) -> bool { @@ -308,16 +304,16 @@ impl Member { pub(crate) struct PaneAxis { pub axis: Axis, pub members: Vec, - ratios: Rc>>, + flexes: Rc>>, } impl PaneAxis { pub fn new(axis: Axis, members: Vec) -> Self { - let ratios = Rc::new(RefCell::new(vec![1.; members.len()])); + let flexes = Rc::new(RefCell::new(vec![1.; members.len()])); Self { axis, members, - ratios, + flexes, } } @@ -342,6 +338,7 @@ impl PaneAxis { } self.members.insert(idx, Member::Pane(new_pane.clone())); + *self.flexes.borrow_mut() = vec![1.; self.members.len()]; } else { *member = Member::new_axis(old_pane.clone(), new_pane.clone(), direction); @@ -381,6 +378,7 @@ impl PaneAxis { if found_pane { if let Some(idx) = remove_member { self.members.remove(idx); + *self.flexes.borrow_mut() = vec![1.; self.members.len()]; } if self.members.len() == 1 { @@ -405,23 +403,17 @@ impl PaneAxis { app_state: &Arc, cx: &mut ViewContext, ) -> AnyElement { - let ratios = self.ratios.clone(); - let mut flex_container = - AdjustableGroupElement::new(self.axis, 2., basis, move |new_flexes, _, cx| { - let mut borrow = ratios.borrow_mut(); - for (ix, flex) in new_flexes { - if let Some(el) = borrow.get_mut(ix) { - *el = flex; - } - } + debug_assert!(self.members.len() == self.flexes.borrow().len()); - cx.notify(); - }); + // TODO: SImplify further by just passing in the flexes pointer directly, no need to generify! + let mut flex_container = PaneAxisElement::new(self.axis, basis, self.flexes.clone()); - let ratios_borrow = self.ratios.borrow(); - let next_basis = basis + self.members.len(); - let mut members = self.members.iter().zip(ratios_borrow.iter()).peekable(); - while let Some((member, flex)) = members.next() { + let mut members = self + .members + .iter() + .enumerate() + .peekable(); + while let Some((ix, member)) = members.next() { let last = members.peek().is_none(); // TODO: Restore this @@ -431,7 +423,7 @@ impl PaneAxis { let mut member = member.render( project, - next_basis, + (basis + ix) * 10, theme, follower_state, active_call, @@ -440,6 +432,7 @@ impl PaneAxis { app_state, cx, ); + if !last { let mut border = theme.workspace.pane_divider; border.left = false; @@ -455,8 +448,7 @@ impl PaneAxis { member = member.contained().with_border(border).into_any(); } - flex_container = - flex_container.with_child(AdjustableGroupItem::new(member, *flex).into_any()); + flex_container = flex_container.with_child(member.into_any()); } flex_container.into_any() @@ -516,48 +508,34 @@ impl SplitDirection { } } -mod adjustable_group { - - use std::{any::Any, ops::Range, rc::Rc}; +// TODO: PaneAxis element here +mod element { + use std::{cell::RefCell, ops::Range, rc::Rc}; use gpui::{ - color::Color, geometry::{ rect::RectF, vector::{vec2f, Vector2F}, }, json::{self, ToJson}, platform::{CursorStyle, MouseButton}, - AnyElement, Axis, CursorRegion, Element, EventContext, LayoutContext, MouseRegion, Quad, - RectFExt, SceneBuilder, SizeConstraint, Vector2FExt, View, ViewContext, + AnyElement, Axis, CursorRegion, Element, LayoutContext, MouseRegion, RectFExt, + SceneBuilder, SizeConstraint, Vector2FExt, View, ViewContext, }; - use serde_json::Value; - use smallvec::SmallVec; - struct AdjustableFlexData { - flex: f32, - } - - pub struct AdjustableGroupElement { + pub struct PaneAxisElement { axis: Axis, - handle_size: f32, basis: usize, - callback: Rc, &mut V, &mut EventContext)>, + flexes: Rc>>, children: Vec>, } - impl AdjustableGroupElement { - pub fn new( - axis: Axis, - handle_size: f32, - basis: usize, - callback: impl Fn(SmallVec<[(usize, f32); 2]>, &mut V, &mut EventContext) + 'static, - ) -> Self { + impl PaneAxisElement { + pub fn new(axis: Axis, basis: usize, flexes: Rc>>) -> Self { Self { axis, - handle_size, basis, - callback: Rc::new(callback), + flexes, children: Default::default(), } } @@ -571,19 +549,17 @@ mod adjustable_group { view: &mut V, cx: &mut LayoutContext, ) { + let flexes = self.flexes.borrow(); let cross_axis = self.axis.invert(); - let last_ix = self.children.len() - 1; for (ix, child) in self.children.iter_mut().enumerate() { - let flex = child.metadata::().unwrap().flex; - - let handle_size = if ix == last_ix { 0. } else { self.handle_size }; + let flex = flexes[ix]; let child_size = if *remaining_flex == 0.0 { *remaining_space } else { let space_per_flex = *remaining_space / *remaining_flex; space_per_flex * flex - } - handle_size; + }; let child_constraint = match self.axis { Axis::Horizontal => SizeConstraint::new( @@ -596,20 +572,20 @@ mod adjustable_group { ), }; let child_size = child.layout(child_constraint, view, cx); - *remaining_space -= child_size.along(self.axis) + handle_size; + *remaining_space -= child_size.along(self.axis); *remaining_flex -= flex; *cross_axis_max = cross_axis_max.max(child_size.along(cross_axis)); } } } - impl Extend> for AdjustableGroupElement { + impl Extend> for PaneAxisElement { fn extend>>(&mut self, children: T) { self.children.extend(children); } } - impl Element for AdjustableGroupElement { + impl Element for PaneAxisElement { type LayoutState = f32; type PaintState = (); @@ -619,14 +595,11 @@ mod adjustable_group { view: &mut V, cx: &mut LayoutContext, ) -> (Vector2F, Self::LayoutState) { + debug_assert!(self.children.len() == self.flexes.borrow().len()); let mut remaining_flex = 0.; let mut cross_axis_max: f32 = 0.0; - for child in &mut self.children { - let metadata = child.metadata::(); - let flex = metadata - .map(|metadata| metadata.flex) - .expect("All children of an adjustable flex must be AdjustableFlexItems"); + for flex in self.flexes.borrow().iter() { remaining_flex += flex; } @@ -695,48 +668,61 @@ mod adjustable_group { Axis::Vertical => child_origin += vec2f(0.0, child.size().y()), } + const HANDLE_HITBOX_SIZE: f32 = 4.0; if let Some((next_ix, next_child)) = children_iter.peek() { - let bounds = match self.axis { + scene.push_stacking_context(None, None); + + let handle_origin = match self.axis { + Axis::Horizontal => child_origin - vec2f(HANDLE_HITBOX_SIZE / 2., 0.0), + Axis::Vertical => child_origin - vec2f(0.0, HANDLE_HITBOX_SIZE / 2.), + }; + + let handle_bounds = match self.axis { Axis::Horizontal => RectF::new( - child_origin, - vec2f(self.handle_size, visible_bounds.height()), + handle_origin, + vec2f(HANDLE_HITBOX_SIZE, visible_bounds.height()), ), Axis::Vertical => RectF::new( - child_origin, - vec2f(visible_bounds.width(), self.handle_size), + handle_origin, + vec2f(visible_bounds.width(), HANDLE_HITBOX_SIZE), ), }; - scene.push_quad(Quad { - bounds, - background: Some(Color::red()), - ..Default::default() - }); + // use gpui::color::Color, + // scene.push_quad(Quad { + // bounds: handle_bounds, + // background: Some(Color::red()), + // ..Default::default() + // }); let style = match self.axis { Axis::Horizontal => CursorStyle::ResizeLeftRight, Axis::Vertical => CursorStyle::ResizeUpDown, }; - scene.push_cursor_region(CursorRegion { bounds, style }); + scene.push_cursor_region(CursorRegion { + bounds: handle_bounds, + style, + }); - let callback = self.callback.clone(); let axis = self.axis; let child_size = child.size(); let next_child_size = next_child.size(); - let mut drag_bounds = visible_bounds.clone(); - // Unsure why this should be needed.... - drag_bounds.set_origin_y(0.); - let current_flex = child.metadata::().unwrap().flex; - let next_flex = next_child.metadata::().unwrap().flex; + let drag_bounds = visible_bounds.clone(); + let flexes = self.flexes.clone(); + let current_flex = flexes.borrow()[ix]; let next_ix = *next_ix; + let next_flex = flexes.borrow()[next_ix]; const HORIZONTAL_MIN_SIZE: f32 = 80.; const VERTICAL_MIN_SIZE: f32 = 100.; enum ResizeHandle {} - let mut mouse_region = - MouseRegion::new::(cx.view_id(), self.basis + ix, bounds); + let mut mouse_region = MouseRegion::new::( + cx.view_id(), + self.basis + ix, + handle_bounds, + ); mouse_region = - mouse_region.on_drag(MouseButton::Left, move |drag, v: &mut V, cx| { + mouse_region.on_drag(MouseButton::Left, move |drag, _: &mut V, cx| { let min_size = match axis { Axis::Horizontal => HORIZONTAL_MIN_SIZE, Axis::Vertical => VERTICAL_MIN_SIZE, @@ -748,15 +734,15 @@ mod adjustable_group { return; } - let flex_position = drag.position - drag_bounds.origin(); - let mut current_target_size = (flex_position - child_start).along(axis); + let mut current_target_size = (drag.position - child_start).along(axis); + let proposed_current_pixel_change = current_target_size - child_size.along(axis); if proposed_current_pixel_change < 0. { current_target_size = current_target_size.max(min_size); } else if proposed_current_pixel_change > 0. { - // TODO: cascade this size change down, collect into a vec + // TODO: cascade this size change down, collect all changes into a vec let next_target_size = (next_child_size.along(axis) - proposed_current_pixel_change) .max(min_size); @@ -768,25 +754,18 @@ mod adjustable_group { let current_pixel_change = current_target_size - child_size.along(axis); let flex_change = current_pixel_change / drag_bounds.length_along(axis); - let current_target_flex = current_flex + flex_change; let next_target_flex = next_flex - flex_change; - callback( - smallvec::smallvec![ - (ix, current_target_flex), - (next_ix, next_target_flex), - ], - v, - cx, - ) + let mut borrow = flexes.borrow_mut(); + *borrow.get_mut(ix).unwrap() = current_target_flex; + *borrow.get_mut(next_ix).unwrap() = next_target_flex; + + cx.notify(); }); scene.push_mouse_region(mouse_region); - match self.axis { - Axis::Horizontal => child_origin += vec2f(self.handle_size, 0.0), - Axis::Vertical => child_origin += vec2f(0.0, self.handle_size), - } + scene.pop_stacking_context(); } } @@ -819,85 +798,11 @@ mod adjustable_group { cx: &ViewContext, ) -> json::Value { serde_json::json!({ - "type": "Flex", + "type": "PaneAxis", "bounds": bounds.to_json(), "axis": self.axis.to_json(), "children": self.children.iter().map(|child| child.debug(view, cx)).collect::>() }) } } - - pub struct AdjustableGroupItem { - metadata: AdjustableFlexData, - child: AnyElement, - } - - impl AdjustableGroupItem { - pub fn new(child: impl Element, flex: f32) -> Self { - Self { - metadata: AdjustableFlexData { flex }, - child: child.into_any(), - } - } - } - - impl Element for AdjustableGroupItem { - type LayoutState = (); - type PaintState = (); - - fn layout( - &mut self, - constraint: SizeConstraint, - view: &mut V, - cx: &mut LayoutContext, - ) -> (Vector2F, Self::LayoutState) { - let size = self.child.layout(constraint, view, cx); - (size, ()) - } - - fn paint( - &mut self, - scene: &mut SceneBuilder, - bounds: RectF, - visible_bounds: RectF, - _: &mut Self::LayoutState, - view: &mut V, - cx: &mut ViewContext, - ) -> Self::PaintState { - self.child - .paint(scene, bounds.origin(), visible_bounds, view, cx) - } - - fn rect_for_text_range( - &self, - range_utf16: Range, - _: RectF, - _: RectF, - _: &Self::LayoutState, - _: &Self::PaintState, - view: &V, - cx: &ViewContext, - ) -> Option { - self.child.rect_for_text_range(range_utf16, view, cx) - } - - fn metadata(&self) -> Option<&dyn Any> { - Some(&self.metadata) - } - - fn debug( - &self, - _: RectF, - _: &Self::LayoutState, - _: &Self::PaintState, - view: &V, - cx: &ViewContext, - ) -> Value { - serde_json::json!({ - "type": "Flexible", - "flex": self.metadata.flex, - "child": self.child.debug(view, cx) - }) - } - } } From 00b04f1c85301c17592deed99b08e539106931e4 Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Thu, 13 Jul 2023 13:10:36 -0700 Subject: [PATCH 088/115] Restore active pane magnification --- crates/workspace/src/pane_group.rs | 80 ++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/crates/workspace/src/pane_group.rs b/crates/workspace/src/pane_group.rs index 372bfd8ef4..3e4ce21694 100644 --- a/crates/workspace/src/pane_group.rs +++ b/crates/workspace/src/pane_group.rs @@ -15,6 +15,10 @@ use project::Project; use serde::Deserialize; use theme::Theme; +const HANDLE_HITBOX_SIZE: f32 = 4.0; +const HORIZONTAL_MIN_SIZE: f32 = 80.; +const VERTICAL_MIN_SIZE: f32 = 100.; + #[derive(Clone, Debug, PartialEq)] pub struct PaneGroup { pub(crate) root: Member, @@ -405,21 +409,17 @@ impl PaneAxis { ) -> AnyElement { debug_assert!(self.members.len() == self.flexes.borrow().len()); - // TODO: SImplify further by just passing in the flexes pointer directly, no need to generify! let mut flex_container = PaneAxisElement::new(self.axis, basis, self.flexes.clone()); + let mut active_pane_ix = None; - let mut members = self - .members - .iter() - .enumerate() - .peekable(); + let mut members = self.members.iter().enumerate().peekable(); while let Some((ix, member)) = members.next() { let last = members.peek().is_none(); // TODO: Restore this - // if member.contains(active_pane) { - // flex = settings::get::(cx).active_pane_magnification; - // } + if member.contains(active_pane) { + active_pane_ix = Some(ix); + } let mut member = member.render( project, @@ -450,7 +450,7 @@ impl PaneAxis { flex_container = flex_container.with_child(member.into_any()); } - + flex_container.set_active_pane(active_pane_ix); flex_container.into_any() } } @@ -508,7 +508,6 @@ impl SplitDirection { } } -// TODO: PaneAxis element here mod element { use std::{cell::RefCell, ops::Range, rc::Rc}; @@ -523,9 +522,15 @@ mod element { SceneBuilder, SizeConstraint, Vector2FExt, View, ViewContext, }; + use crate::{ + pane_group::{HANDLE_HITBOX_SIZE, HORIZONTAL_MIN_SIZE, VERTICAL_MIN_SIZE}, + WorkspaceSettings, + }; + pub struct PaneAxisElement { axis: Axis, basis: usize, + active_pane_ix: Option, flexes: Rc>>, children: Vec>, } @@ -536,12 +541,18 @@ mod element { axis, basis, flexes, + active_pane_ix: None, children: Default::default(), } } + pub fn set_active_pane(&mut self, active_pane_ix: Option) { + self.active_pane_ix = active_pane_ix; + } + fn layout_flex_children( &mut self, + active_pane_magnification: f32, constraint: SizeConstraint, remaining_space: &mut f32, remaining_flex: &mut f32, @@ -552,7 +563,19 @@ mod element { let flexes = self.flexes.borrow(); let cross_axis = self.axis.invert(); for (ix, child) in self.children.iter_mut().enumerate() { - let flex = flexes[ix]; + let flex = if active_pane_magnification != 1. { + if let Some(active_pane_ix) = self.active_pane_ix { + if ix == active_pane_ix { + active_pane_magnification + } else { + 1. + } + } else { + 1. + } + } else { + flexes[ix] + }; let child_size = if *remaining_flex == 0.0 { *remaining_space @@ -596,13 +619,25 @@ mod element { cx: &mut LayoutContext, ) -> (Vector2F, Self::LayoutState) { debug_assert!(self.children.len() == self.flexes.borrow().len()); + + let active_pane_magnification = + settings::get::(cx).active_pane_magnification; + let mut remaining_flex = 0.; - let mut cross_axis_max: f32 = 0.0; - for flex in self.flexes.borrow().iter() { - remaining_flex += flex; + if active_pane_magnification != 1. { + let active_pane_flex = self + .active_pane_ix + .map(|_| active_pane_magnification) + .unwrap_or(1.); + remaining_flex += self.children.len() as f32 - 1. + active_pane_flex; + } else { + for flex in self.flexes.borrow().iter() { + remaining_flex += flex; + } } + let mut cross_axis_max: f32 = 0.0; let mut remaining_space = constraint.max_along(self.axis); if remaining_space.is_infinite() { @@ -610,6 +645,7 @@ mod element { } self.layout_flex_children( + active_pane_magnification, constraint, &mut remaining_space, &mut remaining_flex, @@ -649,6 +685,7 @@ mod element { view: &mut V, cx: &mut ViewContext, ) -> Self::PaintState { + let can_resize = settings::get::(cx).active_pane_magnification == 1.; let visible_bounds = bounds.intersection(visible_bounds).unwrap_or_default(); let overflowing = *remaining_space < 0.; @@ -668,8 +705,8 @@ mod element { Axis::Vertical => child_origin += vec2f(0.0, child.size().y()), } - const HANDLE_HITBOX_SIZE: f32 = 4.0; - if let Some((next_ix, next_child)) = children_iter.peek() { + if let Some(Some((next_ix, next_child))) = can_resize.then(|| children_iter.peek()) + { scene.push_stacking_context(None, None); let handle_origin = match self.axis { @@ -688,13 +725,6 @@ mod element { ), }; - // use gpui::color::Color, - // scene.push_quad(Quad { - // bounds: handle_bounds, - // background: Some(Color::red()), - // ..Default::default() - // }); - let style = match self.axis { Axis::Horizontal => CursorStyle::ResizeLeftRight, Axis::Vertical => CursorStyle::ResizeUpDown, @@ -713,8 +743,6 @@ mod element { let current_flex = flexes.borrow()[ix]; let next_ix = *next_ix; let next_flex = flexes.borrow()[next_ix]; - const HORIZONTAL_MIN_SIZE: f32 = 80.; - const VERTICAL_MIN_SIZE: f32 = 100.; enum ResizeHandle {} let mut mouse_region = MouseRegion::new::( cx.view_id(), From 5797282b981ddc8b527f03ef70793d1b6a534457 Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Thu, 13 Jul 2023 14:21:14 -0700 Subject: [PATCH 089/115] Add resising serialization --- crates/workspace/src/pane_group.rs | 67 ++++++---- crates/workspace/src/persistence.rs | 152 ++++++++++++++-------- crates/workspace/src/persistence/model.rs | 9 +- crates/workspace/src/workspace.rs | 16 ++- 4 files changed, 159 insertions(+), 85 deletions(-) diff --git a/crates/workspace/src/pane_group.rs b/crates/workspace/src/pane_group.rs index 3e4ce21694..e58b95d6b3 100644 --- a/crates/workspace/src/pane_group.rs +++ b/crates/workspace/src/pane_group.rs @@ -308,7 +308,7 @@ impl Member { pub(crate) struct PaneAxis { pub axis: Axis, pub members: Vec, - flexes: Rc>>, + pub flexes: Rc>>, } impl PaneAxis { @@ -321,6 +321,18 @@ impl PaneAxis { } } + pub fn load(axis: Axis, members: Vec, flexes: Option>) -> Self { + let flexes = flexes.unwrap_or_else(|| vec![1.; members.len()]); + debug_assert!(members.len() == flexes.len()); + + let flexes = Rc::new(RefCell::new(flexes)); + Self { + axis, + members, + flexes, + } + } + fn split( &mut self, old_pane: &ViewHandle, @@ -519,23 +531,23 @@ mod element { json::{self, ToJson}, platform::{CursorStyle, MouseButton}, AnyElement, Axis, CursorRegion, Element, LayoutContext, MouseRegion, RectFExt, - SceneBuilder, SizeConstraint, Vector2FExt, View, ViewContext, + SceneBuilder, SizeConstraint, Vector2FExt, ViewContext, }; use crate::{ pane_group::{HANDLE_HITBOX_SIZE, HORIZONTAL_MIN_SIZE, VERTICAL_MIN_SIZE}, - WorkspaceSettings, + WorkspaceSettings, Workspace, }; - pub struct PaneAxisElement { + pub struct PaneAxisElement { axis: Axis, basis: usize, active_pane_ix: Option, flexes: Rc>>, - children: Vec>, + children: Vec>, } - impl PaneAxisElement { + impl PaneAxisElement { pub fn new(axis: Axis, basis: usize, flexes: Rc>>) -> Self { Self { axis, @@ -557,8 +569,8 @@ mod element { remaining_space: &mut f32, remaining_flex: &mut f32, cross_axis_max: &mut f32, - view: &mut V, - cx: &mut LayoutContext, + view: &mut Workspace, + cx: &mut LayoutContext, ) { let flexes = self.flexes.borrow(); let cross_axis = self.axis.invert(); @@ -602,21 +614,21 @@ mod element { } } - impl Extend> for PaneAxisElement { - fn extend>>(&mut self, children: T) { + impl Extend> for PaneAxisElement { + fn extend>>(&mut self, children: T) { self.children.extend(children); } } - impl Element for PaneAxisElement { + impl Element for PaneAxisElement { type LayoutState = f32; type PaintState = (); fn layout( &mut self, constraint: SizeConstraint, - view: &mut V, - cx: &mut LayoutContext, + view: &mut Workspace, + cx: &mut LayoutContext, ) -> (Vector2F, Self::LayoutState) { debug_assert!(self.children.len() == self.flexes.borrow().len()); @@ -682,8 +694,8 @@ mod element { bounds: RectF, visible_bounds: RectF, remaining_space: &mut Self::LayoutState, - view: &mut V, - cx: &mut ViewContext, + view: &mut Workspace, + cx: &mut ViewContext, ) -> Self::PaintState { let can_resize = settings::get::(cx).active_pane_magnification == 1.; let visible_bounds = bounds.intersection(visible_bounds).unwrap_or_default(); @@ -750,7 +762,7 @@ mod element { handle_bounds, ); mouse_region = - mouse_region.on_drag(MouseButton::Left, move |drag, _: &mut V, cx| { + mouse_region.on_drag(MouseButton::Left, move |drag, workspace: &mut Workspace, cx| { let min_size = match axis { Axis::Horizontal => HORIZONTAL_MIN_SIZE, Axis::Vertical => VERTICAL_MIN_SIZE, @@ -768,13 +780,15 @@ mod element { current_target_size - child_size.along(axis); if proposed_current_pixel_change < 0. { - current_target_size = current_target_size.max(min_size); + current_target_size = f32::max(current_target_size, min_size); } else if proposed_current_pixel_change > 0. { - // TODO: cascade this size change down, collect all changes into a vec - let next_target_size = (next_child_size.along(axis) - - proposed_current_pixel_change) - .max(min_size); - current_target_size = current_target_size.min( + // TODO: cascade this change to other children if current item is at min size + let next_target_size = f32::max( + next_child_size.along(axis) - proposed_current_pixel_change, + min_size, + ); + current_target_size = f32::min( + current_target_size, child_size.along(axis) + next_child_size.along(axis) - next_target_size, ); @@ -789,6 +803,7 @@ mod element { *borrow.get_mut(ix).unwrap() = current_target_flex; *borrow.get_mut(next_ix).unwrap() = next_target_flex; + workspace.schedule_serialize(cx); cx.notify(); }); scene.push_mouse_region(mouse_region); @@ -809,8 +824,8 @@ mod element { _: RectF, _: &Self::LayoutState, _: &Self::PaintState, - view: &V, - cx: &ViewContext, + view: &Workspace, + cx: &ViewContext, ) -> Option { self.children .iter() @@ -822,8 +837,8 @@ mod element { bounds: RectF, _: &Self::LayoutState, _: &Self::PaintState, - view: &V, - cx: &ViewContext, + view: &Workspace, + cx: &ViewContext, ) -> json::Value { serde_json::json!({ "type": "PaneAxis", diff --git a/crates/workspace/src/persistence.rs b/crates/workspace/src/persistence.rs index dd2aa5a818..0d7784093a 100644 --- a/crates/workspace/src/persistence.rs +++ b/crates/workspace/src/persistence.rs @@ -45,6 +45,7 @@ define_connection! { // parent_group_id: Option, // None indicates that this is the root node // position: Optiopn, // None indicates that this is the root node // axis: Option, // 'Vertical', 'Horizontal' + // flexes: Option>, // A JSON array of floats // ) // // panes( @@ -168,7 +169,12 @@ define_connection! { ALTER TABLE workspaces ADD COLUMN left_dock_zoom INTEGER; //bool ALTER TABLE workspaces ADD COLUMN right_dock_zoom INTEGER; //bool ALTER TABLE workspaces ADD COLUMN bottom_dock_zoom INTEGER; //bool - )]; + ), + // Add pane group flex data + sql!( + ALTER TABLE pane_groups ADD COLUMN flexes TEXT; + ) + ]; } impl WorkspaceDb { @@ -359,38 +365,51 @@ impl WorkspaceDb { group_id: Option, ) -> Result> { type GroupKey = (Option, WorkspaceId); - type GroupOrPane = (Option, Option, Option, Option); + type GroupOrPane = ( + Option, + Option, + Option, + Option, + Option, + ); self.select_bound::(sql!( - SELECT group_id, axis, pane_id, active + SELECT group_id, axis, pane_id, active, flexes FROM (SELECT - group_id, - axis, - NULL as pane_id, - NULL as active, - position, - parent_group_id, - workspace_id - FROM pane_groups + group_id, + axis, + NULL as pane_id, + NULL as active, + position, + parent_group_id, + workspace_id, + flexes + FROM pane_groups UNION - SELECT - NULL, - NULL, - center_panes.pane_id, - panes.active as active, - position, - parent_group_id, - panes.workspace_id as workspace_id - FROM center_panes - JOIN panes ON center_panes.pane_id = panes.pane_id) + SELECT + NULL, + NULL, + center_panes.pane_id, + panes.active as active, + position, + parent_group_id, + panes.workspace_id as workspace_id, + NULL + FROM center_panes + JOIN panes ON center_panes.pane_id = panes.pane_id) WHERE parent_group_id IS ? AND workspace_id = ? ORDER BY position ))?((group_id, workspace_id))? .into_iter() - .map(|(group_id, axis, pane_id, active)| { + .map(|(group_id, axis, pane_id, active, flexes)| { if let Some((group_id, axis)) = group_id.zip(axis) { + let flexes = flexes + .map(|flexes| serde_json::from_str::>(&flexes)) + .transpose()?; + Ok(SerializedPaneGroup::Group { axis, children: self.get_pane_group(workspace_id, Some(group_id))?, + flexes, }) } else if let Some((pane_id, active)) = pane_id.zip(active) { Ok(SerializedPaneGroup::Pane(SerializedPane::new( @@ -417,14 +436,31 @@ impl WorkspaceDb { parent: Option<(GroupId, usize)>, ) -> Result<()> { match pane_group { - SerializedPaneGroup::Group { axis, children } => { + SerializedPaneGroup::Group { + axis, + children, + flexes, + } => { let (parent_id, position) = unzip_option(parent); + let flex_string = serde_json::json!(flexes).to_string(); let group_id = conn.select_row_bound::<_, i64>(sql!( - INSERT INTO pane_groups(workspace_id, parent_group_id, position, axis) - VALUES (?, ?, ?, ?) + INSERT INTO pane_groups( + workspace_id, + parent_group_id, + position, + axis, + flexes + ) + VALUES (?, ?, ?, ?, ?) RETURNING group_id - ))?((workspace_id, parent_id, position, *axis))? + ))?(( + workspace_id, + parent_id, + position, + *axis, + flex_string, + ))? .ok_or_else(|| anyhow!("Couldn't retrieve group_id from inserted pane_group"))?; for (position, group) in children.iter().enumerate() { @@ -641,6 +677,14 @@ mod tests { assert_eq!(test_text_1, "test-text-1"); } + fn group(axis: gpui::Axis, children: Vec) -> SerializedPaneGroup { + SerializedPaneGroup::Group { + axis, + flexes: None, + children, + } + } + #[gpui::test] async fn test_full_workspace_serialization() { env_logger::try_init().ok(); @@ -652,12 +696,12 @@ mod tests { // | - - - | | // | 3,4 | | // ----------------- - let center_group = SerializedPaneGroup::Group { - axis: gpui::Axis::Horizontal, - children: vec![ - SerializedPaneGroup::Group { - axis: gpui::Axis::Vertical, - children: vec![ + let center_group = group( + gpui::Axis::Horizontal, + vec![ + group( + gpui::Axis::Vertical, + vec![ SerializedPaneGroup::Pane(SerializedPane::new( vec![ SerializedItem::new("Terminal", 5, false), @@ -673,7 +717,7 @@ mod tests { false, )), ], - }, + ), SerializedPaneGroup::Pane(SerializedPane::new( vec![ SerializedItem::new("Terminal", 9, false), @@ -682,7 +726,7 @@ mod tests { false, )), ], - }; + ); let workspace = SerializedWorkspace { id: 5, @@ -811,12 +855,12 @@ mod tests { // | - - - | | // | 3,4 | | // ----------------- - let center_pane = SerializedPaneGroup::Group { - axis: gpui::Axis::Horizontal, - children: vec![ - SerializedPaneGroup::Group { - axis: gpui::Axis::Vertical, - children: vec![ + let center_pane = group( + gpui::Axis::Horizontal, + vec![ + group( + gpui::Axis::Vertical, + vec![ SerializedPaneGroup::Pane(SerializedPane::new( vec![ SerializedItem::new("Terminal", 1, false), @@ -832,7 +876,7 @@ mod tests { true, )), ], - }, + ), SerializedPaneGroup::Pane(SerializedPane::new( vec![ SerializedItem::new("Terminal", 5, true), @@ -841,7 +885,7 @@ mod tests { false, )), ], - }; + ); let workspace = default_workspace(&["/tmp"], ¢er_pane); @@ -858,12 +902,12 @@ mod tests { let db = WorkspaceDb(open_test_db("test_cleanup_panes").await); - let center_pane = SerializedPaneGroup::Group { - axis: gpui::Axis::Horizontal, - children: vec![ - SerializedPaneGroup::Group { - axis: gpui::Axis::Vertical, - children: vec![ + let center_pane = group( + gpui::Axis::Horizontal, + vec![ + group( + gpui::Axis::Vertical, + vec![ SerializedPaneGroup::Pane(SerializedPane::new( vec![ SerializedItem::new("Terminal", 1, false), @@ -879,7 +923,7 @@ mod tests { true, )), ], - }, + ), SerializedPaneGroup::Pane(SerializedPane::new( vec![ SerializedItem::new("Terminal", 5, false), @@ -888,7 +932,7 @@ mod tests { false, )), ], - }; + ); let id = &["/tmp"]; @@ -896,9 +940,9 @@ mod tests { db.save_workspace(workspace.clone()).await; - workspace.center_group = SerializedPaneGroup::Group { - axis: gpui::Axis::Vertical, - children: vec![ + workspace.center_group = group( + gpui::Axis::Vertical, + vec![ SerializedPaneGroup::Pane(SerializedPane::new( vec![ SerializedItem::new("Terminal", 1, false), @@ -914,7 +958,7 @@ mod tests { true, )), ], - }; + ); db.save_workspace(workspace.clone()).await; diff --git a/crates/workspace/src/persistence/model.rs b/crates/workspace/src/persistence/model.rs index 762d7171de..aa184dbb56 100644 --- a/crates/workspace/src/persistence/model.rs +++ b/crates/workspace/src/persistence/model.rs @@ -127,10 +127,11 @@ impl Bind for DockData { } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Clone)] pub enum SerializedPaneGroup { Group { axis: Axis, + flexes: Option>, children: Vec, }, Pane(SerializedPane), @@ -149,7 +150,7 @@ impl Default for SerializedPaneGroup { impl SerializedPaneGroup { #[async_recursion(?Send)] pub(crate) async fn deserialize( - &self, + self, project: &ModelHandle, workspace_id: WorkspaceId, workspace: &WeakViewHandle, @@ -160,7 +161,7 @@ impl SerializedPaneGroup { Vec>>, )> { match self { - SerializedPaneGroup::Group { axis, children } => { + SerializedPaneGroup::Group { axis, children, flexes } => { let mut current_active_pane = None; let mut members = Vec::new(); let mut items = Vec::new(); @@ -184,7 +185,7 @@ impl SerializedPaneGroup { } Some(( - Member::Axis(PaneAxis::new(*axis, members)), + Member::Axis(PaneAxis::load(axis, members, flexes)), current_active_pane, items, )) diff --git a/crates/workspace/src/workspace.rs b/crates/workspace/src/workspace.rs index f91204c51a..e31e0d924f 100644 --- a/crates/workspace/src/workspace.rs +++ b/crates/workspace/src/workspace.rs @@ -504,6 +504,7 @@ pub struct Workspace { subscriptions: Vec, _apply_leader_updates: Task>, _observe_current_user: Task>, + _schedule_serialize: Option>, pane_history_timestamp: Arc, } @@ -718,6 +719,7 @@ impl Workspace { app_state, _observe_current_user, _apply_leader_updates, + _schedule_serialize: None, leader_updates_tx, subscriptions, pane_history_timestamp, @@ -2893,6 +2895,13 @@ impl Workspace { cx.notify(); } + fn schedule_serialize(&mut self, cx: &mut ViewContext) { + self._schedule_serialize = Some(cx.spawn(|this, cx| async move { + cx.background().timer(Duration::from_millis(100)).await; + this.read_with(&cx, |this, cx| this.serialize_workspace(cx)).ok(); + })); + } + fn serialize_workspace(&self, cx: &ViewContext) { fn serialize_pane_handle( pane_handle: &ViewHandle, @@ -2923,12 +2932,17 @@ impl Workspace { cx: &AppContext, ) -> SerializedPaneGroup { match pane_group { - Member::Axis(PaneAxis { axis, members, .. }) => SerializedPaneGroup::Group { + Member::Axis(PaneAxis { + axis, + members, + flexes, + }) => SerializedPaneGroup::Group { axis: *axis, children: members .iter() .map(|member| build_serialized_pane_group(member, cx)) .collect::>(), + flexes: Some(flexes.borrow().clone()), }, Member::Pane(pane_handle) => { SerializedPaneGroup::Pane(serialize_pane_handle(&pane_handle, cx)) From 331fd896b56e90f27c7c3296633303b0ba3685fa Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Thu, 13 Jul 2023 14:21:30 -0700 Subject: [PATCH 090/115] fmt --- crates/workspace/src/pane_group.rs | 12 +++++++----- crates/workspace/src/persistence/model.rs | 6 +++++- crates/workspace/src/workspace.rs | 3 ++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/crates/workspace/src/pane_group.rs b/crates/workspace/src/pane_group.rs index e58b95d6b3..1edee1bc42 100644 --- a/crates/workspace/src/pane_group.rs +++ b/crates/workspace/src/pane_group.rs @@ -536,7 +536,7 @@ mod element { use crate::{ pane_group::{HANDLE_HITBOX_SIZE, HORIZONTAL_MIN_SIZE, VERTICAL_MIN_SIZE}, - WorkspaceSettings, Workspace, + Workspace, WorkspaceSettings, }; pub struct PaneAxisElement { @@ -547,7 +547,7 @@ mod element { children: Vec>, } - impl PaneAxisElement { + impl PaneAxisElement { pub fn new(axis: Axis, basis: usize, flexes: Rc>>) -> Self { Self { axis, @@ -761,8 +761,9 @@ mod element { self.basis + ix, handle_bounds, ); - mouse_region = - mouse_region.on_drag(MouseButton::Left, move |drag, workspace: &mut Workspace, cx| { + mouse_region = mouse_region.on_drag( + MouseButton::Left, + move |drag, workspace: &mut Workspace, cx| { let min_size = match axis { Axis::Horizontal => HORIZONTAL_MIN_SIZE, Axis::Vertical => VERTICAL_MIN_SIZE, @@ -805,7 +806,8 @@ mod element { workspace.schedule_serialize(cx); cx.notify(); - }); + }, + ); scene.push_mouse_region(mouse_region); scene.pop_stacking_context(); diff --git a/crates/workspace/src/persistence/model.rs b/crates/workspace/src/persistence/model.rs index aa184dbb56..5f4c29cd5b 100644 --- a/crates/workspace/src/persistence/model.rs +++ b/crates/workspace/src/persistence/model.rs @@ -161,7 +161,11 @@ impl SerializedPaneGroup { Vec>>, )> { match self { - SerializedPaneGroup::Group { axis, children, flexes } => { + SerializedPaneGroup::Group { + axis, + children, + flexes, + } => { let mut current_active_pane = None; let mut members = Vec::new(); let mut items = Vec::new(); diff --git a/crates/workspace/src/workspace.rs b/crates/workspace/src/workspace.rs index e31e0d924f..885c686ddc 100644 --- a/crates/workspace/src/workspace.rs +++ b/crates/workspace/src/workspace.rs @@ -2898,7 +2898,8 @@ impl Workspace { fn schedule_serialize(&mut self, cx: &mut ViewContext) { self._schedule_serialize = Some(cx.spawn(|this, cx| async move { cx.background().timer(Duration::from_millis(100)).await; - this.read_with(&cx, |this, cx| this.serialize_workspace(cx)).ok(); + this.read_with(&cx, |this, cx| this.serialize_workspace(cx)) + .ok(); })); } From 9da8f609cf71d9bc0ee9e5686d5b6caef0d50465 Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Thu, 13 Jul 2023 14:34:32 -0700 Subject: [PATCH 091/115] tidy up names --- crates/workspace/src/pane_group.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/workspace/src/pane_group.rs b/crates/workspace/src/pane_group.rs index 1edee1bc42..2ece5030f3 100644 --- a/crates/workspace/src/pane_group.rs +++ b/crates/workspace/src/pane_group.rs @@ -421,14 +421,13 @@ impl PaneAxis { ) -> AnyElement { debug_assert!(self.members.len() == self.flexes.borrow().len()); - let mut flex_container = PaneAxisElement::new(self.axis, basis, self.flexes.clone()); + let mut pane_axis = PaneAxisElement::new(self.axis, basis, self.flexes.clone()); let mut active_pane_ix = None; let mut members = self.members.iter().enumerate().peekable(); while let Some((ix, member)) = members.next() { let last = members.peek().is_none(); - // TODO: Restore this if member.contains(active_pane) { active_pane_ix = Some(ix); } @@ -460,10 +459,10 @@ impl PaneAxis { member = member.contained().with_border(border).into_any(); } - flex_container = flex_container.with_child(member.into_any()); + pane_axis = pane_axis.with_child(member.into_any()); } - flex_container.set_active_pane(active_pane_ix); - flex_container.into_any() + pane_axis.set_active_pane(active_pane_ix); + pane_axis.into_any() } } @@ -562,7 +561,7 @@ mod element { self.active_pane_ix = active_pane_ix; } - fn layout_flex_children( + fn layout_children( &mut self, active_pane_magnification: f32, constraint: SizeConstraint, @@ -656,7 +655,7 @@ mod element { panic!("flex contains flexible children but has an infinite constraint along the flex axis"); } - self.layout_flex_children( + self.layout_children( active_pane_magnification, constraint, &mut remaining_space, @@ -846,6 +845,7 @@ mod element { "type": "PaneAxis", "bounds": bounds.to_json(), "axis": self.axis.to_json(), + "flexes": *self.flexes.borrow(), "children": self.children.iter().map(|child| child.debug(view, cx)).collect::>() }) } From 50623c018cb04b1080025f074b8bf45c1a8e7af2 Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Thu, 13 Jul 2023 14:47:37 -0700 Subject: [PATCH 092/115] Fix serialization error --- crates/workspace/src/persistence.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/workspace/src/persistence.rs b/crates/workspace/src/persistence.rs index 0d7784093a..2a4062c079 100644 --- a/crates/workspace/src/persistence.rs +++ b/crates/workspace/src/persistence.rs @@ -443,7 +443,10 @@ impl WorkspaceDb { } => { let (parent_id, position) = unzip_option(parent); - let flex_string = serde_json::json!(flexes).to_string(); + let flex_string = flexes + .as_ref() + .map(|flexes| serde_json::json!(flexes).to_string()); + let group_id = conn.select_row_bound::<_, i64>(sql!( INSERT INTO pane_groups( workspace_id, From 77c4fc98bdfc4bdef5ac5c2f63147887341407b2 Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Thu, 13 Jul 2023 16:13:57 -0700 Subject: [PATCH 093/115] Add line height settings for the editor --- assets/settings/default.json | 12 +++++++++++- crates/editor/src/editor.rs | 5 ++++- crates/editor/src/element.rs | 2 +- crates/terminal/src/terminal.rs | 2 +- crates/theme/src/theme_settings.rs | 29 +++++++++++++++++++++++++++++ 5 files changed, 46 insertions(+), 4 deletions(-) diff --git a/assets/settings/default.json b/assets/settings/default.json index 1f8d12a3d9..2e6361ce7e 100644 --- a/assets/settings/default.json +++ b/assets/settings/default.json @@ -24,6 +24,17 @@ }, // The default font size for text in the editor "buffer_font_size": 15, + // Set the buffer's line height. + // May take 3 values: + // 1. Use a line height that's comfortable for reading (1.618) + // "line_height": "comfortable" + // 2. Use a standard line height, (1.3) + // "line_height": "standard", + // 3. Use a custom line height + // "line_height": { + // "custom": 2 + // }, + "buffer_line_height": "comfortable", // The factor to grow the active pane by. Defaults to 1.0 // which gives the same size as all other panes. "active_pane_magnification": 1.0, @@ -282,7 +293,6 @@ // "line_height": { // "custom": 2 // }, - // "line_height": "comfortable" // Set the terminal's font size. If this option is not included, // the terminal will default to matching the buffer's font size. diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 85a428d801..388f1aae88 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -494,6 +494,7 @@ pub enum SoftWrap { #[derive(Clone)] pub struct EditorStyle { pub text: TextStyle, + pub line_height_scalar: f32, pub placeholder_text: Option, pub theme: theme::Editor, pub theme_id: usize, @@ -8101,7 +8102,7 @@ fn build_style( cx: &AppContext, ) -> EditorStyle { let font_cache = cx.font_cache(); - + let line_height_scalar = settings.line_height(); let theme_id = settings.theme.meta.id; let mut theme = settings.theme.editor.clone(); let mut style = if let Some(get_field_editor_theme) = get_field_editor_theme { @@ -8115,6 +8116,7 @@ fn build_style( EditorStyle { text: field_editor_theme.text, placeholder_text: field_editor_theme.placeholder_text, + line_height_scalar, theme, theme_id, } @@ -8137,6 +8139,7 @@ fn build_style( underline: Default::default(), }, placeholder_text: None, + line_height_scalar, theme, theme_id, } diff --git a/crates/editor/src/element.rs b/crates/editor/src/element.rs index 074a96dfc1..6420c56ece 100644 --- a/crates/editor/src/element.rs +++ b/crates/editor/src/element.rs @@ -1975,7 +1975,7 @@ impl Element for EditorElement { let snapshot = editor.snapshot(cx); let style = self.style.clone(); - let line_height = style.text.line_height(cx.font_cache()); + let line_height = (style.text.font_size * style.line_height_scalar).round(); let gutter_padding; let gutter_width; diff --git a/crates/terminal/src/terminal.rs b/crates/terminal/src/terminal.rs index 576719526d..d14118bb18 100644 --- a/crates/terminal/src/terminal.rs +++ b/crates/terminal/src/terminal.rs @@ -198,7 +198,7 @@ impl TerminalLineHeight { match self { TerminalLineHeight::Comfortable => 1.618, TerminalLineHeight::Standard => 1.3, - TerminalLineHeight::Custom(line_height) => *line_height, + TerminalLineHeight::Custom(line_height) => f32::max(*line_height, 1.), } } } diff --git a/crates/theme/src/theme_settings.rs b/crates/theme/src/theme_settings.rs index b9e6f7a133..359ed8e511 100644 --- a/crates/theme/src/theme_settings.rs +++ b/crates/theme/src/theme_settings.rs @@ -13,6 +13,7 @@ use std::sync::Arc; use util::ResultExt as _; const MIN_FONT_SIZE: f32 = 6.0; +const MIN_LINE_HEIGHT: f32 = 1.0; #[derive(Clone, JsonSchema)] pub struct ThemeSettings { @@ -20,6 +21,7 @@ pub struct ThemeSettings { pub buffer_font_features: fonts::Features, pub buffer_font_family: FamilyId, pub(crate) buffer_font_size: f32, + pub(crate) buffer_line_height: BufferLineHeight, #[serde(skip)] pub theme: Arc, } @@ -33,11 +35,32 @@ pub struct ThemeSettingsContent { #[serde(default)] pub buffer_font_size: Option, #[serde(default)] + pub buffer_line_height: Option, + #[serde(default)] pub buffer_font_features: Option, #[serde(default)] pub theme: Option, } +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, JsonSchema, Default)] +#[serde(rename_all = "snake_case")] +pub enum BufferLineHeight { + #[default] + Comfortable, + Standard, + Custom(f32), +} + +impl BufferLineHeight { + pub fn value(&self) -> f32 { + match self { + BufferLineHeight::Comfortable => 1.618, + BufferLineHeight::Standard => 1.3, + BufferLineHeight::Custom(line_height) => *line_height, + } + } +} + impl ThemeSettings { pub fn buffer_font_size(&self, cx: &AppContext) -> f32 { if cx.has_global::() { @@ -47,6 +70,10 @@ impl ThemeSettings { } .max(MIN_FONT_SIZE) } + + pub fn line_height(&self) -> f32 { + f32::max(self.buffer_line_height.value(), MIN_LINE_HEIGHT) + } } pub fn adjusted_font_size(size: f32, cx: &AppContext) -> f32 { @@ -106,6 +133,7 @@ impl settings::Setting for ThemeSettings { buffer_font_family_name: defaults.buffer_font_family.clone().unwrap(), buffer_font_features, buffer_font_size: defaults.buffer_font_size.unwrap(), + buffer_line_height: defaults.buffer_line_height.unwrap(), theme: themes.get(defaults.theme.as_ref().unwrap()).unwrap(), }; @@ -136,6 +164,7 @@ impl settings::Setting for ThemeSettings { } merge(&mut this.buffer_font_size, value.buffer_font_size); + merge(&mut this.buffer_line_height, value.buffer_line_height); } Ok(this) From 1424a7a56a318d3401ca20a703905492a32e4be9 Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Thu, 13 Jul 2023 21:43:53 -0700 Subject: [PATCH 094/115] Add svelte language server Add svelte tree sitter Add svelte config file Add svelte highlighting --- Cargo.lock | 10 ++ crates/zed/Cargo.toml | 1 + crates/zed/src/languages.rs | 8 +- crates/zed/src/languages/svelte.rs | 126 ++++++++++++++++++ crates/zed/src/languages/svelte/config.toml | 18 +++ crates/zed/src/languages/svelte/folds.scm | 9 ++ .../zed/src/languages/svelte/highlights.scm | 68 ++++++++++ crates/zed/src/languages/svelte/indents.scm | 20 +++ .../zed/src/languages/svelte/injections.scm | 9 ++ 9 files changed, 268 insertions(+), 1 deletion(-) create mode 100644 crates/zed/src/languages/svelte.rs create mode 100644 crates/zed/src/languages/svelte/config.toml create mode 100755 crates/zed/src/languages/svelte/folds.scm create mode 100755 crates/zed/src/languages/svelte/highlights.scm create mode 100755 crates/zed/src/languages/svelte/indents.scm create mode 100755 crates/zed/src/languages/svelte/injections.scm diff --git a/Cargo.lock b/Cargo.lock index 0ac6a2ee89..b2f05a116d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8125,6 +8125,15 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-svelte" +version = "0.10.2" +source = "git+https://github.com/Himujjal/tree-sitter-svelte?rev=697bb515471871e85ff799ea57a76298a71a9cca#697bb515471871e85ff799ea57a76298a71a9cca" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "tree-sitter-toml" version = "0.5.1" @@ -9555,6 +9564,7 @@ dependencies = [ "tree-sitter-ruby", "tree-sitter-rust", "tree-sitter-scheme", + "tree-sitter-svelte", "tree-sitter-toml", "tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)", "tree-sitter-yaml", diff --git a/crates/zed/Cargo.toml b/crates/zed/Cargo.toml index 597e40161f..bf5d3bc8be 100644 --- a/crates/zed/Cargo.toml +++ b/crates/zed/Cargo.toml @@ -122,6 +122,7 @@ tree-sitter-html = "0.19.0" tree-sitter-scheme = { git = "https://github.com/6cdh/tree-sitter-scheme", rev = "af0fd1fa452cb2562dc7b5c8a8c55551c39273b9"} tree-sitter-racket = { git = "https://github.com/zed-industries/tree-sitter-racket", rev = "eb010cf2c674c6fd9a6316a84e28ef90190fe51a"} tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml", rev = "f545a41f57502e1b5ddf2a6668896c1b0620f930"} +tree-sitter-svelte = { git = "https://github.com/Himujjal/tree-sitter-svelte", rev = "697bb515471871e85ff799ea57a76298a71a9cca"} tree-sitter-lua = "0.0.14" url = "2.2" urlencoding = "2.1.2" diff --git a/crates/zed/src/languages.rs b/crates/zed/src/languages.rs index 820f564151..a7590001d7 100644 --- a/crates/zed/src/languages.rs +++ b/crates/zed/src/languages.rs @@ -17,6 +17,7 @@ mod python; mod ruby; mod rust; mod typescript; +mod svelte; mod yaml; // 1. Add tree-sitter-{language} parser to zed crate @@ -135,7 +136,12 @@ pub fn init(languages: Arc, node_runtime: Arc) { language( "yaml", tree_sitter_yaml::language(), - vec![Arc::new(yaml::YamlLspAdapter::new(node_runtime))], + vec![Arc::new(yaml::YamlLspAdapter::new(node_runtime.clone()))], + ); + language( + "svelte", + tree_sitter_svelte::language(), + vec![Arc::new(svelte::SvelteLspAdapter::new(node_runtime.clone()))], ); } diff --git a/crates/zed/src/languages/svelte.rs b/crates/zed/src/languages/svelte.rs new file mode 100644 index 0000000000..35f6945285 --- /dev/null +++ b/crates/zed/src/languages/svelte.rs @@ -0,0 +1,126 @@ +use anyhow::{anyhow, Result}; +use async_trait::async_trait; +use futures::StreamExt; +use language::{LanguageServerName, LspAdapter, LspAdapterDelegate}; +use lsp::LanguageServerBinary; +use node_runtime::NodeRuntime; +use serde_json::json; +use smol::fs; +use std::{ + any::Any, + ffi::OsString, + path::{Path, PathBuf}, + sync::Arc, +}; +use util::ResultExt; + +const SERVER_PATH: &'static str = + "node_modules/svelte-language-server/bin/server.js"; + +fn server_binary_arguments(server_path: &Path) -> Vec { + vec![server_path.into(), "--stdio".into()] +} + +pub struct SvelteLspAdapter { + node: Arc, +} + +impl SvelteLspAdapter { + pub fn new(node: Arc) -> Self { + SvelteLspAdapter { node } + } +} + +#[async_trait] +impl LspAdapter for SvelteLspAdapter { + async fn name(&self) -> LanguageServerName { + LanguageServerName("svelte-language-server".into()) + } + + async fn fetch_latest_server_version( + &self, + _: &dyn LspAdapterDelegate, + ) -> Result> { + Ok(Box::new( + self.node + .npm_package_latest_version("svelte-language-server") + .await?, + ) as Box<_>) + } + + async fn fetch_server_binary( + &self, + version: Box, + container_dir: PathBuf, + _: &dyn LspAdapterDelegate, + ) -> Result { + let version = version.downcast::().unwrap(); + let server_path = container_dir.join(SERVER_PATH); + + if fs::metadata(&server_path).await.is_err() { + self.node + .npm_install_packages( + &container_dir, + [("svelte-language-server", version.as_str())], + ) + .await?; + } + + Ok(LanguageServerBinary { + path: self.node.binary_path().await?, + arguments: server_binary_arguments(&server_path), + }) + } + + async fn cached_server_binary( + &self, + container_dir: PathBuf, + _: &dyn LspAdapterDelegate, + ) -> Option { + get_cached_server_binary(container_dir, &self.node).await + } + + async fn installation_test_binary( + &self, + container_dir: PathBuf, + ) -> Option { + get_cached_server_binary(container_dir, &self.node).await + } + + async fn initialization_options(&self) -> Option { + Some(json!({ + "provideFormatter": true + })) + } +} + +async fn get_cached_server_binary( + container_dir: PathBuf, + node: &NodeRuntime, +) -> Option { + (|| async move { + let mut last_version_dir = None; + let mut entries = fs::read_dir(&container_dir).await?; + while let Some(entry) = entries.next().await { + let entry = entry?; + if entry.file_type().await?.is_dir() { + last_version_dir = Some(entry.path()); + } + } + let last_version_dir = last_version_dir.ok_or_else(|| anyhow!("no cached binary"))?; + let server_path = last_version_dir.join(SERVER_PATH); + if server_path.exists() { + Ok(LanguageServerBinary { + path: node.binary_path().await?, + arguments: server_binary_arguments(&server_path), + }) + } else { + Err(anyhow!( + "missing executable in directory {:?}", + last_version_dir + )) + } + })() + .await + .log_err() +} diff --git a/crates/zed/src/languages/svelte/config.toml b/crates/zed/src/languages/svelte/config.toml new file mode 100644 index 0000000000..41bb21a45d --- /dev/null +++ b/crates/zed/src/languages/svelte/config.toml @@ -0,0 +1,18 @@ +name = "Svelte" +path_suffixes = ["svelte"] +line_comment = "// " +autoclose_before = ";:.,=}])>" +brackets = [ + { start = "{", end = "}", close = true, newline = true }, + { start = "[", end = "]", close = true, newline = true }, + { start = "(", end = ")", close = true, newline = true }, + { start = "<", end = ">", close = false, newline = true, not_in = ["string", "comment"] }, + { start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] }, + { start = "'", end = "'", close = true, newline = false, not_in = ["string", "comment"] }, + { start = "`", end = "`", close = true, newline = false, not_in = ["string"] }, + { start = "/*", end = " */", close = true, newline = false, not_in = ["string", "comment"] }, +] + +[overrides.element] +line_comment = { remove = true } +block_comment = ["{/* ", " */}"] diff --git a/crates/zed/src/languages/svelte/folds.scm b/crates/zed/src/languages/svelte/folds.scm new file mode 100755 index 0000000000..795c32fc4a --- /dev/null +++ b/crates/zed/src/languages/svelte/folds.scm @@ -0,0 +1,9 @@ +[ + (style_element) + (script_element) + (element) + (if_statement) + (else_statement) + (each_statement) + (await_statement) +] @fold diff --git a/crates/zed/src/languages/svelte/highlights.scm b/crates/zed/src/languages/svelte/highlights.scm new file mode 100755 index 0000000000..0b868b7591 --- /dev/null +++ b/crates/zed/src/languages/svelte/highlights.scm @@ -0,0 +1,68 @@ +; Special identifiers +;-------------------- +[ + "<" + ">" + "" + "#" + ":" + "/" + "@" +] @tag.delimiter + +[ + "{" + "}" +] @punctuation.bracket + +[ + (special_block_keyword) + (then) + (as) +] @keyword + +[ + (text) + (raw_text_expr) +] @none + +[ + (attribute_value) + (quoted_attribute_value) +] @string + +(tag_name) @tag +(attribute_name) @property +(erroneous_end_tag_name) @error +(comment) @comment + +((attribute + (attribute_name) @_attr + (quoted_attribute_value (attribute_value) @text.uri)) + (#match? @_attr "^(href|src)$")) + +; TODO: + +((element (start_tag (tag_name) @_tag) (text) @text.uri) + (#eq? @_tag "a")) + +((element (start_tag (tag_name) @_tag) (text) @text.literal) + (#match? @_tag "^(code|kbd)$")) + +((element (start_tag (tag_name) @_tag) (text) @text.underline) + (#eq? @_tag "u")) + +((element (start_tag (tag_name) @_tag) (text) @text.strike) + (#match? @_tag "^(s|del)$")) + +((element (start_tag (tag_name) @_tag) (text) @text.emphasis) + (#match? @_tag "^(em|i)$")) + +((element (start_tag (tag_name) @_tag) (text) @text.strong) + (#match? @_tag "^(strong|b)$")) + +((element (start_tag (tag_name) @_tag) (text) @text.title) + (#match? @_tag "^(h[0-9]|title)$")) + +"=" @operator diff --git a/crates/zed/src/languages/svelte/indents.scm b/crates/zed/src/languages/svelte/indents.scm new file mode 100755 index 0000000000..2f6f5c32e7 --- /dev/null +++ b/crates/zed/src/languages/svelte/indents.scm @@ -0,0 +1,20 @@ +[ + (element) + (if_statement) + (each_statement) + (await_statement) + (script_element) + (style_element) +] @indent + +[ + (end_tag) + (else_statement) + (if_end_expr) + (each_end_expr) + (await_end_expr) + ">" + "/>" +] @branch + +(comment) @ignore diff --git a/crates/zed/src/languages/svelte/injections.scm b/crates/zed/src/languages/svelte/injections.scm new file mode 100755 index 0000000000..84043c0061 --- /dev/null +++ b/crates/zed/src/languages/svelte/injections.scm @@ -0,0 +1,9 @@ +; injections.scm +; -------------- +(script_element + (raw_text) @content + (#set! "language" "javascript")) + +(style_element + (raw_text) @content + (#set! "language" "css")) From ff8a89a0758119ba62eb64ef5120361b90c24edc Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Thu, 13 Jul 2023 22:30:41 -0700 Subject: [PATCH 095/115] Refine svelte queries to work with zed-style highlights Bump scheme dependency: --- Cargo.lock | 4 +- crates/zed/Cargo.toml | 2 +- .../zed/src/languages/svelte/highlights.scm | 86 +++++++------------ crates/zed/src/languages/svelte/indents.scm | 12 --- .../zed/src/languages/svelte/injections.scm | 19 ++++ 5 files changed, 52 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b2f05a116d..1a4562954f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8118,8 +8118,8 @@ dependencies = [ [[package]] name = "tree-sitter-scheme" -version = "0.2.0" -source = "git+https://github.com/6cdh/tree-sitter-scheme?rev=af0fd1fa452cb2562dc7b5c8a8c55551c39273b9#af0fd1fa452cb2562dc7b5c8a8c55551c39273b9" +version = "0.5.0" +source = "git+https://github.com/6cdh/tree-sitter-scheme?rev=ca8af220aaf2a80aaf609bfb0df193817e4f064b#ca8af220aaf2a80aaf609bfb0df193817e4f064b" dependencies = [ "cc", "tree-sitter", diff --git a/crates/zed/Cargo.toml b/crates/zed/Cargo.toml index bf5d3bc8be..28970b1a0f 100644 --- a/crates/zed/Cargo.toml +++ b/crates/zed/Cargo.toml @@ -119,7 +119,7 @@ tree-sitter-toml = { git = "https://github.com/tree-sitter/tree-sitter-toml", re tree-sitter-typescript = { git = "https://github.com/tree-sitter/tree-sitter-typescript", rev = "5d20856f34315b068c41edaee2ac8a100081d259" } tree-sitter-ruby = "0.20.0" tree-sitter-html = "0.19.0" -tree-sitter-scheme = { git = "https://github.com/6cdh/tree-sitter-scheme", rev = "af0fd1fa452cb2562dc7b5c8a8c55551c39273b9"} +tree-sitter-scheme = { git = "https://github.com/6cdh/tree-sitter-scheme", rev = "ca8af220aaf2a80aaf609bfb0df193817e4f064b"} tree-sitter-racket = { git = "https://github.com/zed-industries/tree-sitter-racket", rev = "eb010cf2c674c6fd9a6316a84e28ef90190fe51a"} tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml", rev = "f545a41f57502e1b5ddf2a6668896c1b0620f930"} tree-sitter-svelte = { git = "https://github.com/Himujjal/tree-sitter-svelte", rev = "697bb515471871e85ff799ea57a76298a71a9cca"} diff --git a/crates/zed/src/languages/svelte/highlights.scm b/crates/zed/src/languages/svelte/highlights.scm index 0b868b7591..de873684e4 100755 --- a/crates/zed/src/languages/svelte/highlights.scm +++ b/crates/zed/src/languages/svelte/highlights.scm @@ -1,5 +1,35 @@ ; Special identifiers ;-------------------- + +; TODO: +(tag_name) @tag +(attribute_name) @property +(erroneous_end_tag_name) @keyword +(comment) @comment + +[ + (attribute_value) + (quoted_attribute_value) +] @string + +[ + (text) + (raw_text_expr) +] @none + +[ + (special_block_keyword) + (then) + (as) +] @keyword + +[ + "{" + "}" +] @punctuation.bracket + +"=" @operator + [ "<" ">" @@ -10,59 +40,3 @@ "/" "@" ] @tag.delimiter - -[ - "{" - "}" -] @punctuation.bracket - -[ - (special_block_keyword) - (then) - (as) -] @keyword - -[ - (text) - (raw_text_expr) -] @none - -[ - (attribute_value) - (quoted_attribute_value) -] @string - -(tag_name) @tag -(attribute_name) @property -(erroneous_end_tag_name) @error -(comment) @comment - -((attribute - (attribute_name) @_attr - (quoted_attribute_value (attribute_value) @text.uri)) - (#match? @_attr "^(href|src)$")) - -; TODO: - -((element (start_tag (tag_name) @_tag) (text) @text.uri) - (#eq? @_tag "a")) - -((element (start_tag (tag_name) @_tag) (text) @text.literal) - (#match? @_tag "^(code|kbd)$")) - -((element (start_tag (tag_name) @_tag) (text) @text.underline) - (#eq? @_tag "u")) - -((element (start_tag (tag_name) @_tag) (text) @text.strike) - (#match? @_tag "^(s|del)$")) - -((element (start_tag (tag_name) @_tag) (text) @text.emphasis) - (#match? @_tag "^(em|i)$")) - -((element (start_tag (tag_name) @_tag) (text) @text.strong) - (#match? @_tag "^(strong|b)$")) - -((element (start_tag (tag_name) @_tag) (text) @text.title) - (#match? @_tag "^(h[0-9]|title)$")) - -"=" @operator diff --git a/crates/zed/src/languages/svelte/indents.scm b/crates/zed/src/languages/svelte/indents.scm index 2f6f5c32e7..886d8ca867 100755 --- a/crates/zed/src/languages/svelte/indents.scm +++ b/crates/zed/src/languages/svelte/indents.scm @@ -6,15 +6,3 @@ (script_element) (style_element) ] @indent - -[ - (end_tag) - (else_statement) - (if_end_expr) - (each_end_expr) - (await_end_expr) - ">" - "/>" -] @branch - -(comment) @ignore diff --git a/crates/zed/src/languages/svelte/injections.scm b/crates/zed/src/languages/svelte/injections.scm index 84043c0061..8c1ac9fcd0 100755 --- a/crates/zed/src/languages/svelte/injections.scm +++ b/crates/zed/src/languages/svelte/injections.scm @@ -4,6 +4,25 @@ (raw_text) @content (#set! "language" "javascript")) + ((script_element + (start_tag + (attribute + (quoted_attribute_value (attribute_value) @_language))) + (raw_text) @content) + (#eq? @_language "ts") + (#set! "language" "typescript")) + +((script_element + (start_tag + (attribute + (quoted_attribute_value (attribute_value) @_language))) + (raw_text) @content) + (#eq? @_language "typescript") + (#set! "language" "typescript")) + (style_element (raw_text) @content (#set! "language" "css")) + +((raw_text_expr) @content + (#set! "language" "javascript")) From f1b034d4f8963f40720007ab6538cf6c33d8691c Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Thu, 13 Jul 2023 22:32:29 -0700 Subject: [PATCH 096/115] fmt --- crates/zed/src/languages.rs | 6 ++++-- crates/zed/src/languages/svelte.rs | 3 +-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/zed/src/languages.rs b/crates/zed/src/languages.rs index a7590001d7..8e0e21faba 100644 --- a/crates/zed/src/languages.rs +++ b/crates/zed/src/languages.rs @@ -16,8 +16,8 @@ mod lua; mod python; mod ruby; mod rust; -mod typescript; mod svelte; +mod typescript; mod yaml; // 1. Add tree-sitter-{language} parser to zed crate @@ -141,7 +141,9 @@ pub fn init(languages: Arc, node_runtime: Arc) { language( "svelte", tree_sitter_svelte::language(), - vec![Arc::new(svelte::SvelteLspAdapter::new(node_runtime.clone()))], + vec![Arc::new(svelte::SvelteLspAdapter::new( + node_runtime.clone(), + ))], ); } diff --git a/crates/zed/src/languages/svelte.rs b/crates/zed/src/languages/svelte.rs index 35f6945285..8416859f5a 100644 --- a/crates/zed/src/languages/svelte.rs +++ b/crates/zed/src/languages/svelte.rs @@ -14,8 +14,7 @@ use std::{ }; use util::ResultExt; -const SERVER_PATH: &'static str = - "node_modules/svelte-language-server/bin/server.js"; +const SERVER_PATH: &'static str = "node_modules/svelte-language-server/bin/server.js"; fn server_binary_arguments(server_path: &Path) -> Vec { vec![server_path.into(), "--stdio".into()] From 29cbeb39bddd67b8846da75241595f809ff2c767 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 13 Jul 2023 14:29:02 +0300 Subject: [PATCH 097/115] Allow selecting all search matches in buffer --- assets/keymaps/default.json | 4 +- crates/editor/src/items.rs | 5 + crates/editor/src/selections_collection.rs | 4 +- crates/feedback/src/feedback_editor.rs | 5 + crates/language_tools/src/lsp_log.rs | 5 + crates/search/src/buffer_search.rs | 106 ++++++++++++++++++--- crates/search/src/search.rs | 3 +- crates/terminal/src/terminal.rs | 15 +++ crates/terminal_view/src/terminal_view.rs | 7 ++ crates/workspace/src/searchable.rs | 8 ++ 10 files changed, 146 insertions(+), 16 deletions(-) diff --git a/assets/keymaps/default.json b/assets/keymaps/default.json index 4726c67aea..006719e5f5 100644 --- a/assets/keymaps/default.json +++ b/assets/keymaps/default.json @@ -221,7 +221,8 @@ "escape": "buffer_search::Dismiss", "tab": "buffer_search::FocusEditor", "enter": "search::SelectNextMatch", - "shift-enter": "search::SelectPrevMatch" + "shift-enter": "search::SelectPrevMatch", + "cmd-shift-k": "search::CaretsToAllMatches" } }, { @@ -242,6 +243,7 @@ "cmd-f": "project_search::ToggleFocus", "cmd-g": "search::SelectNextMatch", "cmd-shift-g": "search::SelectPrevMatch", + "cmd-shift-k": "search::CaretsToAllMatches", "alt-cmd-c": "search::ToggleCaseSensitive", "alt-cmd-w": "search::ToggleWholeWord", "alt-cmd-r": "search::ToggleRegex" diff --git a/crates/editor/src/items.rs b/crates/editor/src/items.rs index 431ccf0bfe..cc24cd35da 100644 --- a/crates/editor/src/items.rs +++ b/crates/editor/src/items.rs @@ -941,6 +941,11 @@ impl SearchableItem for Editor { }); } + fn select_matches(&mut self, matches: Vec, cx: &mut ViewContext) { + self.unfold_ranges(matches.clone(), false, false, cx); + self.change_selections(None, cx, |s| s.select_ranges(matches)); + } + fn match_index_for_direction( &mut self, matches: &Vec>, diff --git a/crates/editor/src/selections_collection.rs b/crates/editor/src/selections_collection.rs index d82ce5e216..a22506f751 100644 --- a/crates/editor/src/selections_collection.rs +++ b/crates/editor/src/selections_collection.rs @@ -16,13 +16,13 @@ use crate::{ Anchor, DisplayPoint, ExcerptId, MultiBuffer, MultiBufferSnapshot, SelectMode, ToOffset, }; -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct PendingSelection { pub selection: Selection, pub mode: SelectMode, } -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct SelectionsCollection { display_map: ModelHandle, buffer: ModelHandle, diff --git a/crates/feedback/src/feedback_editor.rs b/crates/feedback/src/feedback_editor.rs index 5a4f912e3a..663164dd07 100644 --- a/crates/feedback/src/feedback_editor.rs +++ b/crates/feedback/src/feedback_editor.rs @@ -391,6 +391,11 @@ impl SearchableItem for FeedbackEditor { .update(cx, |editor, cx| editor.activate_match(index, matches, cx)) } + fn select_matches(&mut self, matches: Vec, cx: &mut ViewContext) { + self.editor + .update(cx, |e, cx| e.select_matches(matches, cx)) + } + fn find_matches( &mut self, query: project::search::SearchQuery, diff --git a/crates/language_tools/src/lsp_log.rs b/crates/language_tools/src/lsp_log.rs index 12d8c6b34d..b27349f412 100644 --- a/crates/language_tools/src/lsp_log.rs +++ b/crates/language_tools/src/lsp_log.rs @@ -494,6 +494,11 @@ impl SearchableItem for LspLogView { .update(cx, |e, cx| e.activate_match(index, matches, cx)) } + fn select_matches(&mut self, matches: Vec, cx: &mut ViewContext) { + self.editor + .update(cx, |e, cx| e.select_matches(matches, cx)) + } + fn find_matches( &mut self, query: project::search::SearchQuery, diff --git a/crates/search/src/buffer_search.rs b/crates/search/src/buffer_search.rs index 59d25c2659..22778f85e8 100644 --- a/crates/search/src/buffer_search.rs +++ b/crates/search/src/buffer_search.rs @@ -1,6 +1,6 @@ use crate::{ - SearchOption, SelectNextMatch, SelectPrevMatch, ToggleCaseSensitive, ToggleRegex, - ToggleWholeWord, + CaretsToAllMatches, SearchOption, SelectNextMatch, SelectPrevMatch, ToggleCaseSensitive, + ToggleRegex, ToggleWholeWord, }; use collections::HashMap; use editor::Editor; @@ -39,8 +39,10 @@ pub fn init(cx: &mut AppContext) { cx.add_action(BufferSearchBar::focus_editor); cx.add_action(BufferSearchBar::select_next_match); cx.add_action(BufferSearchBar::select_prev_match); + cx.add_action(BufferSearchBar::carets_to_all_matches); cx.add_action(BufferSearchBar::select_next_match_on_pane); cx.add_action(BufferSearchBar::select_prev_match_on_pane); + cx.add_action(BufferSearchBar::carets_to_all_matches_on_pane); cx.add_action(BufferSearchBar::handle_editor_cancel); add_toggle_option_action::(SearchOption::CaseSensitive, cx); add_toggle_option_action::(SearchOption::WholeWord, cx); @@ -66,7 +68,7 @@ pub struct BufferSearchBar { active_searchable_item: Option>, active_match_index: Option, active_searchable_item_subscription: Option, - seachable_items_with_matches: + searchable_items_with_matches: HashMap, Vec>>, pending_search: Option>, case_sensitive: bool, @@ -118,7 +120,7 @@ impl View for BufferSearchBar { .with_children(self.active_searchable_item.as_ref().and_then( |searchable_item| { let matches = self - .seachable_items_with_matches + .searchable_items_with_matches .get(&searchable_item.downgrade())?; let message = if let Some(match_ix) = self.active_match_index { format!("{}/{}", match_ix + 1, matches.len()) @@ -249,7 +251,7 @@ impl BufferSearchBar { active_searchable_item: None, active_searchable_item_subscription: None, active_match_index: None, - seachable_items_with_matches: Default::default(), + searchable_items_with_matches: Default::default(), case_sensitive: false, whole_word: false, regex: false, @@ -265,7 +267,7 @@ impl BufferSearchBar { pub fn dismiss(&mut self, _: &Dismiss, cx: &mut ViewContext) { self.dismissed = true; - for searchable_item in self.seachable_items_with_matches.keys() { + for searchable_item in self.searchable_items_with_matches.keys() { if let Some(searchable_item) = WeakSearchableItemHandle::upgrade(searchable_item.as_ref(), cx) { @@ -488,11 +490,25 @@ impl BufferSearchBar { self.select_match(Direction::Prev, cx); } + fn carets_to_all_matches(&mut self, _: &CaretsToAllMatches, cx: &mut ViewContext) { + if !self.dismissed { + if let Some(searchable_item) = self.active_searchable_item.as_ref() { + if let Some(matches) = self + .searchable_items_with_matches + .get(&searchable_item.downgrade()) + { + searchable_item.select_matches(matches, cx); + self.focus_editor(&FocusEditor, cx); + } + } + } + } + pub fn select_match(&mut self, direction: Direction, cx: &mut ViewContext) { if let Some(index) = self.active_match_index { if let Some(searchable_item) = self.active_searchable_item.as_ref() { if let Some(matches) = self - .seachable_items_with_matches + .searchable_items_with_matches .get(&searchable_item.downgrade()) { let new_match_index = @@ -524,6 +540,16 @@ impl BufferSearchBar { } } + fn carets_to_all_matches_on_pane( + pane: &mut Pane, + action: &CaretsToAllMatches, + cx: &mut ViewContext, + ) { + if let Some(search_bar) = pane.toolbar().read(cx).item_of_type::() { + search_bar.update(cx, |bar, cx| bar.carets_to_all_matches(action, cx)); + } + } + fn on_query_editor_event( &mut self, _: ViewHandle, @@ -547,7 +573,7 @@ impl BufferSearchBar { fn clear_matches(&mut self, cx: &mut ViewContext) { let mut active_item_matches = None; - for (searchable_item, matches) in self.seachable_items_with_matches.drain() { + for (searchable_item, matches) in self.searchable_items_with_matches.drain() { if let Some(searchable_item) = WeakSearchableItemHandle::upgrade(searchable_item.as_ref(), cx) { @@ -559,7 +585,7 @@ impl BufferSearchBar { } } - self.seachable_items_with_matches + self.searchable_items_with_matches .extend(active_item_matches); } @@ -605,13 +631,13 @@ impl BufferSearchBar { if let Some(active_searchable_item) = WeakSearchableItemHandle::upgrade(active_searchable_item.as_ref(), cx) { - this.seachable_items_with_matches + this.searchable_items_with_matches .insert(active_searchable_item.downgrade(), matches); this.update_match_index(cx); if !this.dismissed { let matches = this - .seachable_items_with_matches + .searchable_items_with_matches .get(&active_searchable_item.downgrade()) .unwrap(); active_searchable_item.update_matches(matches, cx); @@ -637,7 +663,7 @@ impl BufferSearchBar { .as_ref() .and_then(|searchable_item| { let matches = self - .seachable_items_with_matches + .searchable_items_with_matches .get(&searchable_item.downgrade())?; searchable_item.active_match_index(matches, cx) }); @@ -966,4 +992,60 @@ mod tests { assert_eq!(search_bar.active_match_index, Some(2)); }); } + + #[gpui::test] + async fn test_search_carets_to_all_matches(cx: &mut TestAppContext) { + crate::project_search::tests::init_test(cx); + + let buffer_text = r#" + A regular expression (shortened as regex or regexp;[1] also referred to as + rational expression[2][3]) is a sequence of characters that specifies a search + pattern in text. Usually such patterns are used by string-searching algorithms + for "find" or "find and replace" operations on strings, or for input validation. + "# + .unindent(); + let expected_query_matches_count = buffer_text + .chars() + .filter(|c| c.to_ascii_lowercase() == 'a') + .count(); + assert!( + expected_query_matches_count > 1, + "Should pick a query with multiple results" + ); + let buffer = cx.add_model(|cx| Buffer::new(0, buffer_text, cx)); + let (window_id, _root_view) = cx.add_window(|_| EmptyView); + + let editor = cx.add_view(window_id, |cx| Editor::for_buffer(buffer.clone(), None, cx)); + + let search_bar = cx.add_view(window_id, |cx| { + let mut search_bar = BufferSearchBar::new(cx); + search_bar.set_active_pane_item(Some(&editor), cx); + search_bar.show(false, true, cx); + search_bar + }); + + search_bar.update(cx, |search_bar, cx| { + search_bar.set_query("a", cx); + }); + + editor.next_notification(cx).await; + editor.update(cx, |editor, cx| { + let initial_selections = editor.selections.display_ranges(cx); + assert_eq!( + initial_selections.len(), 1, + "Expected to have only one selection before adding carets to all matches, but got: {initial_selections:?}", + ) + }); + + search_bar.update(cx, |search_bar, cx| { + search_bar.carets_to_all_matches(&CaretsToAllMatches, cx); + let all_selections = + editor.update(cx, |editor, cx| editor.selections.display_ranges(cx)); + assert_eq!( + all_selections.len(), + expected_query_matches_count, + "Should select all `a` characters in the buffer, but got: {all_selections:?}" + ); + }); + } } diff --git a/crates/search/src/search.rs b/crates/search/src/search.rs index 90ea508cc6..da679d191e 100644 --- a/crates/search/src/search.rs +++ b/crates/search/src/search.rs @@ -17,7 +17,8 @@ actions!( ToggleCaseSensitive, ToggleRegex, SelectNextMatch, - SelectPrevMatch + SelectPrevMatch, + CaretsToAllMatches, ] ); diff --git a/crates/terminal/src/terminal.rs b/crates/terminal/src/terminal.rs index d14118bb18..39e77b590b 100644 --- a/crates/terminal/src/terminal.rs +++ b/crates/terminal/src/terminal.rs @@ -908,6 +908,21 @@ impl Terminal { } } + pub fn select_matches(&mut self, matches: Vec>) { + let matches_to_select = self + .matches + .iter() + .filter(|self_match| matches.contains(self_match)) + .cloned() + .collect::>(); + for match_to_select in matches_to_select { + self.set_selection(Some(( + make_selection(&match_to_select), + *match_to_select.end(), + ))); + } + } + fn set_selection(&mut self, selection: Option<(Selection, Point)>) { self.events .push_back(InternalEvent::SetSelection(selection)); diff --git a/crates/terminal_view/src/terminal_view.rs b/crates/terminal_view/src/terminal_view.rs index 36be6bee7f..8e1e4ad62f 100644 --- a/crates/terminal_view/src/terminal_view.rs +++ b/crates/terminal_view/src/terminal_view.rs @@ -682,6 +682,13 @@ impl SearchableItem for TerminalView { cx.notify(); } + /// Add selections for all matches given. + fn select_matches(&mut self, matches: Vec, cx: &mut ViewContext) { + self.terminal() + .update(cx, |term, _| term.select_matches(matches)); + cx.notify(); + } + /// Get all of the matches for this query, should be done on the background fn find_matches( &mut self, diff --git a/crates/workspace/src/searchable.rs b/crates/workspace/src/searchable.rs index 7e3f7227b0..4ebfe69c21 100644 --- a/crates/workspace/src/searchable.rs +++ b/crates/workspace/src/searchable.rs @@ -47,6 +47,7 @@ pub trait SearchableItem: Item { matches: Vec, cx: &mut ViewContext, ); + fn select_matches(&mut self, matches: Vec, cx: &mut ViewContext); fn match_index_for_direction( &mut self, matches: &Vec, @@ -102,6 +103,7 @@ pub trait SearchableItemHandle: ItemHandle { matches: &Vec>, cx: &mut WindowContext, ); + fn select_matches(&self, matches: &Vec>, cx: &mut WindowContext); fn match_index_for_direction( &self, matches: &Vec>, @@ -165,6 +167,12 @@ impl SearchableItemHandle for ViewHandle { let matches = downcast_matches(matches); self.update(cx, |this, cx| this.activate_match(index, matches, cx)); } + + fn select_matches(&self, matches: &Vec>, cx: &mut WindowContext) { + let matches = downcast_matches(matches); + self.update(cx, |this, cx| this.select_matches(matches, cx)); + } + fn match_index_for_direction( &self, matches: &Vec>, From 2053418f21b48c52f4b29be22a6c562398021ac3 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 13 Jul 2023 14:45:42 +0300 Subject: [PATCH 098/115] Use VSCode-like shortcuts by default --- assets/keymaps/default.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/keymaps/default.json b/assets/keymaps/default.json index 006719e5f5..611d3633e3 100644 --- a/assets/keymaps/default.json +++ b/assets/keymaps/default.json @@ -194,8 +194,8 @@ { "context": "Editor && mode == auto_height", "bindings": { - "alt-enter": "editor::Newline", - "cmd-alt-enter": "editor::NewlineBelow" + "shift-enter": "editor::Newline", + "cmd-shift-enter": "editor::NewlineBelow" } }, { @@ -222,7 +222,7 @@ "tab": "buffer_search::FocusEditor", "enter": "search::SelectNextMatch", "shift-enter": "search::SelectPrevMatch", - "cmd-shift-k": "search::CaretsToAllMatches" + "alt-enter": "search::CaretsToAllMatches" } }, { @@ -243,7 +243,7 @@ "cmd-f": "project_search::ToggleFocus", "cmd-g": "search::SelectNextMatch", "cmd-shift-g": "search::SelectPrevMatch", - "cmd-shift-k": "search::CaretsToAllMatches", + "alt-enter": "search::CaretsToAllMatches", "alt-cmd-c": "search::ToggleCaseSensitive", "alt-cmd-w": "search::ToggleWholeWord", "alt-cmd-r": "search::ToggleRegex" From f710efca3bceef3b3373987274929b718d8c7749 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 13 Jul 2023 15:39:00 +0300 Subject: [PATCH 099/115] Use a better name --- assets/keymaps/default.json | 4 ++-- crates/search/src/buffer_search.rs | 18 +++++++++--------- crates/search/src/search.rs | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/assets/keymaps/default.json b/assets/keymaps/default.json index 611d3633e3..c044f4b600 100644 --- a/assets/keymaps/default.json +++ b/assets/keymaps/default.json @@ -222,7 +222,7 @@ "tab": "buffer_search::FocusEditor", "enter": "search::SelectNextMatch", "shift-enter": "search::SelectPrevMatch", - "alt-enter": "search::CaretsToAllMatches" + "alt-enter": "search::SelectAllMatches" } }, { @@ -243,7 +243,7 @@ "cmd-f": "project_search::ToggleFocus", "cmd-g": "search::SelectNextMatch", "cmd-shift-g": "search::SelectPrevMatch", - "alt-enter": "search::CaretsToAllMatches", + "alt-enter": "search::SelectAllMatches", "alt-cmd-c": "search::ToggleCaseSensitive", "alt-cmd-w": "search::ToggleWholeWord", "alt-cmd-r": "search::ToggleRegex" diff --git a/crates/search/src/buffer_search.rs b/crates/search/src/buffer_search.rs index 22778f85e8..a87587a92f 100644 --- a/crates/search/src/buffer_search.rs +++ b/crates/search/src/buffer_search.rs @@ -1,5 +1,5 @@ use crate::{ - CaretsToAllMatches, SearchOption, SelectNextMatch, SelectPrevMatch, ToggleCaseSensitive, + SearchOption, SelectAllMatches, SelectNextMatch, SelectPrevMatch, ToggleCaseSensitive, ToggleRegex, ToggleWholeWord, }; use collections::HashMap; @@ -39,10 +39,10 @@ pub fn init(cx: &mut AppContext) { cx.add_action(BufferSearchBar::focus_editor); cx.add_action(BufferSearchBar::select_next_match); cx.add_action(BufferSearchBar::select_prev_match); - cx.add_action(BufferSearchBar::carets_to_all_matches); + cx.add_action(BufferSearchBar::select_all_matches); cx.add_action(BufferSearchBar::select_next_match_on_pane); cx.add_action(BufferSearchBar::select_prev_match_on_pane); - cx.add_action(BufferSearchBar::carets_to_all_matches_on_pane); + cx.add_action(BufferSearchBar::select_all_matches_on_pane); cx.add_action(BufferSearchBar::handle_editor_cancel); add_toggle_option_action::(SearchOption::CaseSensitive, cx); add_toggle_option_action::(SearchOption::WholeWord, cx); @@ -490,7 +490,7 @@ impl BufferSearchBar { self.select_match(Direction::Prev, cx); } - fn carets_to_all_matches(&mut self, _: &CaretsToAllMatches, cx: &mut ViewContext) { + fn select_all_matches(&mut self, _: &SelectAllMatches, cx: &mut ViewContext) { if !self.dismissed { if let Some(searchable_item) = self.active_searchable_item.as_ref() { if let Some(matches) = self @@ -540,13 +540,13 @@ impl BufferSearchBar { } } - fn carets_to_all_matches_on_pane( + fn select_all_matches_on_pane( pane: &mut Pane, - action: &CaretsToAllMatches, + action: &SelectAllMatches, cx: &mut ViewContext, ) { if let Some(search_bar) = pane.toolbar().read(cx).item_of_type::() { - search_bar.update(cx, |bar, cx| bar.carets_to_all_matches(action, cx)); + search_bar.update(cx, |bar, cx| bar.select_all_matches(action, cx)); } } @@ -994,7 +994,7 @@ mod tests { } #[gpui::test] - async fn test_search_carets_to_all_matches(cx: &mut TestAppContext) { + async fn test_search_select_all_matches(cx: &mut TestAppContext) { crate::project_search::tests::init_test(cx); let buffer_text = r#" @@ -1038,7 +1038,7 @@ mod tests { }); search_bar.update(cx, |search_bar, cx| { - search_bar.carets_to_all_matches(&CaretsToAllMatches, cx); + search_bar.select_all_matches(&SelectAllMatches, cx); let all_selections = editor.update(cx, |editor, cx| editor.selections.display_ranges(cx)); assert_eq!( diff --git a/crates/search/src/search.rs b/crates/search/src/search.rs index da679d191e..7080b4c07e 100644 --- a/crates/search/src/search.rs +++ b/crates/search/src/search.rs @@ -18,7 +18,7 @@ actions!( ToggleRegex, SelectNextMatch, SelectPrevMatch, - CaretsToAllMatches, + SelectAllMatches, ] ); From c130dd6b47c024ec9fa9b2b7a750010744d998ac Mon Sep 17 00:00:00 2001 From: Nate Butler Date: Thu, 13 Jul 2023 09:48:27 -0400 Subject: [PATCH 100/115] Add styles for an `action_button` ahead of the "Select all matches" UI button --- styles/src/style_tree/search.ts | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/styles/src/style_tree/search.ts b/styles/src/style_tree/search.ts index 5c16d03233..28940a8367 100644 --- a/styles/src/style_tree/search.ts +++ b/styles/src/style_tree/search.ts @@ -83,6 +83,35 @@ export default function search(): any { }, }, }), + action_button: interactive({ + base: { + ...text(theme.highest, "mono", "on"), + background: background(theme.highest, "on"), + corner_radius: 6, + border: border(theme.highest, "on"), + margin: { + right: 4, + }, + padding: { + bottom: 2, + left: 10, + right: 10, + top: 2, + }, + }, + state: { + hovered: { + ...text(theme.highest, "mono", "on", "hovered"), + background: background(theme.highest, "on", "hovered"), + border: border(theme.highest, "on", "hovered"), + }, + clicked: { + ...text(theme.highest, "mono", "on", "pressed"), + background: background(theme.highest, "on", "pressed"), + border: border(theme.highest, "on", "pressed"), + }, + }, + }), editor, invalid_editor: { ...editor, From ccc78000bd796105b2d9cd2d7fa69b7323fefcf4 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 14 Jul 2023 14:33:26 +0300 Subject: [PATCH 101/115] Preserve serach index for multicaret selection editor events --- crates/editor/src/items.rs | 32 +++++++-- crates/feedback/src/feedback_editor.rs | 9 ++- crates/language_tools/src/lsp_log.rs | 9 ++- crates/search/src/buffer_search.rs | 79 ++++++++++++++++++++++- crates/terminal_view/src/terminal_view.rs | 6 +- crates/workspace/src/searchable.rs | 11 +++- 6 files changed, 130 insertions(+), 16 deletions(-) diff --git a/crates/editor/src/items.rs b/crates/editor/src/items.rs index cc24cd35da..9498be1844 100644 --- a/crates/editor/src/items.rs +++ b/crates/editor/src/items.rs @@ -887,10 +887,20 @@ pub(crate) enum BufferSearchHighlights {} impl SearchableItem for Editor { type Match = Range; - fn to_search_event(event: &Self::Event) -> Option { + fn to_search_event( + &mut self, + event: &Self::Event, + _: &mut ViewContext, + ) -> Option { match event { Event::BufferEdited => Some(SearchEvent::MatchesInvalidated), - Event::SelectionsChanged { .. } => Some(SearchEvent::ActiveMatchChanged), + Event::SelectionsChanged { .. } => { + if self.selections.disjoint_anchors().len() == 1 { + Some(SearchEvent::ActiveMatchChanged) + } else { + None + } + } _ => None, } } @@ -954,8 +964,16 @@ impl SearchableItem for Editor { cx: &mut ViewContext, ) -> usize { let buffer = self.buffer().read(cx).snapshot(cx); - let cursor = self.selections.newest_anchor().head(); - if matches[current_index].start.cmp(&cursor, &buffer).is_gt() { + let current_index_position = if self.selections.disjoint_anchors().len() == 1 { + self.selections.newest_anchor().head() + } else { + matches[current_index].start + }; + if matches[current_index] + .start + .cmp(¤t_index_position, &buffer) + .is_gt() + { if direction == Direction::Prev { if current_index == 0 { current_index = matches.len() - 1; @@ -963,7 +981,11 @@ impl SearchableItem for Editor { current_index -= 1; } } - } else if matches[current_index].end.cmp(&cursor, &buffer).is_lt() { + } else if matches[current_index] + .end + .cmp(¤t_index_position, &buffer) + .is_lt() + { if direction == Direction::Next { current_index = 0; } diff --git a/crates/feedback/src/feedback_editor.rs b/crates/feedback/src/feedback_editor.rs index 663164dd07..bea398d3eb 100644 --- a/crates/feedback/src/feedback_editor.rs +++ b/crates/feedback/src/feedback_editor.rs @@ -362,8 +362,13 @@ impl Item for FeedbackEditor { impl SearchableItem for FeedbackEditor { type Match = Range; - fn to_search_event(event: &Self::Event) -> Option { - Editor::to_search_event(event) + fn to_search_event( + &mut self, + event: &Self::Event, + cx: &mut ViewContext, + ) -> Option { + self.editor + .update(cx, |editor, cx| editor.to_search_event(event, cx)) } fn clear_matches(&mut self, cx: &mut ViewContext) { diff --git a/crates/language_tools/src/lsp_log.rs b/crates/language_tools/src/lsp_log.rs index b27349f412..0dc594a13f 100644 --- a/crates/language_tools/src/lsp_log.rs +++ b/crates/language_tools/src/lsp_log.rs @@ -467,8 +467,13 @@ impl Item for LspLogView { impl SearchableItem for LspLogView { type Match = ::Match; - fn to_search_event(event: &Self::Event) -> Option { - Editor::to_search_event(event) + fn to_search_event( + &mut self, + event: &Self::Event, + cx: &mut ViewContext, + ) -> Option { + self.editor + .update(cx, |editor, cx| editor.to_search_event(event, cx)) } fn clear_matches(&mut self, cx: &mut ViewContext) { diff --git a/crates/search/src/buffer_search.rs b/crates/search/src/buffer_search.rs index a87587a92f..c3790116d3 100644 --- a/crates/search/src/buffer_search.rs +++ b/crates/search/src/buffer_search.rs @@ -1029,12 +1029,16 @@ mod tests { }); editor.next_notification(cx).await; - editor.update(cx, |editor, cx| { - let initial_selections = editor.selections.display_ranges(cx); + let initial_selections = editor.update(cx, |editor, cx| { + let initial_selections = editor.selections.display_ranges(cx); assert_eq!( initial_selections.len(), 1, "Expected to have only one selection before adding carets to all matches, but got: {initial_selections:?}", - ) + ); + initial_selections + }); + search_bar.update(cx, |search_bar, _| { + assert_eq!(search_bar.active_match_index, Some(0)); }); search_bar.update(cx, |search_bar, cx| { @@ -1047,5 +1051,74 @@ mod tests { "Should select all `a` characters in the buffer, but got: {all_selections:?}" ); }); + search_bar.update(cx, |search_bar, _| { + assert_eq!( + search_bar.active_match_index, + Some(0), + "Match index should not change after selecting all matches" + ); + }); + + search_bar.update(cx, |search_bar, cx| { + search_bar.select_next_match(&SelectNextMatch, cx); + let all_selections = + editor.update(cx, |editor, cx| editor.selections.display_ranges(cx)); + assert_eq!( + all_selections.len(), + 1, + "On next match, should deselect items and select the next match" + ); + assert_ne!( + all_selections, initial_selections, + "Next match should be different from the first selection" + ); + }); + search_bar.update(cx, |search_bar, _| { + assert_eq!( + search_bar.active_match_index, + Some(1), + "Match index should be updated to the next one" + ); + }); + + search_bar.update(cx, |search_bar, cx| { + search_bar.select_all_matches(&SelectAllMatches, cx); + let all_selections = + editor.update(cx, |editor, cx| editor.selections.display_ranges(cx)); + assert_eq!( + all_selections.len(), + expected_query_matches_count, + "Should select all `a` characters in the buffer, but got: {all_selections:?}" + ); + }); + search_bar.update(cx, |search_bar, _| { + assert_eq!( + search_bar.active_match_index, + Some(1), + "Match index should not change after selecting all matches" + ); + }); + + search_bar.update(cx, |search_bar, cx| { + search_bar.select_prev_match(&SelectPrevMatch, cx); + let all_selections = + editor.update(cx, |editor, cx| editor.selections.display_ranges(cx)); + assert_eq!( + all_selections.len(), + 1, + "On previous match, should deselect items and select the previous item" + ); + assert_eq!( + all_selections, initial_selections, + "Previous match should be the same as the first selection" + ); + }); + search_bar.update(cx, |search_bar, _| { + assert_eq!( + search_bar.active_match_index, + Some(0), + "Match index should be updated to the previous one" + ); + }); } } diff --git a/crates/terminal_view/src/terminal_view.rs b/crates/terminal_view/src/terminal_view.rs index 8e1e4ad62f..3dd401e392 100644 --- a/crates/terminal_view/src/terminal_view.rs +++ b/crates/terminal_view/src/terminal_view.rs @@ -647,7 +647,11 @@ impl SearchableItem for TerminalView { } /// Convert events raised by this item into search-relevant events (if applicable) - fn to_search_event(event: &Self::Event) -> Option { + fn to_search_event( + &mut self, + event: &Self::Event, + _: &mut ViewContext, + ) -> Option { match event { Event::Wakeup => Some(SearchEvent::MatchesInvalidated), Event::SelectionsChanged => Some(SearchEvent::ActiveMatchChanged), diff --git a/crates/workspace/src/searchable.rs b/crates/workspace/src/searchable.rs index 4ebfe69c21..3a3ba02e06 100644 --- a/crates/workspace/src/searchable.rs +++ b/crates/workspace/src/searchable.rs @@ -37,7 +37,11 @@ pub trait SearchableItem: Item { regex: true, } } - fn to_search_event(event: &Self::Event) -> Option; + fn to_search_event( + &mut self, + event: &Self::Event, + cx: &mut ViewContext, + ) -> Option; fn clear_matches(&mut self, cx: &mut ViewContext); fn update_matches(&mut self, matches: Vec, cx: &mut ViewContext); fn query_suggestion(&mut self, cx: &mut ViewContext) -> String; @@ -141,8 +145,9 @@ impl SearchableItemHandle for ViewHandle { cx: &mut WindowContext, handler: Box, ) -> Subscription { - cx.subscribe(self, move |_, event, cx| { - if let Some(search_event) = T::to_search_event(event) { + cx.subscribe(self, move |handle, event, cx| { + let search_event = handle.update(cx, |handle, cx| handle.to_search_event(event, cx)); + if let Some(search_event) = search_event { handler(search_event, cx) } }) From b14cd5f56d3029f986a848a971c96cbb1f5f00e9 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 14 Jul 2023 15:50:45 +0300 Subject: [PATCH 102/115] Add a new button for the action --- crates/search/src/buffer_search.rs | 32 ++++++++++++++++++++++++++++++ crates/theme/src/theme.rs | 1 + 2 files changed, 33 insertions(+) diff --git a/crates/search/src/buffer_search.rs b/crates/search/src/buffer_search.rs index c3790116d3..f6466c85af 100644 --- a/crates/search/src/buffer_search.rs +++ b/crates/search/src/buffer_search.rs @@ -148,6 +148,7 @@ impl View for BufferSearchBar { Flex::row() .with_child(self.render_nav_button("<", Direction::Prev, cx)) .with_child(self.render_nav_button(">", Direction::Next, cx)) + .with_child(self.render_action_button("Select All", cx)) .aligned(), ) .with_child( @@ -403,6 +404,37 @@ impl BufferSearchBar { .into_any() } + fn render_action_button( + &self, + icon: &'static str, + cx: &mut ViewContext, + ) -> AnyElement { + let tooltip = "Select All Matches"; + let tooltip_style = theme::current(cx).tooltip.clone(); + let action_type_id = 0_usize; + + enum ActionButton {} + MouseEventHandler::::new(action_type_id, cx, |state, cx| { + let theme = theme::current(cx); + let style = theme.search.action_button.style_for(state); + Label::new(icon, style.text.clone()) + .contained() + .with_style(style.container) + }) + .on_click(MouseButton::Left, move |_, this, cx| { + this.select_all_matches(&SelectAllMatches, cx) + }) + .with_cursor_style(CursorStyle::PointingHand) + .with_tooltip::( + action_type_id, + tooltip.to_string(), + Some(Box::new(SelectAllMatches)), + tooltip_style, + cx, + ) + .into_any() + } + fn render_close_button( &self, theme: &theme::Search, diff --git a/crates/theme/src/theme.rs b/crates/theme/src/theme.rs index 4e8ece1c8f..cdf3cadf59 100644 --- a/crates/theme/src/theme.rs +++ b/crates/theme/src/theme.rs @@ -379,6 +379,7 @@ pub struct Search { pub invalid_include_exclude_editor: ContainerStyle, pub include_exclude_inputs: ContainedText, pub option_button: Toggleable>, + pub action_button: Interactive, pub match_background: Color, pub match_index: ContainedText, pub results_status: TextStyle, From 2e2333107a21aa34c983089502c2133eca4bbfdb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 11 Jul 2023 14:15:45 -0700 Subject: [PATCH 103/115] Find the layer with the smallest enclosing node in language_scope_at --- crates/language/src/buffer.rs | 34 ++--- crates/language/src/buffer_tests.rs | 154 ++++++++++++++++------ crates/language/src/syntax_map.rs | 27 ++-- crates/project/src/project.rs | 2 + crates/project/src/worktree.rs | 1 + crates/zed/src/languages/heex/config.toml | 2 +- 6 files changed, 151 insertions(+), 69 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 5041ab759d..0b10432a9f 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -2145,23 +2145,27 @@ impl BufferSnapshot { pub fn language_scope_at(&self, position: D) -> Option { let offset = position.to_offset(self); + let mut range = 0..self.len(); + let mut scope = self.language.clone().map(|language| LanguageScope { + language, + override_id: None, + }); - if let Some(layer_info) = self - .syntax - .layers_for_range(offset..offset, &self.text) - .filter(|l| l.node().end_byte() > offset) - .last() - { - Some(LanguageScope { - language: layer_info.language.clone(), - override_id: layer_info.override_id(offset, &self.text), - }) - } else { - self.language.clone().map(|language| LanguageScope { - language, - override_id: None, - }) + // Use the layer that has the smallest node intersecting the given point. + for layer in self.syntax.layers_for_range(offset..offset, &self.text) { + let mut cursor = layer.node().walk(); + while cursor.goto_first_child_for_byte(offset).is_some() {} + let node_range = cursor.node().byte_range(); + if node_range.to_inclusive().contains(&offset) && node_range.len() < range.len() { + range = node_range; + scope = Some(LanguageScope { + language: layer.language.clone(), + override_id: layer.override_id(offset, &self.text), + }); + } } + + scope } pub fn surrounding_word(&self, start: T) -> (Range, Option) { diff --git a/crates/language/src/buffer_tests.rs b/crates/language/src/buffer_tests.rs index 38cefbcef9..db9693a530 100644 --- a/crates/language/src/buffer_tests.rs +++ b/crates/language/src/buffer_tests.rs @@ -1533,47 +1533,9 @@ fn test_autoindent_with_injected_languages(cx: &mut AppContext) { ]) }); - let html_language = Arc::new( - Language::new( - LanguageConfig { - name: "HTML".into(), - ..Default::default() - }, - Some(tree_sitter_html::language()), - ) - .with_indents_query( - " - (element - (start_tag) @start - (end_tag)? @end) @indent - ", - ) - .unwrap() - .with_injection_query( - r#" - (script_element - (raw_text) @content - (#set! "language" "javascript")) - "#, - ) - .unwrap(), - ); + let html_language = Arc::new(html_lang()); - let javascript_language = Arc::new( - Language::new( - LanguageConfig { - name: "JavaScript".into(), - ..Default::default() - }, - Some(tree_sitter_javascript::language()), - ) - .with_indents_query( - r#" - (object "}" @end) @indent - "#, - ) - .unwrap(), - ); + let javascript_language = Arc::new(javascript_lang()); let language_registry = Arc::new(LanguageRegistry::test()); language_registry.add(html_language.clone()); @@ -1669,7 +1631,7 @@ fn test_autoindent_query_with_outdent_captures(cx: &mut AppContext) { } #[gpui::test] -fn test_language_config_at(cx: &mut AppContext) { +fn test_language_scope_at(cx: &mut AppContext) { init_settings(cx, |_| {}); cx.add_model(|cx| { @@ -1756,6 +1718,54 @@ fn test_language_config_at(cx: &mut AppContext) { }); } +#[gpui::test] +fn test_language_scope_at_with_combined_injections(cx: &mut AppContext) { + init_settings(cx, |_| {}); + + cx.add_model(|cx| { + let text = r#" +
    + <% people.each do |person| %> +
  1. + <%= person.name %> +
  2. + <% end %> +
+ "# + .unindent(); + + let language_registry = Arc::new(LanguageRegistry::test()); + language_registry.add(Arc::new(ruby_lang())); + language_registry.add(Arc::new(html_lang())); + language_registry.add(Arc::new(erb_lang())); + + let mut buffer = Buffer::new(0, text, cx); + buffer.set_language_registry(language_registry.clone()); + buffer.set_language( + language_registry + .language_for_name("ERB") + .now_or_never() + .unwrap() + .ok(), + cx, + ); + + let snapshot = buffer.snapshot(); + let html_config = snapshot.language_scope_at(Point::new(2, 4)).unwrap(); + assert_eq!(html_config.line_comment_prefix(), None); + assert_eq!( + html_config.block_comment_delimiters(), + Some((&"".into())) + ); + + let ruby_config = snapshot.language_scope_at(Point::new(3, 12)).unwrap(); + assert_eq!(ruby_config.line_comment_prefix().unwrap().as_ref(), "# "); + assert_eq!(ruby_config.block_comment_delimiters(), None); + + buffer + }); +} + #[gpui::test] fn test_serialization(cx: &mut gpui::AppContext) { let mut now = Instant::now(); @@ -2143,6 +2153,7 @@ fn ruby_lang() -> Language { LanguageConfig { name: "Ruby".into(), path_suffixes: vec!["rb".to_string()], + line_comment: Some("# ".into()), ..Default::default() }, Some(tree_sitter_ruby::language()), @@ -2158,6 +2169,61 @@ fn ruby_lang() -> Language { .unwrap() } +fn html_lang() -> Language { + Language::new( + LanguageConfig { + name: "HTML".into(), + block_comment: Some(("".into())), + ..Default::default() + }, + Some(tree_sitter_html::language()), + ) + .with_indents_query( + " + (element + (start_tag) @start + (end_tag)? @end) @indent + ", + ) + .unwrap() + .with_injection_query( + r#" + (script_element + (raw_text) @content + (#set! "language" "javascript")) + "#, + ) + .unwrap() +} + +fn erb_lang() -> Language { + Language::new( + LanguageConfig { + name: "ERB".into(), + path_suffixes: vec!["erb".to_string()], + block_comment: Some(("<%#".into(), "%>".into())), + ..Default::default() + }, + Some(tree_sitter_embedded_template::language()), + ) + .with_injection_query( + r#" + ( + (code) @content + (#set! "language" "ruby") + (#set! "combined") + ) + + ( + (content) @content + (#set! "language" "html") + (#set! "combined") + ) + "#, + ) + .unwrap() +} + fn rust_lang() -> Language { Language::new( LanguageConfig { @@ -2236,6 +2302,12 @@ fn javascript_lang() -> Language { "#, ) .unwrap() + .with_indents_query( + r#" + (object "}" @end) @indent + "#, + ) + .unwrap() } fn get_tree_sexp(buffer: &ModelHandle, cx: &gpui::TestAppContext) -> String { diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index b6431c2286..526f99ec95 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -771,8 +771,10 @@ impl SyntaxSnapshot { range: Range, buffer: &'a BufferSnapshot, ) -> impl 'a + Iterator { - let start = buffer.anchor_before(range.start.to_offset(buffer)); - let end = buffer.anchor_after(range.end.to_offset(buffer)); + let start_offset = range.start.to_offset(buffer); + let end_offset = range.end.to_offset(buffer); + let start = buffer.anchor_before(start_offset); + let end = buffer.anchor_after(end_offset); let mut cursor = self.layers.filter::<_, ()>(move |summary| { if summary.max_depth > summary.min_depth { @@ -787,20 +789,21 @@ impl SyntaxSnapshot { cursor.next(buffer); iter::from_fn(move || { while let Some(layer) = cursor.item() { + let mut info = None; if let SyntaxLayerContent::Parsed { tree, language } = &layer.content { - let info = SyntaxLayerInfo { + let layer_start_offset = layer.range.start.to_offset(buffer); + let layer_start_point = layer.range.start.to_point(buffer).to_ts_point(); + + info = Some(SyntaxLayerInfo { tree, language, depth: layer.depth, - offset: ( - layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer).to_ts_point(), - ), - }; - cursor.next(buffer); - return Some(info); - } else { - cursor.next(buffer); + offset: (layer_start_offset, layer_start_point), + }); + } + cursor.next(buffer); + if info.is_some() { + return info; } } None diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 3bb5457b1c..fded9ec309 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -3045,6 +3045,8 @@ impl Project { ) -> Task<(Option, Vec)> { let key = (worktree_id, adapter_name); if let Some(server_id) = self.language_server_ids.remove(&key) { + log::info!("stopping language server {}", key.1 .0); + // Remove other entries for this language server as well let mut orphaned_worktrees = vec![worktree_id]; let other_keys = self.language_server_ids.keys().cloned().collect::>(); diff --git a/crates/project/src/worktree.rs b/crates/project/src/worktree.rs index 2c3c9d5304..b113af34ad 100644 --- a/crates/project/src/worktree.rs +++ b/crates/project/src/worktree.rs @@ -397,6 +397,7 @@ impl Worktree { })) } + // abcdefghi pub fn remote( project_remote_id: u64, replica_id: ReplicaId, diff --git a/crates/zed/src/languages/heex/config.toml b/crates/zed/src/languages/heex/config.toml index fafd75dc8d..b5d74f80f3 100644 --- a/crates/zed/src/languages/heex/config.toml +++ b/crates/zed/src/languages/heex/config.toml @@ -4,4 +4,4 @@ autoclose_before = ">})" brackets = [ { start = "<", end = ">", close = true, newline = true }, ] -block_comment = ["<%#", "%>"] +block_comment = ["<%!--", "--%>"] From 2f2ef7c165370dce683f78aea3e9aad00705864e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Jul 2023 18:09:32 -0700 Subject: [PATCH 104/115] Use workspace dependencies for tree-sitter grammars --- Cargo.lock | 36 ++++++------------------- Cargo.toml | 21 +++++++++++++++ crates/editor/Cargo.toml | 17 ++++++------ crates/editor/src/editor_tests.rs | 4 +-- crates/language/Cargo.toml | 26 +++++++++--------- crates/language/src/buffer_tests.rs | 4 +-- crates/language/src/language.rs | 2 +- crates/zed/Cargo.toml | 41 +++++++++++++++-------------- 8 files changed, 77 insertions(+), 74 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a4562954f..57604d28b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2253,9 +2253,8 @@ dependencies = [ "theme", "tree-sitter", "tree-sitter-html", - "tree-sitter-javascript", "tree-sitter-rust", - "tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)", + "tree-sitter-typescript", "unindent", "util", "workspace", @@ -3750,15 +3749,16 @@ dependencies = [ "text", "theme", "tree-sitter", + "tree-sitter-elixir", "tree-sitter-embedded-template", + "tree-sitter-heex", "tree-sitter-html", - "tree-sitter-javascript", - "tree-sitter-json 0.19.0", + "tree-sitter-json 0.20.0", "tree-sitter-markdown", "tree-sitter-python", "tree-sitter-ruby", "tree-sitter-rust", - "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)", + "tree-sitter-typescript", "unicase", "unindent", "util", @@ -8029,16 +8029,6 @@ dependencies = [ "tree-sitter", ] -[[package]] -name = "tree-sitter-javascript" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2490fab08630b2c8943c320f7b63473cbf65511c8d83aec551beb9b4375906ed" -dependencies = [ - "cc", - "tree-sitter", -] - [[package]] name = "tree-sitter-json" version = "0.19.0" @@ -8118,8 +8108,8 @@ dependencies = [ [[package]] name = "tree-sitter-scheme" -version = "0.5.0" -source = "git+https://github.com/6cdh/tree-sitter-scheme?rev=ca8af220aaf2a80aaf609bfb0df193817e4f064b#ca8af220aaf2a80aaf609bfb0df193817e4f064b" +version = "0.2.0" +source = "git+https://github.com/6cdh/tree-sitter-scheme?rev=af0fd1fa452cb2562dc7b5c8a8c55551c39273b9#af0fd1fa452cb2562dc7b5c8a8c55551c39273b9" dependencies = [ "cc", "tree-sitter", @@ -8143,16 +8133,6 @@ dependencies = [ "tree-sitter", ] -[[package]] -name = "tree-sitter-typescript" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "079c695c32d39ad089101c66393aeaca30e967fba3486a91f573d2f0e12d290a" -dependencies = [ - "cc", - "tree-sitter", -] - [[package]] name = "tree-sitter-typescript" version = "0.20.2" @@ -9566,7 +9546,7 @@ dependencies = [ "tree-sitter-scheme", "tree-sitter-svelte", "tree-sitter-toml", - "tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)", + "tree-sitter-typescript", "tree-sitter-yaml", "unindent", "url", diff --git a/Cargo.toml b/Cargo.toml index 529f297f70..48a9a51cd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -107,6 +107,27 @@ tree-sitter = "0.20" unindent = { version = "0.1.7" } pretty_assertions = "1.3.0" +tree-sitter-c = "0.20.1" +tree-sitter-cpp = "0.20.0" +tree-sitter-css = { git = "https://github.com/tree-sitter/tree-sitter-css", rev = "769203d0f9abe1a9a691ac2b9fe4bb4397a73c51" } +tree-sitter-elixir = { git = "https://github.com/elixir-lang/tree-sitter-elixir", rev = "4ba9dab6e2602960d95b2b625f3386c27e08084e" } +tree-sitter-embedded-template = "0.20.0" +tree-sitter-go = { git = "https://github.com/tree-sitter/tree-sitter-go", rev = "aeb2f33b366fd78d5789ff104956ce23508b85db" } +tree-sitter-heex = { git = "https://github.com/phoenixframework/tree-sitter-heex", rev = "2e1348c3cf2c9323e87c2744796cf3f3868aa82a" } +tree-sitter-json = { git = "https://github.com/tree-sitter/tree-sitter-json", rev = "40a81c01a40ac48744e0c8ccabbaba1920441199" } +tree-sitter-rust = "0.20.3" +tree-sitter-markdown = { git = "https://github.com/MDeiml/tree-sitter-markdown", rev = "330ecab87a3e3a7211ac69bbadc19eabecdb1cca" } +tree-sitter-python = "0.20.2" +tree-sitter-toml = { git = "https://github.com/tree-sitter/tree-sitter-toml", rev = "342d9be207c2dba869b9967124c679b5e6fd0ebe" } +tree-sitter-typescript = { git = "https://github.com/tree-sitter/tree-sitter-typescript", rev = "5d20856f34315b068c41edaee2ac8a100081d259" } +tree-sitter-ruby = "0.20.0" +tree-sitter-html = "0.19.0" +tree-sitter-scheme = { git = "https://github.com/6cdh/tree-sitter-scheme", rev = "af0fd1fa452cb2562dc7b5c8a8c55551c39273b9"} +tree-sitter-svelte = { git = "https://github.com/Himujjal/tree-sitter-svelte", rev = "697bb515471871e85ff799ea57a76298a71a9cca"} +tree-sitter-racket = { git = "https://github.com/zed-industries/tree-sitter-racket", rev = "eb010cf2c674c6fd9a6316a84e28ef90190fe51a"} +tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml", rev = "f545a41f57502e1b5ddf2a6668896c1b0620f930"} +tree-sitter-lua = "0.0.14" + [patch.crates-io] tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "49226023693107fba9a1191136a4f47f38cdca73" } async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" } diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index dcc2220227..087ce81c26 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -57,16 +57,16 @@ ordered-float.workspace = true parking_lot.workspace = true postage.workspace = true pulldown-cmark = { version = "0.9.2", default-features = false } -rand = { workspace = true, optional = true } schemars.workspace = true serde.workspace = true serde_derive.workspace = true smallvec.workspace = true smol.workspace = true -tree-sitter-rust = { version = "*", optional = true } -tree-sitter-html = { version = "*", optional = true } -tree-sitter-javascript = { version = "*", optional = true } -tree-sitter-typescript = { git = "https://github.com/tree-sitter/tree-sitter-typescript", rev = "5d20856f34315b068c41edaee2ac8a100081d259", optional = true } + +rand = { workspace = true, optional = true } +tree-sitter-rust = { workspace = true, optional = true } +tree-sitter-html = { workspace = true, optional = true } +tree-sitter-typescript = { workspace = true, optional = true } [dev-dependencies] copilot = { path = "../copilot", features = ["test-support"] } @@ -84,7 +84,6 @@ env_logger.workspace = true rand.workspace = true unindent.workspace = true tree-sitter.workspace = true -tree-sitter-rust = "0.20" -tree-sitter-html = "0.19" -tree-sitter-typescript = { git = "https://github.com/tree-sitter/tree-sitter-typescript", rev = "5d20856f34315b068c41edaee2ac8a100081d259" } -tree-sitter-javascript = "0.20" +tree-sitter-rust.workspace = true +tree-sitter-html.workspace = true +tree-sitter-typescript.workspace = true diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs index 260b0ccc40..247a7b021d 100644 --- a/crates/editor/src/editor_tests.rs +++ b/crates/editor/src/editor_tests.rs @@ -3836,7 +3836,7 @@ async fn test_autoclose_with_embedded_language(cx: &mut gpui::TestAppContext) { autoclose_before: "})]>".into(), ..Default::default() }, - Some(tree_sitter_javascript::language()), + Some(tree_sitter_typescript::language_tsx()), )); let registry = Arc::new(LanguageRegistry::test()); @@ -5383,7 +5383,7 @@ async fn test_toggle_block_comment(cx: &mut gpui::TestAppContext) { line_comment: Some("// ".into()), ..Default::default() }, - Some(tree_sitter_javascript::language()), + Some(tree_sitter_typescript::language_tsx()), )); let registry = Arc::new(LanguageRegistry::test()); diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml index c1f7e79d58..4771fc7083 100644 --- a/crates/language/Cargo.toml +++ b/crates/language/Cargo.toml @@ -46,7 +46,6 @@ lazy_static.workspace = true log.workspace = true parking_lot.workspace = true postage.workspace = true -rand = { workspace = true, optional = true } regex.workspace = true schemars.workspace = true serde.workspace = true @@ -56,10 +55,12 @@ similar = "1.3" smallvec.workspace = true smol.workspace = true tree-sitter.workspace = true -tree-sitter-rust = { version = "*", optional = true } -tree-sitter-typescript = { version = "*", optional = true } unicase = "2.6" +rand = { workspace = true, optional = true } +tree-sitter-rust = { workspace = true, optional = true } +tree-sitter-typescript = { workspace = true, optional = true } + [dev-dependencies] client = { path = "../client", features = ["test-support"] } collections = { path = "../collections", features = ["test-support"] } @@ -74,12 +75,13 @@ indoc.workspace = true rand.workspace = true unindent.workspace = true -tree-sitter-embedded-template = "*" -tree-sitter-html = "*" -tree-sitter-javascript = "*" -tree-sitter-json = "*" -tree-sitter-markdown = { git = "https://github.com/MDeiml/tree-sitter-markdown", rev = "330ecab87a3e3a7211ac69bbadc19eabecdb1cca" } -tree-sitter-rust = "*" -tree-sitter-python = "*" -tree-sitter-typescript = "*" -tree-sitter-ruby = "*" +tree-sitter-embedded-template.workspace = true +tree-sitter-html.workspace = true +tree-sitter-json.workspace = true +tree-sitter-markdown.workspace = true +tree-sitter-rust.workspace = true +tree-sitter-python.workspace = true +tree-sitter-typescript.workspace = true +tree-sitter-ruby.workspace = true +tree-sitter-elixir.workspace = true +tree-sitter-heex.workspace = true diff --git a/crates/language/src/buffer_tests.rs b/crates/language/src/buffer_tests.rs index db9693a530..399ca85e56 100644 --- a/crates/language/src/buffer_tests.rs +++ b/crates/language/src/buffer_tests.rs @@ -1671,7 +1671,7 @@ fn test_language_scope_at(cx: &mut AppContext) { .collect(), ..Default::default() }, - Some(tree_sitter_javascript::language()), + Some(tree_sitter_typescript::language_tsx()), ) .with_override_query( r#" @@ -2293,7 +2293,7 @@ fn javascript_lang() -> Language { name: "JavaScript".into(), ..Default::default() }, - Some(tree_sitter_javascript::language()), + Some(tree_sitter_typescript::language_tsx()), ) .with_brackets_query( r#" diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index dbd35f0e87..af80069e15 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -1791,7 +1791,7 @@ mod tests { first_line_pattern: Some(Regex::new(r"\bnode\b").unwrap()), ..Default::default() }, - tree_sitter_javascript::language(), + tree_sitter_typescript::language_tsx(), vec![], |_| Default::default(), ); diff --git a/crates/zed/Cargo.toml b/crates/zed/Cargo.toml index 28970b1a0f..848c07d500 100644 --- a/crates/zed/Cargo.toml +++ b/crates/zed/Cargo.toml @@ -104,26 +104,27 @@ thiserror.workspace = true tiny_http = "0.8" toml.workspace = true tree-sitter.workspace = true -tree-sitter-c = "0.20.1" -tree-sitter-cpp = "0.20.0" -tree-sitter-css = { git = "https://github.com/tree-sitter/tree-sitter-css", rev = "769203d0f9abe1a9a691ac2b9fe4bb4397a73c51" } -tree-sitter-elixir = { git = "https://github.com/elixir-lang/tree-sitter-elixir", rev = "4ba9dab6e2602960d95b2b625f3386c27e08084e" } -tree-sitter-embedded-template = "0.20.0" -tree-sitter-go = { git = "https://github.com/tree-sitter/tree-sitter-go", rev = "aeb2f33b366fd78d5789ff104956ce23508b85db" } -tree-sitter-heex = { git = "https://github.com/phoenixframework/tree-sitter-heex", rev = "2e1348c3cf2c9323e87c2744796cf3f3868aa82a" } -tree-sitter-json = { git = "https://github.com/tree-sitter/tree-sitter-json", rev = "40a81c01a40ac48744e0c8ccabbaba1920441199" } -tree-sitter-rust = "0.20.3" -tree-sitter-markdown = { git = "https://github.com/MDeiml/tree-sitter-markdown", rev = "330ecab87a3e3a7211ac69bbadc19eabecdb1cca" } -tree-sitter-python = "0.20.2" -tree-sitter-toml = { git = "https://github.com/tree-sitter/tree-sitter-toml", rev = "342d9be207c2dba869b9967124c679b5e6fd0ebe" } -tree-sitter-typescript = { git = "https://github.com/tree-sitter/tree-sitter-typescript", rev = "5d20856f34315b068c41edaee2ac8a100081d259" } -tree-sitter-ruby = "0.20.0" -tree-sitter-html = "0.19.0" -tree-sitter-scheme = { git = "https://github.com/6cdh/tree-sitter-scheme", rev = "ca8af220aaf2a80aaf609bfb0df193817e4f064b"} -tree-sitter-racket = { git = "https://github.com/zed-industries/tree-sitter-racket", rev = "eb010cf2c674c6fd9a6316a84e28ef90190fe51a"} -tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml", rev = "f545a41f57502e1b5ddf2a6668896c1b0620f930"} -tree-sitter-svelte = { git = "https://github.com/Himujjal/tree-sitter-svelte", rev = "697bb515471871e85ff799ea57a76298a71a9cca"} -tree-sitter-lua = "0.0.14" +tree-sitter-c.workspace = true +tree-sitter-cpp.workspace = true +tree-sitter-css.workspace = true +tree-sitter-elixir.workspace = true +tree-sitter-embedded-template.workspace = true +tree-sitter-go.workspace = true +tree-sitter-heex.workspace = true +tree-sitter-json.workspace = true +tree-sitter-rust.workspace = true +tree-sitter-markdown.workspace = true +tree-sitter-python.workspace = true +tree-sitter-toml.workspace = true +tree-sitter-typescript.workspace = true +tree-sitter-ruby.workspace = true +tree-sitter-html.workspace = true +tree-sitter-scheme.workspace = true +tree-sitter-svelte.workspace = true +tree-sitter-racket.workspace = true +tree-sitter-yaml.workspace = true +tree-sitter-lua.workspace = true + url = "2.2" urlencoding = "2.1.2" uuid = { version = "1.1.2", features = ["v4"] } From 21e7e35e7356be6cc7a7c870c7a0bbec294d7fa3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 13 Jul 2023 18:38:29 -0700 Subject: [PATCH 105/115] Include newlines in between combined injection ranges on different lines --- crates/language/src/syntax_map.rs | 90 ++++++++++++- .../src/syntax_map/syntax_map_tests.rs | 120 ++++++++++++++++-- crates/zed/src/languages/heex/config.toml | 2 +- crates/zed/src/languages/heex/highlights.scm | 6 +- 4 files changed, 201 insertions(+), 17 deletions(-) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 526f99ec95..1590294b1a 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -569,11 +569,19 @@ impl SyntaxSnapshot { range.end = range.end.saturating_sub(step_start_byte); } - included_ranges = splice_included_ranges( + let changed_indices; + (included_ranges, changed_indices) = splice_included_ranges( old_tree.included_ranges(), &parent_layer_changed_ranges, &included_ranges, ); + insert_newlines_between_ranges( + changed_indices, + &mut included_ranges, + &text, + step_start_byte, + step_start_point, + ); } if included_ranges.is_empty() { @@ -586,7 +594,7 @@ impl SyntaxSnapshot { } log::trace!( - "update layer. language:{}, start:{:?}, ranges:{:?}", + "update layer. language:{}, start:{:?}, included_ranges:{:?}", language.name(), LogAnchorRange(&step.range, text), LogIncludedRanges(&included_ranges), @@ -608,6 +616,16 @@ impl SyntaxSnapshot { }), ); } else { + if matches!(step.mode, ParseMode::Combined { .. }) { + insert_newlines_between_ranges( + 0..included_ranges.len(), + &mut included_ranges, + text, + step_start_byte, + step_start_point, + ); + } + if included_ranges.is_empty() { included_ranges.push(tree_sitter::Range { start_byte: 0, @@ -1275,14 +1293,20 @@ fn get_injections( } } +/// Update the given list of included `ranges`, removing any ranges that intersect +/// `removed_ranges`, and inserting the given `new_ranges`. +/// +/// Returns a new vector of ranges, and the range of the vector that was changed, +/// from the previous `ranges` vector. pub(crate) fn splice_included_ranges( mut ranges: Vec, removed_ranges: &[Range], new_ranges: &[tree_sitter::Range], -) -> Vec { +) -> (Vec, Range) { let mut removed_ranges = removed_ranges.iter().cloned().peekable(); let mut new_ranges = new_ranges.into_iter().cloned().peekable(); let mut ranges_ix = 0; + let mut changed_portion = usize::MAX..0; loop { let next_new_range = new_ranges.peek(); let next_removed_range = removed_ranges.peek(); @@ -1344,11 +1368,69 @@ pub(crate) fn splice_included_ranges( } } + changed_portion.start = changed_portion.start.min(start_ix); + changed_portion.end = changed_portion.end.max(if insert.is_some() { + start_ix + 1 + } else { + start_ix + }); + ranges.splice(start_ix..end_ix, insert); ranges_ix = start_ix; } - ranges + if changed_portion.end < changed_portion.start { + changed_portion = 0..0; + } + + (ranges, changed_portion) +} + +/// Ensure there are newline ranges in between content range that appear on +/// different lines. For performance, only iterate through the given range of +/// indices. All of the ranges in the array are relative to a given start byte +/// and point. +fn insert_newlines_between_ranges( + indices: Range, + ranges: &mut Vec, + text: &text::BufferSnapshot, + start_byte: usize, + start_point: Point, +) { + let mut ix = indices.end + 1; + while ix > indices.start { + ix -= 1; + if 0 == ix || ix == ranges.len() { + continue; + } + + let range_b = ranges[ix].clone(); + let range_a = &mut ranges[ix - 1]; + if range_a.end_point.column == 0 { + continue; + } + + if range_a.end_point.row < range_b.start_point.row { + let end_point = start_point + Point::from_ts_point(range_a.end_point); + let line_end = Point::new(end_point.row, text.line_len(end_point.row)); + if end_point.column as u32 >= line_end.column { + range_a.end_byte += 1; + range_a.end_point.row += 1; + range_a.end_point.column = 0; + } else { + let newline_offset = text.point_to_offset(line_end); + ranges.insert( + ix, + tree_sitter::Range { + start_byte: newline_offset - start_byte, + end_byte: newline_offset - start_byte + 1, + start_point: (line_end - start_point).to_ts_point(), + end_point: ((line_end - start_point) + Point::new(1, 0)).to_ts_point(), + }, + ) + } + } + } } impl OwnedSyntaxLayerInfo { diff --git a/crates/language/src/syntax_map/syntax_map_tests.rs b/crates/language/src/syntax_map/syntax_map_tests.rs index 272501f2d0..a6d35d0e2f 100644 --- a/crates/language/src/syntax_map/syntax_map_tests.rs +++ b/crates/language/src/syntax_map/syntax_map_tests.rs @@ -11,7 +11,7 @@ use util::test::marked_text_ranges; fn test_splice_included_ranges() { let ranges = vec![ts_range(20..30), ts_range(50..60), ts_range(80..90)]; - let new_ranges = splice_included_ranges( + let (new_ranges, change) = splice_included_ranges( ranges.clone(), &[54..56, 58..68], &[ts_range(50..54), ts_range(59..67)], @@ -25,14 +25,16 @@ fn test_splice_included_ranges() { ts_range(80..90), ] ); + assert_eq!(change, 1..3); - let new_ranges = splice_included_ranges(ranges.clone(), &[70..71, 91..100], &[]); + let (new_ranges, change) = splice_included_ranges(ranges.clone(), &[70..71, 91..100], &[]); assert_eq!( new_ranges, &[ts_range(20..30), ts_range(50..60), ts_range(80..90)] ); + assert_eq!(change, 2..3); - let new_ranges = + let (new_ranges, change) = splice_included_ranges(ranges.clone(), &[], &[ts_range(0..2), ts_range(70..75)]); assert_eq!( new_ranges, @@ -44,16 +46,21 @@ fn test_splice_included_ranges() { ts_range(80..90) ] ); + assert_eq!(change, 0..4); - let new_ranges = splice_included_ranges(ranges.clone(), &[30..50], &[ts_range(25..55)]); + let (new_ranges, change) = + splice_included_ranges(ranges.clone(), &[30..50], &[ts_range(25..55)]); assert_eq!(new_ranges, &[ts_range(25..55), ts_range(80..90)]); + assert_eq!(change, 0..1); // does not create overlapping ranges - let new_ranges = splice_included_ranges(ranges.clone(), &[0..18], &[ts_range(20..32)]); + let (new_ranges, change) = + splice_included_ranges(ranges.clone(), &[0..18], &[ts_range(20..32)]); assert_eq!( new_ranges, &[ts_range(20..32), ts_range(50..60), ts_range(80..90)] ); + assert_eq!(change, 0..1); fn ts_range(range: Range) -> tree_sitter::Range { tree_sitter::Range { @@ -511,7 +518,7 @@ fn test_removing_injection_by_replacing_across_boundary() { } #[gpui::test] -fn test_combined_injections() { +fn test_combined_injections_simple() { let (buffer, syntax_map) = test_edit_sequence( "ERB", &[ @@ -653,33 +660,78 @@ fn test_combined_injections_editing_after_last_injection() { #[gpui::test] fn test_combined_injections_inside_injections() { - let (_buffer, _syntax_map) = test_edit_sequence( + let (buffer, syntax_map) = test_edit_sequence( "Markdown", &[ r#" - here is some ERB code: + here is + some + ERB code: ```erb
    <% people.each do |person| %>
  • <%= person.name %>
  • +
  • <%= person.age %>
  • <% end %>
``` "#, r#" - here is some ERB code: + here is + some + ERB code: ```erb
    <% people«2».each do |person| %>
  • <%= person.name %>
  • +
  • <%= person.age %>
  • + <% end %> +
+ ``` + "#, + // Inserting a comment character inside one code directive + // does not cause the other code directive to become a comment, + // because newlines are included in between each injection range. + r#" + here is + some + ERB code: + + ```erb +
    + <% people2.each do |person| %> +
  • <%= «# »person.name %>
  • +
  • <%= person.age %>
  • <% end %>
``` "#, ], ); + + // Check that the code directive below the ruby comment is + // not parsed as a comment. + assert_capture_ranges( + &syntax_map, + &buffer, + &["method"], + " + here is + some + ERB code: + + ```erb +
    + <% people2.«each» do |person| %> +
  • <%= # person.name %>
  • +
  • <%= person.«age» %>
  • + <% end %> +
+ ``` + ", + ); } #[gpui::test] @@ -984,11 +1036,14 @@ fn check_interpolation( fn test_edit_sequence(language_name: &str, steps: &[&str]) -> (Buffer, SyntaxMap) { let registry = Arc::new(LanguageRegistry::test()); + registry.add(Arc::new(elixir_lang())); + registry.add(Arc::new(heex_lang())); registry.add(Arc::new(rust_lang())); registry.add(Arc::new(ruby_lang())); registry.add(Arc::new(html_lang())); registry.add(Arc::new(erb_lang())); registry.add(Arc::new(markdown_lang())); + let language = registry .language_for_name(language_name) .now_or_never() @@ -1074,6 +1129,7 @@ fn ruby_lang() -> Language { r#" ["if" "do" "else" "end"] @keyword (instance_variable) @ivar + (call method: (identifier) @method) "#, ) .unwrap() @@ -1158,6 +1214,52 @@ fn markdown_lang() -> Language { .unwrap() } +fn elixir_lang() -> Language { + Language::new( + LanguageConfig { + name: "Elixir".into(), + path_suffixes: vec!["ex".into()], + ..Default::default() + }, + Some(tree_sitter_elixir::language()), + ) + .with_highlights_query( + r#" + + "#, + ) + .unwrap() +} + +fn heex_lang() -> Language { + Language::new( + LanguageConfig { + name: "HEEx".into(), + path_suffixes: vec!["heex".into()], + ..Default::default() + }, + Some(tree_sitter_heex::language()), + ) + .with_injection_query( + r#" + ( + (directive + [ + (partial_expression_value) + (expression_value) + (ending_expression_value) + ] @content) + (#set! language "elixir") + (#set! combined) + ) + + ((expression (expression_value) @content) + (#set! language "elixir")) + "#, + ) + .unwrap() +} + fn range_for_text(buffer: &Buffer, text: &str) -> Range { let start = buffer.as_rope().to_string().find(text).unwrap(); start..start + text.len() diff --git a/crates/zed/src/languages/heex/config.toml b/crates/zed/src/languages/heex/config.toml index b5d74f80f3..c9f952ee3c 100644 --- a/crates/zed/src/languages/heex/config.toml +++ b/crates/zed/src/languages/heex/config.toml @@ -4,4 +4,4 @@ autoclose_before = ">})" brackets = [ { start = "<", end = ">", close = true, newline = true }, ] -block_comment = ["<%!--", "--%>"] +block_comment = ["<%!-- ", " --%>"] diff --git a/crates/zed/src/languages/heex/highlights.scm b/crates/zed/src/languages/heex/highlights.scm index 8728110d58..5252b71fac 100644 --- a/crates/zed/src/languages/heex/highlights.scm +++ b/crates/zed/src/languages/heex/highlights.scm @@ -1,10 +1,7 @@ ; HEEx delimiters [ - "--%>" - "-->" "/>" "" + "--%>" + "-->" + "