diff --git a/Cargo.lock b/Cargo.lock index 103b9c2c2a..0216bea496 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,13 +91,25 @@ name = "ai" version = "0.1.0" dependencies = [ "anyhow", - "ctor", + "async-trait", + "bincode", "futures 0.3.28", "gpui", "isahc", + "lazy_static", + "log", + "matrixmultiply", + "ordered-float", + "parking_lot 0.11.2", + "parse_duration", + "postage", + "rand 0.8.5", "regex", + "rusqlite", "serde", "serde_json", + "tiktoken-rs 0.5.4", + "util", ] [[package]] @@ -6725,9 +6737,9 @@ dependencies = [ name = "semantic_index" version = "0.1.0" dependencies = [ + "ai", "anyhow", "async-trait", - "bincode", "client", "collections", "ctor", @@ -6736,15 +6748,12 @@ dependencies = [ "futures 0.3.28", "globset", "gpui", - "isahc", "language", "lazy_static", "log", - "matrixmultiply", "node_runtime", "ordered-float", "parking_lot 0.11.2", - "parse_duration", "picker", "postage", "pretty_assertions", diff --git a/crates/ai/Cargo.toml b/crates/ai/Cargo.toml index c4e129c1f5..a2c70ce8c6 100644 --- a/crates/ai/Cargo.toml +++ b/crates/ai/Cargo.toml @@ -10,12 +10,25 @@ doctest = false [dependencies] gpui = { path = "../gpui" } +util = { path = "../util" } +async-trait.workspace = true anyhow.workspace = true futures.workspace = true +lazy_static.workspace = true +ordered-float.workspace = true +parking_lot.workspace = true isahc.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true +postage.workspace = true +rand.workspace = true +log.workspace = true +parse_duration = "2.1.1" +tiktoken-rs = "0.5.0" +matrixmultiply = "0.3.7" +rusqlite = { version = "0.27.0", features = ["blob", "array", "modern_sqlite"] } +bincode = "1.3.3" [dev-dependencies] -ctor.workspace = true +gpui = { path = "../gpui", features = ["test-support"] } diff --git a/crates/ai/src/ai.rs b/crates/ai/src/ai.rs index c893d109ab..5256a6a643 100644 --- a/crates/ai/src/ai.rs +++ b/crates/ai/src/ai.rs @@ -1 +1,2 @@ pub mod completion; +pub mod embedding; diff --git a/crates/semantic_index/src/embedding.rs b/crates/ai/src/embedding.rs similarity index 92% rename from crates/semantic_index/src/embedding.rs rename to crates/ai/src/embedding.rs index 2b6e94854e..332470aa54 100644 --- a/crates/semantic_index/src/embedding.rs +++ b/crates/ai/src/embedding.rs @@ -27,8 +27,30 @@ lazy_static! { } #[derive(Debug, PartialEq, Clone)] -pub struct Embedding(Vec); +pub struct Embedding(pub Vec); +// This is needed for semantic index functionality +// Unfortunately it has to live wherever the "Embedding" struct is created. +// Keeping this in here though, introduces a 'rusqlite' dependency into AI +// which is less than ideal +impl FromSql for Embedding { + fn column_result(value: ValueRef) -> FromSqlResult { + let bytes = value.as_blob()?; + let embedding: Result, Box> = bincode::deserialize(bytes); + if embedding.is_err() { + return Err(rusqlite::types::FromSqlError::Other(embedding.unwrap_err())); + } + Ok(Embedding(embedding.unwrap())) + } +} + +impl ToSql for Embedding { + fn to_sql(&self) -> rusqlite::Result { + let bytes = bincode::serialize(&self.0) + .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?; + Ok(ToSqlOutput::Owned(rusqlite::types::Value::Blob(bytes))) + } +} impl From> for Embedding { fn from(value: Vec) -> Self { Embedding(value) @@ -63,24 +85,24 @@ impl Embedding { } } -impl FromSql for Embedding { - fn column_result(value: ValueRef) -> FromSqlResult { - let bytes = value.as_blob()?; - let embedding: Result, Box> = bincode::deserialize(bytes); - if embedding.is_err() { - return Err(rusqlite::types::FromSqlError::Other(embedding.unwrap_err())); - } - Ok(Embedding(embedding.unwrap())) - } -} +// impl FromSql for Embedding { +// fn column_result(value: ValueRef) -> FromSqlResult { +// let bytes = value.as_blob()?; +// let embedding: Result, Box> = bincode::deserialize(bytes); +// if embedding.is_err() { +// return Err(rusqlite::types::FromSqlError::Other(embedding.unwrap_err())); +// } +// Ok(Embedding(embedding.unwrap())) +// } +// } -impl ToSql for Embedding { - fn to_sql(&self) -> rusqlite::Result { - let bytes = bincode::serialize(&self.0) - .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?; - Ok(ToSqlOutput::Owned(rusqlite::types::Value::Blob(bytes))) - } -} +// impl ToSql for Embedding { +// fn to_sql(&self) -> rusqlite::Result { +// let bytes = bincode::serialize(&self.0) +// .map_err(|err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)))?; +// Ok(ToSqlOutput::Owned(rusqlite::types::Value::Blob(bytes))) +// } +// } #[derive(Clone)] pub struct OpenAIEmbeddings { diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml index 44afecb0c2..e38ae1f06d 100644 --- a/crates/semantic_index/Cargo.toml +++ b/crates/semantic_index/Cargo.toml @@ -9,6 +9,7 @@ path = "src/semantic_index.rs" doctest = false [dependencies] +ai = { path = "../ai" } collections = { path = "../collections" } gpui = { path = "../gpui" } language = { path = "../language" } @@ -26,22 +27,18 @@ futures.workspace = true ordered-float.workspace = true smol.workspace = true rusqlite = { version = "0.27.0", features = ["blob", "array", "modern_sqlite"] } -isahc.workspace = true log.workspace = true tree-sitter.workspace = true lazy_static.workspace = true serde.workspace = true serde_json.workspace = true async-trait.workspace = true -bincode = "1.3.3" -matrixmultiply = "0.3.7" tiktoken-rs = "0.5.0" parking_lot.workspace = true rand.workspace = true schemars.workspace = true globset.workspace = true sha1 = "0.10.5" -parse_duration = "2.1.1" [dev-dependencies] collections = { path = "../collections", features = ["test-support"] } diff --git a/crates/semantic_index/examples/eval.rs b/crates/semantic_index/examples/eval.rs index 15406cf63e..a0cdbeea05 100644 --- a/crates/semantic_index/examples/eval.rs +++ b/crates/semantic_index/examples/eval.rs @@ -1,10 +1,10 @@ +use ai::embedding::OpenAIEmbeddings; use anyhow::{anyhow, Result}; use client::{self, UserStore}; use gpui::{AsyncAppContext, ModelHandle, Task}; use language::LanguageRegistry; use node_runtime::RealNodeRuntime; use project::{Project, RealFs}; -use semantic_index::embedding::OpenAIEmbeddings; use semantic_index::semantic_index_settings::SemanticIndexSettings; use semantic_index::{SearchResult, SemanticIndex}; use serde::{Deserialize, Serialize}; diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs index 3e35284027..8280dc7d65 100644 --- a/crates/semantic_index/src/db.rs +++ b/crates/semantic_index/src/db.rs @@ -1,8 +1,8 @@ use crate::{ - embedding::Embedding, parsing::{Span, SpanDigest}, SEMANTIC_INDEX_VERSION, }; +use ai::embedding::Embedding; use anyhow::{anyhow, Context, Result}; use collections::HashMap; use futures::channel::oneshot; diff --git a/crates/semantic_index/src/embedding_queue.rs b/crates/semantic_index/src/embedding_queue.rs index 3026eef9ae..6ae8faa4cd 100644 --- a/crates/semantic_index/src/embedding_queue.rs +++ b/crates/semantic_index/src/embedding_queue.rs @@ -1,4 +1,5 @@ -use crate::{embedding::EmbeddingProvider, parsing::Span, JobHandle}; +use crate::{parsing::Span, JobHandle}; +use ai::embedding::EmbeddingProvider; use gpui::executor::Background; use parking_lot::Mutex; use smol::channel; diff --git a/crates/semantic_index/src/parsing.rs b/crates/semantic_index/src/parsing.rs index 9f5a339b23..498ad6187e 100644 --- a/crates/semantic_index/src/parsing.rs +++ b/crates/semantic_index/src/parsing.rs @@ -1,4 +1,4 @@ -use crate::embedding::{Embedding, EmbeddingProvider}; +use ai::embedding::{Embedding, EmbeddingProvider}; use anyhow::{anyhow, Result}; use language::{Grammar, Language}; use rusqlite::{ diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs index 1ba0001cfd..3bb6e17fb5 100644 --- a/crates/semantic_index/src/semantic_index.rs +++ b/crates/semantic_index/src/semantic_index.rs @@ -1,5 +1,5 @@ mod db; -pub mod embedding; +// pub mod embedding; mod embedding_queue; mod parsing; pub mod semantic_index_settings; @@ -11,7 +11,7 @@ use crate::semantic_index_settings::SemanticIndexSettings; use anyhow::{anyhow, Result}; use collections::{BTreeMap, HashMap, HashSet}; use db::VectorDatabase; -use embedding::{Embedding, EmbeddingProvider, OpenAIEmbeddings}; +use ai::embedding::{Embedding, EmbeddingProvider, OpenAIEmbeddings}; use embedding_queue::{EmbeddingQueue, FileToEmbed}; use futures::{future, FutureExt, StreamExt}; use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle}; diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs index f386665915..f2cae8a557 100644 --- a/crates/semantic_index/src/semantic_index_tests.rs +++ b/crates/semantic_index/src/semantic_index_tests.rs @@ -1,10 +1,10 @@ use crate::{ - embedding::{DummyEmbeddings, Embedding, EmbeddingProvider}, embedding_queue::EmbeddingQueue, parsing::{subtract_ranges, CodeContextRetriever, Span, SpanDigest}, semantic_index_settings::SemanticIndexSettings, FileToEmbed, JobHandle, SearchResult, SemanticIndex, EMBEDDING_QUEUE_FLUSH_TIMEOUT, }; +use ai::embedding::{DummyEmbeddings, Embedding, EmbeddingProvider}; use anyhow::Result; use async_trait::async_trait; use gpui::{executor::Deterministic, Task, TestAppContext};