collab: Setup database for LLM service (#15882)
This PR puts the initial infrastructure for the LLM service's database in place. The LLM service will be using a separate Postgres database, with its own set of migrations. Currently we only connect to the database in development, as we don't yet have the database setup for the staging/production environments. Release Notes: - N/A
This commit is contained in:
parent
a64906779b
commit
7f6d0919c9
25 changed files with 627 additions and 74 deletions
118
crates/collab/src/llm/db.rs
Normal file
118
crates/collab/src/llm/db.rs
Normal file
|
@ -0,0 +1,118 @@
|
|||
mod ids;
|
||||
mod queries;
|
||||
mod tables;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
pub use ids::*;
|
||||
pub use tables::*;
|
||||
|
||||
#[cfg(test)]
|
||||
pub use tests::TestLlmDb;
|
||||
|
||||
use std::future::Future;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::anyhow;
|
||||
use sea_orm::prelude::*;
|
||||
pub use sea_orm::ConnectOptions;
|
||||
use sea_orm::{
|
||||
ActiveValue, DatabaseConnection, DatabaseTransaction, IsolationLevel, TransactionTrait,
|
||||
};
|
||||
|
||||
use crate::db::TransactionHandle;
|
||||
use crate::executor::Executor;
|
||||
use crate::Result;
|
||||
|
||||
/// The database for the LLM service.
|
||||
pub struct LlmDatabase {
|
||||
options: ConnectOptions,
|
||||
pool: DatabaseConnection,
|
||||
#[allow(unused)]
|
||||
executor: Executor,
|
||||
#[cfg(test)]
|
||||
runtime: Option<tokio::runtime::Runtime>,
|
||||
}
|
||||
|
||||
impl LlmDatabase {
|
||||
/// Connects to the database with the given options
|
||||
pub async fn new(options: ConnectOptions, executor: Executor) -> Result<Self> {
|
||||
sqlx::any::install_default_drivers();
|
||||
Ok(Self {
|
||||
options: options.clone(),
|
||||
pool: sea_orm::Database::connect(options).await?,
|
||||
executor,
|
||||
#[cfg(test)]
|
||||
runtime: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn options(&self) -> &ConnectOptions {
|
||||
&self.options
|
||||
}
|
||||
|
||||
pub async fn transaction<F, Fut, T>(&self, f: F) -> Result<T>
|
||||
where
|
||||
F: Send + Fn(TransactionHandle) -> Fut,
|
||||
Fut: Send + Future<Output = Result<T>>,
|
||||
{
|
||||
let body = async {
|
||||
let (tx, result) = self.with_transaction(&f).await?;
|
||||
match result {
|
||||
Ok(result) => match tx.commit().await.map_err(Into::into) {
|
||||
Ok(()) => return Ok(result),
|
||||
Err(error) => {
|
||||
return Err(error);
|
||||
}
|
||||
},
|
||||
Err(error) => {
|
||||
tx.rollback().await?;
|
||||
return Err(error);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
self.run(body).await
|
||||
}
|
||||
|
||||
async fn with_transaction<F, Fut, T>(&self, f: &F) -> Result<(DatabaseTransaction, Result<T>)>
|
||||
where
|
||||
F: Send + Fn(TransactionHandle) -> Fut,
|
||||
Fut: Send + Future<Output = Result<T>>,
|
||||
{
|
||||
let tx = self
|
||||
.pool
|
||||
.begin_with_config(Some(IsolationLevel::ReadCommitted), None)
|
||||
.await?;
|
||||
|
||||
let mut tx = Arc::new(Some(tx));
|
||||
let result = f(TransactionHandle(tx.clone())).await;
|
||||
let Some(tx) = Arc::get_mut(&mut tx).and_then(|tx| tx.take()) else {
|
||||
return Err(anyhow!(
|
||||
"couldn't complete transaction because it's still in use"
|
||||
))?;
|
||||
};
|
||||
|
||||
Ok((tx, result))
|
||||
}
|
||||
|
||||
async fn run<F, T>(&self, future: F) -> Result<T>
|
||||
where
|
||||
F: Future<Output = Result<T>>,
|
||||
{
|
||||
#[cfg(test)]
|
||||
{
|
||||
if let Executor::Deterministic(executor) = &self.executor {
|
||||
executor.simulate_random_delay().await;
|
||||
}
|
||||
|
||||
self.runtime.as_ref().unwrap().block_on(future)
|
||||
}
|
||||
|
||||
#[cfg(not(test))]
|
||||
{
|
||||
future.await
|
||||
}
|
||||
}
|
||||
}
|
7
crates/collab/src/llm/db/ids.rs
Normal file
7
crates/collab/src/llm/db/ids.rs
Normal file
|
@ -0,0 +1,7 @@
|
|||
use sea_orm::{entity::prelude::*, DbErr};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::id_type;
|
||||
|
||||
id_type!(ProviderId);
|
||||
id_type!(ModelId);
|
3
crates/collab/src/llm/db/queries.rs
Normal file
3
crates/collab/src/llm/db/queries.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
use super::*;
|
||||
|
||||
pub mod providers;
|
67
crates/collab/src/llm/db/queries/providers.rs
Normal file
67
crates/collab/src/llm/db/queries/providers.rs
Normal file
|
@ -0,0 +1,67 @@
|
|||
use sea_orm::sea_query::OnConflict;
|
||||
use sea_orm::QueryOrder;
|
||||
|
||||
use super::*;
|
||||
|
||||
impl LlmDatabase {
|
||||
pub async fn initialize_providers(&self) -> Result<()> {
|
||||
self.transaction(|tx| async move {
|
||||
let providers_and_models = vec![
|
||||
("anthropic", "claude-3-5-sonnet"),
|
||||
("anthropic", "claude-3-opus"),
|
||||
("anthropic", "claude-3-sonnet"),
|
||||
("anthropic", "claude-3-haiku"),
|
||||
];
|
||||
|
||||
for (provider_name, model_name) in providers_and_models {
|
||||
let insert_provider = provider::Entity::insert(provider::ActiveModel {
|
||||
name: ActiveValue::set(provider_name.to_owned()),
|
||||
..Default::default()
|
||||
})
|
||||
.on_conflict(
|
||||
OnConflict::columns([provider::Column::Name])
|
||||
.update_column(provider::Column::Name)
|
||||
.to_owned(),
|
||||
);
|
||||
|
||||
let provider = if tx.support_returning() {
|
||||
insert_provider.exec_with_returning(&*tx).await?
|
||||
} else {
|
||||
insert_provider.exec_without_returning(&*tx).await?;
|
||||
provider::Entity::find()
|
||||
.filter(provider::Column::Name.eq(provider_name))
|
||||
.one(&*tx)
|
||||
.await?
|
||||
.ok_or_else(|| anyhow!("failed to insert provider"))?
|
||||
};
|
||||
|
||||
model::Entity::insert(model::ActiveModel {
|
||||
provider_id: ActiveValue::set(provider.id),
|
||||
name: ActiveValue::set(model_name.to_owned()),
|
||||
..Default::default()
|
||||
})
|
||||
.on_conflict(
|
||||
OnConflict::columns([model::Column::ProviderId, model::Column::Name])
|
||||
.update_column(model::Column::Name)
|
||||
.to_owned(),
|
||||
)
|
||||
.exec_without_returning(&*tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Returns the list of LLM providers.
|
||||
pub async fn list_providers(&self) -> Result<Vec<provider::Model>> {
|
||||
self.transaction(|tx| async move {
|
||||
Ok(provider::Entity::find()
|
||||
.order_by_asc(provider::Column::Name)
|
||||
.all(&*tx)
|
||||
.await?)
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
2
crates/collab/src/llm/db/tables.rs
Normal file
2
crates/collab/src/llm/db/tables.rs
Normal file
|
@ -0,0 +1,2 @@
|
|||
pub mod model;
|
||||
pub mod provider;
|
31
crates/collab/src/llm/db/tables/model.rs
Normal file
31
crates/collab/src/llm/db/tables/model.rs
Normal file
|
@ -0,0 +1,31 @@
|
|||
use sea_orm::entity::prelude::*;
|
||||
|
||||
use crate::llm::db::{ModelId, ProviderId};
|
||||
|
||||
/// An LLM model.
|
||||
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
|
||||
#[sea_orm(table_name = "models")]
|
||||
pub struct Model {
|
||||
#[sea_orm(primary_key)]
|
||||
pub id: ModelId,
|
||||
pub provider_id: ProviderId,
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
|
||||
pub enum Relation {
|
||||
#[sea_orm(
|
||||
belongs_to = "super::provider::Entity",
|
||||
from = "Column::ProviderId",
|
||||
to = "super::provider::Column::Id"
|
||||
)]
|
||||
Provider,
|
||||
}
|
||||
|
||||
impl Related<super::provider::Entity> for Entity {
|
||||
fn to() -> RelationDef {
|
||||
Relation::Provider.def()
|
||||
}
|
||||
}
|
||||
|
||||
impl ActiveModelBehavior for ActiveModel {}
|
26
crates/collab/src/llm/db/tables/provider.rs
Normal file
26
crates/collab/src/llm/db/tables/provider.rs
Normal file
|
@ -0,0 +1,26 @@
|
|||
use sea_orm::entity::prelude::*;
|
||||
|
||||
use crate::llm::db::ProviderId;
|
||||
|
||||
/// An LLM provider.
|
||||
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
|
||||
#[sea_orm(table_name = "providers")]
|
||||
pub struct Model {
|
||||
#[sea_orm(primary_key)]
|
||||
pub id: ProviderId,
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
|
||||
pub enum Relation {
|
||||
#[sea_orm(has_many = "super::model::Entity")]
|
||||
Models,
|
||||
}
|
||||
|
||||
impl Related<super::model::Entity> for Entity {
|
||||
fn to() -> RelationDef {
|
||||
Relation::Models.def()
|
||||
}
|
||||
}
|
||||
|
||||
impl ActiveModelBehavior for ActiveModel {}
|
147
crates/collab/src/llm/db/tests.rs
Normal file
147
crates/collab/src/llm/db/tests.rs
Normal file
|
@ -0,0 +1,147 @@
|
|||
mod provider_tests;
|
||||
|
||||
use gpui::BackgroundExecutor;
|
||||
use parking_lot::Mutex;
|
||||
use rand::prelude::*;
|
||||
use sea_orm::ConnectionTrait;
|
||||
use sqlx::migrate::MigrateDatabase;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::migrations::run_database_migrations;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub struct TestLlmDb {
|
||||
pub db: Option<Arc<LlmDatabase>>,
|
||||
pub connection: Option<sqlx::AnyConnection>,
|
||||
}
|
||||
|
||||
impl TestLlmDb {
|
||||
pub fn sqlite(background: BackgroundExecutor) -> Self {
|
||||
let url = "sqlite::memory:";
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_io()
|
||||
.enable_time()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let mut db = runtime.block_on(async {
|
||||
let mut options = ConnectOptions::new(url);
|
||||
options.max_connections(5);
|
||||
let db = LlmDatabase::new(options, Executor::Deterministic(background))
|
||||
.await
|
||||
.unwrap();
|
||||
let sql = include_str!(concat!(
|
||||
env!("CARGO_MANIFEST_DIR"),
|
||||
"/migrations_llm.sqlite/20240806182921_test_schema.sql"
|
||||
));
|
||||
db.pool
|
||||
.execute(sea_orm::Statement::from_string(
|
||||
db.pool.get_database_backend(),
|
||||
sql,
|
||||
))
|
||||
.await
|
||||
.unwrap();
|
||||
db
|
||||
});
|
||||
|
||||
db.runtime = Some(runtime);
|
||||
|
||||
Self {
|
||||
db: Some(Arc::new(db)),
|
||||
connection: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn postgres(background: BackgroundExecutor) -> Self {
|
||||
static LOCK: Mutex<()> = Mutex::new(());
|
||||
|
||||
let _guard = LOCK.lock();
|
||||
let mut rng = StdRng::from_entropy();
|
||||
let url = format!(
|
||||
"postgres://postgres@localhost/zed-llm-test-{}",
|
||||
rng.gen::<u128>()
|
||||
);
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_io()
|
||||
.enable_time()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let mut db = runtime.block_on(async {
|
||||
sqlx::Postgres::create_database(&url)
|
||||
.await
|
||||
.expect("failed to create test db");
|
||||
let mut options = ConnectOptions::new(url);
|
||||
options
|
||||
.max_connections(5)
|
||||
.idle_timeout(Duration::from_secs(0));
|
||||
let db = LlmDatabase::new(options, Executor::Deterministic(background))
|
||||
.await
|
||||
.unwrap();
|
||||
let migrations_path = concat!(env!("CARGO_MANIFEST_DIR"), "/migrations_llm");
|
||||
run_database_migrations(db.options(), migrations_path, false)
|
||||
.await
|
||||
.unwrap();
|
||||
db
|
||||
});
|
||||
|
||||
db.runtime = Some(runtime);
|
||||
|
||||
Self {
|
||||
db: Some(Arc::new(db)),
|
||||
connection: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn db(&self) -> &Arc<LlmDatabase> {
|
||||
self.db.as_ref().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! test_both_llm_dbs {
|
||||
($test_name:ident, $postgres_test_name:ident, $sqlite_test_name:ident) => {
|
||||
#[cfg(target_os = "macos")]
|
||||
#[gpui::test]
|
||||
async fn $postgres_test_name(cx: &mut gpui::TestAppContext) {
|
||||
let test_db = $crate::llm::db::TestLlmDb::postgres(cx.executor().clone());
|
||||
$test_name(test_db.db()).await;
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn $sqlite_test_name(cx: &mut gpui::TestAppContext) {
|
||||
let test_db = $crate::llm::db::TestLlmDb::sqlite(cx.executor().clone());
|
||||
$test_name(test_db.db()).await;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl Drop for TestLlmDb {
|
||||
fn drop(&mut self) {
|
||||
let db = self.db.take().unwrap();
|
||||
if let sea_orm::DatabaseBackend::Postgres = db.pool.get_database_backend() {
|
||||
db.runtime.as_ref().unwrap().block_on(async {
|
||||
use util::ResultExt;
|
||||
let query = "
|
||||
SELECT pg_terminate_backend(pg_stat_activity.pid)
|
||||
FROM pg_stat_activity
|
||||
WHERE
|
||||
pg_stat_activity.datname = current_database() AND
|
||||
pid <> pg_backend_pid();
|
||||
";
|
||||
db.pool
|
||||
.execute(sea_orm::Statement::from_string(
|
||||
db.pool.get_database_backend(),
|
||||
query,
|
||||
))
|
||||
.await
|
||||
.log_err();
|
||||
sqlx::Postgres::drop_database(db.options.get_url())
|
||||
.await
|
||||
.log_err();
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
30
crates/collab/src/llm/db/tests/provider_tests.rs
Normal file
30
crates/collab/src/llm/db/tests/provider_tests.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use crate::llm::db::LlmDatabase;
|
||||
use crate::test_both_llm_dbs;
|
||||
|
||||
test_both_llm_dbs!(
|
||||
test_initialize_providers,
|
||||
test_initialize_providers_postgres,
|
||||
test_initialize_providers_sqlite
|
||||
);
|
||||
|
||||
async fn test_initialize_providers(db: &Arc<LlmDatabase>) {
|
||||
let initial_providers = db.list_providers().await.unwrap();
|
||||
assert_eq!(initial_providers, vec![]);
|
||||
|
||||
db.initialize_providers().await.unwrap();
|
||||
|
||||
// Do it twice, to make sure the operation is idempotent.
|
||||
db.initialize_providers().await.unwrap();
|
||||
|
||||
let providers = db.list_providers().await.unwrap();
|
||||
|
||||
let provider_names = providers
|
||||
.into_iter()
|
||||
.map(|provider| provider.name)
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(provider_names, vec!["anthropic".to_string()]);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue