collab: Setup database for LLM service (#15882)

This PR puts the initial infrastructure for the LLM service's database
in place.

The LLM service will be using a separate Postgres database, with its own
set of migrations.

Currently we only connect to the database in development, as we don't
yet have the database setup for the staging/production environments.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-08-06 17:18:08 -04:00 committed by GitHub
parent a64906779b
commit 7f6d0919c9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 627 additions and 74 deletions

118
crates/collab/src/llm/db.rs Normal file
View file

@ -0,0 +1,118 @@
mod ids;
mod queries;
mod tables;
#[cfg(test)]
mod tests;
pub use ids::*;
pub use tables::*;
#[cfg(test)]
pub use tests::TestLlmDb;
use std::future::Future;
use std::sync::Arc;
use anyhow::anyhow;
use sea_orm::prelude::*;
pub use sea_orm::ConnectOptions;
use sea_orm::{
ActiveValue, DatabaseConnection, DatabaseTransaction, IsolationLevel, TransactionTrait,
};
use crate::db::TransactionHandle;
use crate::executor::Executor;
use crate::Result;
/// The database for the LLM service.
pub struct LlmDatabase {
options: ConnectOptions,
pool: DatabaseConnection,
#[allow(unused)]
executor: Executor,
#[cfg(test)]
runtime: Option<tokio::runtime::Runtime>,
}
impl LlmDatabase {
/// Connects to the database with the given options
pub async fn new(options: ConnectOptions, executor: Executor) -> Result<Self> {
sqlx::any::install_default_drivers();
Ok(Self {
options: options.clone(),
pool: sea_orm::Database::connect(options).await?,
executor,
#[cfg(test)]
runtime: None,
})
}
pub fn options(&self) -> &ConnectOptions {
&self.options
}
pub async fn transaction<F, Fut, T>(&self, f: F) -> Result<T>
where
F: Send + Fn(TransactionHandle) -> Fut,
Fut: Send + Future<Output = Result<T>>,
{
let body = async {
let (tx, result) = self.with_transaction(&f).await?;
match result {
Ok(result) => match tx.commit().await.map_err(Into::into) {
Ok(()) => return Ok(result),
Err(error) => {
return Err(error);
}
},
Err(error) => {
tx.rollback().await?;
return Err(error);
}
}
};
self.run(body).await
}
async fn with_transaction<F, Fut, T>(&self, f: &F) -> Result<(DatabaseTransaction, Result<T>)>
where
F: Send + Fn(TransactionHandle) -> Fut,
Fut: Send + Future<Output = Result<T>>,
{
let tx = self
.pool
.begin_with_config(Some(IsolationLevel::ReadCommitted), None)
.await?;
let mut tx = Arc::new(Some(tx));
let result = f(TransactionHandle(tx.clone())).await;
let Some(tx) = Arc::get_mut(&mut tx).and_then(|tx| tx.take()) else {
return Err(anyhow!(
"couldn't complete transaction because it's still in use"
))?;
};
Ok((tx, result))
}
async fn run<F, T>(&self, future: F) -> Result<T>
where
F: Future<Output = Result<T>>,
{
#[cfg(test)]
{
if let Executor::Deterministic(executor) = &self.executor {
executor.simulate_random_delay().await;
}
self.runtime.as_ref().unwrap().block_on(future)
}
#[cfg(not(test))]
{
future.await
}
}
}

View file

@ -0,0 +1,7 @@
use sea_orm::{entity::prelude::*, DbErr};
use serde::{Deserialize, Serialize};
use crate::id_type;
id_type!(ProviderId);
id_type!(ModelId);

View file

@ -0,0 +1,3 @@
use super::*;
pub mod providers;

View file

@ -0,0 +1,67 @@
use sea_orm::sea_query::OnConflict;
use sea_orm::QueryOrder;
use super::*;
impl LlmDatabase {
pub async fn initialize_providers(&self) -> Result<()> {
self.transaction(|tx| async move {
let providers_and_models = vec![
("anthropic", "claude-3-5-sonnet"),
("anthropic", "claude-3-opus"),
("anthropic", "claude-3-sonnet"),
("anthropic", "claude-3-haiku"),
];
for (provider_name, model_name) in providers_and_models {
let insert_provider = provider::Entity::insert(provider::ActiveModel {
name: ActiveValue::set(provider_name.to_owned()),
..Default::default()
})
.on_conflict(
OnConflict::columns([provider::Column::Name])
.update_column(provider::Column::Name)
.to_owned(),
);
let provider = if tx.support_returning() {
insert_provider.exec_with_returning(&*tx).await?
} else {
insert_provider.exec_without_returning(&*tx).await?;
provider::Entity::find()
.filter(provider::Column::Name.eq(provider_name))
.one(&*tx)
.await?
.ok_or_else(|| anyhow!("failed to insert provider"))?
};
model::Entity::insert(model::ActiveModel {
provider_id: ActiveValue::set(provider.id),
name: ActiveValue::set(model_name.to_owned()),
..Default::default()
})
.on_conflict(
OnConflict::columns([model::Column::ProviderId, model::Column::Name])
.update_column(model::Column::Name)
.to_owned(),
)
.exec_without_returning(&*tx)
.await?;
}
Ok(())
})
.await
}
/// Returns the list of LLM providers.
pub async fn list_providers(&self) -> Result<Vec<provider::Model>> {
self.transaction(|tx| async move {
Ok(provider::Entity::find()
.order_by_asc(provider::Column::Name)
.all(&*tx)
.await?)
})
.await
}
}

View file

@ -0,0 +1,2 @@
pub mod model;
pub mod provider;

View file

@ -0,0 +1,31 @@
use sea_orm::entity::prelude::*;
use crate::llm::db::{ModelId, ProviderId};
/// An LLM model.
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
#[sea_orm(table_name = "models")]
pub struct Model {
#[sea_orm(primary_key)]
pub id: ModelId,
pub provider_id: ProviderId,
pub name: String,
}
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
pub enum Relation {
#[sea_orm(
belongs_to = "super::provider::Entity",
from = "Column::ProviderId",
to = "super::provider::Column::Id"
)]
Provider,
}
impl Related<super::provider::Entity> for Entity {
fn to() -> RelationDef {
Relation::Provider.def()
}
}
impl ActiveModelBehavior for ActiveModel {}

View file

@ -0,0 +1,26 @@
use sea_orm::entity::prelude::*;
use crate::llm::db::ProviderId;
/// An LLM provider.
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
#[sea_orm(table_name = "providers")]
pub struct Model {
#[sea_orm(primary_key)]
pub id: ProviderId,
pub name: String,
}
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
pub enum Relation {
#[sea_orm(has_many = "super::model::Entity")]
Models,
}
impl Related<super::model::Entity> for Entity {
fn to() -> RelationDef {
Relation::Models.def()
}
}
impl ActiveModelBehavior for ActiveModel {}

View file

@ -0,0 +1,147 @@
mod provider_tests;
use gpui::BackgroundExecutor;
use parking_lot::Mutex;
use rand::prelude::*;
use sea_orm::ConnectionTrait;
use sqlx::migrate::MigrateDatabase;
use std::sync::Arc;
use std::time::Duration;
use crate::migrations::run_database_migrations;
use super::*;
pub struct TestLlmDb {
pub db: Option<Arc<LlmDatabase>>,
pub connection: Option<sqlx::AnyConnection>,
}
impl TestLlmDb {
pub fn sqlite(background: BackgroundExecutor) -> Self {
let url = "sqlite::memory:";
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_io()
.enable_time()
.build()
.unwrap();
let mut db = runtime.block_on(async {
let mut options = ConnectOptions::new(url);
options.max_connections(5);
let db = LlmDatabase::new(options, Executor::Deterministic(background))
.await
.unwrap();
let sql = include_str!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/migrations_llm.sqlite/20240806182921_test_schema.sql"
));
db.pool
.execute(sea_orm::Statement::from_string(
db.pool.get_database_backend(),
sql,
))
.await
.unwrap();
db
});
db.runtime = Some(runtime);
Self {
db: Some(Arc::new(db)),
connection: None,
}
}
pub fn postgres(background: BackgroundExecutor) -> Self {
static LOCK: Mutex<()> = Mutex::new(());
let _guard = LOCK.lock();
let mut rng = StdRng::from_entropy();
let url = format!(
"postgres://postgres@localhost/zed-llm-test-{}",
rng.gen::<u128>()
);
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_io()
.enable_time()
.build()
.unwrap();
let mut db = runtime.block_on(async {
sqlx::Postgres::create_database(&url)
.await
.expect("failed to create test db");
let mut options = ConnectOptions::new(url);
options
.max_connections(5)
.idle_timeout(Duration::from_secs(0));
let db = LlmDatabase::new(options, Executor::Deterministic(background))
.await
.unwrap();
let migrations_path = concat!(env!("CARGO_MANIFEST_DIR"), "/migrations_llm");
run_database_migrations(db.options(), migrations_path, false)
.await
.unwrap();
db
});
db.runtime = Some(runtime);
Self {
db: Some(Arc::new(db)),
connection: None,
}
}
pub fn db(&self) -> &Arc<LlmDatabase> {
self.db.as_ref().unwrap()
}
}
#[macro_export]
macro_rules! test_both_llm_dbs {
($test_name:ident, $postgres_test_name:ident, $sqlite_test_name:ident) => {
#[cfg(target_os = "macos")]
#[gpui::test]
async fn $postgres_test_name(cx: &mut gpui::TestAppContext) {
let test_db = $crate::llm::db::TestLlmDb::postgres(cx.executor().clone());
$test_name(test_db.db()).await;
}
#[gpui::test]
async fn $sqlite_test_name(cx: &mut gpui::TestAppContext) {
let test_db = $crate::llm::db::TestLlmDb::sqlite(cx.executor().clone());
$test_name(test_db.db()).await;
}
};
}
impl Drop for TestLlmDb {
fn drop(&mut self) {
let db = self.db.take().unwrap();
if let sea_orm::DatabaseBackend::Postgres = db.pool.get_database_backend() {
db.runtime.as_ref().unwrap().block_on(async {
use util::ResultExt;
let query = "
SELECT pg_terminate_backend(pg_stat_activity.pid)
FROM pg_stat_activity
WHERE
pg_stat_activity.datname = current_database() AND
pid <> pg_backend_pid();
";
db.pool
.execute(sea_orm::Statement::from_string(
db.pool.get_database_backend(),
query,
))
.await
.log_err();
sqlx::Postgres::drop_database(db.options.get_url())
.await
.log_err();
})
}
}
}

View file

@ -0,0 +1,30 @@
use std::sync::Arc;
use pretty_assertions::assert_eq;
use crate::llm::db::LlmDatabase;
use crate::test_both_llm_dbs;
test_both_llm_dbs!(
test_initialize_providers,
test_initialize_providers_postgres,
test_initialize_providers_sqlite
);
async fn test_initialize_providers(db: &Arc<LlmDatabase>) {
let initial_providers = db.list_providers().await.unwrap();
assert_eq!(initial_providers, vec![]);
db.initialize_providers().await.unwrap();
// Do it twice, to make sure the operation is idempotent.
db.initialize_providers().await.unwrap();
let providers = db.list_providers().await.unwrap();
let provider_names = providers
.into_iter()
.map(|provider| provider.name)
.collect::<Vec<_>>();
assert_eq!(provider_names, vec!["anthropic".to_string()]);
}