collab: Add usage-based billing for LLM interactions (#19081)

This PR adds usage-based billing for LLM interactions in the Assistant.

Release Notes:

- N/A

---------

Co-authored-by: Antonio Scandurra <me@as-cii.com>
Co-authored-by: Antonio <antonio@zed.dev>
Co-authored-by: Richard <richard@zed.dev>
Co-authored-by: Richard Feldman <oss@rtfeldman.com>
This commit is contained in:
Marshall Bowers 2024-10-11 13:36:54 -04:00 committed by GitHub
parent f1c45d988e
commit 22ea7cef7a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 918 additions and 280 deletions

View file

@ -20,7 +20,7 @@ use std::future::Future;
use std::sync::Arc;
use anyhow::anyhow;
pub use queries::usages::ActiveUserCount;
pub use queries::usages::{ActiveUserCount, TokenUsage};
use sea_orm::prelude::*;
pub use sea_orm::ConnectOptions;
use sea_orm::{

View file

@ -8,3 +8,4 @@ id_type!(ProviderId);
id_type!(UsageId);
id_type!(UsageMeasureId);
id_type!(RevokedAccessTokenId);
id_type!(BillingEventId);

View file

@ -1,5 +1,6 @@
use super::*;
pub mod billing_events;
pub mod providers;
pub mod revoked_access_tokens;
pub mod usages;

View file

@ -0,0 +1,31 @@
use super::*;
use crate::Result;
use anyhow::Context as _;
impl LlmDatabase {
pub async fn get_billing_events(&self) -> Result<Vec<(billing_event::Model, model::Model)>> {
self.transaction(|tx| async move {
let events_with_models = billing_event::Entity::find()
.find_also_related(model::Entity)
.all(&*tx)
.await?;
events_with_models
.into_iter()
.map(|(event, model)| {
let model =
model.context("could not find model associated with billing event")?;
Ok((event, model))
})
.collect()
})
.await
}
pub async fn consume_billing_event(&self, id: BillingEventId) -> Result<()> {
self.transaction(|tx| async move {
billing_event::Entity::delete_by_id(id).exec(&*tx).await?;
Ok(())
})
.await
}
}

View file

@ -1,5 +1,5 @@
use crate::db::UserId;
use crate::llm::Cents;
use crate::{db::UserId, llm::FREE_TIER_MONTHLY_SPENDING_LIMIT};
use chrono::{Datelike, Duration};
use futures::StreamExt as _;
use rpc::LanguageModelProvider;
@ -9,15 +9,26 @@ use strum::IntoEnumIterator as _;
use super::*;
#[derive(Debug, PartialEq, Clone, Copy, Default)]
pub struct TokenUsage {
pub input: usize,
pub input_cache_creation: usize,
pub input_cache_read: usize,
pub output: usize,
}
impl TokenUsage {
pub fn total(&self) -> usize {
self.input + self.input_cache_creation + self.input_cache_read + self.output
}
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub struct Usage {
pub requests_this_minute: usize,
pub tokens_this_minute: usize,
pub tokens_this_day: usize,
pub input_tokens_this_month: usize,
pub cache_creation_input_tokens_this_month: usize,
pub cache_read_input_tokens_this_month: usize,
pub output_tokens_this_month: usize,
pub tokens_this_month: TokenUsage,
pub spending_this_month: Cents,
pub lifetime_spending: Cents,
}
@ -257,18 +268,20 @@ impl LlmDatabase {
requests_this_minute,
tokens_this_minute,
tokens_this_day,
input_tokens_this_month: monthly_usage
.as_ref()
.map_or(0, |usage| usage.input_tokens as usize),
cache_creation_input_tokens_this_month: monthly_usage
.as_ref()
.map_or(0, |usage| usage.cache_creation_input_tokens as usize),
cache_read_input_tokens_this_month: monthly_usage
.as_ref()
.map_or(0, |usage| usage.cache_read_input_tokens as usize),
output_tokens_this_month: monthly_usage
.as_ref()
.map_or(0, |usage| usage.output_tokens as usize),
tokens_this_month: TokenUsage {
input: monthly_usage
.as_ref()
.map_or(0, |usage| usage.input_tokens as usize),
input_cache_creation: monthly_usage
.as_ref()
.map_or(0, |usage| usage.cache_creation_input_tokens as usize),
input_cache_read: monthly_usage
.as_ref()
.map_or(0, |usage| usage.cache_read_input_tokens as usize),
output: monthly_usage
.as_ref()
.map_or(0, |usage| usage.output_tokens as usize),
},
spending_this_month,
lifetime_spending,
})
@ -283,10 +296,9 @@ impl LlmDatabase {
is_staff: bool,
provider: LanguageModelProvider,
model_name: &str,
input_token_count: usize,
cache_creation_input_tokens: usize,
cache_read_input_tokens: usize,
output_token_count: usize,
tokens: TokenUsage,
has_llm_subscription: bool,
max_monthly_spend: Cents,
now: DateTimeUtc,
) -> Result<Usage> {
self.transaction(|tx| async move {
@ -313,10 +325,6 @@ impl LlmDatabase {
&tx,
)
.await?;
let total_token_count = input_token_count
+ cache_read_input_tokens
+ cache_creation_input_tokens
+ output_token_count;
let tokens_this_minute = self
.update_usage_for_measure(
user_id,
@ -325,7 +333,7 @@ impl LlmDatabase {
&usages,
UsageMeasure::TokensPerMinute,
now,
total_token_count,
tokens.total(),
&tx,
)
.await?;
@ -337,7 +345,7 @@ impl LlmDatabase {
&usages,
UsageMeasure::TokensPerDay,
now,
total_token_count,
tokens.total(),
&tx,
)
.await?;
@ -361,18 +369,14 @@ impl LlmDatabase {
Some(usage) => {
monthly_usage::Entity::update(monthly_usage::ActiveModel {
id: ActiveValue::unchanged(usage.id),
input_tokens: ActiveValue::set(
usage.input_tokens + input_token_count as i64,
),
input_tokens: ActiveValue::set(usage.input_tokens + tokens.input as i64),
cache_creation_input_tokens: ActiveValue::set(
usage.cache_creation_input_tokens + cache_creation_input_tokens as i64,
usage.cache_creation_input_tokens + tokens.input_cache_creation as i64,
),
cache_read_input_tokens: ActiveValue::set(
usage.cache_read_input_tokens + cache_read_input_tokens as i64,
),
output_tokens: ActiveValue::set(
usage.output_tokens + output_token_count as i64,
usage.cache_read_input_tokens + tokens.input_cache_read as i64,
),
output_tokens: ActiveValue::set(usage.output_tokens + tokens.output as i64),
..Default::default()
})
.exec(&*tx)
@ -384,12 +388,12 @@ impl LlmDatabase {
model_id: ActiveValue::set(model.id),
month: ActiveValue::set(month),
year: ActiveValue::set(year),
input_tokens: ActiveValue::set(input_token_count as i64),
input_tokens: ActiveValue::set(tokens.input as i64),
cache_creation_input_tokens: ActiveValue::set(
cache_creation_input_tokens as i64,
tokens.input_cache_creation as i64,
),
cache_read_input_tokens: ActiveValue::set(cache_read_input_tokens as i64),
output_tokens: ActiveValue::set(output_token_count as i64),
cache_read_input_tokens: ActiveValue::set(tokens.input_cache_read as i64),
output_tokens: ActiveValue::set(tokens.output as i64),
..Default::default()
}
.insert(&*tx)
@ -405,6 +409,26 @@ impl LlmDatabase {
monthly_usage.output_tokens as usize,
);
if spending_this_month > FREE_TIER_MONTHLY_SPENDING_LIMIT
&& has_llm_subscription
&& spending_this_month <= max_monthly_spend
{
billing_event::ActiveModel {
id: ActiveValue::not_set(),
idempotency_key: ActiveValue::not_set(),
user_id: ActiveValue::set(user_id),
model_id: ActiveValue::set(model.id),
input_tokens: ActiveValue::set(tokens.input as i64),
input_cache_creation_tokens: ActiveValue::set(
tokens.input_cache_creation as i64,
),
input_cache_read_tokens: ActiveValue::set(tokens.input_cache_read as i64),
output_tokens: ActiveValue::set(tokens.output as i64),
}
.insert(&*tx)
.await?;
}
// Update lifetime usage
let lifetime_usage = lifetime_usage::Entity::find()
.filter(
@ -419,18 +443,14 @@ impl LlmDatabase {
Some(usage) => {
lifetime_usage::Entity::update(lifetime_usage::ActiveModel {
id: ActiveValue::unchanged(usage.id),
input_tokens: ActiveValue::set(
usage.input_tokens + input_token_count as i64,
),
input_tokens: ActiveValue::set(usage.input_tokens + tokens.input as i64),
cache_creation_input_tokens: ActiveValue::set(
usage.cache_creation_input_tokens + cache_creation_input_tokens as i64,
usage.cache_creation_input_tokens + tokens.input_cache_creation as i64,
),
cache_read_input_tokens: ActiveValue::set(
usage.cache_read_input_tokens + cache_read_input_tokens as i64,
),
output_tokens: ActiveValue::set(
usage.output_tokens + output_token_count as i64,
usage.cache_read_input_tokens + tokens.input_cache_read as i64,
),
output_tokens: ActiveValue::set(usage.output_tokens + tokens.output as i64),
..Default::default()
})
.exec(&*tx)
@ -440,12 +460,12 @@ impl LlmDatabase {
lifetime_usage::ActiveModel {
user_id: ActiveValue::set(user_id),
model_id: ActiveValue::set(model.id),
input_tokens: ActiveValue::set(input_token_count as i64),
input_tokens: ActiveValue::set(tokens.input as i64),
cache_creation_input_tokens: ActiveValue::set(
cache_creation_input_tokens as i64,
tokens.input_cache_creation as i64,
),
cache_read_input_tokens: ActiveValue::set(cache_read_input_tokens as i64),
output_tokens: ActiveValue::set(output_token_count as i64),
cache_read_input_tokens: ActiveValue::set(tokens.input_cache_read as i64),
output_tokens: ActiveValue::set(tokens.output as i64),
..Default::default()
}
.insert(&*tx)
@ -465,11 +485,12 @@ impl LlmDatabase {
requests_this_minute,
tokens_this_minute,
tokens_this_day,
input_tokens_this_month: monthly_usage.input_tokens as usize,
cache_creation_input_tokens_this_month: monthly_usage.cache_creation_input_tokens
as usize,
cache_read_input_tokens_this_month: monthly_usage.cache_read_input_tokens as usize,
output_tokens_this_month: monthly_usage.output_tokens as usize,
tokens_this_month: TokenUsage {
input: monthly_usage.input_tokens as usize,
input_cache_creation: monthly_usage.cache_creation_input_tokens as usize,
input_cache_read: monthly_usage.cache_read_input_tokens as usize,
output: monthly_usage.output_tokens as usize,
},
spending_this_month,
lifetime_spending,
})

View file

@ -1,3 +1,4 @@
pub mod billing_event;
pub mod lifetime_usage;
pub mod model;
pub mod monthly_usage;

View file

@ -0,0 +1,37 @@
use crate::{
db::UserId,
llm::db::{BillingEventId, ModelId},
};
use sea_orm::entity::prelude::*;
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
#[sea_orm(table_name = "billing_events")]
pub struct Model {
#[sea_orm(primary_key)]
pub id: BillingEventId,
pub idempotency_key: Uuid,
pub user_id: UserId,
pub model_id: ModelId,
pub input_tokens: i64,
pub input_cache_creation_tokens: i64,
pub input_cache_read_tokens: i64,
pub output_tokens: i64,
}
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
pub enum Relation {
#[sea_orm(
belongs_to = "super::model::Entity",
from = "Column::ModelId",
to = "super::model::Column::Id"
)]
Model,
}
impl Related<super::model::Entity> for Entity {
fn to() -> RelationDef {
Relation::Model.def()
}
}
impl ActiveModelBehavior for ActiveModel {}

View file

@ -29,6 +29,8 @@ pub enum Relation {
Provider,
#[sea_orm(has_many = "super::usage::Entity")]
Usages,
#[sea_orm(has_many = "super::billing_event::Entity")]
BillingEvents,
}
impl Related<super::provider::Entity> for Entity {
@ -43,4 +45,10 @@ impl Related<super::usage::Entity> for Entity {
}
}
impl Related<super::billing_event::Entity> for Entity {
fn to() -> RelationDef {
Relation::BillingEvents.def()
}
}
impl ActiveModelBehavior for ActiveModel {}

View file

@ -2,7 +2,7 @@ use crate::{
db::UserId,
llm::db::{
queries::{providers::ModelParams, usages::Usage},
LlmDatabase,
LlmDatabase, TokenUsage,
},
test_llm_db, Cents,
};
@ -36,14 +36,42 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
let user_id = UserId::from_proto(123);
let now = t0;
db.record_usage(user_id, false, provider, model, 1000, 0, 0, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 1000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let now = t0 + Duration::seconds(10);
db.record_usage(user_id, false, provider, model, 2000, 0, 0, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 2000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
@ -52,10 +80,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2,
tokens_this_minute: 3000,
tokens_this_day: 3000,
input_tokens_this_month: 3000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 3000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
@ -69,19 +99,35 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1,
tokens_this_minute: 2000,
tokens_this_day: 3000,
input_tokens_this_month: 3000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 3000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
);
let now = t0 + Duration::seconds(60);
db.record_usage(user_id, false, provider, model, 3000, 0, 0, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 3000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
@ -90,10 +136,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2,
tokens_this_minute: 5000,
tokens_this_day: 6000,
input_tokens_this_month: 6000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 6000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
@ -108,18 +156,34 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 0,
tokens_this_minute: 0,
tokens_this_day: 5000,
input_tokens_this_month: 6000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 6000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
);
db.record_usage(user_id, false, provider, model, 4000, 0, 0, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 4000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
@ -128,10 +192,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1,
tokens_this_minute: 4000,
tokens_this_day: 9000,
input_tokens_this_month: 10000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 10000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
@ -143,9 +209,23 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
.with_timezone(&Utc);
// Test cache creation input tokens
db.record_usage(user_id, false, provider, model, 1000, 500, 0, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 1000,
input_cache_creation: 500,
input_cache_read: 0,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
@ -154,19 +234,35 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1,
tokens_this_minute: 1500,
tokens_this_day: 1500,
input_tokens_this_month: 1000,
cache_creation_input_tokens_this_month: 500,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 1000,
input_cache_creation: 500,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
);
// Test cache read input tokens
db.record_usage(user_id, false, provider, model, 1000, 0, 300, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 1000,
input_cache_creation: 0,
input_cache_read: 300,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
@ -175,10 +271,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2,
tokens_this_minute: 2800,
tokens_this_day: 2800,
input_tokens_this_month: 2000,
cache_creation_input_tokens_this_month: 500,
cache_read_input_tokens_this_month: 300,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 2000,
input_cache_creation: 500,
input_cache_read: 300,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}