collab: Add usage-based billing for LLM interactions (#19081)
This PR adds usage-based billing for LLM interactions in the Assistant. Release Notes: - N/A --------- Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Antonio <antonio@zed.dev> Co-authored-by: Richard <richard@zed.dev> Co-authored-by: Richard Feldman <oss@rtfeldman.com>
This commit is contained in:
parent
f1c45d988e
commit
22ea7cef7a
20 changed files with 918 additions and 280 deletions
31
crates/collab/src/llm/db/queries/billing_events.rs
Normal file
31
crates/collab/src/llm/db/queries/billing_events.rs
Normal file
|
@ -0,0 +1,31 @@
|
|||
use super::*;
|
||||
use crate::Result;
|
||||
use anyhow::Context as _;
|
||||
|
||||
impl LlmDatabase {
|
||||
pub async fn get_billing_events(&self) -> Result<Vec<(billing_event::Model, model::Model)>> {
|
||||
self.transaction(|tx| async move {
|
||||
let events_with_models = billing_event::Entity::find()
|
||||
.find_also_related(model::Entity)
|
||||
.all(&*tx)
|
||||
.await?;
|
||||
events_with_models
|
||||
.into_iter()
|
||||
.map(|(event, model)| {
|
||||
let model =
|
||||
model.context("could not find model associated with billing event")?;
|
||||
Ok((event, model))
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn consume_billing_event(&self, id: BillingEventId) -> Result<()> {
|
||||
self.transaction(|tx| async move {
|
||||
billing_event::Entity::delete_by_id(id).exec(&*tx).await?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
use crate::db::UserId;
|
||||
use crate::llm::Cents;
|
||||
use crate::{db::UserId, llm::FREE_TIER_MONTHLY_SPENDING_LIMIT};
|
||||
use chrono::{Datelike, Duration};
|
||||
use futures::StreamExt as _;
|
||||
use rpc::LanguageModelProvider;
|
||||
|
@ -9,15 +9,26 @@ use strum::IntoEnumIterator as _;
|
|||
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, Default)]
|
||||
pub struct TokenUsage {
|
||||
pub input: usize,
|
||||
pub input_cache_creation: usize,
|
||||
pub input_cache_read: usize,
|
||||
pub output: usize,
|
||||
}
|
||||
|
||||
impl TokenUsage {
|
||||
pub fn total(&self) -> usize {
|
||||
self.input + self.input_cache_creation + self.input_cache_read + self.output
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
pub struct Usage {
|
||||
pub requests_this_minute: usize,
|
||||
pub tokens_this_minute: usize,
|
||||
pub tokens_this_day: usize,
|
||||
pub input_tokens_this_month: usize,
|
||||
pub cache_creation_input_tokens_this_month: usize,
|
||||
pub cache_read_input_tokens_this_month: usize,
|
||||
pub output_tokens_this_month: usize,
|
||||
pub tokens_this_month: TokenUsage,
|
||||
pub spending_this_month: Cents,
|
||||
pub lifetime_spending: Cents,
|
||||
}
|
||||
|
@ -257,18 +268,20 @@ impl LlmDatabase {
|
|||
requests_this_minute,
|
||||
tokens_this_minute,
|
||||
tokens_this_day,
|
||||
input_tokens_this_month: monthly_usage
|
||||
.as_ref()
|
||||
.map_or(0, |usage| usage.input_tokens as usize),
|
||||
cache_creation_input_tokens_this_month: monthly_usage
|
||||
.as_ref()
|
||||
.map_or(0, |usage| usage.cache_creation_input_tokens as usize),
|
||||
cache_read_input_tokens_this_month: monthly_usage
|
||||
.as_ref()
|
||||
.map_or(0, |usage| usage.cache_read_input_tokens as usize),
|
||||
output_tokens_this_month: monthly_usage
|
||||
.as_ref()
|
||||
.map_or(0, |usage| usage.output_tokens as usize),
|
||||
tokens_this_month: TokenUsage {
|
||||
input: monthly_usage
|
||||
.as_ref()
|
||||
.map_or(0, |usage| usage.input_tokens as usize),
|
||||
input_cache_creation: monthly_usage
|
||||
.as_ref()
|
||||
.map_or(0, |usage| usage.cache_creation_input_tokens as usize),
|
||||
input_cache_read: monthly_usage
|
||||
.as_ref()
|
||||
.map_or(0, |usage| usage.cache_read_input_tokens as usize),
|
||||
output: monthly_usage
|
||||
.as_ref()
|
||||
.map_or(0, |usage| usage.output_tokens as usize),
|
||||
},
|
||||
spending_this_month,
|
||||
lifetime_spending,
|
||||
})
|
||||
|
@ -283,10 +296,9 @@ impl LlmDatabase {
|
|||
is_staff: bool,
|
||||
provider: LanguageModelProvider,
|
||||
model_name: &str,
|
||||
input_token_count: usize,
|
||||
cache_creation_input_tokens: usize,
|
||||
cache_read_input_tokens: usize,
|
||||
output_token_count: usize,
|
||||
tokens: TokenUsage,
|
||||
has_llm_subscription: bool,
|
||||
max_monthly_spend: Cents,
|
||||
now: DateTimeUtc,
|
||||
) -> Result<Usage> {
|
||||
self.transaction(|tx| async move {
|
||||
|
@ -313,10 +325,6 @@ impl LlmDatabase {
|
|||
&tx,
|
||||
)
|
||||
.await?;
|
||||
let total_token_count = input_token_count
|
||||
+ cache_read_input_tokens
|
||||
+ cache_creation_input_tokens
|
||||
+ output_token_count;
|
||||
let tokens_this_minute = self
|
||||
.update_usage_for_measure(
|
||||
user_id,
|
||||
|
@ -325,7 +333,7 @@ impl LlmDatabase {
|
|||
&usages,
|
||||
UsageMeasure::TokensPerMinute,
|
||||
now,
|
||||
total_token_count,
|
||||
tokens.total(),
|
||||
&tx,
|
||||
)
|
||||
.await?;
|
||||
|
@ -337,7 +345,7 @@ impl LlmDatabase {
|
|||
&usages,
|
||||
UsageMeasure::TokensPerDay,
|
||||
now,
|
||||
total_token_count,
|
||||
tokens.total(),
|
||||
&tx,
|
||||
)
|
||||
.await?;
|
||||
|
@ -361,18 +369,14 @@ impl LlmDatabase {
|
|||
Some(usage) => {
|
||||
monthly_usage::Entity::update(monthly_usage::ActiveModel {
|
||||
id: ActiveValue::unchanged(usage.id),
|
||||
input_tokens: ActiveValue::set(
|
||||
usage.input_tokens + input_token_count as i64,
|
||||
),
|
||||
input_tokens: ActiveValue::set(usage.input_tokens + tokens.input as i64),
|
||||
cache_creation_input_tokens: ActiveValue::set(
|
||||
usage.cache_creation_input_tokens + cache_creation_input_tokens as i64,
|
||||
usage.cache_creation_input_tokens + tokens.input_cache_creation as i64,
|
||||
),
|
||||
cache_read_input_tokens: ActiveValue::set(
|
||||
usage.cache_read_input_tokens + cache_read_input_tokens as i64,
|
||||
),
|
||||
output_tokens: ActiveValue::set(
|
||||
usage.output_tokens + output_token_count as i64,
|
||||
usage.cache_read_input_tokens + tokens.input_cache_read as i64,
|
||||
),
|
||||
output_tokens: ActiveValue::set(usage.output_tokens + tokens.output as i64),
|
||||
..Default::default()
|
||||
})
|
||||
.exec(&*tx)
|
||||
|
@ -384,12 +388,12 @@ impl LlmDatabase {
|
|||
model_id: ActiveValue::set(model.id),
|
||||
month: ActiveValue::set(month),
|
||||
year: ActiveValue::set(year),
|
||||
input_tokens: ActiveValue::set(input_token_count as i64),
|
||||
input_tokens: ActiveValue::set(tokens.input as i64),
|
||||
cache_creation_input_tokens: ActiveValue::set(
|
||||
cache_creation_input_tokens as i64,
|
||||
tokens.input_cache_creation as i64,
|
||||
),
|
||||
cache_read_input_tokens: ActiveValue::set(cache_read_input_tokens as i64),
|
||||
output_tokens: ActiveValue::set(output_token_count as i64),
|
||||
cache_read_input_tokens: ActiveValue::set(tokens.input_cache_read as i64),
|
||||
output_tokens: ActiveValue::set(tokens.output as i64),
|
||||
..Default::default()
|
||||
}
|
||||
.insert(&*tx)
|
||||
|
@ -405,6 +409,26 @@ impl LlmDatabase {
|
|||
monthly_usage.output_tokens as usize,
|
||||
);
|
||||
|
||||
if spending_this_month > FREE_TIER_MONTHLY_SPENDING_LIMIT
|
||||
&& has_llm_subscription
|
||||
&& spending_this_month <= max_monthly_spend
|
||||
{
|
||||
billing_event::ActiveModel {
|
||||
id: ActiveValue::not_set(),
|
||||
idempotency_key: ActiveValue::not_set(),
|
||||
user_id: ActiveValue::set(user_id),
|
||||
model_id: ActiveValue::set(model.id),
|
||||
input_tokens: ActiveValue::set(tokens.input as i64),
|
||||
input_cache_creation_tokens: ActiveValue::set(
|
||||
tokens.input_cache_creation as i64,
|
||||
),
|
||||
input_cache_read_tokens: ActiveValue::set(tokens.input_cache_read as i64),
|
||||
output_tokens: ActiveValue::set(tokens.output as i64),
|
||||
}
|
||||
.insert(&*tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Update lifetime usage
|
||||
let lifetime_usage = lifetime_usage::Entity::find()
|
||||
.filter(
|
||||
|
@ -419,18 +443,14 @@ impl LlmDatabase {
|
|||
Some(usage) => {
|
||||
lifetime_usage::Entity::update(lifetime_usage::ActiveModel {
|
||||
id: ActiveValue::unchanged(usage.id),
|
||||
input_tokens: ActiveValue::set(
|
||||
usage.input_tokens + input_token_count as i64,
|
||||
),
|
||||
input_tokens: ActiveValue::set(usage.input_tokens + tokens.input as i64),
|
||||
cache_creation_input_tokens: ActiveValue::set(
|
||||
usage.cache_creation_input_tokens + cache_creation_input_tokens as i64,
|
||||
usage.cache_creation_input_tokens + tokens.input_cache_creation as i64,
|
||||
),
|
||||
cache_read_input_tokens: ActiveValue::set(
|
||||
usage.cache_read_input_tokens + cache_read_input_tokens as i64,
|
||||
),
|
||||
output_tokens: ActiveValue::set(
|
||||
usage.output_tokens + output_token_count as i64,
|
||||
usage.cache_read_input_tokens + tokens.input_cache_read as i64,
|
||||
),
|
||||
output_tokens: ActiveValue::set(usage.output_tokens + tokens.output as i64),
|
||||
..Default::default()
|
||||
})
|
||||
.exec(&*tx)
|
||||
|
@ -440,12 +460,12 @@ impl LlmDatabase {
|
|||
lifetime_usage::ActiveModel {
|
||||
user_id: ActiveValue::set(user_id),
|
||||
model_id: ActiveValue::set(model.id),
|
||||
input_tokens: ActiveValue::set(input_token_count as i64),
|
||||
input_tokens: ActiveValue::set(tokens.input as i64),
|
||||
cache_creation_input_tokens: ActiveValue::set(
|
||||
cache_creation_input_tokens as i64,
|
||||
tokens.input_cache_creation as i64,
|
||||
),
|
||||
cache_read_input_tokens: ActiveValue::set(cache_read_input_tokens as i64),
|
||||
output_tokens: ActiveValue::set(output_token_count as i64),
|
||||
cache_read_input_tokens: ActiveValue::set(tokens.input_cache_read as i64),
|
||||
output_tokens: ActiveValue::set(tokens.output as i64),
|
||||
..Default::default()
|
||||
}
|
||||
.insert(&*tx)
|
||||
|
@ -465,11 +485,12 @@ impl LlmDatabase {
|
|||
requests_this_minute,
|
||||
tokens_this_minute,
|
||||
tokens_this_day,
|
||||
input_tokens_this_month: monthly_usage.input_tokens as usize,
|
||||
cache_creation_input_tokens_this_month: monthly_usage.cache_creation_input_tokens
|
||||
as usize,
|
||||
cache_read_input_tokens_this_month: monthly_usage.cache_read_input_tokens as usize,
|
||||
output_tokens_this_month: monthly_usage.output_tokens as usize,
|
||||
tokens_this_month: TokenUsage {
|
||||
input: monthly_usage.input_tokens as usize,
|
||||
input_cache_creation: monthly_usage.cache_creation_input_tokens as usize,
|
||||
input_cache_read: monthly_usage.cache_read_input_tokens as usize,
|
||||
output: monthly_usage.output_tokens as usize,
|
||||
},
|
||||
spending_this_month,
|
||||
lifetime_spending,
|
||||
})
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue