collab: Add usage-based billing for LLM interactions (#19081)

This PR adds usage-based billing for LLM interactions in the Assistant. Release Notes: - N/A --------- Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Antonio <antonio@zed.dev> Co-authored-by: Richard <richard@zed.dev> Co-authored-by: Richard Feldman <oss@rtfeldman.com>
2024-10-11 13:36:54 -04:00 · 2024-10-11 13:36:54 -04:00 · 22ea7cef7a
commit 22ea7cef7a
parent f1c45d988e
20 changed files with 918 additions and 280 deletions
--- a/crates/collab/src/llm/db/queries/billing_events.rs
+++ b/crates/collab/src/llm/db/queries/billing_events.rs
@ -0,0 +1,31 @@
+use super::*;
+use crate::Result;
+use anyhow::Context as _;
+
+impl LlmDatabase {
+    pub async fn get_billing_events(&self) -> Result<Vec<(billing_event::Model, model::Model)>> {
+        self.transaction(|tx| async move {
+            let events_with_models = billing_event::Entity::find()
+                .find_also_related(model::Entity)
+                .all(&*tx)
+                .await?;
+            events_with_models
+                .into_iter()
+                .map(|(event, model)| {
+                    let model =
+                        model.context("could not find model associated with billing event")?;
+                    Ok((event, model))
+                })
+                .collect()
+        })
+        .await
+    }
+
+    pub async fn consume_billing_event(&self, id: BillingEventId) -> Result<()> {
+        self.transaction(|tx| async move {
+            billing_event::Entity::delete_by_id(id).exec(&*tx).await?;
+            Ok(())
+        })
+        .await
+    }
+}
--- a/crates/collab/src/llm/db/queries/usages.rs
+++ b/crates/collab/src/llm/db/queries/usages.rs
@ -1,5 +1,5 @@
-use crate::db::UserId;
 use crate::llm::Cents;
+use crate::{db::UserId, llm::FREE_TIER_MONTHLY_SPENDING_LIMIT};
 use chrono::{Datelike, Duration};
 use futures::StreamExt as _;
 use rpc::LanguageModelProvider;
@ -9,15 +9,26 @@ use strum::IntoEnumIterator as _;

 use super::*;

+#[derive(Debug, PartialEq, Clone, Copy, Default)]
+pub struct TokenUsage {
+    pub input: usize,
+    pub input_cache_creation: usize,
+    pub input_cache_read: usize,
+    pub output: usize,
+}
+
+impl TokenUsage {
+    pub fn total(&self) -> usize {
+        self.input + self.input_cache_creation + self.input_cache_read + self.output
+    }
+}
+
 #[derive(Debug, PartialEq, Clone, Copy)]
 pub struct Usage {
    pub requests_this_minute: usize,
    pub tokens_this_minute: usize,
    pub tokens_this_day: usize,
-    pub input_tokens_this_month: usize,
-    pub cache_creation_input_tokens_this_month: usize,
-    pub cache_read_input_tokens_this_month: usize,
-    pub output_tokens_this_month: usize,
+    pub tokens_this_month: TokenUsage,
    pub spending_this_month: Cents,
    pub lifetime_spending: Cents,
 }
@ -257,18 +268,20 @@ impl LlmDatabase {
                requests_this_minute,
                tokens_this_minute,
                tokens_this_day,
-                input_tokens_this_month: monthly_usage
-                    .as_ref()
-                    .map_or(0, |usage| usage.input_tokens as usize),
-                cache_creation_input_tokens_this_month: monthly_usage
-                    .as_ref()
-                    .map_or(0, |usage| usage.cache_creation_input_tokens as usize),
-                cache_read_input_tokens_this_month: monthly_usage
-                    .as_ref()
-                    .map_or(0, |usage| usage.cache_read_input_tokens as usize),
-                output_tokens_this_month: monthly_usage
-                    .as_ref()
-                    .map_or(0, |usage| usage.output_tokens as usize),
+                tokens_this_month: TokenUsage {
+                    input: monthly_usage
+                        .as_ref()
+                        .map_or(0, |usage| usage.input_tokens as usize),
+                    input_cache_creation: monthly_usage
+                        .as_ref()
+                        .map_or(0, |usage| usage.cache_creation_input_tokens as usize),
+                    input_cache_read: monthly_usage
+                        .as_ref()
+                        .map_or(0, |usage| usage.cache_read_input_tokens as usize),
+                    output: monthly_usage
+                        .as_ref()
+                        .map_or(0, |usage| usage.output_tokens as usize),
+                },
                spending_this_month,
                lifetime_spending,
            })
@ -283,10 +296,9 @@ impl LlmDatabase {
        is_staff: bool,
        provider: LanguageModelProvider,
        model_name: &str,
-        input_token_count: usize,
-        cache_creation_input_tokens: usize,
-        cache_read_input_tokens: usize,
-        output_token_count: usize,
+        tokens: TokenUsage,
+        has_llm_subscription: bool,
+        max_monthly_spend: Cents,
        now: DateTimeUtc,
    ) -> Result<Usage> {
        self.transaction(|tx| async move {
@ -313,10 +325,6 @@ impl LlmDatabase {
                    &tx,
                )
                .await?;
-            let total_token_count = input_token_count
-                + cache_read_input_tokens
-                + cache_creation_input_tokens
-                + output_token_count;
            let tokens_this_minute = self
                .update_usage_for_measure(
                    user_id,
@ -325,7 +333,7 @@ impl LlmDatabase {
                    &usages,
                    UsageMeasure::TokensPerMinute,
                    now,
-                    total_token_count,
+                    tokens.total(),
                    &tx,
                )
                .await?;
@ -337,7 +345,7 @@ impl LlmDatabase {
                    &usages,
                    UsageMeasure::TokensPerDay,
                    now,
-                    total_token_count,
+                    tokens.total(),
                    &tx,
                )
                .await?;
@ -361,18 +369,14 @@ impl LlmDatabase {
                Some(usage) => {
                    monthly_usage::Entity::update(monthly_usage::ActiveModel {
                        id: ActiveValue::unchanged(usage.id),
-                        input_tokens: ActiveValue::set(
-                            usage.input_tokens + input_token_count as i64,
-                        ),
+                        input_tokens: ActiveValue::set(usage.input_tokens + tokens.input as i64),
                        cache_creation_input_tokens: ActiveValue::set(
-                            usage.cache_creation_input_tokens + cache_creation_input_tokens as i64,
+                            usage.cache_creation_input_tokens + tokens.input_cache_creation as i64,
                        ),
                        cache_read_input_tokens: ActiveValue::set(
-                            usage.cache_read_input_tokens + cache_read_input_tokens as i64,
-                        ),
-                        output_tokens: ActiveValue::set(
-                            usage.output_tokens + output_token_count as i64,
+                            usage.cache_read_input_tokens + tokens.input_cache_read as i64,
                        ),
+                        output_tokens: ActiveValue::set(usage.output_tokens + tokens.output as i64),
                        ..Default::default()
                    })
                    .exec(&*tx)
@ -384,12 +388,12 @@ impl LlmDatabase {
                        model_id: ActiveValue::set(model.id),
                        month: ActiveValue::set(month),
                        year: ActiveValue::set(year),
-                        input_tokens: ActiveValue::set(input_token_count as i64),
+                        input_tokens: ActiveValue::set(tokens.input as i64),
                        cache_creation_input_tokens: ActiveValue::set(
-                            cache_creation_input_tokens as i64,
+                            tokens.input_cache_creation as i64,
                        ),
-                        cache_read_input_tokens: ActiveValue::set(cache_read_input_tokens as i64),
-                        output_tokens: ActiveValue::set(output_token_count as i64),
+                        cache_read_input_tokens: ActiveValue::set(tokens.input_cache_read as i64),
+                        output_tokens: ActiveValue::set(tokens.output as i64),
                        ..Default::default()
                    }
                    .insert(&*tx)
@ -405,6 +409,26 @@ impl LlmDatabase {
                monthly_usage.output_tokens as usize,
            );

+            if spending_this_month > FREE_TIER_MONTHLY_SPENDING_LIMIT
+                && has_llm_subscription
+                && spending_this_month <= max_monthly_spend
+            {
+                billing_event::ActiveModel {
+                    id: ActiveValue::not_set(),
+                    idempotency_key: ActiveValue::not_set(),
+                    user_id: ActiveValue::set(user_id),
+                    model_id: ActiveValue::set(model.id),
+                    input_tokens: ActiveValue::set(tokens.input as i64),
+                    input_cache_creation_tokens: ActiveValue::set(
+                        tokens.input_cache_creation as i64,
+                    ),
+                    input_cache_read_tokens: ActiveValue::set(tokens.input_cache_read as i64),
+                    output_tokens: ActiveValue::set(tokens.output as i64),
+                }
+                .insert(&*tx)
+                .await?;
+            }
+
            // Update lifetime usage
            let lifetime_usage = lifetime_usage::Entity::find()
                .filter(
@ -419,18 +443,14 @@ impl LlmDatabase {
                Some(usage) => {
                    lifetime_usage::Entity::update(lifetime_usage::ActiveModel {
                        id: ActiveValue::unchanged(usage.id),
-                        input_tokens: ActiveValue::set(
-                            usage.input_tokens + input_token_count as i64,
-                        ),
+                        input_tokens: ActiveValue::set(usage.input_tokens + tokens.input as i64),
                        cache_creation_input_tokens: ActiveValue::set(
-                            usage.cache_creation_input_tokens + cache_creation_input_tokens as i64,
+                            usage.cache_creation_input_tokens + tokens.input_cache_creation as i64,
                        ),
                        cache_read_input_tokens: ActiveValue::set(
-                            usage.cache_read_input_tokens + cache_read_input_tokens as i64,
-                        ),
-                        output_tokens: ActiveValue::set(
-                            usage.output_tokens + output_token_count as i64,
+                            usage.cache_read_input_tokens + tokens.input_cache_read as i64,
                        ),
+                        output_tokens: ActiveValue::set(usage.output_tokens + tokens.output as i64),
                        ..Default::default()
                    })
                    .exec(&*tx)
@ -440,12 +460,12 @@ impl LlmDatabase {
                    lifetime_usage::ActiveModel {
                        user_id: ActiveValue::set(user_id),
                        model_id: ActiveValue::set(model.id),
-                        input_tokens: ActiveValue::set(input_token_count as i64),
+                        input_tokens: ActiveValue::set(tokens.input as i64),
                        cache_creation_input_tokens: ActiveValue::set(
-                            cache_creation_input_tokens as i64,
+                            tokens.input_cache_creation as i64,
                        ),
-                        cache_read_input_tokens: ActiveValue::set(cache_read_input_tokens as i64),
-                        output_tokens: ActiveValue::set(output_token_count as i64),
+                        cache_read_input_tokens: ActiveValue::set(tokens.input_cache_read as i64),
+                        output_tokens: ActiveValue::set(tokens.output as i64),
                        ..Default::default()
                    }
                    .insert(&*tx)
@ -465,11 +485,12 @@ impl LlmDatabase {
                requests_this_minute,
                tokens_this_minute,
                tokens_this_day,
-                input_tokens_this_month: monthly_usage.input_tokens as usize,
-                cache_creation_input_tokens_this_month: monthly_usage.cache_creation_input_tokens
-                    as usize,
-                cache_read_input_tokens_this_month: monthly_usage.cache_read_input_tokens as usize,
-                output_tokens_this_month: monthly_usage.output_tokens as usize,
+                tokens_this_month: TokenUsage {
+                    input: monthly_usage.input_tokens as usize,
+                    input_cache_creation: monthly_usage.cache_creation_input_tokens as usize,
+                    input_cache_read: monthly_usage.cache_read_input_tokens as usize,
+                    output: monthly_usage.output_tokens as usize,
+                },
                spending_this_month,
                lifetime_spending,
            })