collab: Add usage-based billing for LLM interactions (#19081)

This PR adds usage-based billing for LLM interactions in the Assistant. Release Notes: - N/A --------- Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Antonio <antonio@zed.dev> Co-authored-by: Richard <richard@zed.dev> Co-authored-by: Richard Feldman <oss@rtfeldman.com>
2024-10-11 13:36:54 -04:00 · 2024-10-11 13:36:54 -04:00 · 22ea7cef7a
commit 22ea7cef7a
parent f1c45d988e
20 changed files with 918 additions and 280 deletions
--- a/crates/collab/src/llm/db.rs
+++ b/crates/collab/src/llm/db.rs
@ -20,7 +20,7 @@ use std::future::Future;
 use std::sync::Arc;

 use anyhow::anyhow;
-pub use queries::usages::ActiveUserCount;
+pub use queries::usages::{ActiveUserCount, TokenUsage};
 use sea_orm::prelude::*;
 pub use sea_orm::ConnectOptions;
 use sea_orm::{
--- a/crates/collab/src/llm/db/ids.rs
+++ b/crates/collab/src/llm/db/ids.rs
@ -8,3 +8,4 @@ id_type!(ProviderId);
 id_type!(UsageId);
 id_type!(UsageMeasureId);
 id_type!(RevokedAccessTokenId);
+id_type!(BillingEventId);
--- a/crates/collab/src/llm/db/queries.rs
+++ b/crates/collab/src/llm/db/queries.rs
@ -1,5 +1,6 @@
 use super::*;

+pub mod billing_events;
 pub mod providers;
 pub mod revoked_access_tokens;
 pub mod usages;
--- a/crates/collab/src/llm/db/queries/billing_events.rs
+++ b/crates/collab/src/llm/db/queries/billing_events.rs
@ -0,0 +1,31 @@
+use super::*;
+use crate::Result;
+use anyhow::Context as _;
+
+impl LlmDatabase {
+    pub async fn get_billing_events(&self) -> Result<Vec<(billing_event::Model, model::Model)>> {
+        self.transaction(|tx| async move {
+            let events_with_models = billing_event::Entity::find()
+                .find_also_related(model::Entity)
+                .all(&*tx)
+                .await?;
+            events_with_models
+                .into_iter()
+                .map(|(event, model)| {
+                    let model =
+                        model.context("could not find model associated with billing event")?;
+                    Ok((event, model))
+                })
+                .collect()
+        })
+        .await
+    }
+
+    pub async fn consume_billing_event(&self, id: BillingEventId) -> Result<()> {
+        self.transaction(|tx| async move {
+            billing_event::Entity::delete_by_id(id).exec(&*tx).await?;
+            Ok(())
+        })
+        .await
+    }
+}
--- a/crates/collab/src/llm/db/queries/usages.rs
+++ b/crates/collab/src/llm/db/queries/usages.rs
@ -1,5 +1,5 @@
-use crate::db::UserId;
 use crate::llm::Cents;
+use crate::{db::UserId, llm::FREE_TIER_MONTHLY_SPENDING_LIMIT};
 use chrono::{Datelike, Duration};
 use futures::StreamExt as _;
 use rpc::LanguageModelProvider;
@ -9,15 +9,26 @@ use strum::IntoEnumIterator as _;

 use super::*;

+#[derive(Debug, PartialEq, Clone, Copy, Default)]
+pub struct TokenUsage {
+    pub input: usize,
+    pub input_cache_creation: usize,
+    pub input_cache_read: usize,
+    pub output: usize,
+}
+
+impl TokenUsage {
+    pub fn total(&self) -> usize {
+        self.input + self.input_cache_creation + self.input_cache_read + self.output
+    }
+}
+
 #[derive(Debug, PartialEq, Clone, Copy)]
 pub struct Usage {
    pub requests_this_minute: usize,
    pub tokens_this_minute: usize,
    pub tokens_this_day: usize,
-    pub input_tokens_this_month: usize,
-    pub cache_creation_input_tokens_this_month: usize,
-    pub cache_read_input_tokens_this_month: usize,
-    pub output_tokens_this_month: usize,
+    pub tokens_this_month: TokenUsage,
    pub spending_this_month: Cents,
    pub lifetime_spending: Cents,
 }
@ -257,18 +268,20 @@ impl LlmDatabase {
                requests_this_minute,
                tokens_this_minute,
                tokens_this_day,
-                input_tokens_this_month: monthly_usage
-                    .as_ref()
-                    .map_or(0, |usage| usage.input_tokens as usize),
-                cache_creation_input_tokens_this_month: monthly_usage
-                    .as_ref()
-                    .map_or(0, |usage| usage.cache_creation_input_tokens as usize),
-                cache_read_input_tokens_this_month: monthly_usage
-                    .as_ref()
-                    .map_or(0, |usage| usage.cache_read_input_tokens as usize),
-                output_tokens_this_month: monthly_usage
-                    .as_ref()
-                    .map_or(0, |usage| usage.output_tokens as usize),
+                tokens_this_month: TokenUsage {
+                    input: monthly_usage
+                        .as_ref()
+                        .map_or(0, |usage| usage.input_tokens as usize),
+                    input_cache_creation: monthly_usage
+                        .as_ref()
+                        .map_or(0, |usage| usage.cache_creation_input_tokens as usize),
+                    input_cache_read: monthly_usage
+                        .as_ref()
+                        .map_or(0, |usage| usage.cache_read_input_tokens as usize),
+                    output: monthly_usage
+                        .as_ref()
+                        .map_or(0, |usage| usage.output_tokens as usize),
+                },
                spending_this_month,
                lifetime_spending,
            })
@ -283,10 +296,9 @@ impl LlmDatabase {
        is_staff: bool,
        provider: LanguageModelProvider,
        model_name: &str,
-        input_token_count: usize,
-        cache_creation_input_tokens: usize,
-        cache_read_input_tokens: usize,
-        output_token_count: usize,
+        tokens: TokenUsage,
+        has_llm_subscription: bool,
+        max_monthly_spend: Cents,
        now: DateTimeUtc,
    ) -> Result<Usage> {
        self.transaction(|tx| async move {
@ -313,10 +325,6 @@ impl LlmDatabase {
                    &tx,
                )
                .await?;
-            let total_token_count = input_token_count
-                + cache_read_input_tokens
-                + cache_creation_input_tokens
-                + output_token_count;
            let tokens_this_minute = self
                .update_usage_for_measure(
                    user_id,
@ -325,7 +333,7 @@ impl LlmDatabase {
                    &usages,
                    UsageMeasure::TokensPerMinute,
                    now,
-                    total_token_count,
+                    tokens.total(),
                    &tx,
                )
                .await?;
@ -337,7 +345,7 @@ impl LlmDatabase {
                    &usages,
                    UsageMeasure::TokensPerDay,
                    now,
-                    total_token_count,
+                    tokens.total(),
                    &tx,
                )
                .await?;
@ -361,18 +369,14 @@ impl LlmDatabase {
                Some(usage) => {
                    monthly_usage::Entity::update(monthly_usage::ActiveModel {
                        id: ActiveValue::unchanged(usage.id),
-                        input_tokens: ActiveValue::set(
-                            usage.input_tokens + input_token_count as i64,
-                        ),
+                        input_tokens: ActiveValue::set(usage.input_tokens + tokens.input as i64),
                        cache_creation_input_tokens: ActiveValue::set(
-                            usage.cache_creation_input_tokens + cache_creation_input_tokens as i64,
+                            usage.cache_creation_input_tokens + tokens.input_cache_creation as i64,
                        ),
                        cache_read_input_tokens: ActiveValue::set(
-                            usage.cache_read_input_tokens + cache_read_input_tokens as i64,
-                        ),
-                        output_tokens: ActiveValue::set(
-                            usage.output_tokens + output_token_count as i64,
+                            usage.cache_read_input_tokens + tokens.input_cache_read as i64,
                        ),
+                        output_tokens: ActiveValue::set(usage.output_tokens + tokens.output as i64),
                        ..Default::default()
                    })
                    .exec(&*tx)
@ -384,12 +388,12 @@ impl LlmDatabase {
                        model_id: ActiveValue::set(model.id),
                        month: ActiveValue::set(month),
                        year: ActiveValue::set(year),
-                        input_tokens: ActiveValue::set(input_token_count as i64),
+                        input_tokens: ActiveValue::set(tokens.input as i64),
                        cache_creation_input_tokens: ActiveValue::set(
-                            cache_creation_input_tokens as i64,
+                            tokens.input_cache_creation as i64,
                        ),
-                        cache_read_input_tokens: ActiveValue::set(cache_read_input_tokens as i64),
-                        output_tokens: ActiveValue::set(output_token_count as i64),
+                        cache_read_input_tokens: ActiveValue::set(tokens.input_cache_read as i64),
+                        output_tokens: ActiveValue::set(tokens.output as i64),
                        ..Default::default()
                    }
                    .insert(&*tx)
@ -405,6 +409,26 @@ impl LlmDatabase {
                monthly_usage.output_tokens as usize,
            );

+            if spending_this_month > FREE_TIER_MONTHLY_SPENDING_LIMIT
+                && has_llm_subscription
+                && spending_this_month <= max_monthly_spend
+            {
+                billing_event::ActiveModel {
+                    id: ActiveValue::not_set(),
+                    idempotency_key: ActiveValue::not_set(),
+                    user_id: ActiveValue::set(user_id),
+                    model_id: ActiveValue::set(model.id),
+                    input_tokens: ActiveValue::set(tokens.input as i64),
+                    input_cache_creation_tokens: ActiveValue::set(
+                        tokens.input_cache_creation as i64,
+                    ),
+                    input_cache_read_tokens: ActiveValue::set(tokens.input_cache_read as i64),
+                    output_tokens: ActiveValue::set(tokens.output as i64),
+                }
+                .insert(&*tx)
+                .await?;
+            }
+
            // Update lifetime usage
            let lifetime_usage = lifetime_usage::Entity::find()
                .filter(
@ -419,18 +443,14 @@ impl LlmDatabase {
                Some(usage) => {
                    lifetime_usage::Entity::update(lifetime_usage::ActiveModel {
                        id: ActiveValue::unchanged(usage.id),
-                        input_tokens: ActiveValue::set(
-                            usage.input_tokens + input_token_count as i64,
-                        ),
+                        input_tokens: ActiveValue::set(usage.input_tokens + tokens.input as i64),
                        cache_creation_input_tokens: ActiveValue::set(
-                            usage.cache_creation_input_tokens + cache_creation_input_tokens as i64,
+                            usage.cache_creation_input_tokens + tokens.input_cache_creation as i64,
                        ),
                        cache_read_input_tokens: ActiveValue::set(
-                            usage.cache_read_input_tokens + cache_read_input_tokens as i64,
-                        ),
-                        output_tokens: ActiveValue::set(
-                            usage.output_tokens + output_token_count as i64,
+                            usage.cache_read_input_tokens + tokens.input_cache_read as i64,
                        ),
+                        output_tokens: ActiveValue::set(usage.output_tokens + tokens.output as i64),
                        ..Default::default()
                    })
                    .exec(&*tx)
@ -440,12 +460,12 @@ impl LlmDatabase {
                    lifetime_usage::ActiveModel {
                        user_id: ActiveValue::set(user_id),
                        model_id: ActiveValue::set(model.id),
-                        input_tokens: ActiveValue::set(input_token_count as i64),
+                        input_tokens: ActiveValue::set(tokens.input as i64),
                        cache_creation_input_tokens: ActiveValue::set(
-                            cache_creation_input_tokens as i64,
+                            tokens.input_cache_creation as i64,
                        ),
-                        cache_read_input_tokens: ActiveValue::set(cache_read_input_tokens as i64),
-                        output_tokens: ActiveValue::set(output_token_count as i64),
+                        cache_read_input_tokens: ActiveValue::set(tokens.input_cache_read as i64),
+                        output_tokens: ActiveValue::set(tokens.output as i64),
                        ..Default::default()
                    }
                    .insert(&*tx)
@ -465,11 +485,12 @@ impl LlmDatabase {
                requests_this_minute,
                tokens_this_minute,
                tokens_this_day,
-                input_tokens_this_month: monthly_usage.input_tokens as usize,
-                cache_creation_input_tokens_this_month: monthly_usage.cache_creation_input_tokens
-                    as usize,
-                cache_read_input_tokens_this_month: monthly_usage.cache_read_input_tokens as usize,
-                output_tokens_this_month: monthly_usage.output_tokens as usize,
+                tokens_this_month: TokenUsage {
+                    input: monthly_usage.input_tokens as usize,
+                    input_cache_creation: monthly_usage.cache_creation_input_tokens as usize,
+                    input_cache_read: monthly_usage.cache_read_input_tokens as usize,
+                    output: monthly_usage.output_tokens as usize,
+                },
                spending_this_month,
                lifetime_spending,
            })
--- a/crates/collab/src/llm/db/tables.rs
+++ b/crates/collab/src/llm/db/tables.rs
@ -1,3 +1,4 @@
+pub mod billing_event;
 pub mod lifetime_usage;
 pub mod model;
 pub mod monthly_usage;
--- a/crates/collab/src/llm/db/tables/billing_event.rs
+++ b/crates/collab/src/llm/db/tables/billing_event.rs
@ -0,0 +1,37 @@
+use crate::{
+    db::UserId,
+    llm::db::{BillingEventId, ModelId},
+};
+use sea_orm::entity::prelude::*;
+
+#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
+#[sea_orm(table_name = "billing_events")]
+pub struct Model {
+    #[sea_orm(primary_key)]
+    pub id: BillingEventId,
+    pub idempotency_key: Uuid,
+    pub user_id: UserId,
+    pub model_id: ModelId,
+    pub input_tokens: i64,
+    pub input_cache_creation_tokens: i64,
+    pub input_cache_read_tokens: i64,
+    pub output_tokens: i64,
+}
+
+#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
+pub enum Relation {
+    #[sea_orm(
+        belongs_to = "super::model::Entity",
+        from = "Column::ModelId",
+        to = "super::model::Column::Id"
+    )]
+    Model,
+}
+
+impl Related<super::model::Entity> for Entity {
+    fn to() -> RelationDef {
+        Relation::Model.def()
+    }
+}
+
+impl ActiveModelBehavior for ActiveModel {}
--- a/crates/collab/src/llm/db/tables/model.rs
+++ b/crates/collab/src/llm/db/tables/model.rs
@ -29,6 +29,8 @@ pub enum Relation {
    Provider,
    #[sea_orm(has_many = "super::usage::Entity")]
    Usages,
+    #[sea_orm(has_many = "super::billing_event::Entity")]
+    BillingEvents,
 }

 impl Related<super::provider::Entity> for Entity {
@ -43,4 +45,10 @@ impl Related<super::usage::Entity> for Entity {
    }
 }

+impl Related<super::billing_event::Entity> for Entity {
+    fn to() -> RelationDef {
+        Relation::BillingEvents.def()
+    }
+}
+
 impl ActiveModelBehavior for ActiveModel {}
--- a/crates/collab/src/llm/db/tests/usage_tests.rs
+++ b/crates/collab/src/llm/db/tests/usage_tests.rs
@ -2,7 +2,7 @@ use crate::{
    db::UserId,
    llm::db::{
        queries::{providers::ModelParams, usages::Usage},
-        LlmDatabase,
+        LlmDatabase, TokenUsage,
    },
    test_llm_db, Cents,
 };
@ -36,14 +36,42 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
    let user_id = UserId::from_proto(123);

    let now = t0;
-    db.record_usage(user_id, false, provider, model, 1000, 0, 0, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 1000,
+            input_cache_creation: 0,
+            input_cache_read: 0,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let now = t0 + Duration::seconds(10);
-    db.record_usage(user_id, false, provider, model, 2000, 0, 0, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 2000,
+            input_cache_creation: 0,
+            input_cache_read: 0,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
    assert_eq!(
@ -52,10 +80,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 2,
            tokens_this_minute: 3000,
            tokens_this_day: 3000,
-            input_tokens_this_month: 3000,
-            cache_creation_input_tokens_this_month: 0,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 3000,
+                input_cache_creation: 0,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
@ -69,19 +99,35 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 1,
            tokens_this_minute: 2000,
            tokens_this_day: 3000,
-            input_tokens_this_month: 3000,
-            cache_creation_input_tokens_this_month: 0,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 3000,
+                input_cache_creation: 0,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
    );

    let now = t0 + Duration::seconds(60);
-    db.record_usage(user_id, false, provider, model, 3000, 0, 0, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 3000,
+            input_cache_creation: 0,
+            input_cache_read: 0,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
    assert_eq!(
@ -90,10 +136,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 2,
            tokens_this_minute: 5000,
            tokens_this_day: 6000,
-            input_tokens_this_month: 6000,
-            cache_creation_input_tokens_this_month: 0,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 6000,
+                input_cache_creation: 0,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
@ -108,18 +156,34 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 0,
            tokens_this_minute: 0,
            tokens_this_day: 5000,
-            input_tokens_this_month: 6000,
-            cache_creation_input_tokens_this_month: 0,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 6000,
+                input_cache_creation: 0,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
    );

-    db.record_usage(user_id, false, provider, model, 4000, 0, 0, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 4000,
+            input_cache_creation: 0,
+            input_cache_read: 0,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
    assert_eq!(
@ -128,10 +192,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 1,
            tokens_this_minute: 4000,
            tokens_this_day: 9000,
-            input_tokens_this_month: 10000,
-            cache_creation_input_tokens_this_month: 0,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 10000,
+                input_cache_creation: 0,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
@ -143,9 +209,23 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
        .with_timezone(&Utc);

    // Test cache creation input tokens
-    db.record_usage(user_id, false, provider, model, 1000, 500, 0, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 1000,
+            input_cache_creation: 500,
+            input_cache_read: 0,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
    assert_eq!(
@ -154,19 +234,35 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 1,
            tokens_this_minute: 1500,
            tokens_this_day: 1500,
-            input_tokens_this_month: 1000,
-            cache_creation_input_tokens_this_month: 500,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 1000,
+                input_cache_creation: 500,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
    );

    // Test cache read input tokens
-    db.record_usage(user_id, false, provider, model, 1000, 0, 300, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 1000,
+            input_cache_creation: 0,
+            input_cache_read: 300,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
    assert_eq!(
@ -175,10 +271,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 2,
            tokens_this_minute: 2800,
            tokens_this_day: 2800,
-            input_tokens_this_month: 2000,
-            cache_creation_input_tokens_this_month: 500,
-            cache_read_input_tokens_this_month: 300,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 2000,
+                input_cache_creation: 500,
+                input_cache_read: 300,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }