collab: Add usage-based billing for LLM interactions (#19081)

This PR adds usage-based billing for LLM interactions in the Assistant. Release Notes: - N/A --------- Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Antonio <antonio@zed.dev> Co-authored-by: Richard <richard@zed.dev> Co-authored-by: Richard Feldman <oss@rtfeldman.com>
2024-10-11 13:36:54 -04:00 · 2024-10-11 13:36:54 -04:00 · 22ea7cef7a
commit 22ea7cef7a
parent f1c45d988e
20 changed files with 918 additions and 280 deletions
--- a/crates/collab/src/llm/db/tests/usage_tests.rs
+++ b/crates/collab/src/llm/db/tests/usage_tests.rs
@ -2,7 +2,7 @@ use crate::{
    db::UserId,
    llm::db::{
        queries::{providers::ModelParams, usages::Usage},
-        LlmDatabase,
+        LlmDatabase, TokenUsage,
    },
    test_llm_db, Cents,
 };
@ -36,14 +36,42 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
    let user_id = UserId::from_proto(123);

    let now = t0;
-    db.record_usage(user_id, false, provider, model, 1000, 0, 0, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 1000,
+            input_cache_creation: 0,
+            input_cache_read: 0,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let now = t0 + Duration::seconds(10);
-    db.record_usage(user_id, false, provider, model, 2000, 0, 0, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 2000,
+            input_cache_creation: 0,
+            input_cache_read: 0,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
    assert_eq!(
@ -52,10 +80,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 2,
            tokens_this_minute: 3000,
            tokens_this_day: 3000,
-            input_tokens_this_month: 3000,
-            cache_creation_input_tokens_this_month: 0,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 3000,
+                input_cache_creation: 0,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
@ -69,19 +99,35 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 1,
            tokens_this_minute: 2000,
            tokens_this_day: 3000,
-            input_tokens_this_month: 3000,
-            cache_creation_input_tokens_this_month: 0,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 3000,
+                input_cache_creation: 0,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
    );

    let now = t0 + Duration::seconds(60);
-    db.record_usage(user_id, false, provider, model, 3000, 0, 0, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 3000,
+            input_cache_creation: 0,
+            input_cache_read: 0,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
    assert_eq!(
@ -90,10 +136,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 2,
            tokens_this_minute: 5000,
            tokens_this_day: 6000,
-            input_tokens_this_month: 6000,
-            cache_creation_input_tokens_this_month: 0,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 6000,
+                input_cache_creation: 0,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
@ -108,18 +156,34 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 0,
            tokens_this_minute: 0,
            tokens_this_day: 5000,
-            input_tokens_this_month: 6000,
-            cache_creation_input_tokens_this_month: 0,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 6000,
+                input_cache_creation: 0,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
    );

-    db.record_usage(user_id, false, provider, model, 4000, 0, 0, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 4000,
+            input_cache_creation: 0,
+            input_cache_read: 0,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
    assert_eq!(
@ -128,10 +192,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 1,
            tokens_this_minute: 4000,
            tokens_this_day: 9000,
-            input_tokens_this_month: 10000,
-            cache_creation_input_tokens_this_month: 0,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 10000,
+                input_cache_creation: 0,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
@ -143,9 +209,23 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
        .with_timezone(&Utc);

    // Test cache creation input tokens
-    db.record_usage(user_id, false, provider, model, 1000, 500, 0, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 1000,
+            input_cache_creation: 500,
+            input_cache_read: 0,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
    assert_eq!(
@ -154,19 +234,35 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 1,
            tokens_this_minute: 1500,
            tokens_this_day: 1500,
-            input_tokens_this_month: 1000,
-            cache_creation_input_tokens_this_month: 500,
-            cache_read_input_tokens_this_month: 0,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 1000,
+                input_cache_creation: 500,
+                input_cache_read: 0,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }
    );

    // Test cache read input tokens
-    db.record_usage(user_id, false, provider, model, 1000, 0, 300, 0, now)
-        .await
-        .unwrap();
+    db.record_usage(
+        user_id,
+        false,
+        provider,
+        model,
+        TokenUsage {
+            input: 1000,
+            input_cache_creation: 0,
+            input_cache_read: 300,
+            output: 0,
+        },
+        false,
+        Cents::ZERO,
+        now,
+    )
+    .await
+    .unwrap();

    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
    assert_eq!(
@ -175,10 +271,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            requests_this_minute: 2,
            tokens_this_minute: 2800,
            tokens_this_day: 2800,
-            input_tokens_this_month: 2000,
-            cache_creation_input_tokens_this_month: 500,
-            cache_read_input_tokens_this_month: 300,
-            output_tokens_this_month: 0,
+            tokens_this_month: TokenUsage {
+                input: 2000,
+                input_cache_creation: 500,
+                input_cache_read: 300,
+                output: 0,
+            },
            spending_this_month: Cents::ZERO,
            lifetime_spending: Cents::ZERO,
        }