collab: Track cache writes/reads in LLM usage (#18834)

This PR extends the LLM usage tracking to support tracking usage for cache writes and reads for Anthropic models. Release Notes: - N/A --------- Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Antonio <antonio@zed.dev>
2024-10-07 17:32:49 -04:00 · 2024-10-07 17:32:49 -04:00 · d55f025906
commit d55f025906
parent c5d252b837
9 changed files with 241 additions and 39 deletions
--- a/crates/collab/src/llm/db/tests/usage_tests.rs
+++ b/crates/collab/src/llm/db/tests/usage_tests.rs
@ -33,12 +33,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
    let user_id = UserId::from_proto(123);

    let now = t0;
-    db.record_usage(user_id, false, provider, model, 1000, 0, now)
+    db.record_usage(user_id, false, provider, model, 1000, 0, 0, 0, now)
        .await
        .unwrap();

    let now = t0 + Duration::seconds(10);
-    db.record_usage(user_id, false, provider, model, 2000, 0, now)
+    db.record_usage(user_id, false, provider, model, 2000, 0, 0, 0, now)
        .await
        .unwrap();

@ -50,6 +50,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            tokens_this_minute: 3000,
            tokens_this_day: 3000,
            input_tokens_this_month: 3000,
+            cache_creation_input_tokens_this_month: 0,
+            cache_read_input_tokens_this_month: 0,
            output_tokens_this_month: 0,
            spending_this_month: 0,
            lifetime_spending: 0,
@ -65,6 +67,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            tokens_this_minute: 2000,
            tokens_this_day: 3000,
            input_tokens_this_month: 3000,
+            cache_creation_input_tokens_this_month: 0,
+            cache_read_input_tokens_this_month: 0,
            output_tokens_this_month: 0,
            spending_this_month: 0,
            lifetime_spending: 0,
@ -72,7 +76,7 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
    );

    let now = t0 + Duration::seconds(60);
-    db.record_usage(user_id, false, provider, model, 3000, 0, now)
+    db.record_usage(user_id, false, provider, model, 3000, 0, 0, 0, now)
        .await
        .unwrap();

@ -84,6 +88,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            tokens_this_minute: 5000,
            tokens_this_day: 6000,
            input_tokens_this_month: 6000,
+            cache_creation_input_tokens_this_month: 0,
+            cache_read_input_tokens_this_month: 0,
            output_tokens_this_month: 0,
            spending_this_month: 0,
            lifetime_spending: 0,
@ -100,13 +106,15 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            tokens_this_minute: 0,
            tokens_this_day: 5000,
            input_tokens_this_month: 6000,
+            cache_creation_input_tokens_this_month: 0,
+            cache_read_input_tokens_this_month: 0,
            output_tokens_this_month: 0,
            spending_this_month: 0,
            lifetime_spending: 0,
        }
    );

-    db.record_usage(user_id, false, provider, model, 4000, 0, now)
+    db.record_usage(user_id, false, provider, model, 4000, 0, 0, 0, now)
        .await
        .unwrap();

@ -118,6 +126,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            tokens_this_minute: 4000,
            tokens_this_day: 9000,
            input_tokens_this_month: 10000,
+            cache_creation_input_tokens_this_month: 0,
+            cache_read_input_tokens_this_month: 0,
            output_tokens_this_month: 0,
            spending_this_month: 0,
            lifetime_spending: 0,
@ -134,6 +144,50 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
            tokens_this_minute: 0,
            tokens_this_day: 0,
            input_tokens_this_month: 9000,
+            cache_creation_input_tokens_this_month: 0,
+            cache_read_input_tokens_this_month: 0,
+            output_tokens_this_month: 0,
+            spending_this_month: 0,
+            lifetime_spending: 0,
+        }
+    );
+
+    // Test cache creation input tokens
+    db.record_usage(user_id, false, provider, model, 1000, 500, 0, 0, now)
+        .await
+        .unwrap();
+
+    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
+    assert_eq!(
+        usage,
+        Usage {
+            requests_this_minute: 1,
+            tokens_this_minute: 1500,
+            tokens_this_day: 1500,
+            input_tokens_this_month: 10000,
+            cache_creation_input_tokens_this_month: 500,
+            cache_read_input_tokens_this_month: 0,
+            output_tokens_this_month: 0,
+            spending_this_month: 0,
+            lifetime_spending: 0,
+        }
+    );
+
+    // Test cache read input tokens
+    db.record_usage(user_id, false, provider, model, 1000, 0, 300, 0, now)
+        .await
+        .unwrap();
+
+    let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
+    assert_eq!(
+        usage,
+        Usage {
+            requests_this_minute: 2,
+            tokens_this_minute: 2800,
+            tokens_this_day: 2800,
+            input_tokens_this_month: 11000,
+            cache_creation_input_tokens_this_month: 500,
+            cache_read_input_tokens_this_month: 300,
            output_tokens_this_month: 0,
            spending_this_month: 0,
            lifetime_spending: 0,