collab: Add usage-based billing for LLM interactions (#19081)

This PR adds usage-based billing for LLM interactions in the Assistant.

Release Notes:

- N/A

---------

Co-authored-by: Antonio Scandurra <me@as-cii.com>
Co-authored-by: Antonio <antonio@zed.dev>
Co-authored-by: Richard <richard@zed.dev>
Co-authored-by: Richard Feldman <oss@rtfeldman.com>
This commit is contained in:
Marshall Bowers 2024-10-11 13:36:54 -04:00 committed by GitHub
parent f1c45d988e
commit 22ea7cef7a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 918 additions and 280 deletions

View file

@ -2,7 +2,7 @@ use crate::{
db::UserId,
llm::db::{
queries::{providers::ModelParams, usages::Usage},
LlmDatabase,
LlmDatabase, TokenUsage,
},
test_llm_db, Cents,
};
@ -36,14 +36,42 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
let user_id = UserId::from_proto(123);
let now = t0;
db.record_usage(user_id, false, provider, model, 1000, 0, 0, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 1000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let now = t0 + Duration::seconds(10);
db.record_usage(user_id, false, provider, model, 2000, 0, 0, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 2000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
@ -52,10 +80,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2,
tokens_this_minute: 3000,
tokens_this_day: 3000,
input_tokens_this_month: 3000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 3000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
@ -69,19 +99,35 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1,
tokens_this_minute: 2000,
tokens_this_day: 3000,
input_tokens_this_month: 3000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 3000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
);
let now = t0 + Duration::seconds(60);
db.record_usage(user_id, false, provider, model, 3000, 0, 0, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 3000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
@ -90,10 +136,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2,
tokens_this_minute: 5000,
tokens_this_day: 6000,
input_tokens_this_month: 6000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 6000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
@ -108,18 +156,34 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 0,
tokens_this_minute: 0,
tokens_this_day: 5000,
input_tokens_this_month: 6000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 6000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
);
db.record_usage(user_id, false, provider, model, 4000, 0, 0, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 4000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
@ -128,10 +192,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1,
tokens_this_minute: 4000,
tokens_this_day: 9000,
input_tokens_this_month: 10000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 10000,
input_cache_creation: 0,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
@ -143,9 +209,23 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
.with_timezone(&Utc);
// Test cache creation input tokens
db.record_usage(user_id, false, provider, model, 1000, 500, 0, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 1000,
input_cache_creation: 500,
input_cache_read: 0,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
@ -154,19 +234,35 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1,
tokens_this_minute: 1500,
tokens_this_day: 1500,
input_tokens_this_month: 1000,
cache_creation_input_tokens_this_month: 500,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 1000,
input_cache_creation: 500,
input_cache_read: 0,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}
);
// Test cache read input tokens
db.record_usage(user_id, false, provider, model, 1000, 0, 300, 0, now)
.await
.unwrap();
db.record_usage(
user_id,
false,
provider,
model,
TokenUsage {
input: 1000,
input_cache_creation: 0,
input_cache_read: 300,
output: 0,
},
false,
Cents::ZERO,
now,
)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
@ -175,10 +271,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2,
tokens_this_minute: 2800,
tokens_this_day: 2800,
input_tokens_this_month: 2000,
cache_creation_input_tokens_this_month: 500,
cache_read_input_tokens_this_month: 300,
output_tokens_this_month: 0,
tokens_this_month: TokenUsage {
input: 2000,
input_cache_creation: 500,
input_cache_read: 300,
output: 0,
},
spending_this_month: Cents::ZERO,
lifetime_spending: Cents::ZERO,
}