collab: Track cache writes/reads in LLM usage (#18834)

This PR extends the LLM usage tracking to support tracking usage for
cache writes and reads for Anthropic models.

Release Notes:

- N/A

---------

Co-authored-by: Antonio Scandurra <me@as-cii.com>
Co-authored-by: Antonio <antonio@zed.dev>
This commit is contained in:
Marshall Bowers 2024-10-07 17:32:49 -04:00 committed by GitHub
parent c5d252b837
commit d55f025906
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 241 additions and 39 deletions

View file

@ -33,12 +33,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
let user_id = UserId::from_proto(123);
let now = t0;
db.record_usage(user_id, false, provider, model, 1000, 0, now)
db.record_usage(user_id, false, provider, model, 1000, 0, 0, 0, now)
.await
.unwrap();
let now = t0 + Duration::seconds(10);
db.record_usage(user_id, false, provider, model, 2000, 0, now)
db.record_usage(user_id, false, provider, model, 2000, 0, 0, 0, now)
.await
.unwrap();
@ -50,6 +50,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
tokens_this_minute: 3000,
tokens_this_day: 3000,
input_tokens_this_month: 3000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
spending_this_month: 0,
lifetime_spending: 0,
@ -65,6 +67,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
tokens_this_minute: 2000,
tokens_this_day: 3000,
input_tokens_this_month: 3000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
spending_this_month: 0,
lifetime_spending: 0,
@ -72,7 +76,7 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
);
let now = t0 + Duration::seconds(60);
db.record_usage(user_id, false, provider, model, 3000, 0, now)
db.record_usage(user_id, false, provider, model, 3000, 0, 0, 0, now)
.await
.unwrap();
@ -84,6 +88,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
tokens_this_minute: 5000,
tokens_this_day: 6000,
input_tokens_this_month: 6000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
spending_this_month: 0,
lifetime_spending: 0,
@ -100,13 +106,15 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
tokens_this_minute: 0,
tokens_this_day: 5000,
input_tokens_this_month: 6000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
spending_this_month: 0,
lifetime_spending: 0,
}
);
db.record_usage(user_id, false, provider, model, 4000, 0, now)
db.record_usage(user_id, false, provider, model, 4000, 0, 0, 0, now)
.await
.unwrap();
@ -118,6 +126,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
tokens_this_minute: 4000,
tokens_this_day: 9000,
input_tokens_this_month: 10000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
spending_this_month: 0,
lifetime_spending: 0,
@ -134,6 +144,50 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
tokens_this_minute: 0,
tokens_this_day: 0,
input_tokens_this_month: 9000,
cache_creation_input_tokens_this_month: 0,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
spending_this_month: 0,
lifetime_spending: 0,
}
);
// Test cache creation input tokens
db.record_usage(user_id, false, provider, model, 1000, 500, 0, 0, now)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
usage,
Usage {
requests_this_minute: 1,
tokens_this_minute: 1500,
tokens_this_day: 1500,
input_tokens_this_month: 10000,
cache_creation_input_tokens_this_month: 500,
cache_read_input_tokens_this_month: 0,
output_tokens_this_month: 0,
spending_this_month: 0,
lifetime_spending: 0,
}
);
// Test cache read input tokens
db.record_usage(user_id, false, provider, model, 1000, 0, 300, 0, now)
.await
.unwrap();
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
assert_eq!(
usage,
Usage {
requests_this_minute: 2,
tokens_this_minute: 2800,
tokens_this_day: 2800,
input_tokens_this_month: 11000,
cache_creation_input_tokens_this_month: 500,
cache_read_input_tokens_this_month: 300,
output_tokens_this_month: 0,
spending_this_month: 0,
lifetime_spending: 0,