collab: Track cache writes/reads in LLM usage (#18834)
This PR extends the LLM usage tracking to support tracking usage for cache writes and reads for Anthropic models. Release Notes: - N/A --------- Co-authored-by: Antonio Scandurra <me@as-cii.com> Co-authored-by: Antonio <antonio@zed.dev>
This commit is contained in:
parent
c5d252b837
commit
d55f025906
9 changed files with 241 additions and 39 deletions
|
@ -33,12 +33,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
let user_id = UserId::from_proto(123);
|
||||
|
||||
let now = t0;
|
||||
db.record_usage(user_id, false, provider, model, 1000, 0, now)
|
||||
db.record_usage(user_id, false, provider, model, 1000, 0, 0, 0, now)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let now = t0 + Duration::seconds(10);
|
||||
db.record_usage(user_id, false, provider, model, 2000, 0, now)
|
||||
db.record_usage(user_id, false, provider, model, 2000, 0, 0, 0, now)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -50,6 +50,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
tokens_this_minute: 3000,
|
||||
tokens_this_day: 3000,
|
||||
input_tokens_this_month: 3000,
|
||||
cache_creation_input_tokens_this_month: 0,
|
||||
cache_read_input_tokens_this_month: 0,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
lifetime_spending: 0,
|
||||
|
@ -65,6 +67,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
tokens_this_minute: 2000,
|
||||
tokens_this_day: 3000,
|
||||
input_tokens_this_month: 3000,
|
||||
cache_creation_input_tokens_this_month: 0,
|
||||
cache_read_input_tokens_this_month: 0,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
lifetime_spending: 0,
|
||||
|
@ -72,7 +76,7 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
);
|
||||
|
||||
let now = t0 + Duration::seconds(60);
|
||||
db.record_usage(user_id, false, provider, model, 3000, 0, now)
|
||||
db.record_usage(user_id, false, provider, model, 3000, 0, 0, 0, now)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -84,6 +88,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
tokens_this_minute: 5000,
|
||||
tokens_this_day: 6000,
|
||||
input_tokens_this_month: 6000,
|
||||
cache_creation_input_tokens_this_month: 0,
|
||||
cache_read_input_tokens_this_month: 0,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
lifetime_spending: 0,
|
||||
|
@ -100,13 +106,15 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
tokens_this_minute: 0,
|
||||
tokens_this_day: 5000,
|
||||
input_tokens_this_month: 6000,
|
||||
cache_creation_input_tokens_this_month: 0,
|
||||
cache_read_input_tokens_this_month: 0,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
lifetime_spending: 0,
|
||||
}
|
||||
);
|
||||
|
||||
db.record_usage(user_id, false, provider, model, 4000, 0, now)
|
||||
db.record_usage(user_id, false, provider, model, 4000, 0, 0, 0, now)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -118,6 +126,8 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
tokens_this_minute: 4000,
|
||||
tokens_this_day: 9000,
|
||||
input_tokens_this_month: 10000,
|
||||
cache_creation_input_tokens_this_month: 0,
|
||||
cache_read_input_tokens_this_month: 0,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
lifetime_spending: 0,
|
||||
|
@ -134,6 +144,50 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
tokens_this_minute: 0,
|
||||
tokens_this_day: 0,
|
||||
input_tokens_this_month: 9000,
|
||||
cache_creation_input_tokens_this_month: 0,
|
||||
cache_read_input_tokens_this_month: 0,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
lifetime_spending: 0,
|
||||
}
|
||||
);
|
||||
|
||||
// Test cache creation input tokens
|
||||
db.record_usage(user_id, false, provider, model, 1000, 500, 0, 0, now)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
|
||||
assert_eq!(
|
||||
usage,
|
||||
Usage {
|
||||
requests_this_minute: 1,
|
||||
tokens_this_minute: 1500,
|
||||
tokens_this_day: 1500,
|
||||
input_tokens_this_month: 10000,
|
||||
cache_creation_input_tokens_this_month: 500,
|
||||
cache_read_input_tokens_this_month: 0,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
lifetime_spending: 0,
|
||||
}
|
||||
);
|
||||
|
||||
// Test cache read input tokens
|
||||
db.record_usage(user_id, false, provider, model, 1000, 0, 300, 0, now)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let usage = db.get_usage(user_id, provider, model, now).await.unwrap();
|
||||
assert_eq!(
|
||||
usage,
|
||||
Usage {
|
||||
requests_this_minute: 2,
|
||||
tokens_this_minute: 2800,
|
||||
tokens_this_day: 2800,
|
||||
input_tokens_this_month: 11000,
|
||||
cache_creation_input_tokens_this_month: 500,
|
||||
cache_read_input_tokens_this_month: 300,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
lifetime_spending: 0,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue