From 33e120d9645afd7fe51b8b2cdae9d9427b13f989 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 9 Aug 2024 16:38:37 -0700 Subject: [PATCH] Capture telemetry data on per-user monthly LLM spending (#16050) Release Notes: - N/A --------- Co-authored-by: Marshall --- crates/anthropic/src/anthropic.rs | 2 +- ...09160000_add_pricing_columns_to_models.sql | 3 + crates/collab/src/llm.rs | 7 +- crates/collab/src/llm/db/queries/providers.rs | 29 +++++--- crates/collab/src/llm/db/queries/usages.rs | 64 ++++++++++++---- crates/collab/src/llm/db/seed.rs | 74 +++++++++---------- crates/collab/src/llm/db/tables/model.rs | 2 + .../collab/src/llm/db/tables/usage_measure.rs | 3 +- crates/collab/src/llm/db/tests/usage_tests.rs | 50 ++++++++----- crates/collab/src/llm/telemetry.rs | 4 +- 10 files changed, 153 insertions(+), 85 deletions(-) create mode 100644 crates/collab/migrations_llm/20240809160000_add_pricing_columns_to_models.sql diff --git a/crates/anthropic/src/anthropic.rs b/crates/anthropic/src/anthropic.rs index 536cadf173..5bb16eeb2c 100644 --- a/crates/anthropic/src/anthropic.rs +++ b/crates/anthropic/src/anthropic.rs @@ -53,7 +53,7 @@ impl Model { Model::Claude3_5Sonnet => "claude-3-5-sonnet-20240620", Model::Claude3Opus => "claude-3-opus-20240229", Model::Claude3Sonnet => "claude-3-sonnet-20240229", - Model::Claude3Haiku => "claude-3-opus-20240307", + Model::Claude3Haiku => "claude-3-haiku-20240307", Self::Custom { name, .. } => name, } } diff --git a/crates/collab/migrations_llm/20240809160000_add_pricing_columns_to_models.sql b/crates/collab/migrations_llm/20240809160000_add_pricing_columns_to_models.sql new file mode 100644 index 0000000000..d9ffe2f9f2 --- /dev/null +++ b/crates/collab/migrations_llm/20240809160000_add_pricing_columns_to_models.sql @@ -0,0 +1,3 @@ +ALTER TABLE models + ADD COLUMN price_per_million_input_tokens integer NOT NULL DEFAULT 0, + ADD COLUMN price_per_million_output_tokens integer NOT NULL DEFAULT 0; diff --git a/crates/collab/src/llm.rs b/crates/collab/src/llm.rs index 7d21d07058..43bbca1df0 100644 --- a/crates/collab/src/llm.rs +++ b/crates/collab/src/llm.rs @@ -457,7 +457,8 @@ impl Drop for TokenCountingStream { claims.user_id as i32, provider, &model, - input_token_count + output_token_count, + input_token_count, + output_token_count, Utc::now(), ) .await @@ -481,7 +482,9 @@ impl Drop for TokenCountingStream { requests_this_minute: usage.requests_this_minute as u64, tokens_this_minute: usage.tokens_this_minute as u64, tokens_this_day: usage.tokens_this_day as u64, - tokens_this_month: usage.tokens_this_month as u64, + input_tokens_this_month: usage.input_tokens_this_month as u64, + output_tokens_this_month: usage.output_tokens_this_month as u64, + spending_this_month: usage.spending_this_month as u64, }, ) .await diff --git a/crates/collab/src/llm/db/queries/providers.rs b/crates/collab/src/llm/db/queries/providers.rs index c53b6aff7f..fef2e3d37b 100644 --- a/crates/collab/src/llm/db/queries/providers.rs +++ b/crates/collab/src/llm/db/queries/providers.rs @@ -3,10 +3,14 @@ use sea_orm::QueryOrder; use std::str::FromStr; use strum::IntoEnumIterator as _; -pub struct ModelRateLimits { +pub struct ModelParams { + pub provider: LanguageModelProvider, + pub name: String, pub max_requests_per_minute: i64, pub max_tokens_per_minute: i64, pub max_tokens_per_day: i64, + pub price_per_million_input_tokens: i32, + pub price_per_million_output_tokens: i32, } impl LlmDatabase { @@ -75,20 +79,23 @@ impl LlmDatabase { Ok(()) } - pub async fn insert_models( - &mut self, - models: &[(LanguageModelProvider, String, ModelRateLimits)], - ) -> Result<()> { + pub async fn insert_models(&mut self, models: &[ModelParams]) -> Result<()> { let all_provider_ids = &self.provider_ids; self.transaction(|tx| async move { - model::Entity::insert_many(models.into_iter().map(|(provider, name, rate_limits)| { - let provider_id = all_provider_ids[&provider]; + model::Entity::insert_many(models.into_iter().map(|model_params| { + let provider_id = all_provider_ids[&model_params.provider]; model::ActiveModel { provider_id: ActiveValue::set(provider_id), - name: ActiveValue::set(name.clone()), - max_requests_per_minute: ActiveValue::set(rate_limits.max_requests_per_minute), - max_tokens_per_minute: ActiveValue::set(rate_limits.max_tokens_per_minute), - max_tokens_per_day: ActiveValue::set(rate_limits.max_tokens_per_day), + name: ActiveValue::set(model_params.name.clone()), + max_requests_per_minute: ActiveValue::set(model_params.max_requests_per_minute), + max_tokens_per_minute: ActiveValue::set(model_params.max_tokens_per_minute), + max_tokens_per_day: ActiveValue::set(model_params.max_tokens_per_day), + price_per_million_input_tokens: ActiveValue::set( + model_params.price_per_million_input_tokens, + ), + price_per_million_output_tokens: ActiveValue::set( + model_params.price_per_million_output_tokens, + ), ..Default::default() } })) diff --git a/crates/collab/src/llm/db/queries/usages.rs b/crates/collab/src/llm/db/queries/usages.rs index 108d0e4111..3dc247ca12 100644 --- a/crates/collab/src/llm/db/queries/usages.rs +++ b/crates/collab/src/llm/db/queries/usages.rs @@ -11,7 +11,9 @@ pub struct Usage { pub requests_this_minute: usize, pub tokens_this_minute: usize, pub tokens_this_day: usize, - pub tokens_this_month: usize, + pub input_tokens_this_month: usize, + pub output_tokens_this_month: usize, + pub spending_this_month: usize, } #[derive(Clone, Copy, Debug, Default)] @@ -87,14 +89,20 @@ impl LlmDatabase { self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMinute)?; let tokens_this_day = self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerDay)?; - let tokens_this_month = - self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMonth)?; + let input_tokens_this_month = + self.get_usage_for_measure(&usages, now, UsageMeasure::InputTokensPerMonth)?; + let output_tokens_this_month = + self.get_usage_for_measure(&usages, now, UsageMeasure::OutputTokensPerMonth)?; + let spending_this_month = + calculate_spending(model, input_tokens_this_month, output_tokens_this_month); Ok(Usage { requests_this_minute, tokens_this_minute, tokens_this_day, - tokens_this_month, + input_tokens_this_month, + output_tokens_this_month, + spending_this_month, }) }) .await @@ -105,7 +113,8 @@ impl LlmDatabase { user_id: i32, provider: LanguageModelProvider, model_name: &str, - token_count: usize, + input_token_count: usize, + output_token_count: usize, now: DateTimeUtc, ) -> Result { self.transaction(|tx| async move { @@ -138,7 +147,7 @@ impl LlmDatabase { &usages, UsageMeasure::TokensPerMinute, now, - token_count, + input_token_count + output_token_count, &tx, ) .await?; @@ -149,27 +158,42 @@ impl LlmDatabase { &usages, UsageMeasure::TokensPerDay, now, - token_count, + input_token_count + output_token_count, &tx, ) .await?; - let tokens_this_month = self + let input_tokens_this_month = self .update_usage_for_measure( user_id, model.id, &usages, - UsageMeasure::TokensPerMonth, + UsageMeasure::InputTokensPerMonth, now, - token_count, + input_token_count, &tx, ) .await?; + let output_tokens_this_month = self + .update_usage_for_measure( + user_id, + model.id, + &usages, + UsageMeasure::OutputTokensPerMonth, + now, + output_token_count, + &tx, + ) + .await?; + let spending_this_month = + calculate_spending(model, input_tokens_this_month, output_tokens_this_month); Ok(Usage { requests_this_minute, tokens_this_minute, tokens_this_day, - tokens_this_month, + input_tokens_this_month, + output_tokens_this_month, + spending_this_month, }) }) .await @@ -303,6 +327,18 @@ impl LlmDatabase { } } +fn calculate_spending( + model: &model::Model, + input_tokens_this_month: usize, + output_tokens_this_month: usize, +) -> usize { + let input_token_cost = + input_tokens_this_month * model.price_per_million_input_tokens as usize / 1_000_000; + let output_token_cost = + output_tokens_this_month * model.price_per_million_output_tokens as usize / 1_000_000; + input_token_cost + output_token_cost +} + const MINUTE_BUCKET_COUNT: usize = 12; const DAY_BUCKET_COUNT: usize = 48; const MONTH_BUCKET_COUNT: usize = 30; @@ -313,7 +349,8 @@ impl UsageMeasure { UsageMeasure::RequestsPerMinute => MINUTE_BUCKET_COUNT, UsageMeasure::TokensPerMinute => MINUTE_BUCKET_COUNT, UsageMeasure::TokensPerDay => DAY_BUCKET_COUNT, - UsageMeasure::TokensPerMonth => MONTH_BUCKET_COUNT, + UsageMeasure::InputTokensPerMonth => MONTH_BUCKET_COUNT, + UsageMeasure::OutputTokensPerMonth => MONTH_BUCKET_COUNT, } } @@ -322,7 +359,8 @@ impl UsageMeasure { UsageMeasure::RequestsPerMinute => Duration::minutes(1), UsageMeasure::TokensPerMinute => Duration::minutes(1), UsageMeasure::TokensPerDay => Duration::hours(24), - UsageMeasure::TokensPerMonth => Duration::days(30), + UsageMeasure::InputTokensPerMonth => Duration::days(30), + UsageMeasure::OutputTokensPerMonth => Duration::days(30), } } diff --git a/crates/collab/src/llm/db/seed.rs b/crates/collab/src/llm/db/seed.rs index fe1a073b15..55c6c30cd5 100644 --- a/crates/collab/src/llm/db/seed.rs +++ b/crates/collab/src/llm/db/seed.rs @@ -1,45 +1,45 @@ use super::*; use crate::{Config, Result}; -use queries::providers::ModelRateLimits; +use queries::providers::ModelParams; pub async fn seed_database(_config: &Config, db: &mut LlmDatabase, _force: bool) -> Result<()> { db.insert_models(&[ - ( - LanguageModelProvider::Anthropic, - "claude-3-5-sonnet".into(), - ModelRateLimits { - max_requests_per_minute: 5, - max_tokens_per_minute: 20_000, - max_tokens_per_day: 300_000, - }, - ), - ( - LanguageModelProvider::Anthropic, - "claude-3-opus".into(), - ModelRateLimits { - max_requests_per_minute: 5, - max_tokens_per_minute: 10_000, - max_tokens_per_day: 300_000, - }, - ), - ( - LanguageModelProvider::Anthropic, - "claude-3-sonnet".into(), - ModelRateLimits { - max_requests_per_minute: 5, - max_tokens_per_minute: 20_000, - max_tokens_per_day: 300_000, - }, - ), - ( - LanguageModelProvider::Anthropic, - "claude-3-haiku".into(), - ModelRateLimits { - max_requests_per_minute: 5, - max_tokens_per_minute: 25_000, - max_tokens_per_day: 300_000, - }, - ), + ModelParams { + provider: LanguageModelProvider::Anthropic, + name: "claude-3-5-sonnet".into(), + max_requests_per_minute: 5, + max_tokens_per_minute: 20_000, + max_tokens_per_day: 300_000, + price_per_million_input_tokens: 300, // $3.00/MTok + price_per_million_output_tokens: 1500, // $15.00/MTok + }, + ModelParams { + provider: LanguageModelProvider::Anthropic, + name: "claude-3-opus".into(), + max_requests_per_minute: 5, + max_tokens_per_minute: 10_000, + max_tokens_per_day: 300_000, + price_per_million_input_tokens: 1500, // $15.00/MTok + price_per_million_output_tokens: 7500, // $75.00/MTok + }, + ModelParams { + provider: LanguageModelProvider::Anthropic, + name: "claude-3-sonnet".into(), + max_requests_per_minute: 5, + max_tokens_per_minute: 20_000, + max_tokens_per_day: 300_000, + price_per_million_input_tokens: 1500, // $15.00/MTok + price_per_million_output_tokens: 7500, // $75.00/MTok + }, + ModelParams { + provider: LanguageModelProvider::Anthropic, + name: "claude-3-haiku".into(), + max_requests_per_minute: 5, + max_tokens_per_minute: 25_000, + max_tokens_per_day: 300_000, + price_per_million_input_tokens: 25, // $0.25/MTok + price_per_million_output_tokens: 125, // $1.25/MTok + }, ]) .await } diff --git a/crates/collab/src/llm/db/tables/model.rs b/crates/collab/src/llm/db/tables/model.rs index ea8bd5d456..c87789f27e 100644 --- a/crates/collab/src/llm/db/tables/model.rs +++ b/crates/collab/src/llm/db/tables/model.rs @@ -13,6 +13,8 @@ pub struct Model { pub max_requests_per_minute: i64, pub max_tokens_per_minute: i64, pub max_tokens_per_day: i64, + pub price_per_million_input_tokens: i32, + pub price_per_million_output_tokens: i32, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/crates/collab/src/llm/db/tables/usage_measure.rs b/crates/collab/src/llm/db/tables/usage_measure.rs index 6462f24907..1105d997c2 100644 --- a/crates/collab/src/llm/db/tables/usage_measure.rs +++ b/crates/collab/src/llm/db/tables/usage_measure.rs @@ -9,7 +9,8 @@ pub enum UsageMeasure { RequestsPerMinute, TokensPerMinute, TokensPerDay, - TokensPerMonth, + InputTokensPerMonth, + OutputTokensPerMonth, } #[derive(Clone, Debug, PartialEq, DeriveEntityModel)] diff --git a/crates/collab/src/llm/db/tests/usage_tests.rs b/crates/collab/src/llm/db/tests/usage_tests.rs index 081b333afc..8faad1caaf 100644 --- a/crates/collab/src/llm/db/tests/usage_tests.rs +++ b/crates/collab/src/llm/db/tests/usage_tests.rs @@ -1,5 +1,5 @@ use crate::{ - llm::db::{queries::providers::ModelRateLimits, queries::usages::Usage, LlmDatabase}, + llm::db::{queries::providers::ModelParams, queries::usages::Usage, LlmDatabase}, test_llm_db, }; use chrono::{Duration, Utc}; @@ -13,15 +13,15 @@ async fn test_tracking_usage(db: &mut LlmDatabase) { let model = "claude-3-5-sonnet"; db.initialize().await.unwrap(); - db.insert_models(&[( + db.insert_models(&[ModelParams { provider, - model.to_string(), - ModelRateLimits { - max_requests_per_minute: 5, - max_tokens_per_minute: 10_000, - max_tokens_per_day: 50_000, - }, - )]) + name: model.to_string(), + max_requests_per_minute: 5, + max_tokens_per_minute: 10_000, + max_tokens_per_day: 50_000, + price_per_million_input_tokens: 50, + price_per_million_output_tokens: 50, + }]) .await .unwrap(); @@ -29,12 +29,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) { let user_id = 123; let now = t0; - db.record_usage(user_id, provider, model, 1000, now) + db.record_usage(user_id, provider, model, 1000, 0, now) .await .unwrap(); let now = t0 + Duration::seconds(10); - db.record_usage(user_id, provider, model, 2000, now) + db.record_usage(user_id, provider, model, 2000, 0, now) .await .unwrap(); @@ -45,7 +45,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) { requests_this_minute: 2, tokens_this_minute: 3000, tokens_this_day: 3000, - tokens_this_month: 3000, + input_tokens_this_month: 3000, + output_tokens_this_month: 0, + spending_this_month: 0, } ); @@ -57,12 +59,14 @@ async fn test_tracking_usage(db: &mut LlmDatabase) { requests_this_minute: 1, tokens_this_minute: 2000, tokens_this_day: 3000, - tokens_this_month: 3000, + input_tokens_this_month: 3000, + output_tokens_this_month: 0, + spending_this_month: 0, } ); let now = t0 + Duration::seconds(60); - db.record_usage(user_id, provider, model, 3000, now) + db.record_usage(user_id, provider, model, 3000, 0, now) .await .unwrap(); @@ -73,7 +77,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) { requests_this_minute: 2, tokens_this_minute: 5000, tokens_this_day: 6000, - tokens_this_month: 6000, + input_tokens_this_month: 6000, + output_tokens_this_month: 0, + spending_this_month: 0, } ); @@ -86,11 +92,13 @@ async fn test_tracking_usage(db: &mut LlmDatabase) { requests_this_minute: 0, tokens_this_minute: 0, tokens_this_day: 5000, - tokens_this_month: 6000, + input_tokens_this_month: 6000, + output_tokens_this_month: 0, + spending_this_month: 0, } ); - db.record_usage(user_id, provider, model, 4000, now) + db.record_usage(user_id, provider, model, 4000, 0, now) .await .unwrap(); @@ -101,7 +109,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) { requests_this_minute: 1, tokens_this_minute: 4000, tokens_this_day: 9000, - tokens_this_month: 10000, + input_tokens_this_month: 10000, + output_tokens_this_month: 0, + spending_this_month: 0, } ); @@ -114,7 +124,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) { requests_this_minute: 0, tokens_this_minute: 0, tokens_this_day: 0, - tokens_this_month: 9000, + input_tokens_this_month: 9000, + output_tokens_this_month: 0, + spending_this_month: 0, } ); } diff --git a/crates/collab/src/llm/telemetry.rs b/crates/collab/src/llm/telemetry.rs index 941fe9a16d..f8d0cf4aac 100644 --- a/crates/collab/src/llm/telemetry.rs +++ b/crates/collab/src/llm/telemetry.rs @@ -14,7 +14,9 @@ pub struct LlmUsageEventRow { pub requests_this_minute: u64, pub tokens_this_minute: u64, pub tokens_this_day: u64, - pub tokens_this_month: u64, + pub input_tokens_this_month: u64, + pub output_tokens_this_month: u64, + pub spending_this_month: u64, } pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {