Capture telemetry data on per-user monthly LLM spending (#16050)

Release Notes:

- N/A

---------

Co-authored-by: Marshall <marshall@zed.dev>
This commit is contained in:
Max Brunsfeld 2024-08-09 16:38:37 -07:00 committed by GitHub
parent 8688b2ad19
commit 33e120d964
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 153 additions and 85 deletions

View file

@ -53,7 +53,7 @@ impl Model {
Model::Claude3_5Sonnet => "claude-3-5-sonnet-20240620", Model::Claude3_5Sonnet => "claude-3-5-sonnet-20240620",
Model::Claude3Opus => "claude-3-opus-20240229", Model::Claude3Opus => "claude-3-opus-20240229",
Model::Claude3Sonnet => "claude-3-sonnet-20240229", Model::Claude3Sonnet => "claude-3-sonnet-20240229",
Model::Claude3Haiku => "claude-3-opus-20240307", Model::Claude3Haiku => "claude-3-haiku-20240307",
Self::Custom { name, .. } => name, Self::Custom { name, .. } => name,
} }
} }

View file

@ -0,0 +1,3 @@
ALTER TABLE models
ADD COLUMN price_per_million_input_tokens integer NOT NULL DEFAULT 0,
ADD COLUMN price_per_million_output_tokens integer NOT NULL DEFAULT 0;

View file

@ -457,7 +457,8 @@ impl<S> Drop for TokenCountingStream<S> {
claims.user_id as i32, claims.user_id as i32,
provider, provider,
&model, &model,
input_token_count + output_token_count, input_token_count,
output_token_count,
Utc::now(), Utc::now(),
) )
.await .await
@ -481,7 +482,9 @@ impl<S> Drop for TokenCountingStream<S> {
requests_this_minute: usage.requests_this_minute as u64, requests_this_minute: usage.requests_this_minute as u64,
tokens_this_minute: usage.tokens_this_minute as u64, tokens_this_minute: usage.tokens_this_minute as u64,
tokens_this_day: usage.tokens_this_day as u64, tokens_this_day: usage.tokens_this_day as u64,
tokens_this_month: usage.tokens_this_month as u64, input_tokens_this_month: usage.input_tokens_this_month as u64,
output_tokens_this_month: usage.output_tokens_this_month as u64,
spending_this_month: usage.spending_this_month as u64,
}, },
) )
.await .await

View file

@ -3,10 +3,14 @@ use sea_orm::QueryOrder;
use std::str::FromStr; use std::str::FromStr;
use strum::IntoEnumIterator as _; use strum::IntoEnumIterator as _;
pub struct ModelRateLimits { pub struct ModelParams {
pub provider: LanguageModelProvider,
pub name: String,
pub max_requests_per_minute: i64, pub max_requests_per_minute: i64,
pub max_tokens_per_minute: i64, pub max_tokens_per_minute: i64,
pub max_tokens_per_day: i64, pub max_tokens_per_day: i64,
pub price_per_million_input_tokens: i32,
pub price_per_million_output_tokens: i32,
} }
impl LlmDatabase { impl LlmDatabase {
@ -75,20 +79,23 @@ impl LlmDatabase {
Ok(()) Ok(())
} }
pub async fn insert_models( pub async fn insert_models(&mut self, models: &[ModelParams]) -> Result<()> {
&mut self,
models: &[(LanguageModelProvider, String, ModelRateLimits)],
) -> Result<()> {
let all_provider_ids = &self.provider_ids; let all_provider_ids = &self.provider_ids;
self.transaction(|tx| async move { self.transaction(|tx| async move {
model::Entity::insert_many(models.into_iter().map(|(provider, name, rate_limits)| { model::Entity::insert_many(models.into_iter().map(|model_params| {
let provider_id = all_provider_ids[&provider]; let provider_id = all_provider_ids[&model_params.provider];
model::ActiveModel { model::ActiveModel {
provider_id: ActiveValue::set(provider_id), provider_id: ActiveValue::set(provider_id),
name: ActiveValue::set(name.clone()), name: ActiveValue::set(model_params.name.clone()),
max_requests_per_minute: ActiveValue::set(rate_limits.max_requests_per_minute), max_requests_per_minute: ActiveValue::set(model_params.max_requests_per_minute),
max_tokens_per_minute: ActiveValue::set(rate_limits.max_tokens_per_minute), max_tokens_per_minute: ActiveValue::set(model_params.max_tokens_per_minute),
max_tokens_per_day: ActiveValue::set(rate_limits.max_tokens_per_day), max_tokens_per_day: ActiveValue::set(model_params.max_tokens_per_day),
price_per_million_input_tokens: ActiveValue::set(
model_params.price_per_million_input_tokens,
),
price_per_million_output_tokens: ActiveValue::set(
model_params.price_per_million_output_tokens,
),
..Default::default() ..Default::default()
} }
})) }))

View file

@ -11,7 +11,9 @@ pub struct Usage {
pub requests_this_minute: usize, pub requests_this_minute: usize,
pub tokens_this_minute: usize, pub tokens_this_minute: usize,
pub tokens_this_day: usize, pub tokens_this_day: usize,
pub tokens_this_month: usize, pub input_tokens_this_month: usize,
pub output_tokens_this_month: usize,
pub spending_this_month: usize,
} }
#[derive(Clone, Copy, Debug, Default)] #[derive(Clone, Copy, Debug, Default)]
@ -87,14 +89,20 @@ impl LlmDatabase {
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMinute)?; self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMinute)?;
let tokens_this_day = let tokens_this_day =
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerDay)?; self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerDay)?;
let tokens_this_month = let input_tokens_this_month =
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMonth)?; self.get_usage_for_measure(&usages, now, UsageMeasure::InputTokensPerMonth)?;
let output_tokens_this_month =
self.get_usage_for_measure(&usages, now, UsageMeasure::OutputTokensPerMonth)?;
let spending_this_month =
calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
Ok(Usage { Ok(Usage {
requests_this_minute, requests_this_minute,
tokens_this_minute, tokens_this_minute,
tokens_this_day, tokens_this_day,
tokens_this_month, input_tokens_this_month,
output_tokens_this_month,
spending_this_month,
}) })
}) })
.await .await
@ -105,7 +113,8 @@ impl LlmDatabase {
user_id: i32, user_id: i32,
provider: LanguageModelProvider, provider: LanguageModelProvider,
model_name: &str, model_name: &str,
token_count: usize, input_token_count: usize,
output_token_count: usize,
now: DateTimeUtc, now: DateTimeUtc,
) -> Result<Usage> { ) -> Result<Usage> {
self.transaction(|tx| async move { self.transaction(|tx| async move {
@ -138,7 +147,7 @@ impl LlmDatabase {
&usages, &usages,
UsageMeasure::TokensPerMinute, UsageMeasure::TokensPerMinute,
now, now,
token_count, input_token_count + output_token_count,
&tx, &tx,
) )
.await?; .await?;
@ -149,27 +158,42 @@ impl LlmDatabase {
&usages, &usages,
UsageMeasure::TokensPerDay, UsageMeasure::TokensPerDay,
now, now,
token_count, input_token_count + output_token_count,
&tx, &tx,
) )
.await?; .await?;
let tokens_this_month = self let input_tokens_this_month = self
.update_usage_for_measure( .update_usage_for_measure(
user_id, user_id,
model.id, model.id,
&usages, &usages,
UsageMeasure::TokensPerMonth, UsageMeasure::InputTokensPerMonth,
now, now,
token_count, input_token_count,
&tx, &tx,
) )
.await?; .await?;
let output_tokens_this_month = self
.update_usage_for_measure(
user_id,
model.id,
&usages,
UsageMeasure::OutputTokensPerMonth,
now,
output_token_count,
&tx,
)
.await?;
let spending_this_month =
calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
Ok(Usage { Ok(Usage {
requests_this_minute, requests_this_minute,
tokens_this_minute, tokens_this_minute,
tokens_this_day, tokens_this_day,
tokens_this_month, input_tokens_this_month,
output_tokens_this_month,
spending_this_month,
}) })
}) })
.await .await
@ -303,6 +327,18 @@ impl LlmDatabase {
} }
} }
fn calculate_spending(
model: &model::Model,
input_tokens_this_month: usize,
output_tokens_this_month: usize,
) -> usize {
let input_token_cost =
input_tokens_this_month * model.price_per_million_input_tokens as usize / 1_000_000;
let output_token_cost =
output_tokens_this_month * model.price_per_million_output_tokens as usize / 1_000_000;
input_token_cost + output_token_cost
}
const MINUTE_BUCKET_COUNT: usize = 12; const MINUTE_BUCKET_COUNT: usize = 12;
const DAY_BUCKET_COUNT: usize = 48; const DAY_BUCKET_COUNT: usize = 48;
const MONTH_BUCKET_COUNT: usize = 30; const MONTH_BUCKET_COUNT: usize = 30;
@ -313,7 +349,8 @@ impl UsageMeasure {
UsageMeasure::RequestsPerMinute => MINUTE_BUCKET_COUNT, UsageMeasure::RequestsPerMinute => MINUTE_BUCKET_COUNT,
UsageMeasure::TokensPerMinute => MINUTE_BUCKET_COUNT, UsageMeasure::TokensPerMinute => MINUTE_BUCKET_COUNT,
UsageMeasure::TokensPerDay => DAY_BUCKET_COUNT, UsageMeasure::TokensPerDay => DAY_BUCKET_COUNT,
UsageMeasure::TokensPerMonth => MONTH_BUCKET_COUNT, UsageMeasure::InputTokensPerMonth => MONTH_BUCKET_COUNT,
UsageMeasure::OutputTokensPerMonth => MONTH_BUCKET_COUNT,
} }
} }
@ -322,7 +359,8 @@ impl UsageMeasure {
UsageMeasure::RequestsPerMinute => Duration::minutes(1), UsageMeasure::RequestsPerMinute => Duration::minutes(1),
UsageMeasure::TokensPerMinute => Duration::minutes(1), UsageMeasure::TokensPerMinute => Duration::minutes(1),
UsageMeasure::TokensPerDay => Duration::hours(24), UsageMeasure::TokensPerDay => Duration::hours(24),
UsageMeasure::TokensPerMonth => Duration::days(30), UsageMeasure::InputTokensPerMonth => Duration::days(30),
UsageMeasure::OutputTokensPerMonth => Duration::days(30),
} }
} }

View file

@ -1,45 +1,45 @@
use super::*; use super::*;
use crate::{Config, Result}; use crate::{Config, Result};
use queries::providers::ModelRateLimits; use queries::providers::ModelParams;
pub async fn seed_database(_config: &Config, db: &mut LlmDatabase, _force: bool) -> Result<()> { pub async fn seed_database(_config: &Config, db: &mut LlmDatabase, _force: bool) -> Result<()> {
db.insert_models(&[ db.insert_models(&[
( ModelParams {
LanguageModelProvider::Anthropic, provider: LanguageModelProvider::Anthropic,
"claude-3-5-sonnet".into(), name: "claude-3-5-sonnet".into(),
ModelRateLimits { max_requests_per_minute: 5,
max_requests_per_minute: 5, max_tokens_per_minute: 20_000,
max_tokens_per_minute: 20_000, max_tokens_per_day: 300_000,
max_tokens_per_day: 300_000, price_per_million_input_tokens: 300, // $3.00/MTok
}, price_per_million_output_tokens: 1500, // $15.00/MTok
), },
( ModelParams {
LanguageModelProvider::Anthropic, provider: LanguageModelProvider::Anthropic,
"claude-3-opus".into(), name: "claude-3-opus".into(),
ModelRateLimits { max_requests_per_minute: 5,
max_requests_per_minute: 5, max_tokens_per_minute: 10_000,
max_tokens_per_minute: 10_000, max_tokens_per_day: 300_000,
max_tokens_per_day: 300_000, price_per_million_input_tokens: 1500, // $15.00/MTok
}, price_per_million_output_tokens: 7500, // $75.00/MTok
), },
( ModelParams {
LanguageModelProvider::Anthropic, provider: LanguageModelProvider::Anthropic,
"claude-3-sonnet".into(), name: "claude-3-sonnet".into(),
ModelRateLimits { max_requests_per_minute: 5,
max_requests_per_minute: 5, max_tokens_per_minute: 20_000,
max_tokens_per_minute: 20_000, max_tokens_per_day: 300_000,
max_tokens_per_day: 300_000, price_per_million_input_tokens: 1500, // $15.00/MTok
}, price_per_million_output_tokens: 7500, // $75.00/MTok
), },
( ModelParams {
LanguageModelProvider::Anthropic, provider: LanguageModelProvider::Anthropic,
"claude-3-haiku".into(), name: "claude-3-haiku".into(),
ModelRateLimits { max_requests_per_minute: 5,
max_requests_per_minute: 5, max_tokens_per_minute: 25_000,
max_tokens_per_minute: 25_000, max_tokens_per_day: 300_000,
max_tokens_per_day: 300_000, price_per_million_input_tokens: 25, // $0.25/MTok
}, price_per_million_output_tokens: 125, // $1.25/MTok
), },
]) ])
.await .await
} }

View file

@ -13,6 +13,8 @@ pub struct Model {
pub max_requests_per_minute: i64, pub max_requests_per_minute: i64,
pub max_tokens_per_minute: i64, pub max_tokens_per_minute: i64,
pub max_tokens_per_day: i64, pub max_tokens_per_day: i64,
pub price_per_million_input_tokens: i32,
pub price_per_million_output_tokens: i32,
} }
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]

View file

@ -9,7 +9,8 @@ pub enum UsageMeasure {
RequestsPerMinute, RequestsPerMinute,
TokensPerMinute, TokensPerMinute,
TokensPerDay, TokensPerDay,
TokensPerMonth, InputTokensPerMonth,
OutputTokensPerMonth,
} }
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)] #[derive(Clone, Debug, PartialEq, DeriveEntityModel)]

View file

@ -1,5 +1,5 @@
use crate::{ use crate::{
llm::db::{queries::providers::ModelRateLimits, queries::usages::Usage, LlmDatabase}, llm::db::{queries::providers::ModelParams, queries::usages::Usage, LlmDatabase},
test_llm_db, test_llm_db,
}; };
use chrono::{Duration, Utc}; use chrono::{Duration, Utc};
@ -13,15 +13,15 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
let model = "claude-3-5-sonnet"; let model = "claude-3-5-sonnet";
db.initialize().await.unwrap(); db.initialize().await.unwrap();
db.insert_models(&[( db.insert_models(&[ModelParams {
provider, provider,
model.to_string(), name: model.to_string(),
ModelRateLimits { max_requests_per_minute: 5,
max_requests_per_minute: 5, max_tokens_per_minute: 10_000,
max_tokens_per_minute: 10_000, max_tokens_per_day: 50_000,
max_tokens_per_day: 50_000, price_per_million_input_tokens: 50,
}, price_per_million_output_tokens: 50,
)]) }])
.await .await
.unwrap(); .unwrap();
@ -29,12 +29,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
let user_id = 123; let user_id = 123;
let now = t0; let now = t0;
db.record_usage(user_id, provider, model, 1000, now) db.record_usage(user_id, provider, model, 1000, 0, now)
.await .await
.unwrap(); .unwrap();
let now = t0 + Duration::seconds(10); let now = t0 + Duration::seconds(10);
db.record_usage(user_id, provider, model, 2000, now) db.record_usage(user_id, provider, model, 2000, 0, now)
.await .await
.unwrap(); .unwrap();
@ -45,7 +45,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2, requests_this_minute: 2,
tokens_this_minute: 3000, tokens_this_minute: 3000,
tokens_this_day: 3000, tokens_this_day: 3000,
tokens_this_month: 3000, input_tokens_this_month: 3000,
output_tokens_this_month: 0,
spending_this_month: 0,
} }
); );
@ -57,12 +59,14 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1, requests_this_minute: 1,
tokens_this_minute: 2000, tokens_this_minute: 2000,
tokens_this_day: 3000, tokens_this_day: 3000,
tokens_this_month: 3000, input_tokens_this_month: 3000,
output_tokens_this_month: 0,
spending_this_month: 0,
} }
); );
let now = t0 + Duration::seconds(60); let now = t0 + Duration::seconds(60);
db.record_usage(user_id, provider, model, 3000, now) db.record_usage(user_id, provider, model, 3000, 0, now)
.await .await
.unwrap(); .unwrap();
@ -73,7 +77,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2, requests_this_minute: 2,
tokens_this_minute: 5000, tokens_this_minute: 5000,
tokens_this_day: 6000, tokens_this_day: 6000,
tokens_this_month: 6000, input_tokens_this_month: 6000,
output_tokens_this_month: 0,
spending_this_month: 0,
} }
); );
@ -86,11 +92,13 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 0, requests_this_minute: 0,
tokens_this_minute: 0, tokens_this_minute: 0,
tokens_this_day: 5000, tokens_this_day: 5000,
tokens_this_month: 6000, input_tokens_this_month: 6000,
output_tokens_this_month: 0,
spending_this_month: 0,
} }
); );
db.record_usage(user_id, provider, model, 4000, now) db.record_usage(user_id, provider, model, 4000, 0, now)
.await .await
.unwrap(); .unwrap();
@ -101,7 +109,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1, requests_this_minute: 1,
tokens_this_minute: 4000, tokens_this_minute: 4000,
tokens_this_day: 9000, tokens_this_day: 9000,
tokens_this_month: 10000, input_tokens_this_month: 10000,
output_tokens_this_month: 0,
spending_this_month: 0,
} }
); );
@ -114,7 +124,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 0, requests_this_minute: 0,
tokens_this_minute: 0, tokens_this_minute: 0,
tokens_this_day: 0, tokens_this_day: 0,
tokens_this_month: 9000, input_tokens_this_month: 9000,
output_tokens_this_month: 0,
spending_this_month: 0,
} }
); );
} }

View file

@ -14,7 +14,9 @@ pub struct LlmUsageEventRow {
pub requests_this_minute: u64, pub requests_this_minute: u64,
pub tokens_this_minute: u64, pub tokens_this_minute: u64,
pub tokens_this_day: u64, pub tokens_this_day: u64,
pub tokens_this_month: u64, pub input_tokens_this_month: u64,
pub output_tokens_this_month: u64,
pub spending_this_month: u64,
} }
pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> { pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {