Capture telemetry data on per-user monthly LLM spending (#16050)

Release Notes:

- N/A

---------

Co-authored-by: Marshall <marshall@zed.dev>
This commit is contained in:
Max Brunsfeld 2024-08-09 16:38:37 -07:00 committed by GitHub
parent 8688b2ad19
commit 33e120d964
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 153 additions and 85 deletions

View file

@ -53,7 +53,7 @@ impl Model {
Model::Claude3_5Sonnet => "claude-3-5-sonnet-20240620",
Model::Claude3Opus => "claude-3-opus-20240229",
Model::Claude3Sonnet => "claude-3-sonnet-20240229",
Model::Claude3Haiku => "claude-3-opus-20240307",
Model::Claude3Haiku => "claude-3-haiku-20240307",
Self::Custom { name, .. } => name,
}
}

View file

@ -0,0 +1,3 @@
ALTER TABLE models
ADD COLUMN price_per_million_input_tokens integer NOT NULL DEFAULT 0,
ADD COLUMN price_per_million_output_tokens integer NOT NULL DEFAULT 0;

View file

@ -457,7 +457,8 @@ impl<S> Drop for TokenCountingStream<S> {
claims.user_id as i32,
provider,
&model,
input_token_count + output_token_count,
input_token_count,
output_token_count,
Utc::now(),
)
.await
@ -481,7 +482,9 @@ impl<S> Drop for TokenCountingStream<S> {
requests_this_minute: usage.requests_this_minute as u64,
tokens_this_minute: usage.tokens_this_minute as u64,
tokens_this_day: usage.tokens_this_day as u64,
tokens_this_month: usage.tokens_this_month as u64,
input_tokens_this_month: usage.input_tokens_this_month as u64,
output_tokens_this_month: usage.output_tokens_this_month as u64,
spending_this_month: usage.spending_this_month as u64,
},
)
.await

View file

@ -3,10 +3,14 @@ use sea_orm::QueryOrder;
use std::str::FromStr;
use strum::IntoEnumIterator as _;
pub struct ModelRateLimits {
pub struct ModelParams {
pub provider: LanguageModelProvider,
pub name: String,
pub max_requests_per_minute: i64,
pub max_tokens_per_minute: i64,
pub max_tokens_per_day: i64,
pub price_per_million_input_tokens: i32,
pub price_per_million_output_tokens: i32,
}
impl LlmDatabase {
@ -75,20 +79,23 @@ impl LlmDatabase {
Ok(())
}
pub async fn insert_models(
&mut self,
models: &[(LanguageModelProvider, String, ModelRateLimits)],
) -> Result<()> {
pub async fn insert_models(&mut self, models: &[ModelParams]) -> Result<()> {
let all_provider_ids = &self.provider_ids;
self.transaction(|tx| async move {
model::Entity::insert_many(models.into_iter().map(|(provider, name, rate_limits)| {
let provider_id = all_provider_ids[&provider];
model::Entity::insert_many(models.into_iter().map(|model_params| {
let provider_id = all_provider_ids[&model_params.provider];
model::ActiveModel {
provider_id: ActiveValue::set(provider_id),
name: ActiveValue::set(name.clone()),
max_requests_per_minute: ActiveValue::set(rate_limits.max_requests_per_minute),
max_tokens_per_minute: ActiveValue::set(rate_limits.max_tokens_per_minute),
max_tokens_per_day: ActiveValue::set(rate_limits.max_tokens_per_day),
name: ActiveValue::set(model_params.name.clone()),
max_requests_per_minute: ActiveValue::set(model_params.max_requests_per_minute),
max_tokens_per_minute: ActiveValue::set(model_params.max_tokens_per_minute),
max_tokens_per_day: ActiveValue::set(model_params.max_tokens_per_day),
price_per_million_input_tokens: ActiveValue::set(
model_params.price_per_million_input_tokens,
),
price_per_million_output_tokens: ActiveValue::set(
model_params.price_per_million_output_tokens,
),
..Default::default()
}
}))

View file

@ -11,7 +11,9 @@ pub struct Usage {
pub requests_this_minute: usize,
pub tokens_this_minute: usize,
pub tokens_this_day: usize,
pub tokens_this_month: usize,
pub input_tokens_this_month: usize,
pub output_tokens_this_month: usize,
pub spending_this_month: usize,
}
#[derive(Clone, Copy, Debug, Default)]
@ -87,14 +89,20 @@ impl LlmDatabase {
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMinute)?;
let tokens_this_day =
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerDay)?;
let tokens_this_month =
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMonth)?;
let input_tokens_this_month =
self.get_usage_for_measure(&usages, now, UsageMeasure::InputTokensPerMonth)?;
let output_tokens_this_month =
self.get_usage_for_measure(&usages, now, UsageMeasure::OutputTokensPerMonth)?;
let spending_this_month =
calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
Ok(Usage {
requests_this_minute,
tokens_this_minute,
tokens_this_day,
tokens_this_month,
input_tokens_this_month,
output_tokens_this_month,
spending_this_month,
})
})
.await
@ -105,7 +113,8 @@ impl LlmDatabase {
user_id: i32,
provider: LanguageModelProvider,
model_name: &str,
token_count: usize,
input_token_count: usize,
output_token_count: usize,
now: DateTimeUtc,
) -> Result<Usage> {
self.transaction(|tx| async move {
@ -138,7 +147,7 @@ impl LlmDatabase {
&usages,
UsageMeasure::TokensPerMinute,
now,
token_count,
input_token_count + output_token_count,
&tx,
)
.await?;
@ -149,27 +158,42 @@ impl LlmDatabase {
&usages,
UsageMeasure::TokensPerDay,
now,
token_count,
input_token_count + output_token_count,
&tx,
)
.await?;
let tokens_this_month = self
let input_tokens_this_month = self
.update_usage_for_measure(
user_id,
model.id,
&usages,
UsageMeasure::TokensPerMonth,
UsageMeasure::InputTokensPerMonth,
now,
token_count,
input_token_count,
&tx,
)
.await?;
let output_tokens_this_month = self
.update_usage_for_measure(
user_id,
model.id,
&usages,
UsageMeasure::OutputTokensPerMonth,
now,
output_token_count,
&tx,
)
.await?;
let spending_this_month =
calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
Ok(Usage {
requests_this_minute,
tokens_this_minute,
tokens_this_day,
tokens_this_month,
input_tokens_this_month,
output_tokens_this_month,
spending_this_month,
})
})
.await
@ -303,6 +327,18 @@ impl LlmDatabase {
}
}
fn calculate_spending(
model: &model::Model,
input_tokens_this_month: usize,
output_tokens_this_month: usize,
) -> usize {
let input_token_cost =
input_tokens_this_month * model.price_per_million_input_tokens as usize / 1_000_000;
let output_token_cost =
output_tokens_this_month * model.price_per_million_output_tokens as usize / 1_000_000;
input_token_cost + output_token_cost
}
const MINUTE_BUCKET_COUNT: usize = 12;
const DAY_BUCKET_COUNT: usize = 48;
const MONTH_BUCKET_COUNT: usize = 30;
@ -313,7 +349,8 @@ impl UsageMeasure {
UsageMeasure::RequestsPerMinute => MINUTE_BUCKET_COUNT,
UsageMeasure::TokensPerMinute => MINUTE_BUCKET_COUNT,
UsageMeasure::TokensPerDay => DAY_BUCKET_COUNT,
UsageMeasure::TokensPerMonth => MONTH_BUCKET_COUNT,
UsageMeasure::InputTokensPerMonth => MONTH_BUCKET_COUNT,
UsageMeasure::OutputTokensPerMonth => MONTH_BUCKET_COUNT,
}
}
@ -322,7 +359,8 @@ impl UsageMeasure {
UsageMeasure::RequestsPerMinute => Duration::minutes(1),
UsageMeasure::TokensPerMinute => Duration::minutes(1),
UsageMeasure::TokensPerDay => Duration::hours(24),
UsageMeasure::TokensPerMonth => Duration::days(30),
UsageMeasure::InputTokensPerMonth => Duration::days(30),
UsageMeasure::OutputTokensPerMonth => Duration::days(30),
}
}

View file

@ -1,45 +1,45 @@
use super::*;
use crate::{Config, Result};
use queries::providers::ModelRateLimits;
use queries::providers::ModelParams;
pub async fn seed_database(_config: &Config, db: &mut LlmDatabase, _force: bool) -> Result<()> {
db.insert_models(&[
(
LanguageModelProvider::Anthropic,
"claude-3-5-sonnet".into(),
ModelRateLimits {
max_requests_per_minute: 5,
max_tokens_per_minute: 20_000,
max_tokens_per_day: 300_000,
},
),
(
LanguageModelProvider::Anthropic,
"claude-3-opus".into(),
ModelRateLimits {
max_requests_per_minute: 5,
max_tokens_per_minute: 10_000,
max_tokens_per_day: 300_000,
},
),
(
LanguageModelProvider::Anthropic,
"claude-3-sonnet".into(),
ModelRateLimits {
max_requests_per_minute: 5,
max_tokens_per_minute: 20_000,
max_tokens_per_day: 300_000,
},
),
(
LanguageModelProvider::Anthropic,
"claude-3-haiku".into(),
ModelRateLimits {
max_requests_per_minute: 5,
max_tokens_per_minute: 25_000,
max_tokens_per_day: 300_000,
},
),
ModelParams {
provider: LanguageModelProvider::Anthropic,
name: "claude-3-5-sonnet".into(),
max_requests_per_minute: 5,
max_tokens_per_minute: 20_000,
max_tokens_per_day: 300_000,
price_per_million_input_tokens: 300, // $3.00/MTok
price_per_million_output_tokens: 1500, // $15.00/MTok
},
ModelParams {
provider: LanguageModelProvider::Anthropic,
name: "claude-3-opus".into(),
max_requests_per_minute: 5,
max_tokens_per_minute: 10_000,
max_tokens_per_day: 300_000,
price_per_million_input_tokens: 1500, // $15.00/MTok
price_per_million_output_tokens: 7500, // $75.00/MTok
},
ModelParams {
provider: LanguageModelProvider::Anthropic,
name: "claude-3-sonnet".into(),
max_requests_per_minute: 5,
max_tokens_per_minute: 20_000,
max_tokens_per_day: 300_000,
price_per_million_input_tokens: 1500, // $15.00/MTok
price_per_million_output_tokens: 7500, // $75.00/MTok
},
ModelParams {
provider: LanguageModelProvider::Anthropic,
name: "claude-3-haiku".into(),
max_requests_per_minute: 5,
max_tokens_per_minute: 25_000,
max_tokens_per_day: 300_000,
price_per_million_input_tokens: 25, // $0.25/MTok
price_per_million_output_tokens: 125, // $1.25/MTok
},
])
.await
}

View file

@ -13,6 +13,8 @@ pub struct Model {
pub max_requests_per_minute: i64,
pub max_tokens_per_minute: i64,
pub max_tokens_per_day: i64,
pub price_per_million_input_tokens: i32,
pub price_per_million_output_tokens: i32,
}
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]

View file

@ -9,7 +9,8 @@ pub enum UsageMeasure {
RequestsPerMinute,
TokensPerMinute,
TokensPerDay,
TokensPerMonth,
InputTokensPerMonth,
OutputTokensPerMonth,
}
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]

View file

@ -1,5 +1,5 @@
use crate::{
llm::db::{queries::providers::ModelRateLimits, queries::usages::Usage, LlmDatabase},
llm::db::{queries::providers::ModelParams, queries::usages::Usage, LlmDatabase},
test_llm_db,
};
use chrono::{Duration, Utc};
@ -13,15 +13,15 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
let model = "claude-3-5-sonnet";
db.initialize().await.unwrap();
db.insert_models(&[(
db.insert_models(&[ModelParams {
provider,
model.to_string(),
ModelRateLimits {
max_requests_per_minute: 5,
max_tokens_per_minute: 10_000,
max_tokens_per_day: 50_000,
},
)])
name: model.to_string(),
max_requests_per_minute: 5,
max_tokens_per_minute: 10_000,
max_tokens_per_day: 50_000,
price_per_million_input_tokens: 50,
price_per_million_output_tokens: 50,
}])
.await
.unwrap();
@ -29,12 +29,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
let user_id = 123;
let now = t0;
db.record_usage(user_id, provider, model, 1000, now)
db.record_usage(user_id, provider, model, 1000, 0, now)
.await
.unwrap();
let now = t0 + Duration::seconds(10);
db.record_usage(user_id, provider, model, 2000, now)
db.record_usage(user_id, provider, model, 2000, 0, now)
.await
.unwrap();
@ -45,7 +45,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2,
tokens_this_minute: 3000,
tokens_this_day: 3000,
tokens_this_month: 3000,
input_tokens_this_month: 3000,
output_tokens_this_month: 0,
spending_this_month: 0,
}
);
@ -57,12 +59,14 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1,
tokens_this_minute: 2000,
tokens_this_day: 3000,
tokens_this_month: 3000,
input_tokens_this_month: 3000,
output_tokens_this_month: 0,
spending_this_month: 0,
}
);
let now = t0 + Duration::seconds(60);
db.record_usage(user_id, provider, model, 3000, now)
db.record_usage(user_id, provider, model, 3000, 0, now)
.await
.unwrap();
@ -73,7 +77,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2,
tokens_this_minute: 5000,
tokens_this_day: 6000,
tokens_this_month: 6000,
input_tokens_this_month: 6000,
output_tokens_this_month: 0,
spending_this_month: 0,
}
);
@ -86,11 +92,13 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 0,
tokens_this_minute: 0,
tokens_this_day: 5000,
tokens_this_month: 6000,
input_tokens_this_month: 6000,
output_tokens_this_month: 0,
spending_this_month: 0,
}
);
db.record_usage(user_id, provider, model, 4000, now)
db.record_usage(user_id, provider, model, 4000, 0, now)
.await
.unwrap();
@ -101,7 +109,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1,
tokens_this_minute: 4000,
tokens_this_day: 9000,
tokens_this_month: 10000,
input_tokens_this_month: 10000,
output_tokens_this_month: 0,
spending_this_month: 0,
}
);
@ -114,7 +124,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 0,
tokens_this_minute: 0,
tokens_this_day: 0,
tokens_this_month: 9000,
input_tokens_this_month: 9000,
output_tokens_this_month: 0,
spending_this_month: 0,
}
);
}

View file

@ -14,7 +14,9 @@ pub struct LlmUsageEventRow {
pub requests_this_minute: u64,
pub tokens_this_minute: u64,
pub tokens_this_day: u64,
pub tokens_this_month: u64,
pub input_tokens_this_month: u64,
pub output_tokens_this_month: u64,
pub spending_this_month: u64,
}
pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {