Capture telemetry data on per-user monthly LLM spending (#16050)
Release Notes: - N/A --------- Co-authored-by: Marshall <marshall@zed.dev>
This commit is contained in:
parent
8688b2ad19
commit
33e120d964
10 changed files with 153 additions and 85 deletions
|
@ -53,7 +53,7 @@ impl Model {
|
|||
Model::Claude3_5Sonnet => "claude-3-5-sonnet-20240620",
|
||||
Model::Claude3Opus => "claude-3-opus-20240229",
|
||||
Model::Claude3Sonnet => "claude-3-sonnet-20240229",
|
||||
Model::Claude3Haiku => "claude-3-opus-20240307",
|
||||
Model::Claude3Haiku => "claude-3-haiku-20240307",
|
||||
Self::Custom { name, .. } => name,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
ALTER TABLE models
|
||||
ADD COLUMN price_per_million_input_tokens integer NOT NULL DEFAULT 0,
|
||||
ADD COLUMN price_per_million_output_tokens integer NOT NULL DEFAULT 0;
|
|
@ -457,7 +457,8 @@ impl<S> Drop for TokenCountingStream<S> {
|
|||
claims.user_id as i32,
|
||||
provider,
|
||||
&model,
|
||||
input_token_count + output_token_count,
|
||||
input_token_count,
|
||||
output_token_count,
|
||||
Utc::now(),
|
||||
)
|
||||
.await
|
||||
|
@ -481,7 +482,9 @@ impl<S> Drop for TokenCountingStream<S> {
|
|||
requests_this_minute: usage.requests_this_minute as u64,
|
||||
tokens_this_minute: usage.tokens_this_minute as u64,
|
||||
tokens_this_day: usage.tokens_this_day as u64,
|
||||
tokens_this_month: usage.tokens_this_month as u64,
|
||||
input_tokens_this_month: usage.input_tokens_this_month as u64,
|
||||
output_tokens_this_month: usage.output_tokens_this_month as u64,
|
||||
spending_this_month: usage.spending_this_month as u64,
|
||||
},
|
||||
)
|
||||
.await
|
||||
|
|
|
@ -3,10 +3,14 @@ use sea_orm::QueryOrder;
|
|||
use std::str::FromStr;
|
||||
use strum::IntoEnumIterator as _;
|
||||
|
||||
pub struct ModelRateLimits {
|
||||
pub struct ModelParams {
|
||||
pub provider: LanguageModelProvider,
|
||||
pub name: String,
|
||||
pub max_requests_per_minute: i64,
|
||||
pub max_tokens_per_minute: i64,
|
||||
pub max_tokens_per_day: i64,
|
||||
pub price_per_million_input_tokens: i32,
|
||||
pub price_per_million_output_tokens: i32,
|
||||
}
|
||||
|
||||
impl LlmDatabase {
|
||||
|
@ -75,20 +79,23 @@ impl LlmDatabase {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn insert_models(
|
||||
&mut self,
|
||||
models: &[(LanguageModelProvider, String, ModelRateLimits)],
|
||||
) -> Result<()> {
|
||||
pub async fn insert_models(&mut self, models: &[ModelParams]) -> Result<()> {
|
||||
let all_provider_ids = &self.provider_ids;
|
||||
self.transaction(|tx| async move {
|
||||
model::Entity::insert_many(models.into_iter().map(|(provider, name, rate_limits)| {
|
||||
let provider_id = all_provider_ids[&provider];
|
||||
model::Entity::insert_many(models.into_iter().map(|model_params| {
|
||||
let provider_id = all_provider_ids[&model_params.provider];
|
||||
model::ActiveModel {
|
||||
provider_id: ActiveValue::set(provider_id),
|
||||
name: ActiveValue::set(name.clone()),
|
||||
max_requests_per_minute: ActiveValue::set(rate_limits.max_requests_per_minute),
|
||||
max_tokens_per_minute: ActiveValue::set(rate_limits.max_tokens_per_minute),
|
||||
max_tokens_per_day: ActiveValue::set(rate_limits.max_tokens_per_day),
|
||||
name: ActiveValue::set(model_params.name.clone()),
|
||||
max_requests_per_minute: ActiveValue::set(model_params.max_requests_per_minute),
|
||||
max_tokens_per_minute: ActiveValue::set(model_params.max_tokens_per_minute),
|
||||
max_tokens_per_day: ActiveValue::set(model_params.max_tokens_per_day),
|
||||
price_per_million_input_tokens: ActiveValue::set(
|
||||
model_params.price_per_million_input_tokens,
|
||||
),
|
||||
price_per_million_output_tokens: ActiveValue::set(
|
||||
model_params.price_per_million_output_tokens,
|
||||
),
|
||||
..Default::default()
|
||||
}
|
||||
}))
|
||||
|
|
|
@ -11,7 +11,9 @@ pub struct Usage {
|
|||
pub requests_this_minute: usize,
|
||||
pub tokens_this_minute: usize,
|
||||
pub tokens_this_day: usize,
|
||||
pub tokens_this_month: usize,
|
||||
pub input_tokens_this_month: usize,
|
||||
pub output_tokens_this_month: usize,
|
||||
pub spending_this_month: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
|
@ -87,14 +89,20 @@ impl LlmDatabase {
|
|||
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMinute)?;
|
||||
let tokens_this_day =
|
||||
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerDay)?;
|
||||
let tokens_this_month =
|
||||
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMonth)?;
|
||||
let input_tokens_this_month =
|
||||
self.get_usage_for_measure(&usages, now, UsageMeasure::InputTokensPerMonth)?;
|
||||
let output_tokens_this_month =
|
||||
self.get_usage_for_measure(&usages, now, UsageMeasure::OutputTokensPerMonth)?;
|
||||
let spending_this_month =
|
||||
calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
|
||||
|
||||
Ok(Usage {
|
||||
requests_this_minute,
|
||||
tokens_this_minute,
|
||||
tokens_this_day,
|
||||
tokens_this_month,
|
||||
input_tokens_this_month,
|
||||
output_tokens_this_month,
|
||||
spending_this_month,
|
||||
})
|
||||
})
|
||||
.await
|
||||
|
@ -105,7 +113,8 @@ impl LlmDatabase {
|
|||
user_id: i32,
|
||||
provider: LanguageModelProvider,
|
||||
model_name: &str,
|
||||
token_count: usize,
|
||||
input_token_count: usize,
|
||||
output_token_count: usize,
|
||||
now: DateTimeUtc,
|
||||
) -> Result<Usage> {
|
||||
self.transaction(|tx| async move {
|
||||
|
@ -138,7 +147,7 @@ impl LlmDatabase {
|
|||
&usages,
|
||||
UsageMeasure::TokensPerMinute,
|
||||
now,
|
||||
token_count,
|
||||
input_token_count + output_token_count,
|
||||
&tx,
|
||||
)
|
||||
.await?;
|
||||
|
@ -149,27 +158,42 @@ impl LlmDatabase {
|
|||
&usages,
|
||||
UsageMeasure::TokensPerDay,
|
||||
now,
|
||||
token_count,
|
||||
input_token_count + output_token_count,
|
||||
&tx,
|
||||
)
|
||||
.await?;
|
||||
let tokens_this_month = self
|
||||
let input_tokens_this_month = self
|
||||
.update_usage_for_measure(
|
||||
user_id,
|
||||
model.id,
|
||||
&usages,
|
||||
UsageMeasure::TokensPerMonth,
|
||||
UsageMeasure::InputTokensPerMonth,
|
||||
now,
|
||||
token_count,
|
||||
input_token_count,
|
||||
&tx,
|
||||
)
|
||||
.await?;
|
||||
let output_tokens_this_month = self
|
||||
.update_usage_for_measure(
|
||||
user_id,
|
||||
model.id,
|
||||
&usages,
|
||||
UsageMeasure::OutputTokensPerMonth,
|
||||
now,
|
||||
output_token_count,
|
||||
&tx,
|
||||
)
|
||||
.await?;
|
||||
let spending_this_month =
|
||||
calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
|
||||
|
||||
Ok(Usage {
|
||||
requests_this_minute,
|
||||
tokens_this_minute,
|
||||
tokens_this_day,
|
||||
tokens_this_month,
|
||||
input_tokens_this_month,
|
||||
output_tokens_this_month,
|
||||
spending_this_month,
|
||||
})
|
||||
})
|
||||
.await
|
||||
|
@ -303,6 +327,18 @@ impl LlmDatabase {
|
|||
}
|
||||
}
|
||||
|
||||
fn calculate_spending(
|
||||
model: &model::Model,
|
||||
input_tokens_this_month: usize,
|
||||
output_tokens_this_month: usize,
|
||||
) -> usize {
|
||||
let input_token_cost =
|
||||
input_tokens_this_month * model.price_per_million_input_tokens as usize / 1_000_000;
|
||||
let output_token_cost =
|
||||
output_tokens_this_month * model.price_per_million_output_tokens as usize / 1_000_000;
|
||||
input_token_cost + output_token_cost
|
||||
}
|
||||
|
||||
const MINUTE_BUCKET_COUNT: usize = 12;
|
||||
const DAY_BUCKET_COUNT: usize = 48;
|
||||
const MONTH_BUCKET_COUNT: usize = 30;
|
||||
|
@ -313,7 +349,8 @@ impl UsageMeasure {
|
|||
UsageMeasure::RequestsPerMinute => MINUTE_BUCKET_COUNT,
|
||||
UsageMeasure::TokensPerMinute => MINUTE_BUCKET_COUNT,
|
||||
UsageMeasure::TokensPerDay => DAY_BUCKET_COUNT,
|
||||
UsageMeasure::TokensPerMonth => MONTH_BUCKET_COUNT,
|
||||
UsageMeasure::InputTokensPerMonth => MONTH_BUCKET_COUNT,
|
||||
UsageMeasure::OutputTokensPerMonth => MONTH_BUCKET_COUNT,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -322,7 +359,8 @@ impl UsageMeasure {
|
|||
UsageMeasure::RequestsPerMinute => Duration::minutes(1),
|
||||
UsageMeasure::TokensPerMinute => Duration::minutes(1),
|
||||
UsageMeasure::TokensPerDay => Duration::hours(24),
|
||||
UsageMeasure::TokensPerMonth => Duration::days(30),
|
||||
UsageMeasure::InputTokensPerMonth => Duration::days(30),
|
||||
UsageMeasure::OutputTokensPerMonth => Duration::days(30),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,45 +1,45 @@
|
|||
use super::*;
|
||||
use crate::{Config, Result};
|
||||
use queries::providers::ModelRateLimits;
|
||||
use queries::providers::ModelParams;
|
||||
|
||||
pub async fn seed_database(_config: &Config, db: &mut LlmDatabase, _force: bool) -> Result<()> {
|
||||
db.insert_models(&[
|
||||
(
|
||||
LanguageModelProvider::Anthropic,
|
||||
"claude-3-5-sonnet".into(),
|
||||
ModelRateLimits {
|
||||
max_requests_per_minute: 5,
|
||||
max_tokens_per_minute: 20_000,
|
||||
max_tokens_per_day: 300_000,
|
||||
},
|
||||
),
|
||||
(
|
||||
LanguageModelProvider::Anthropic,
|
||||
"claude-3-opus".into(),
|
||||
ModelRateLimits {
|
||||
max_requests_per_minute: 5,
|
||||
max_tokens_per_minute: 10_000,
|
||||
max_tokens_per_day: 300_000,
|
||||
},
|
||||
),
|
||||
(
|
||||
LanguageModelProvider::Anthropic,
|
||||
"claude-3-sonnet".into(),
|
||||
ModelRateLimits {
|
||||
max_requests_per_minute: 5,
|
||||
max_tokens_per_minute: 20_000,
|
||||
max_tokens_per_day: 300_000,
|
||||
},
|
||||
),
|
||||
(
|
||||
LanguageModelProvider::Anthropic,
|
||||
"claude-3-haiku".into(),
|
||||
ModelRateLimits {
|
||||
max_requests_per_minute: 5,
|
||||
max_tokens_per_minute: 25_000,
|
||||
max_tokens_per_day: 300_000,
|
||||
},
|
||||
),
|
||||
ModelParams {
|
||||
provider: LanguageModelProvider::Anthropic,
|
||||
name: "claude-3-5-sonnet".into(),
|
||||
max_requests_per_minute: 5,
|
||||
max_tokens_per_minute: 20_000,
|
||||
max_tokens_per_day: 300_000,
|
||||
price_per_million_input_tokens: 300, // $3.00/MTok
|
||||
price_per_million_output_tokens: 1500, // $15.00/MTok
|
||||
},
|
||||
ModelParams {
|
||||
provider: LanguageModelProvider::Anthropic,
|
||||
name: "claude-3-opus".into(),
|
||||
max_requests_per_minute: 5,
|
||||
max_tokens_per_minute: 10_000,
|
||||
max_tokens_per_day: 300_000,
|
||||
price_per_million_input_tokens: 1500, // $15.00/MTok
|
||||
price_per_million_output_tokens: 7500, // $75.00/MTok
|
||||
},
|
||||
ModelParams {
|
||||
provider: LanguageModelProvider::Anthropic,
|
||||
name: "claude-3-sonnet".into(),
|
||||
max_requests_per_minute: 5,
|
||||
max_tokens_per_minute: 20_000,
|
||||
max_tokens_per_day: 300_000,
|
||||
price_per_million_input_tokens: 1500, // $15.00/MTok
|
||||
price_per_million_output_tokens: 7500, // $75.00/MTok
|
||||
},
|
||||
ModelParams {
|
||||
provider: LanguageModelProvider::Anthropic,
|
||||
name: "claude-3-haiku".into(),
|
||||
max_requests_per_minute: 5,
|
||||
max_tokens_per_minute: 25_000,
|
||||
max_tokens_per_day: 300_000,
|
||||
price_per_million_input_tokens: 25, // $0.25/MTok
|
||||
price_per_million_output_tokens: 125, // $1.25/MTok
|
||||
},
|
||||
])
|
||||
.await
|
||||
}
|
||||
|
|
|
@ -13,6 +13,8 @@ pub struct Model {
|
|||
pub max_requests_per_minute: i64,
|
||||
pub max_tokens_per_minute: i64,
|
||||
pub max_tokens_per_day: i64,
|
||||
pub price_per_million_input_tokens: i32,
|
||||
pub price_per_million_output_tokens: i32,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
|
||||
|
|
|
@ -9,7 +9,8 @@ pub enum UsageMeasure {
|
|||
RequestsPerMinute,
|
||||
TokensPerMinute,
|
||||
TokensPerDay,
|
||||
TokensPerMonth,
|
||||
InputTokensPerMonth,
|
||||
OutputTokensPerMonth,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::{
|
||||
llm::db::{queries::providers::ModelRateLimits, queries::usages::Usage, LlmDatabase},
|
||||
llm::db::{queries::providers::ModelParams, queries::usages::Usage, LlmDatabase},
|
||||
test_llm_db,
|
||||
};
|
||||
use chrono::{Duration, Utc};
|
||||
|
@ -13,15 +13,15 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
let model = "claude-3-5-sonnet";
|
||||
|
||||
db.initialize().await.unwrap();
|
||||
db.insert_models(&[(
|
||||
db.insert_models(&[ModelParams {
|
||||
provider,
|
||||
model.to_string(),
|
||||
ModelRateLimits {
|
||||
max_requests_per_minute: 5,
|
||||
max_tokens_per_minute: 10_000,
|
||||
max_tokens_per_day: 50_000,
|
||||
},
|
||||
)])
|
||||
name: model.to_string(),
|
||||
max_requests_per_minute: 5,
|
||||
max_tokens_per_minute: 10_000,
|
||||
max_tokens_per_day: 50_000,
|
||||
price_per_million_input_tokens: 50,
|
||||
price_per_million_output_tokens: 50,
|
||||
}])
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -29,12 +29,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
let user_id = 123;
|
||||
|
||||
let now = t0;
|
||||
db.record_usage(user_id, provider, model, 1000, now)
|
||||
db.record_usage(user_id, provider, model, 1000, 0, now)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let now = t0 + Duration::seconds(10);
|
||||
db.record_usage(user_id, provider, model, 2000, now)
|
||||
db.record_usage(user_id, provider, model, 2000, 0, now)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -45,7 +45,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
requests_this_minute: 2,
|
||||
tokens_this_minute: 3000,
|
||||
tokens_this_day: 3000,
|
||||
tokens_this_month: 3000,
|
||||
input_tokens_this_month: 3000,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -57,12 +59,14 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
requests_this_minute: 1,
|
||||
tokens_this_minute: 2000,
|
||||
tokens_this_day: 3000,
|
||||
tokens_this_month: 3000,
|
||||
input_tokens_this_month: 3000,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
}
|
||||
);
|
||||
|
||||
let now = t0 + Duration::seconds(60);
|
||||
db.record_usage(user_id, provider, model, 3000, now)
|
||||
db.record_usage(user_id, provider, model, 3000, 0, now)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -73,7 +77,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
requests_this_minute: 2,
|
||||
tokens_this_minute: 5000,
|
||||
tokens_this_day: 6000,
|
||||
tokens_this_month: 6000,
|
||||
input_tokens_this_month: 6000,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -86,11 +92,13 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
requests_this_minute: 0,
|
||||
tokens_this_minute: 0,
|
||||
tokens_this_day: 5000,
|
||||
tokens_this_month: 6000,
|
||||
input_tokens_this_month: 6000,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
}
|
||||
);
|
||||
|
||||
db.record_usage(user_id, provider, model, 4000, now)
|
||||
db.record_usage(user_id, provider, model, 4000, 0, now)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -101,7 +109,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
requests_this_minute: 1,
|
||||
tokens_this_minute: 4000,
|
||||
tokens_this_day: 9000,
|
||||
tokens_this_month: 10000,
|
||||
input_tokens_this_month: 10000,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -114,7 +124,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
|
|||
requests_this_minute: 0,
|
||||
tokens_this_minute: 0,
|
||||
tokens_this_day: 0,
|
||||
tokens_this_month: 9000,
|
||||
input_tokens_this_month: 9000,
|
||||
output_tokens_this_month: 0,
|
||||
spending_this_month: 0,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
|
@ -14,7 +14,9 @@ pub struct LlmUsageEventRow {
|
|||
pub requests_this_minute: u64,
|
||||
pub tokens_this_minute: u64,
|
||||
pub tokens_this_day: u64,
|
||||
pub tokens_this_month: u64,
|
||||
pub input_tokens_this_month: u64,
|
||||
pub output_tokens_this_month: u64,
|
||||
pub spending_this_month: u64,
|
||||
}
|
||||
|
||||
pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue