Report telemetry events for rate limit errors (#16130)
clickhouse telemetry schema: ``` CREATE TABLE default.llm_rate_limit_events ( `time` DateTime64(3), `user_id` Int32, `is_staff` Bool, `plan` LowCardinality(String), `model` String, `provider` LowCardinality(String), `usage_measure` LowCardinality(String), `requests_this_minute` UInt64, `tokens_this_minute` UInt64, `tokens_this_day` UInt64, `max_requests_per_minute` UInt64, `max_tokens_per_minute` UInt64, `max_tokens_per_day` UInt64, `users_in_recent_minutes` UInt64, `users_in_recent_days` UInt64 ) ORDER BY tuple() ``` Release Notes: - N/A Co-authored-by: Marshall <marshall@zed.dev>
This commit is contained in:
parent
1674e12ccb
commit
a3c79218c4
2 changed files with 77 additions and 11 deletions
|
@ -18,7 +18,7 @@ use axum::{
|
||||||
Extension, Json, Router, TypedHeader,
|
Extension, Json, Router, TypedHeader,
|
||||||
};
|
};
|
||||||
use chrono::{DateTime, Duration, Utc};
|
use chrono::{DateTime, Duration, Utc};
|
||||||
use db::{ActiveUserCount, LlmDatabase};
|
use db::{usage_measure::UsageMeasure, ActiveUserCount, LlmDatabase};
|
||||||
use futures::{Stream, StreamExt as _};
|
use futures::{Stream, StreamExt as _};
|
||||||
use http_client::IsahcHttpClient;
|
use http_client::IsahcHttpClient;
|
||||||
use rpc::{
|
use rpc::{
|
||||||
|
@ -29,7 +29,7 @@ use std::{
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
task::{Context, Poll},
|
task::{Context, Poll},
|
||||||
};
|
};
|
||||||
use telemetry::{report_llm_usage, LlmUsageEventRow};
|
use telemetry::{report_llm_rate_limit, report_llm_usage, LlmRateLimitEventRow, LlmUsageEventRow};
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
use util::ResultExt;
|
use util::ResultExt;
|
||||||
|
|
||||||
|
@ -401,38 +401,75 @@ async fn check_usage_limit(
|
||||||
|
|
||||||
let active_users = state.get_active_user_count().await?;
|
let active_users = state.get_active_user_count().await?;
|
||||||
|
|
||||||
|
let users_in_recent_minutes = active_users.users_in_recent_minutes.max(1);
|
||||||
|
let users_in_recent_days = active_users.users_in_recent_days.max(1);
|
||||||
|
|
||||||
let per_user_max_requests_per_minute =
|
let per_user_max_requests_per_minute =
|
||||||
model.max_requests_per_minute as usize / active_users.users_in_recent_minutes.max(1);
|
model.max_requests_per_minute as usize / users_in_recent_minutes;
|
||||||
let per_user_max_tokens_per_minute =
|
let per_user_max_tokens_per_minute =
|
||||||
model.max_tokens_per_minute as usize / active_users.users_in_recent_minutes.max(1);
|
model.max_tokens_per_minute as usize / users_in_recent_minutes;
|
||||||
let per_user_max_tokens_per_day =
|
let per_user_max_tokens_per_day = model.max_tokens_per_day as usize / users_in_recent_days;
|
||||||
model.max_tokens_per_day as usize / active_users.users_in_recent_days.max(1);
|
|
||||||
|
|
||||||
let checks = [
|
let checks = [
|
||||||
(
|
(
|
||||||
usage.requests_this_minute,
|
usage.requests_this_minute,
|
||||||
per_user_max_requests_per_minute,
|
per_user_max_requests_per_minute,
|
||||||
"requests per minute",
|
UsageMeasure::RequestsPerMinute,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
usage.tokens_this_minute,
|
usage.tokens_this_minute,
|
||||||
per_user_max_tokens_per_minute,
|
per_user_max_tokens_per_minute,
|
||||||
"tokens per minute",
|
UsageMeasure::TokensPerMinute,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
usage.tokens_this_day,
|
usage.tokens_this_day,
|
||||||
per_user_max_tokens_per_day,
|
per_user_max_tokens_per_day,
|
||||||
"tokens per day",
|
UsageMeasure::TokensPerDay,
|
||||||
),
|
),
|
||||||
];
|
];
|
||||||
|
|
||||||
for (usage, limit, resource) in checks {
|
for (used, limit, usage_measure) in checks {
|
||||||
// Temporarily bypass rate-limiting for staff members.
|
// Temporarily bypass rate-limiting for staff members.
|
||||||
if claims.is_staff {
|
if claims.is_staff {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if usage > limit {
|
if used > limit {
|
||||||
|
let resource = match usage_measure {
|
||||||
|
UsageMeasure::RequestsPerMinute => "requests_per_minute",
|
||||||
|
UsageMeasure::TokensPerMinute => "tokens_per_minute",
|
||||||
|
UsageMeasure::TokensPerDay => "tokens_per_day",
|
||||||
|
_ => "",
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(client) = state.clickhouse_client.as_ref() {
|
||||||
|
report_llm_rate_limit(
|
||||||
|
client,
|
||||||
|
LlmRateLimitEventRow {
|
||||||
|
time: Utc::now().timestamp_millis(),
|
||||||
|
user_id: claims.user_id as i32,
|
||||||
|
is_staff: claims.is_staff,
|
||||||
|
plan: match claims.plan {
|
||||||
|
Plan::Free => "free".to_string(),
|
||||||
|
Plan::ZedPro => "zed_pro".to_string(),
|
||||||
|
},
|
||||||
|
model: model.name.clone(),
|
||||||
|
provider: provider.to_string(),
|
||||||
|
usage_measure: resource.to_string(),
|
||||||
|
requests_this_minute: usage.requests_this_minute as u64,
|
||||||
|
tokens_this_minute: usage.tokens_this_minute as u64,
|
||||||
|
tokens_this_day: usage.tokens_this_day as u64,
|
||||||
|
users_in_recent_minutes: users_in_recent_minutes as u64,
|
||||||
|
users_in_recent_days: users_in_recent_days as u64,
|
||||||
|
max_requests_per_minute: per_user_max_requests_per_minute as u64,
|
||||||
|
max_tokens_per_minute: per_user_max_tokens_per_minute as u64,
|
||||||
|
max_tokens_per_day: per_user_max_tokens_per_day as u64,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.log_err();
|
||||||
|
}
|
||||||
|
|
||||||
return Err(Error::http(
|
return Err(Error::http(
|
||||||
StatusCode::TOO_MANY_REQUESTS,
|
StatusCode::TOO_MANY_REQUESTS,
|
||||||
format!("Rate limit exceeded. Maximum {} reached.", resource),
|
format!("Rate limit exceeded. Maximum {} reached.", resource),
|
||||||
|
|
|
@ -19,9 +19,38 @@ pub struct LlmUsageEventRow {
|
||||||
pub spending_this_month: u64,
|
pub spending_this_month: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, clickhouse::Row)]
|
||||||
|
pub struct LlmRateLimitEventRow {
|
||||||
|
pub time: i64,
|
||||||
|
pub user_id: i32,
|
||||||
|
pub is_staff: bool,
|
||||||
|
pub plan: String,
|
||||||
|
pub model: String,
|
||||||
|
pub provider: String,
|
||||||
|
pub usage_measure: String,
|
||||||
|
pub requests_this_minute: u64,
|
||||||
|
pub tokens_this_minute: u64,
|
||||||
|
pub tokens_this_day: u64,
|
||||||
|
pub users_in_recent_minutes: u64,
|
||||||
|
pub users_in_recent_days: u64,
|
||||||
|
pub max_requests_per_minute: u64,
|
||||||
|
pub max_tokens_per_minute: u64,
|
||||||
|
pub max_tokens_per_day: u64,
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {
|
pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {
|
||||||
let mut insert = client.insert("llm_usage_events")?;
|
let mut insert = client.insert("llm_usage_events")?;
|
||||||
insert.write(&row).await?;
|
insert.write(&row).await?;
|
||||||
insert.end().await?;
|
insert.end().await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn report_llm_rate_limit(
|
||||||
|
client: &clickhouse::Client,
|
||||||
|
row: LlmRateLimitEventRow,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut insert = client.insert("llm_rate_limits")?;
|
||||||
|
insert.write(&row).await?;
|
||||||
|
insert.end().await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue