parent
43ba0c9fa6
commit
03efd0d1d9
14 changed files with 18 additions and 1280 deletions
|
@ -1,14 +1,11 @@
|
|||
mod authorization;
|
||||
pub mod db;
|
||||
mod telemetry;
|
||||
mod token;
|
||||
|
||||
use crate::api::events::SnowflakeRow;
|
||||
use crate::api::CloudflareIpCountryHeader;
|
||||
use crate::build_kinesis_client;
|
||||
use crate::{
|
||||
build_clickhouse_client, db::UserId, executor::Executor, Cents, Config, Error, Result,
|
||||
};
|
||||
use crate::{db::UserId, executor::Executor, Cents, Config, Error, Result};
|
||||
use anyhow::{anyhow, Context as _};
|
||||
use authorization::authorize_access_to_language_model;
|
||||
use axum::routing::get;
|
||||
|
@ -40,7 +37,6 @@ use std::{
|
|||
task::{Context, Poll},
|
||||
};
|
||||
use strum::IntoEnumIterator;
|
||||
use telemetry::{report_llm_rate_limit, report_llm_usage, LlmRateLimitEventRow, LlmUsageEventRow};
|
||||
use tokio::sync::RwLock;
|
||||
use util::ResultExt;
|
||||
|
||||
|
@ -52,7 +48,6 @@ pub struct LlmState {
|
|||
pub db: Arc<LlmDatabase>,
|
||||
pub http_client: ReqwestClient,
|
||||
pub kinesis_client: Option<aws_sdk_kinesis::Client>,
|
||||
pub clickhouse_client: Option<clickhouse::Client>,
|
||||
active_user_count_by_model:
|
||||
RwLock<HashMap<(LanguageModelProvider, String), (DateTime<Utc>, ActiveUserCount)>>,
|
||||
}
|
||||
|
@ -89,10 +84,6 @@ impl LlmState {
|
|||
} else {
|
||||
None
|
||||
},
|
||||
clickhouse_client: config
|
||||
.clickhouse_url
|
||||
.as_ref()
|
||||
.and_then(|_| build_clickhouse_client(&config).log_err()),
|
||||
active_user_count_by_model: RwLock::new(HashMap::default()),
|
||||
config,
|
||||
};
|
||||
|
@ -630,34 +621,6 @@ async fn check_usage_limit(
|
|||
.await
|
||||
.log_err();
|
||||
|
||||
if let Some(client) = state.clickhouse_client.as_ref() {
|
||||
report_llm_rate_limit(
|
||||
client,
|
||||
LlmRateLimitEventRow {
|
||||
time: Utc::now().timestamp_millis(),
|
||||
user_id: claims.user_id as i32,
|
||||
is_staff: claims.is_staff,
|
||||
plan: match claims.plan {
|
||||
Plan::Free => "free".to_string(),
|
||||
Plan::ZedPro => "zed_pro".to_string(),
|
||||
},
|
||||
model: model.name.clone(),
|
||||
provider: provider.to_string(),
|
||||
usage_measure: resource.to_string(),
|
||||
requests_this_minute: usage.requests_this_minute as u64,
|
||||
tokens_this_minute: usage.tokens_this_minute as u64,
|
||||
tokens_this_day: usage.tokens_this_day as u64,
|
||||
users_in_recent_minutes: users_in_recent_minutes as u64,
|
||||
users_in_recent_days: users_in_recent_days as u64,
|
||||
max_requests_per_minute: per_user_max_requests_per_minute as u64,
|
||||
max_tokens_per_minute: per_user_max_tokens_per_minute as u64,
|
||||
max_tokens_per_day: per_user_max_tokens_per_day as u64,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.log_err();
|
||||
}
|
||||
|
||||
return Err(Error::http(
|
||||
StatusCode::TOO_MANY_REQUESTS,
|
||||
format!("Rate limit exceeded. Maximum {} reached.", resource),
|
||||
|
@ -765,44 +728,6 @@ impl<S> Drop for TokenCountingStream<S> {
|
|||
.write(&state.kinesis_client, &state.config.kinesis_stream)
|
||||
.await
|
||||
.log_err();
|
||||
|
||||
if let Some(clickhouse_client) = state.clickhouse_client.as_ref() {
|
||||
report_llm_usage(
|
||||
clickhouse_client,
|
||||
LlmUsageEventRow {
|
||||
time: Utc::now().timestamp_millis(),
|
||||
user_id: claims.user_id as i32,
|
||||
is_staff: claims.is_staff,
|
||||
plan: match claims.plan {
|
||||
Plan::Free => "free".to_string(),
|
||||
Plan::ZedPro => "zed_pro".to_string(),
|
||||
},
|
||||
model,
|
||||
provider: provider.to_string(),
|
||||
input_token_count: tokens.input as u64,
|
||||
cache_creation_input_token_count: tokens.input_cache_creation as u64,
|
||||
cache_read_input_token_count: tokens.input_cache_read as u64,
|
||||
output_token_count: tokens.output as u64,
|
||||
requests_this_minute: usage.requests_this_minute as u64,
|
||||
tokens_this_minute: usage.tokens_this_minute as u64,
|
||||
tokens_this_day: usage.tokens_this_day as u64,
|
||||
input_tokens_this_month: usage.tokens_this_month.input as u64,
|
||||
cache_creation_input_tokens_this_month: usage
|
||||
.tokens_this_month
|
||||
.input_cache_creation
|
||||
as u64,
|
||||
cache_read_input_tokens_this_month: usage
|
||||
.tokens_this_month
|
||||
.input_cache_read
|
||||
as u64,
|
||||
output_tokens_this_month: usage.tokens_this_month.output as u64,
|
||||
spending_this_month: usage.spending_this_month.0 as u64,
|
||||
lifetime_spending: usage.lifetime_spending.0 as u64,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.log_err();
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue