Stop sending data to Clickhouse (#21763)

Release Notes:

- N/A
This commit is contained in:
Conrad Irwin 2024-12-10 08:47:29 -07:00 committed by GitHub
parent 43ba0c9fa6
commit 03efd0d1d9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 18 additions and 1280 deletions

View file

@ -19,11 +19,6 @@ LLM_DATABASE_URL = "postgres://postgres@localhost/zed_llm"
LLM_DATABASE_MAX_CONNECTIONS = 5
LLM_API_SECRET = "llm-secret"
# CLICKHOUSE_URL = ""
# CLICKHOUSE_USER = "default"
# CLICKHOUSE_PASSWORD = ""
# CLICKHOUSE_DATABASE = "default"
# SLACK_PANICS_WEBHOOK = ""
# RUST_LOG=info

View file

@ -29,7 +29,6 @@ axum = { version = "0.6", features = ["json", "headers", "ws"] }
axum-extra = { version = "0.4", features = ["erased-json"] }
base64.workspace = true
chrono.workspace = true
clickhouse.workspace = true
clock.workspace = true
collections.workspace = true
dashmap.workspace = true

View file

@ -214,26 +214,6 @@ spec:
secretKeyRef:
name: blob-store
key: bucket
- name: CLICKHOUSE_URL
valueFrom:
secretKeyRef:
name: clickhouse
key: url
- name: CLICKHOUSE_USER
valueFrom:
secretKeyRef:
name: clickhouse
key: user
- name: CLICKHOUSE_PASSWORD
valueFrom:
secretKeyRef:
name: clickhouse
key: password
- name: CLICKHOUSE_DATABASE
valueFrom:
secretKeyRef:
name: clickhouse
key: database
- name: SLACK_PANICS_WEBHOOK
valueFrom:
secretKeyRef:

File diff suppressed because it is too large Load diff

View file

@ -1,28 +0,0 @@
use serde::Serialize;
/// Writes the given rows to the specified Clickhouse table.
pub async fn write_to_table<T: clickhouse::Row + Serialize + std::fmt::Debug>(
table: &str,
rows: &[T],
clickhouse_client: &clickhouse::Client,
) -> anyhow::Result<()> {
if rows.is_empty() {
return Ok(());
}
let mut insert = clickhouse_client.insert(table)?;
for event in rows {
insert.write(event).await?;
}
insert.end().await?;
let event_count = rows.len();
log::info!(
"wrote {event_count} {event_specifier} to '{table}'",
event_specifier = if event_count == 1 { "event" } else { "events" }
);
Ok(())
}

View file

@ -1,7 +1,6 @@
pub mod api;
pub mod auth;
mod cents;
pub mod clickhouse;
pub mod db;
pub mod env;
pub mod executor;
@ -151,10 +150,6 @@ pub struct Config {
pub seed_path: Option<PathBuf>,
pub database_max_connections: u32,
pub api_token: String,
pub clickhouse_url: Option<String>,
pub clickhouse_user: Option<String>,
pub clickhouse_password: Option<String>,
pub clickhouse_database: Option<String>,
pub invite_link_prefix: String,
pub livekit_server: Option<String>,
pub livekit_key: Option<String>,
@ -236,10 +231,6 @@ impl Config {
prediction_api_url: None,
prediction_api_key: None,
prediction_model: None,
clickhouse_url: None,
clickhouse_user: None,
clickhouse_password: None,
clickhouse_database: None,
zed_client_checksum_seed: None,
slack_panics_webhook: None,
auto_join_channel_id: None,
@ -289,7 +280,6 @@ pub struct AppState {
pub stripe_billing: Option<Arc<StripeBilling>>,
pub rate_limiter: Arc<RateLimiter>,
pub executor: Executor,
pub clickhouse_client: Option<::clickhouse::Client>,
pub kinesis_client: Option<::aws_sdk_kinesis::Client>,
pub config: Config,
}
@ -343,10 +333,6 @@ impl AppState {
stripe_client,
rate_limiter: Arc::new(RateLimiter::new(db)),
executor,
clickhouse_client: config
.clickhouse_url
.as_ref()
.and_then(|_| build_clickhouse_client(&config).log_err()),
kinesis_client: if config.kinesis_access_key.is_some() {
build_kinesis_client(&config).await.log_err()
} else {
@ -429,31 +415,3 @@ async fn build_kinesis_client(config: &Config) -> anyhow::Result<aws_sdk_kinesis
Ok(aws_sdk_kinesis::Client::new(&kinesis_config))
}
fn build_clickhouse_client(config: &Config) -> anyhow::Result<::clickhouse::Client> {
Ok(::clickhouse::Client::default()
.with_url(
config
.clickhouse_url
.as_ref()
.ok_or_else(|| anyhow!("missing clickhouse_url"))?,
)
.with_user(
config
.clickhouse_user
.as_ref()
.ok_or_else(|| anyhow!("missing clickhouse_user"))?,
)
.with_password(
config
.clickhouse_password
.as_ref()
.ok_or_else(|| anyhow!("missing clickhouse_password"))?,
)
.with_database(
config
.clickhouse_database
.as_ref()
.ok_or_else(|| anyhow!("missing clickhouse_database"))?,
))
}

View file

@ -1,14 +1,11 @@
mod authorization;
pub mod db;
mod telemetry;
mod token;
use crate::api::events::SnowflakeRow;
use crate::api::CloudflareIpCountryHeader;
use crate::build_kinesis_client;
use crate::{
build_clickhouse_client, db::UserId, executor::Executor, Cents, Config, Error, Result,
};
use crate::{db::UserId, executor::Executor, Cents, Config, Error, Result};
use anyhow::{anyhow, Context as _};
use authorization::authorize_access_to_language_model;
use axum::routing::get;
@ -40,7 +37,6 @@ use std::{
task::{Context, Poll},
};
use strum::IntoEnumIterator;
use telemetry::{report_llm_rate_limit, report_llm_usage, LlmRateLimitEventRow, LlmUsageEventRow};
use tokio::sync::RwLock;
use util::ResultExt;
@ -52,7 +48,6 @@ pub struct LlmState {
pub db: Arc<LlmDatabase>,
pub http_client: ReqwestClient,
pub kinesis_client: Option<aws_sdk_kinesis::Client>,
pub clickhouse_client: Option<clickhouse::Client>,
active_user_count_by_model:
RwLock<HashMap<(LanguageModelProvider, String), (DateTime<Utc>, ActiveUserCount)>>,
}
@ -89,10 +84,6 @@ impl LlmState {
} else {
None
},
clickhouse_client: config
.clickhouse_url
.as_ref()
.and_then(|_| build_clickhouse_client(&config).log_err()),
active_user_count_by_model: RwLock::new(HashMap::default()),
config,
};
@ -630,34 +621,6 @@ async fn check_usage_limit(
.await
.log_err();
if let Some(client) = state.clickhouse_client.as_ref() {
report_llm_rate_limit(
client,
LlmRateLimitEventRow {
time: Utc::now().timestamp_millis(),
user_id: claims.user_id as i32,
is_staff: claims.is_staff,
plan: match claims.plan {
Plan::Free => "free".to_string(),
Plan::ZedPro => "zed_pro".to_string(),
},
model: model.name.clone(),
provider: provider.to_string(),
usage_measure: resource.to_string(),
requests_this_minute: usage.requests_this_minute as u64,
tokens_this_minute: usage.tokens_this_minute as u64,
tokens_this_day: usage.tokens_this_day as u64,
users_in_recent_minutes: users_in_recent_minutes as u64,
users_in_recent_days: users_in_recent_days as u64,
max_requests_per_minute: per_user_max_requests_per_minute as u64,
max_tokens_per_minute: per_user_max_tokens_per_minute as u64,
max_tokens_per_day: per_user_max_tokens_per_day as u64,
},
)
.await
.log_err();
}
return Err(Error::http(
StatusCode::TOO_MANY_REQUESTS,
format!("Rate limit exceeded. Maximum {} reached.", resource),
@ -765,44 +728,6 @@ impl<S> Drop for TokenCountingStream<S> {
.write(&state.kinesis_client, &state.config.kinesis_stream)
.await
.log_err();
if let Some(clickhouse_client) = state.clickhouse_client.as_ref() {
report_llm_usage(
clickhouse_client,
LlmUsageEventRow {
time: Utc::now().timestamp_millis(),
user_id: claims.user_id as i32,
is_staff: claims.is_staff,
plan: match claims.plan {
Plan::Free => "free".to_string(),
Plan::ZedPro => "zed_pro".to_string(),
},
model,
provider: provider.to_string(),
input_token_count: tokens.input as u64,
cache_creation_input_token_count: tokens.input_cache_creation as u64,
cache_read_input_token_count: tokens.input_cache_read as u64,
output_token_count: tokens.output as u64,
requests_this_minute: usage.requests_this_minute as u64,
tokens_this_minute: usage.tokens_this_minute as u64,
tokens_this_day: usage.tokens_this_day as u64,
input_tokens_this_month: usage.tokens_this_month.input as u64,
cache_creation_input_tokens_this_month: usage
.tokens_this_month
.input_cache_creation
as u64,
cache_read_input_tokens_this_month: usage
.tokens_this_month
.input_cache_read
as u64,
output_tokens_this_month: usage.tokens_this_month.output as u64,
spending_this_month: usage.spending_this_month.0 as u64,
lifetime_spending: usage.lifetime_spending.0 as u64,
},
)
.await
.log_err();
}
}
})
}

View file

@ -1,65 +0,0 @@
use anyhow::{Context, Result};
use serde::Serialize;
use crate::clickhouse::write_to_table;
#[derive(Serialize, Debug, clickhouse::Row)]
pub struct LlmUsageEventRow {
pub time: i64,
pub user_id: i32,
pub is_staff: bool,
pub plan: String,
pub model: String,
pub provider: String,
pub input_token_count: u64,
pub cache_creation_input_token_count: u64,
pub cache_read_input_token_count: u64,
pub output_token_count: u64,
pub requests_this_minute: u64,
pub tokens_this_minute: u64,
pub tokens_this_day: u64,
pub input_tokens_this_month: u64,
pub cache_creation_input_tokens_this_month: u64,
pub cache_read_input_tokens_this_month: u64,
pub output_tokens_this_month: u64,
pub spending_this_month: u64,
pub lifetime_spending: u64,
}
#[derive(Serialize, Debug, clickhouse::Row)]
pub struct LlmRateLimitEventRow {
pub time: i64,
pub user_id: i32,
pub is_staff: bool,
pub plan: String,
pub model: String,
pub provider: String,
pub usage_measure: String,
pub requests_this_minute: u64,
pub tokens_this_minute: u64,
pub tokens_this_day: u64,
pub users_in_recent_minutes: u64,
pub users_in_recent_days: u64,
pub max_requests_per_minute: u64,
pub max_tokens_per_minute: u64,
pub max_tokens_per_day: u64,
}
pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {
const LLM_USAGE_EVENTS_TABLE: &str = "llm_usage_events";
write_to_table(LLM_USAGE_EVENTS_TABLE, &[row], client)
.await
.with_context(|| format!("failed to upload to table '{LLM_USAGE_EVENTS_TABLE}'"))?;
Ok(())
}
pub async fn report_llm_rate_limit(
client: &clickhouse::Client,
row: LlmRateLimitEventRow,
) -> Result<()> {
const LLM_RATE_LIMIT_EVENTS_TABLE: &str = "llm_rate_limit_events";
write_to_table(LLM_RATE_LIMIT_EVENTS_TABLE, &[row], client)
.await
.with_context(|| format!("failed to upload to table '{LLM_RATE_LIMIT_EVENTS_TABLE}'"))?;
Ok(())
}

View file

@ -518,7 +518,6 @@ impl TestServer {
stripe_billing: None,
rate_limiter: Arc::new(RateLimiter::new(test_db.db().clone())),
executor,
clickhouse_client: None,
kinesis_client: None,
config: Config {
http_port: 0,
@ -549,10 +548,6 @@ impl TestServer {
prediction_api_url: None,
prediction_api_key: None,
prediction_model: None,
clickhouse_url: None,
clickhouse_user: None,
clickhouse_password: None,
clickhouse_database: None,
zed_client_checksum_seed: None,
slack_panics_webhook: None,
auto_join_channel_id: None,