Start to send data to Snowflake too (#20698)

This PR adds support for sending telemetry events to AWS Kinesis.

In our AWS account we now have three new things:
* The [Kinesis data
stream](https://us-east-1.console.aws.amazon.com/kinesis/home?region=us-east-1#/streams/details/zed-telemetry/monitoring)
that we will actually write to.
* A [Firehose for
Axiom](https://us-east-1.console.aws.amazon.com/firehose/home?region=us-east-1#/details/telemetry-to-axiom/monitoring)
that sends events from that stream to Axiom for ad-hoc queries over
recent data.
* A [Firehose for
Snowflake](https://us-east-1.console.aws.amazon.com/firehose/home?region=us-east-1#/details/telemetry-to-snowflake/monitoring)
that sends events from that stream to Snowflake for long-term retention.
This Firehose also backs up data into an S3 bucket in case we want to
change how the system works in the future.

In a follow-up PR, we'll add support for ad-hoc telemetry events; and
slowly move away from the current Clickhouse defined schemas; though we
won't move off click house until we have what we need in Snowflake.

Co-Authored-By: Nathan <nathan@zed.dev>

Release Notes:

- N/A
This commit is contained in:
Conrad Irwin 2024-11-15 12:58:00 -07:00 committed by GitHub
parent f449e8d3d3
commit 6ff69faf37
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 305 additions and 28 deletions

View file

@ -170,6 +170,10 @@ pub struct Config {
pub blob_store_access_key: Option<String>,
pub blob_store_secret_key: Option<String>,
pub blob_store_bucket: Option<String>,
pub kinesis_region: Option<String>,
pub kinesis_stream: Option<String>,
pub kinesis_access_key: Option<String>,
pub kinesis_secret_key: Option<String>,
pub zed_environment: Arc<str>,
pub openai_api_key: Option<Arc<str>>,
pub google_ai_api_key: Option<Arc<str>>,
@ -238,6 +242,10 @@ impl Config {
stripe_api_key: None,
supermaven_admin_api_key: None,
user_backfiller_github_access_token: None,
kinesis_region: None,
kinesis_access_key: None,
kinesis_secret_key: None,
kinesis_stream: None,
}
}
}
@ -276,6 +284,7 @@ pub struct AppState {
pub rate_limiter: Arc<RateLimiter>,
pub executor: Executor,
pub clickhouse_client: Option<::clickhouse::Client>,
pub kinesis_client: Option<::aws_sdk_kinesis::Client>,
pub config: Config,
}
@ -332,6 +341,11 @@ impl AppState {
.clickhouse_url
.as_ref()
.and_then(|_| build_clickhouse_client(&config).log_err()),
kinesis_client: if config.kinesis_access_key.is_some() {
build_kinesis_client(&config).await.log_err()
} else {
None
},
config,
};
Ok(Arc::new(this))
@ -381,6 +395,35 @@ async fn build_blob_store_client(config: &Config) -> anyhow::Result<aws_sdk_s3::
Ok(aws_sdk_s3::Client::new(&s3_config))
}
async fn build_kinesis_client(config: &Config) -> anyhow::Result<aws_sdk_kinesis::Client> {
let keys = aws_sdk_s3::config::Credentials::new(
config
.kinesis_access_key
.clone()
.ok_or_else(|| anyhow!("missing kinesis_access_key"))?,
config
.kinesis_secret_key
.clone()
.ok_or_else(|| anyhow!("missing kinesis_secret_key"))?,
None,
None,
"env",
);
let kinesis_config = aws_config::defaults(BehaviorVersion::latest())
.region(Region::new(
config
.kinesis_region
.clone()
.ok_or_else(|| anyhow!("missing blob_store_region"))?,
))
.credentials_provider(keys)
.load()
.await;
Ok(aws_sdk_kinesis::Client::new(&kinesis_config))
}
fn build_clickhouse_client(config: &Config) -> anyhow::Result<::clickhouse::Client> {
Ok(::clickhouse::Client::default()
.with_url(