Upload crashes to collab directly (#8649)

This lets us run rustc_demangle on the backtrace, which helps the Slack
view significantly.

We're also now uploading files to digital ocean's S3 equivalent (with a
1 month expiry) instead of to Slack.

This PR paves the way for (but does not yet implement) sending this data
to clickhouse too.

Release Notes:

- N/A
This commit is contained in:
Conrad Irwin 2024-03-01 13:23:44 -07:00 committed by GitHub
parent cdf702aeff
commit 64460e492a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 657 additions and 27 deletions

View file

@ -1,21 +1,27 @@
use std::sync::{Arc, OnceLock};
use anyhow::{anyhow, Context};
use aws_sdk_s3::primitives::ByteStream;
use axum::{
body::Bytes, headers::Header, http::HeaderName, routing::post, Extension, Router, TypedHeader,
};
use hyper::StatusCode;
use hyper::{HeaderMap, StatusCode};
use serde::{Serialize, Serializer};
use sha2::{Digest, Sha256};
use telemetry_events::{
ActionEvent, AppEvent, AssistantEvent, CallEvent, CopilotEvent, CpuEvent, EditEvent,
EditorEvent, Event, EventRequestBody, EventWrapper, MemoryEvent, SettingEvent,
};
use util::SemanticVersion;
use crate::{AppState, Error, Result};
use crate::{api::slack, AppState, Error, Result};
use super::ips_file::IpsFile;
pub fn router() -> Router {
Router::new().route("/telemetry/events", post(post_events))
Router::new()
.route("/telemetry/events", post(post_events))
.route("/telemetry/crashes", post(post_crash))
}
pub struct ZedChecksumHeader(Vec<u8>);
@ -73,6 +79,140 @@ impl Header for CloudflareIpCountryHeader {
}
}
pub async fn post_crash(
Extension(app): Extension<Arc<AppState>>,
body: Bytes,
headers: HeaderMap,
) -> Result<()> {
static CRASH_REPORTS_BUCKET: &str = "zed-crash-reports";
let report = IpsFile::parse(&body)?;
let version_threshold = SemanticVersion::new(0, 123, 0);
let bundle_id = &report.header.bundle_id;
let app_version = &report.app_version();
if bundle_id == "dev.zed.Zed-Dev" {
log::error!("Crash uploads from {} are ignored.", bundle_id);
return Ok(());
}
if app_version.is_none() || app_version.unwrap() < version_threshold {
log::error!(
"Crash uploads from {} are ignored.",
report.header.app_version
);
return Ok(());
}
let app_version = app_version.unwrap();
if let Some(blob_store_client) = app.blob_store_client.as_ref() {
let response = blob_store_client
.head_object()
.bucket(CRASH_REPORTS_BUCKET)
.key(report.header.incident_id.clone() + ".ips")
.send()
.await;
if response.is_ok() {
log::info!("We've already uploaded this crash");
return Ok(());
}
blob_store_client
.put_object()
.bucket(CRASH_REPORTS_BUCKET)
.key(report.header.incident_id.clone() + ".ips")
.acl(aws_sdk_s3::types::ObjectCannedAcl::PublicRead)
.body(ByteStream::from(body.to_vec()))
.send()
.await
.map_err(|e| log::error!("Failed to upload crash: {}", e))
.ok();
}
let recent_panic_on: Option<i64> = headers
.get("x-zed-panicked-on")
.and_then(|h| h.to_str().ok())
.and_then(|s| s.parse().ok());
let mut recent_panic = None;
if let Some(recent_panic_on) = recent_panic_on {
let crashed_at = match report.timestamp() {
Ok(t) => Some(t),
Err(e) => {
log::error!("Can't parse {}: {}", report.header.timestamp, e);
None
}
};
if crashed_at.is_some_and(|t| (t.timestamp_millis() - recent_panic_on).abs() <= 30000) {
recent_panic = headers.get("x-zed-panic").and_then(|h| h.to_str().ok());
}
}
let description = report.description(recent_panic);
let summary = report.backtrace_summary();
tracing::error!(
service = "client",
version = %report.header.app_version,
os_version = %report.header.os_version,
bundle_id = %report.header.bundle_id,
incident_id = %report.header.incident_id,
description = %description,
backtrace = %summary,
"crash report");
if let Some(slack_panics_webhook) = app.config.slack_panics_webhook.clone() {
let payload = slack::WebhookBody::new(|w| {
w.add_section(|s| s.text(slack::Text::markdown(description)))
.add_section(|s| {
s.add_field(slack::Text::markdown(format!(
"*Version:*\n{} ({})",
bundle_id, app_version
)))
.add_field({
let hostname = app.config.blob_store_url.clone().unwrap_or_default();
let hostname = hostname.strip_prefix("https://").unwrap_or_else(|| {
hostname.strip_prefix("http://").unwrap_or_default()
});
slack::Text::markdown(format!(
"*Incident:*\n<https://{}.{}/{}.ips|{}…>",
CRASH_REPORTS_BUCKET,
hostname,
report.header.incident_id,
report
.header
.incident_id
.chars()
.take(8)
.collect::<String>(),
))
})
})
.add_rich_text(|r| r.add_preformatted(|p| p.add_text(summary)))
});
let payload_json = serde_json::to_string(&payload).map_err(|err| {
log::error!("Failed to serialize payload to JSON: {err}");
Error::Internal(anyhow!(err))
})?;
reqwest::Client::new()
.post(slack_panics_webhook)
.header("Content-Type", "application/json")
.body(payload_json)
.send()
.await
.map_err(|err| {
log::error!("Failed to send payload to Slack: {err}");
Error::Internal(anyhow!(err))
})?;
}
Ok(())
}
pub async fn post_events(
Extension(app): Extension<Arc<AppState>>,
TypedHeader(ZedChecksumHeader(checksum)): TypedHeader<ZedChecksumHeader>,
@ -98,7 +238,7 @@ pub async fn post_events(
summer.update(&body);
summer.update(checksum_seed);
if &checksum[..] != &summer.finalize()[..] {
if &checksum != &summer.finalize()[..] {
return Err(Error::Http(
StatusCode::BAD_REQUEST,
"invalid checksum".into(),