From b3f0ba14304f7aa8d780dbbdf8bfe7f20dd08bda Mon Sep 17 00:00:00 2001 From: Mikayla Maki Date: Tue, 29 Oct 2024 23:54:00 -0700 Subject: [PATCH] Implement panic reporting saving and uploads (#19932) TODO: - [x] check that the app version is well formatted for zed.dev Release Notes: - N/A --------- Co-authored-by: Trace --- Cargo.lock | 5 + crates/gpui/src/app.rs | 44 +++++- crates/proto/proto/zed.proto | 13 +- crates/proto/src/proto.rs | 9 +- crates/remote_server/Cargo.toml | 8 +- crates/remote_server/src/unix.rs | 98 ++++++++++++- .../telemetry_events/src/telemetry_events.rs | 4 +- crates/zed/Cargo.toml | 1 + crates/zed/src/main.rs | 4 +- crates/zed/src/reliability.rs | 137 ++++++++++++++---- 10 files changed, 278 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9c73baad36..f473a0f74d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9553,6 +9553,7 @@ dependencies = [ "async-watch", "backtrace", "cargo_toml", + "chrono", "clap", "client", "clock", @@ -9572,6 +9573,8 @@ dependencies = [ "node_runtime", "paths", "project", + "proto", + "release_channel", "remote", "reqwest_client", "rpc", @@ -9581,6 +9584,7 @@ dependencies = [ "settings", "shellexpand 2.1.2", "smol", + "telemetry_events", "toml 0.8.19", "util", "worktree", @@ -15092,6 +15096,7 @@ dependencies = [ "project", "project_panel", "project_symbols", + "proto", "quick_action_bar", "recent_projects", "release_channel", diff --git a/crates/gpui/src/app.rs b/crates/gpui/src/app.rs index 096f495a88..ffbc757369 100644 --- a/crates/gpui/src/app.rs +++ b/crates/gpui/src/app.rs @@ -217,6 +217,7 @@ pub(crate) type KeystrokeObserver = type QuitHandler = Box LocalBoxFuture<'static, ()> + 'static>; type ReleaseListener = Box; type NewViewListener = Box; +type NewModelListener = Box; /// Contains the state of the full application, and passed as a reference to a variety of callbacks. /// Other contexts such as [ModelContext], [WindowContext], and [ViewContext] deref to this type, making it the most general context type. @@ -237,6 +238,7 @@ pub struct AppContext { http_client: Arc, pub(crate) globals_by_type: FxHashMap>, pub(crate) entities: EntityMap, + pub(crate) new_model_observers: SubscriberSet, pub(crate) new_view_observers: SubscriberSet, pub(crate) windows: SlotMap>, pub(crate) window_handles: FxHashMap, @@ -296,6 +298,7 @@ impl AppContext { globals_by_type: FxHashMap::default(), entities, new_view_observers: SubscriberSet::new(), + new_model_observers: SubscriberSet::new(), window_handles: FxHashMap::default(), windows: SlotMap::with_key(), keymap: Rc::new(RefCell::new(Keymap::default())), @@ -1016,6 +1019,7 @@ impl AppContext { activate(); subscription } + /// Arrange for the given function to be invoked whenever a view of the specified type is created. /// The function will be passed a mutable reference to the view along with an appropriate context. pub fn observe_new_views( @@ -1035,6 +1039,31 @@ impl AppContext { ) } + pub(crate) fn new_model_observer(&self, key: TypeId, value: NewModelListener) -> Subscription { + let (subscription, activate) = self.new_model_observers.insert(key, value); + activate(); + subscription + } + + /// Arrange for the given function to be invoked whenever a view of the specified type is created. + /// The function will be passed a mutable reference to the view along with an appropriate context. + pub fn observe_new_models( + &self, + on_new: impl 'static + Fn(&mut T, &mut ModelContext), + ) -> Subscription { + self.new_model_observer( + TypeId::of::(), + Box::new(move |any_model: AnyModel, cx: &mut AppContext| { + any_model + .downcast::() + .unwrap() + .update(cx, |model_state, cx| { + on_new(model_state, cx); + }) + }), + ) + } + /// Observe the release of a model or view. The callback is invoked after the model or view /// has no more strong references but before it has been dropped. pub fn observe_release( @@ -1346,8 +1375,21 @@ impl Context for AppContext { ) -> Model { self.update(|cx| { let slot = cx.entities.reserve(); + let model = slot.clone(); let entity = build_model(&mut ModelContext::new(cx, slot.downgrade())); - cx.entities.insert(slot, entity) + cx.entities.insert(slot, entity); + + // Non-generic part to avoid leaking SubscriberSet to invokers of `new_view`. + fn notify_observers(cx: &mut AppContext, tid: TypeId, model: AnyModel) { + cx.new_model_observers.clone().retain(&tid, |observer| { + let any_model = model.clone(); + (observer)(any_model, cx); + true + }); + } + notify_observers(cx, TypeId::of::(), AnyModel::from(model.clone())); + + model }) } diff --git a/crates/proto/proto/zed.proto b/crates/proto/proto/zed.proto index 439531ccb3..5dce1fbda6 100644 --- a/crates/proto/proto/zed.proto +++ b/crates/proto/proto/zed.proto @@ -289,7 +289,10 @@ message Envelope { ActiveToolchainResponse active_toolchain_response = 277; GetPathMetadata get_path_metadata = 278; - GetPathMetadataResponse get_path_metadata_response = 279; // current max + GetPathMetadataResponse get_path_metadata_response = 279; + + GetPanicFiles get_panic_files = 280; + GetPanicFilesResponse get_panic_files_response = 281; // current max } reserved 87 to 88; @@ -2489,5 +2492,11 @@ message UpdateGitBranch { uint64 project_id = 1; string branch_name = 2; ProjectPath repository = 3; - +} + +message GetPanicFiles { +} + +message GetPanicFilesResponse { + repeated string file_contents = 2; } diff --git a/crates/proto/src/proto.rs b/crates/proto/src/proto.rs index 4bae2d9931..8ff10a6056 100644 --- a/crates/proto/src/proto.rs +++ b/crates/proto/src/proto.rs @@ -363,7 +363,9 @@ messages!( (ActiveToolchain, Foreground), (ActiveToolchainResponse, Foreground), (GetPathMetadata, Background), - (GetPathMetadataResponse, Background) + (GetPathMetadataResponse, Background), + (GetPanicFiles, Background), + (GetPanicFilesResponse, Background), ); request_messages!( @@ -483,7 +485,8 @@ request_messages!( (ListToolchains, ListToolchainsResponse), (ActivateToolchain, Ack), (ActiveToolchain, ActiveToolchainResponse), - (GetPathMetadata, GetPathMetadataResponse) + (GetPathMetadata, GetPathMetadataResponse), + (GetPanicFiles, GetPanicFilesResponse) ); entity_messages!( @@ -566,7 +569,7 @@ entity_messages!( ListToolchains, ActivateToolchain, ActiveToolchain, - GetPathMetadata + GetPathMetadata, ); entity_messages!( diff --git a/crates/remote_server/Cargo.toml b/crates/remote_server/Cargo.toml index d2623d5f47..92ddbee094 100644 --- a/crates/remote_server/Cargo.toml +++ b/crates/remote_server/Cargo.toml @@ -22,9 +22,10 @@ debug-embed = ["dep:rust-embed"] test-support = ["fs/test-support"] [dependencies] -async-watch.workspace = true anyhow.workspace = true +async-watch.workspace = true backtrace = "0.3" +chrono.workspace = true clap.workspace = true client.workspace = true env_logger.workspace = true @@ -39,8 +40,10 @@ languages.workspace = true log.workspace = true lsp.workspace = true node_runtime.workspace = true -project.workspace = true paths = { workspace = true } +project.workspace = true +proto.workspace = true +release_channel.workspace = true remote.workspace = true reqwest_client.workspace = true rpc.workspace = true @@ -50,6 +53,7 @@ serde_json.workspace = true settings.workspace = true shellexpand.workspace = true smol.workspace = true +telemetry_events.workspace = true util.workspace = true worktree.workspace = true diff --git a/crates/remote_server/src/unix.rs b/crates/remote_server/src/unix.rs index f6f98a41c1..a4add3354e 100644 --- a/crates/remote_server/src/unix.rs +++ b/crates/remote_server/src/unix.rs @@ -1,12 +1,13 @@ use crate::headless_project::HeadlessAppState; use crate::HeadlessProject; use anyhow::{anyhow, Context, Result}; -use client::ProxySettings; +use chrono::Utc; +use client::{telemetry, ProxySettings}; use fs::{Fs, RealFs}; use futures::channel::mpsc; use futures::{select, select_biased, AsyncRead, AsyncWrite, AsyncWriteExt, FutureExt, SinkExt}; use git::GitHostingProviderRegistry; -use gpui::{AppContext, Context as _, ModelContext, UpdateGlobal as _}; +use gpui::{AppContext, Context as _, Model, ModelContext, UpdateGlobal as _}; use http_client::{read_proxy_from_env, Uri}; use language::LanguageRegistry; use node_runtime::{NodeBinaryOptions, NodeRuntime}; @@ -21,19 +22,23 @@ use remote::{ }; use reqwest_client::ReqwestClient; use rpc::proto::{self, Envelope, SSH_PROJECT_ID}; +use rpc::{AnyProtoClient, TypedEnvelope}; use settings::{watch_config_file, Settings, SettingsStore}; use smol::channel::{Receiver, Sender}; use smol::io::AsyncReadExt; use smol::Async; use smol::{net::unix::UnixListener, stream::StreamExt as _}; +use std::ffi::OsStr; use std::ops::ControlFlow; +use std::{env, thread}; use std::{ io::Write, mem, path::{Path, PathBuf}, sync::Arc, }; +use telemetry_events::LocationData; use util::ResultExt; fn init_logging_proxy() { @@ -131,16 +136,97 @@ fn init_panic_hook() { backtrace.drain(0..=ix); } + let thread = thread::current(); + let thread_name = thread.name().unwrap_or(""); + log::error!( "panic occurred: {}\nBacktrace:\n{}", - payload, - backtrace.join("\n") + &payload, + (&backtrace).join("\n") ); + let panic_data = telemetry_events::Panic { + thread: thread_name.into(), + payload: payload.clone(), + location_data: info.location().map(|location| LocationData { + file: location.file().into(), + line: location.line(), + }), + app_version: format!( + "remote-server-{}", + option_env!("ZED_COMMIT_SHA").unwrap_or(&env!("ZED_PKG_VERSION")) + ), + release_channel: release_channel::RELEASE_CHANNEL.display_name().into(), + os_name: telemetry::os_name(), + os_version: Some(telemetry::os_version()), + architecture: env::consts::ARCH.into(), + panicked_on: Utc::now().timestamp_millis(), + backtrace, + system_id: None, // Set on SSH client + installation_id: None, // Set on SSH client + session_id: "".to_string(), // Set on SSH client + }; + + if let Some(panic_data_json) = serde_json::to_string(&panic_data).log_err() { + let timestamp = chrono::Utc::now().format("%Y_%m_%d %H_%M_%S").to_string(); + let panic_file_path = paths::logs_dir().join(format!("zed-{timestamp}.panic")); + let panic_file = std::fs::OpenOptions::new() + .append(true) + .create(true) + .open(&panic_file_path) + .log_err(); + if let Some(mut panic_file) = panic_file { + writeln!(&mut panic_file, "{panic_data_json}").log_err(); + panic_file.flush().log_err(); + } + } + std::process::abort(); })); } +fn handle_panic_requests(project: &Model, client: &Arc) { + let client: AnyProtoClient = client.clone().into(); + client.add_request_handler( + project.downgrade(), + |_, _: TypedEnvelope, _cx| async move { + let mut children = smol::fs::read_dir(paths::logs_dir()).await?; + let mut panic_files = Vec::new(); + while let Some(child) = children.next().await { + let child = child?; + let child_path = child.path(); + + if child_path.extension() != Some(OsStr::new("panic")) { + continue; + } + let filename = if let Some(filename) = child_path.file_name() { + filename.to_string_lossy() + } else { + continue; + }; + + if !filename.starts_with("zed") { + continue; + } + + let file_contents = smol::fs::read_to_string(&child_path) + .await + .context("error reading panic file")?; + + panic_files.push(file_contents); + + // We've done what we can, delete the file + std::fs::remove_file(child_path) + .context("error removing panic") + .log_err(); + } + anyhow::Ok(proto::GetPanicFilesResponse { + file_contents: panic_files, + }) + }, + ); +} + struct ServerListeners { stdin: UnixListener, stdout: UnixListener, @@ -368,7 +454,7 @@ pub fn execute_run( HeadlessProject::new( HeadlessAppState { - session, + session: session.clone(), fs, http_client, node_runtime, @@ -378,6 +464,8 @@ pub fn execute_run( ) }); + handle_panic_requests(&project, &session); + mem::forget(project); }); log::info!("gpui app is shut down. quitting."); diff --git a/crates/telemetry_events/src/telemetry_events.rs b/crates/telemetry_events/src/telemetry_events.rs index 47e66a46a7..26db3cf8d8 100644 --- a/crates/telemetry_events/src/telemetry_events.rs +++ b/crates/telemetry_events/src/telemetry_events.rs @@ -222,13 +222,13 @@ pub struct HangReport { pub installation_id: Option, } -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub struct LocationData { pub file: String, pub line: u32, } -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub struct Panic { /// The name of the thread that panicked pub thread: String, diff --git a/crates/zed/Cargo.toml b/crates/zed/Cargo.toml index e2a3f2be36..272d423f24 100644 --- a/crates/zed/Cargo.toml +++ b/crates/zed/Cargo.toml @@ -77,6 +77,7 @@ profiling.workspace = true project.workspace = true project_panel.workspace = true project_symbols.workspace = true +proto.workspace = true quick_action_bar.workspace = true recent_projects.workspace = true release_channel.workspace = true diff --git a/crates/zed/src/main.rs b/crates/zed/src/main.rs index 998289f920..f366323ff5 100644 --- a/crates/zed/src/main.rs +++ b/crates/zed/src/main.rs @@ -329,7 +329,7 @@ fn main() { telemetry.start( system_id.as_ref().map(|id| id.to_string()), installation_id.as_ref().map(|id| id.to_string()), - session_id, + session_id.clone(), cx, ); @@ -365,7 +365,9 @@ fn main() { auto_update::init(client.http_client(), cx); reliability::init( client.http_client(), + system_id.as_ref().map(|id| id.to_string()), installation_id.clone().map(|id| id.to_string()), + session_id.clone(), cx, ); diff --git a/crates/zed/src/reliability.rs b/crates/zed/src/reliability.rs index 9d76a6c47f..b02afb8c0d 100644 --- a/crates/zed/src/reliability.rs +++ b/crates/zed/src/reliability.rs @@ -1,13 +1,14 @@ use anyhow::{Context, Result}; use backtrace::{self, Backtrace}; use chrono::Utc; -use client::telemetry; +use client::{telemetry, TelemetrySettings}; use db::kvp::KEY_VALUE_STORE; use gpui::{AppContext, SemanticVersion}; use http_client::{HttpRequestExt, Method}; use http_client::{self, HttpClient, HttpClientWithUrl}; use paths::{crashes_dir, crashes_retired_dir}; +use project::Project; use release_channel::ReleaseChannel; use release_channel::RELEASE_CHANNEL; use settings::Settings; @@ -21,6 +22,7 @@ use std::{io::Write, panic, sync::atomic::AtomicU32, thread}; use telemetry_events::LocationData; use telemetry_events::Panic; use telemetry_events::PanicRequest; +use url::Url; use util::ResultExt; use crate::stdout_is_a_pty; @@ -133,13 +135,73 @@ pub fn init_panic_hook( pub fn init( http_client: Arc, + system_id: Option, installation_id: Option, + session_id: String, cx: &mut AppContext, ) { #[cfg(target_os = "macos")] monitor_main_thread_hangs(http_client.clone(), installation_id.clone(), cx); - upload_panics_and_crashes(http_client, installation_id, cx) + let Some(panic_report_url) = http_client + .build_zed_api_url("/telemetry/panics", &[]) + .log_err() + else { + return; + }; + + upload_panics_and_crashes( + http_client.clone(), + panic_report_url.clone(), + installation_id.clone(), + cx, + ); + + cx.observe_new_models(move |project: &mut Project, cx| { + let http_client = http_client.clone(); + let panic_report_url = panic_report_url.clone(); + let session_id = session_id.clone(); + let installation_id = installation_id.clone(); + let system_id = system_id.clone(); + + if let Some(ssh_client) = project.ssh_client() { + ssh_client.update(cx, |client, cx| { + if TelemetrySettings::get_global(cx).diagnostics { + let request = client.proto_client().request(proto::GetPanicFiles {}); + cx.background_executor() + .spawn(async move { + let panic_files = request.await?; + for file in panic_files.file_contents { + let panic: Option = serde_json::from_str(&file) + .log_err() + .or_else(|| { + file.lines() + .next() + .and_then(|line| serde_json::from_str(line).ok()) + }) + .unwrap_or_else(|| { + log::error!("failed to deserialize panic file {:?}", file); + None + }); + + if let Some(mut panic) = panic { + panic.session_id = session_id.clone(); + panic.system_id = system_id.clone(); + panic.installation_id = installation_id.clone(); + + upload_panic(&http_client, &panic_report_url, panic, &mut None) + .await?; + } + } + + anyhow::Ok(()) + }) + .detach_and_log_err(cx); + } + }) + } + }) + .detach(); } #[cfg(target_os = "macos")] @@ -346,16 +408,18 @@ pub fn monitor_main_thread_hangs( fn upload_panics_and_crashes( http: Arc, + panic_report_url: Url, installation_id: Option, cx: &AppContext, ) { let telemetry_settings = *client::TelemetrySettings::get_global(cx); cx.background_executor() .spawn(async move { - let most_recent_panic = upload_previous_panics(http.clone(), telemetry_settings) - .await - .log_err() - .flatten(); + let most_recent_panic = + upload_previous_panics(http.clone(), &panic_report_url, telemetry_settings) + .await + .log_err() + .flatten(); upload_previous_crashes(http, most_recent_panic, installation_id, telemetry_settings) .await .log_err() @@ -366,9 +430,9 @@ fn upload_panics_and_crashes( /// Uploads panics via `zed.dev`. async fn upload_previous_panics( http: Arc, + panic_report_url: &Url, telemetry_settings: client::TelemetrySettings, -) -> Result> { - let panic_report_url = http.build_zed_api_url("/telemetry/panics", &[])?; +) -> anyhow::Result> { let mut children = smol::fs::read_dir(paths::logs_dir()).await?; let mut most_recent_panic = None; @@ -396,7 +460,7 @@ async fn upload_previous_panics( .context("error reading panic file")?; let panic: Option = serde_json::from_str(&panic_file_content) - .ok() + .log_err() .or_else(|| { panic_file_content .lines() @@ -409,26 +473,8 @@ async fn upload_previous_panics( }); if let Some(panic) = panic { - most_recent_panic = Some((panic.panicked_on, panic.payload.clone())); - - let json_bytes = serde_json::to_vec(&PanicRequest { panic }).unwrap(); - - let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else { + if !upload_panic(&http, &panic_report_url, panic, &mut most_recent_panic).await? { continue; - }; - - let Ok(request) = http_client::Request::builder() - .method(Method::POST) - .uri(panic_report_url.as_ref()) - .header("x-zed-checksum", checksum) - .body(json_bytes.into()) - else { - continue; - }; - - let response = http.send(request).await.context("error sending panic")?; - if !response.status().is_success() { - log::error!("Error uploading panic to server: {}", response.status()); } } } @@ -438,9 +484,42 @@ async fn upload_previous_panics( .context("error removing panic") .log_err(); } - Ok::<_, anyhow::Error>(most_recent_panic) + Ok(most_recent_panic) } +async fn upload_panic( + http: &Arc, + panic_report_url: &Url, + panic: telemetry_events::Panic, + most_recent_panic: &mut Option<(i64, String)>, +) -> Result { + *most_recent_panic = Some((panic.panicked_on, panic.payload.clone())); + + let json_bytes = serde_json::to_vec(&PanicRequest { + panic: panic.clone(), + }) + .unwrap(); + + let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else { + return Ok(false); + }; + + let Ok(request) = http_client::Request::builder() + .method(Method::POST) + .uri(panic_report_url.as_ref()) + .header("x-zed-checksum", checksum) + .body(json_bytes.into()) + else { + return Ok(false); + }; + + let response = http.send(request).await.context("error sending panic")?; + if !response.status().is_success() { + log::error!("Error uploading panic to server: {}", response.status()); + } + + Ok(true) +} const LAST_CRASH_UPLOADED: &str = "LAST_CRASH_UPLOADED"; /// upload crashes from apple's diagnostic reports to our server.