Add minidump crash reporting (#35263)
- [x] Handle uploading minidumps from the remote_server - [x] Associate minidumps with panics with some sort of ID (we don't use session_id on the remote) - [x] Update the protobufs and client/server code to request panics - [x] Upload minidumps with no corresponding panic - [x] Fill in panic info when there _is_ a corresponding panic - [x] Use an env var for the sentry endpoint instead of hardcoding it Release Notes: - Zed now generates minidumps for crash reporting --------- Co-authored-by: Max Brunsfeld <maxbrunsfeld@gmail.com>
This commit is contained in:
parent
07e3d53d58
commit
669c57b45f
25 changed files with 709 additions and 135 deletions
20
crates/crashes/Cargo.toml
Normal file
20
crates/crashes/Cargo.toml
Normal file
|
@ -0,0 +1,20 @@
|
|||
[package]
|
||||
name = "crashes"
|
||||
version = "0.1.0"
|
||||
publish.workspace = true
|
||||
edition.workspace = true
|
||||
license = "GPL-3.0-or-later"
|
||||
|
||||
[dependencies]
|
||||
crash-handler.workspace = true
|
||||
log.workspace = true
|
||||
minidumper.workspace = true
|
||||
paths.workspace = true
|
||||
smol.workspace = true
|
||||
workspace-hack.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[lib]
|
||||
path = "src/crashes.rs"
|
1
crates/crashes/LICENSE-GPL
Symbolic link
1
crates/crashes/LICENSE-GPL
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../LICENSE-GPL
|
172
crates/crashes/src/crashes.rs
Normal file
172
crates/crashes/src/crashes.rs
Normal file
|
@ -0,0 +1,172 @@
|
|||
use crash_handler::CrashHandler;
|
||||
use log::info;
|
||||
use minidumper::{Client, LoopAction, MinidumpBinary};
|
||||
|
||||
use std::{
|
||||
env,
|
||||
fs::File,
|
||||
io,
|
||||
path::{Path, PathBuf},
|
||||
process::{self, Command},
|
||||
sync::{
|
||||
OnceLock,
|
||||
atomic::{AtomicBool, Ordering},
|
||||
},
|
||||
thread,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
// set once the crash handler has initialized and the client has connected to it
|
||||
pub static CRASH_HANDLER: AtomicBool = AtomicBool::new(false);
|
||||
// set when the first minidump request is made to avoid generating duplicate crash reports
|
||||
pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
|
||||
const CRASH_HANDLER_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
pub async fn init(id: String) {
|
||||
let exe = env::current_exe().expect("unable to find ourselves");
|
||||
let zed_pid = process::id();
|
||||
// TODO: we should be able to get away with using 1 crash-handler process per machine,
|
||||
// but for now we append the PID of the current process which makes it unique per remote
|
||||
// server or interactive zed instance. This solves an issue where occasionally the socket
|
||||
// used by the crash handler isn't destroyed correctly which causes it to stay on the file
|
||||
// system and block further attempts to initialize crash handlers with that socket path.
|
||||
let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
|
||||
#[allow(unused)]
|
||||
let server_pid = Command::new(exe)
|
||||
.arg("--crash-handler")
|
||||
.arg(&socket_name)
|
||||
.spawn()
|
||||
.expect("unable to spawn server process")
|
||||
.id();
|
||||
info!("spawning crash handler process");
|
||||
|
||||
let mut elapsed = Duration::ZERO;
|
||||
let retry_frequency = Duration::from_millis(100);
|
||||
let mut maybe_client = None;
|
||||
while maybe_client.is_none() {
|
||||
if let Ok(client) = Client::with_name(socket_name.as_path()) {
|
||||
maybe_client = Some(client);
|
||||
info!("connected to crash handler process after {elapsed:?}");
|
||||
break;
|
||||
}
|
||||
elapsed += retry_frequency;
|
||||
smol::Timer::after(retry_frequency).await;
|
||||
}
|
||||
let client = maybe_client.unwrap();
|
||||
client.send_message(1, id).unwrap(); // set session id on the server
|
||||
|
||||
let client = std::sync::Arc::new(client);
|
||||
let handler = crash_handler::CrashHandler::attach(unsafe {
|
||||
let client = client.clone();
|
||||
crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
|
||||
// only request a minidump once
|
||||
let res = if REQUESTED_MINIDUMP
|
||||
.compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
|
||||
.is_ok()
|
||||
{
|
||||
client.send_message(2, "mistakes were made").unwrap();
|
||||
client.ping().unwrap();
|
||||
client.request_dump(crash_context).is_ok()
|
||||
} else {
|
||||
true
|
||||
};
|
||||
crash_handler::CrashEventResult::Handled(res)
|
||||
})
|
||||
})
|
||||
.expect("failed to attach signal handler");
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
handler.set_ptracer(Some(server_pid));
|
||||
}
|
||||
CRASH_HANDLER.store(true, Ordering::Release);
|
||||
std::mem::forget(handler);
|
||||
info!("crash handler registered");
|
||||
|
||||
loop {
|
||||
client.ping().ok();
|
||||
smol::Timer::after(Duration::from_secs(10)).await;
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CrashServer {
|
||||
session_id: OnceLock<String>,
|
||||
}
|
||||
|
||||
impl minidumper::ServerHandler for CrashServer {
|
||||
fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
|
||||
let err_message = "Need to send a message with the ID upon starting the crash handler";
|
||||
let dump_path = paths::logs_dir()
|
||||
.join(self.session_id.get().expect(err_message))
|
||||
.with_extension("dmp");
|
||||
let file = File::create(&dump_path)?;
|
||||
Ok((file, dump_path))
|
||||
}
|
||||
|
||||
fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
|
||||
match result {
|
||||
Ok(mut md_bin) => {
|
||||
use io::Write;
|
||||
let _ = md_bin.file.flush();
|
||||
info!("wrote minidump to disk {:?}", md_bin.path);
|
||||
}
|
||||
Err(e) => {
|
||||
info!("failed to write minidump: {:#}", e);
|
||||
}
|
||||
}
|
||||
LoopAction::Exit
|
||||
}
|
||||
|
||||
fn on_message(&self, kind: u32, buffer: Vec<u8>) {
|
||||
let message = String::from_utf8(buffer).expect("invalid utf-8");
|
||||
info!("kind: {kind}, message: {message}",);
|
||||
if kind == 1 {
|
||||
self.session_id
|
||||
.set(message)
|
||||
.expect("session id already initialized");
|
||||
}
|
||||
}
|
||||
|
||||
fn on_client_disconnected(&self, clients: usize) -> LoopAction {
|
||||
info!("client disconnected, {clients} remaining");
|
||||
if clients == 0 {
|
||||
LoopAction::Exit
|
||||
} else {
|
||||
LoopAction::Continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn handle_panic() {
|
||||
// wait 500ms for the crash handler process to start up
|
||||
// if it's still not there just write panic info and no minidump
|
||||
let retry_frequency = Duration::from_millis(100);
|
||||
for _ in 0..5 {
|
||||
if CRASH_HANDLER.load(Ordering::Acquire) {
|
||||
log::error!("triggering a crash to generate a minidump...");
|
||||
#[cfg(target_os = "linux")]
|
||||
CrashHandler.simulate_signal(crash_handler::Signal::Trap as u32);
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
CrashHandler.simulate_exception(None);
|
||||
break;
|
||||
}
|
||||
thread::sleep(retry_frequency);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn crash_server(socket: &Path) {
|
||||
let Ok(mut server) = minidumper::Server::with_name(socket) else {
|
||||
log::info!("Couldn't create socket, there may already be a running crash server");
|
||||
return;
|
||||
};
|
||||
let ab = AtomicBool::new(false);
|
||||
server
|
||||
.run(
|
||||
Box::new(CrashServer {
|
||||
session_id: OnceLock::new(),
|
||||
}),
|
||||
&ab,
|
||||
Some(CRASH_HANDLER_TIMEOUT),
|
||||
)
|
||||
.expect("failed to run server");
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue