Skip to main content

zng_app/
crash_handler.rs

1#![cfg(all(
2    feature = "crash_handler",
3    not(any(target_arch = "wasm32", target_os = "android", target_os = "ios"))
4))]
5
6//! App-process crash handler.
7//!
8//! See the `zng::app::crash_handler` documentation for more details.
9
10use std::{
11    fmt,
12    path::{Path, PathBuf},
13    sync::{Arc, atomic::AtomicBool},
14    time::SystemTime,
15};
16use zng_clone_move::clmv;
17use zng_layout::unit::TimeUnits as _;
18use zng_task::{parking_lot::Mutex, process::tap};
19
20// TODO(breaking) remove this
21use tap::contains_ansi_csi;
22pub use tap::{BacktraceFrame, PanicInfo as CrashPanic, remove_ansi_csi};
23
24use zng_txt::Txt;
25
26/// Environment variable that causes the crash handler to not start if set.
27///
28/// This is particularly useful to set in debugger launch configs. Crash handler spawns
29/// a different process for the app  so break points will not work.
30pub const NO_CRASH_HANDLER: &str = "ZNG_NO_CRASH_HANDLER";
31
32zng_env::on_process_start!(|process_start_args| {
33    if std::env::var(NO_CRASH_HANDLER).is_ok() {
34        return;
35    }
36    if zng_env::about().is_test {
37        tracing::debug!("ignoring crash_handler because is test process");
38        return;
39    }
40
41    let mut config = CrashConfig::new();
42    for ext in CRASH_CONFIG {
43        ext(&mut config);
44        if config.no_crash_handler {
45            return;
46        }
47    }
48
49    if process_start_args.next_handlers_count > 0 && process_start_args.yield_count < zng_env::ProcessStartArgs::MAX_YIELD_COUNT - 10 {
50        // extra sure that this is the app-process
51        return process_start_args.yield_once();
52    }
53
54    if std::env::var(APP_PROCESS) != Err(std::env::VarError::NotPresent) {
55        return crash_handler_app_process(config.dump_dir.is_some());
56    }
57
58    match std::env::var(DIALOG_PROCESS) {
59        Ok(args_file) => crash_handler_dialog_process(
60            config.dump_dir.is_some(),
61            config
62                .dialog
63                .or(config.default_dialog)
64                .expect("dialog-process spawned without dialog handler"),
65            args_file,
66        ),
67        Err(e) => match e {
68            std::env::VarError::NotPresent => {}
69            e => panic!("invalid dialog env args, {e:?}"),
70        },
71    }
72
73    crash_handler_monitor_process(
74        config.dump_dir,
75        config.app_process,
76        config.dialog_process,
77        config.default_dialog.is_some() || config.dialog.is_some(),
78    );
79});
80
81/// Gets the number of crash restarts in the app-process.
82///
83/// Always returns zero if called in other processes.
84pub fn restart_count() -> usize {
85    match std::env::var(APP_PROCESS) {
86        Ok(c) => c.strip_prefix("restart-").unwrap_or("0").parse().unwrap_or(0),
87        Err(_) => 0,
88    }
89}
90
91const APP_PROCESS: &str = "ZNG_CRASH_HANDLER_APP";
92const DIALOG_PROCESS: &str = "ZNG_CRASH_HANDLER_DIALOG";
93const DUMP_CHANNEL: &str = "ZNG_MINIDUMP_CHANNEL";
94const RESPONSE_PREFIX: &str = "zng_crash_response: ";
95
96#[doc(hidden)]
97#[linkme::distributed_slice]
98pub static CRASH_CONFIG: [fn(&mut CrashConfig)];
99
100#[doc(hidden)]
101pub use linkme as __linkme;
102
103/// <span data-del-macro-root></span> Register a `FnOnce(&mut CrashConfig)` closure to be
104/// called on process init to configure the crash handler.
105///
106/// See [`CrashConfig`] for more details.
107#[macro_export]
108macro_rules! crash_handler_config {
109    ($closure:expr) => {
110        // expanded from:
111        #[$crate::crash_handler::__linkme::distributed_slice($crate::crash_handler::CRASH_CONFIG)]
112        #[linkme(crate = $crate::crash_handler::__linkme)]
113        #[doc(hidden)]
114        static _CRASH_CONFIG: fn(&mut $crate::crash_handler::CrashConfig) = _crash_config;
115        #[doc(hidden)]
116        fn _crash_config(cfg: &mut $crate::crash_handler::CrashConfig) {
117            fn crash_config(cfg: &mut $crate::crash_handler::CrashConfig, handler: impl FnOnce(&mut $crate::crash_handler::CrashConfig)) {
118                handler(cfg)
119            }
120            crash_config(cfg, $closure)
121        }
122    };
123}
124pub use crate::crash_handler_config;
125
126type ConfigProcess = Vec<Box<dyn for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command>>;
127type CrashDialogHandler = Box<dyn FnOnce(CrashArgs)>;
128
129/// Crash handler config.
130///
131/// Use [`crash_handler_config!`] to set config.
132///
133/// [`crash_handler_config!`]: crate::crash_handler_config!
134pub struct CrashConfig {
135    default_dialog: Option<CrashDialogHandler>,
136    dialog: Option<CrashDialogHandler>,
137    app_process: ConfigProcess,
138    dialog_process: ConfigProcess,
139    dump_dir: Option<PathBuf>,
140    no_crash_handler: bool,
141}
142impl CrashConfig {
143    fn new() -> Self {
144        Self {
145            default_dialog: None,
146            dialog: None,
147            app_process: vec![],
148            dialog_process: vec![],
149            dump_dir: Some(zng_env::cache("zng_minidump")),
150            no_crash_handler: false,
151        }
152    }
153
154    /// Set the crash dialog process handler.
155    ///
156    /// The dialog `handler` can run an app or show a native dialog, it must use the [`CrashArgs`] process
157    /// terminating methods to respond, if it returns [`CrashArgs::exit`] will run.
158    ///
159    /// Note that the handler does not need to actually show any dialog, it can just save crash info and
160    /// restart the app for example.
161    pub fn dialog(&mut self, handler: impl FnOnce(CrashArgs) + 'static) {
162        if self.dialog.is_none() {
163            self.dialog = Some(Box::new(handler));
164        }
165    }
166
167    /// Set the crash dialog-handler used if `crash_dialog` is not set.
168    ///
169    /// This is used by app libraries or themes to provide a default dialog.
170    pub fn default_dialog(&mut self, handler: impl FnOnce(CrashArgs) + 'static) {
171        self.default_dialog = Some(Box::new(handler));
172    }
173
174    /// Add a closure that is called just before the app-process is spawned.
175    pub fn app_process(
176        &mut self,
177        cfg: impl for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command + 'static,
178    ) {
179        self.app_process.push(Box::new(cfg));
180    }
181
182    /// Add a closure that is called just before the dialog-process is spawned.
183    pub fn dialog_process(
184        &mut self,
185        cfg: impl for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command + 'static,
186    ) {
187        self.dialog_process.push(Box::new(cfg));
188    }
189
190    /// Change the minidump directory.
191    ///
192    /// Is `zng::env::cache("zng_minidump")` by default.
193    pub fn minidump_dir(&mut self, dir: impl Into<PathBuf>) {
194        self.dump_dir = Some(dir.into());
195    }
196
197    /// Do not collect a minidump.
198    pub fn no_minidump(&mut self) {
199        self.dump_dir = None;
200    }
201
202    /// Does not run with crash handler.
203    ///
204    /// This is equivalent of running with `NO_ZNG_CRASH_HANDLER` env var.
205    pub fn no_crash_handler(&mut self) {
206        self.no_crash_handler = true;
207    }
208}
209
210/// Arguments for the crash handler dialog function.
211#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
212#[non_exhaustive]
213pub struct CrashArgs {
214    /// Info about the app-process crashes.
215    ///
216    /// Has at least one entry, latest is last. Includes all crashes since the start of the monitor-process.
217    pub app_crashes: Vec<CrashError>,
218
219    /// Info about a crash in the dialog-process spawned to handle the latest app-process crash.
220    ///
221    /// If set this is the last chance to show something to the end user, if the current dialog crashes too
222    /// the monitor-process will give up. If you started an `APP` to show a crash dialog try using a native
223    /// dialog directly now, or just give up, clearly things are far from ok.
224    pub dialog_crash: Option<CrashError>,
225}
226impl CrashArgs {
227    /// Latest crash.
228    pub fn latest(&self) -> &CrashError {
229        self.app_crashes.last().unwrap()
230    }
231
232    /// Restart the app-process with same argument as the latest crash.
233    pub fn restart(&self) -> ! {
234        let json_args = serde_json::to_string(&self.latest().args[..]).unwrap();
235        println!("{RESPONSE_PREFIX}restart {json_args}");
236        zng_env::exit(0)
237    }
238
239    /// Restart the app-process with custom arguments.
240    pub fn restart_with(&self, args: &[Txt]) -> ! {
241        let json_args = serde_json::to_string(&args).unwrap();
242        println!("{RESPONSE_PREFIX}restart {json_args}");
243        zng_env::exit(0)
244    }
245
246    /// Exit the monitor-process (application) with code.
247    pub fn exit(&self, code: i32) -> ! {
248        println!("{RESPONSE_PREFIX}exit {code}");
249        zng_env::exit(0)
250    }
251}
252impl fmt::Display for CrashArgs {
253    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
254        writeln!(f, "APP CRASHES:\n")?;
255
256        for c in self.app_crashes.iter() {
257            writeln!(f, "{c}")?;
258        }
259
260        if let Some(c) = &self.dialog_crash {
261            writeln!(f, "\nDIALOG CRASH:\n")?;
262            writeln!(f, "{c}")?;
263        }
264
265        Ok(())
266    }
267}
268
269/// Info about an app-process crash.
270#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
271#[non_exhaustive]
272pub struct CrashError {
273    /// Crash moment.
274    pub timestamp: SystemTime,
275    /// Process exit code.
276    pub code: Option<i32>,
277    /// Unix signal that terminated the process.
278    pub signal: Option<i32>,
279    /// Full capture of the app stdout.
280    pub stdout: Txt,
281    /// Full capture of the app stderr.
282    pub stderr: Txt,
283    /// Arguments used.
284    pub args: Box<[Txt]>,
285    /// Minidump file.
286    pub minidump: Option<PathBuf>,
287    /// Operating system.
288    ///
289    /// See [`std::env::consts::OS`] for details.
290    pub os: Txt,
291}
292/// Alternate mode `{:#}` prints plain stdout and stderr (no ANSI escape sequences).
293impl fmt::Display for CrashError {
294    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
295        writeln!(f, "timestamp: {}", self.unix_time())?;
296        if let Some(c) = self.code {
297            writeln!(f, "exit code: {c:#X}")?
298        }
299        if let Some(c) = self.signal {
300            writeln!(f, "exit signal: {c}")?
301        }
302        if let Some(p) = self.minidump.as_ref() {
303            writeln!(f, "minidump: {}", p.display())?
304        }
305        if f.alternate() {
306            write!(f, "\nSTDOUT:\n{}\nSTDERR:\n{}\n", self.stdout_plain(), self.stderr_plain())
307        } else {
308            write!(f, "\nSTDOUT:\n{}\nSTDERR:\n{}\n", self.stdout, self.stderr)
309        }
310    }
311}
312impl CrashError {
313    fn new(
314        timestamp: SystemTime,
315        code: Option<i32>,
316        signal: Option<i32>,
317        stdout: Txt,
318        stderr: Txt,
319        minidump: Option<PathBuf>,
320        args: Box<[Txt]>,
321    ) -> Self {
322        Self {
323            timestamp,
324            code,
325            signal,
326            stdout,
327            stderr,
328            args,
329            minidump,
330            os: std::env::consts::OS.into(),
331        }
332    }
333
334    /// Seconds since Unix epoch.
335    pub fn unix_time(&self) -> u64 {
336        self.timestamp.duration_since(SystemTime::UNIX_EPOCH).unwrap_or_default().as_secs()
337    }
338
339    /// Gets if `stdout` does not contain any ANSI scape sequences.
340    pub fn is_stdout_plain(&self) -> bool {
341        !contains_ansi_csi(&self.stdout)
342    }
343
344    /// Gets if `stderr` does not contain any ANSI scape sequences.
345    pub fn is_stderr_plain(&self) -> bool {
346        !contains_ansi_csi(&self.stderr)
347    }
348
349    /// Get `stdout` without any ANSI escape sequences (CSI).
350    pub fn stdout_plain(&self) -> Txt {
351        if self.is_stdout_plain() {
352            self.stdout.clone()
353        } else {
354            remove_ansi_csi(&self.stdout)
355        }
356    }
357
358    /// Get `stderr` without any ANSI escape sequences (CSI).
359    pub fn stderr_plain(&self) -> Txt {
360        if self.is_stderr_plain() {
361            self.stderr.clone()
362        } else {
363            remove_ansi_csi(&self.stderr)
364        }
365    }
366
367    /// Gets if `stderr` contains a crash panic.
368    pub fn has_panic(&self) -> bool {
369        if self.code == Some(101) {
370            CrashPanic::contains(&self.stderr)
371        } else {
372            false
373        }
374    }
375
376    /// Gets if `stderr` contains a crash panic that traced widget/window path.
377    pub fn has_panic_widget(&self) -> bool {
378        if self.code == Some(101) {
379            CrashPanic::contains_widget(&self.stderr)
380        } else {
381            false
382        }
383    }
384
385    /// Try parse `stderr` for the crash panic.
386    ///
387    /// Only reliably works if the panic fully printed correctly and was formatted by the panic
388    /// hook installed by `crash_handler` or by the display print of [`CrashPanic`].
389    pub fn find_panic(&self) -> Option<CrashPanic> {
390        if self.code == Some(101) {
391            CrashPanic::find(&self.stderr)
392        } else {
393            None
394        }
395    }
396
397    /// Best attempt at generating a readable error message.
398    ///
399    /// Is the panic message, or the minidump exception, with the exit code and signal.
400    pub fn message(&self) -> Txt {
401        let mut msg = if let Some(msg) = self.find_panic().map(|p| p.message) {
402            msg
403        } else if let Some(msg) = self.minidump_message() {
404            msg
405        } else {
406            "".into()
407        };
408        use std::fmt::Write as _;
409
410        if let Some(c) = self.code {
411            let sep = if msg.is_empty() { "" } else { "\n" };
412            write!(&mut msg, "{sep}Code: {c:#X}").unwrap();
413        }
414        if let Some(c) = self.signal {
415            let sep = if msg.is_empty() { "" } else { "\n" };
416            write!(&mut msg, "{sep}Signal: {c}").unwrap();
417        }
418        msg.end_mut();
419        msg
420    }
421
422    fn minidump_message(&self) -> Option<Txt> {
423        use minidump::*;
424
425        let dump = match Minidump::read_path(self.minidump.as_ref()?) {
426            Ok(d) => d,
427            Err(e) => {
428                tracing::error!("error reading minidump, {e}");
429                return None;
430            }
431        };
432
433        let exception = match dump.get_stream::<MinidumpException>() {
434            Ok(s) => s,
435            Err(e) => {
436                tracing::error!("error reading minidump exception, {e}");
437                return None;
438            }
439        };
440
441        #[cfg(debug_assertions)]
442        {
443            // nice error messages, but adds >1MB of binary code
444            let system_info = match dump.get_stream::<MinidumpSystemInfo>() {
445                Ok(s) => s,
446                Err(e) => {
447                    tracing::error!("error reading minidump system info, {e}");
448                    return None;
449                }
450            };
451            let crash_reason = exception.get_crash_reason(system_info.os, system_info.cpu);
452            Some(zng_txt::formatx!("{crash_reason}"))
453        }
454
455        #[cfg(not(debug_assertions))]
456        {
457            // raw error code, only common names
458            let raw = exception.raw;
459
460            let code = raw.exception_record.exception_code;
461            let addr = raw.exception_record.exception_address;
462
463            cfg_select! {
464                windows => {
465                    let name = match code {
466                        0xC0000005 => "ACCESS_VIOLATION",
467                        0xC0000409 => "STACK_BUFFER_OVERRUN",
468                        0x80000003 => "BREAKPOINT",
469                        0xC000001D => "ILLEGAL_INSTRUCTION",
470                        0xC0000094 => "INTEGER_DIVIDE_BY_ZERO",
471                        0xC00000FD => "STACK_OVERFLOW",
472                        0xC0000096 => "PRIVILEGED_INSTRUCTION",
473                        0xC0000008 => "INVALID_HANDLE",
474                        0xC0000135 => "DLL_NOT_FOUND",
475                        _ => "",
476                    };
477                }
478                any(target_os = "linux", target_os = "android") => {
479                    let name = match code as i32 {
480                        4 => "SIGILL",
481                        5 => "SIGTRAP",
482                        6 => "SIGABRT",
483                        7 => "SIGBUS",
484                        8 => "SIGFPE",
485                        9 => "SIGKILL",
486                        11 => "SIGSEGV",
487                        13 => "SIGPIPE",
488                        _ => "",
489                    };
490                }
491                any(target_os = "macos", target_os = "ios") => {
492                    let name = match code as i32 {
493                        4 => "SIGILL",
494                        5 => "SIGTRAP",
495                        6 => "SIGABRT",
496                        8 => "SIGFPE",
497                        10 => "SIGBUS",
498                        11 => "SIGSEGV",
499                        _ => "",
500                    };
501                }
502                _ => {
503                    let name = "";
504                }
505            }
506            if name.is_empty() {
507                Some(zng_txt::formatx!("exception 0x{code:08X} at 0x{addr:X}"))
508            } else {
509                Some(zng_txt::formatx!("exception 0x{code:08X} ({name}) at 0x{addr:X}"))
510            }
511        }
512    }
513}
514
515fn crash_handler_monitor_process(
516    dump_dir: Option<PathBuf>,
517    mut cfg_app: ConfigProcess,
518    mut cfg_dialog: ConfigProcess,
519    has_dialog_handler: bool,
520) -> ! {
521    zng_env::set_process_name("crash-handler-process");
522
523    let exe = std::env::current_exe()
524        .and_then(dunce::canonicalize)
525        .expect("failed to get the current executable");
526
527    let mut args: Box<[_]> = std::env::args().skip(1).map(Txt::from).collect();
528
529    let mut dialog_args = CrashArgs {
530        app_crashes: vec![],
531        dialog_crash: None,
532    };
533    loop {
534        let mut app_process = std::process::Command::new(&exe);
535        for cfg in &mut cfg_app {
536            cfg(&mut app_process, &dialog_args);
537        }
538
539        match run_process(
540            dump_dir.as_deref(),
541            app_process
542                .env(APP_PROCESS, format!("restart-{}", dialog_args.app_crashes.len()))
543                .args(args.iter()),
544        ) {
545            Ok((status, stdout, stderr, dump_file)) => {
546                if status.success() {
547                    let code = status.code().unwrap_or(0);
548                    tracing::info!(
549                        "crash monitor-process exiting with success code ({code}), {} crashes",
550                        dialog_args.app_crashes.len()
551                    );
552                    zng_env::exit(code);
553                } else {
554                    let code = status.code();
555                    #[allow(unused_mut)] // Windows has no signal
556                    let mut signal = None::<i32>;
557
558                    #[cfg(windows)]
559                    if code == Some(1) {
560                        tracing::warn!(
561                            "app-process exit code (1), probably killed by the system, \
562                                        will exit monitor-process with the same code"
563                        );
564                        zng_env::exit(1);
565                    }
566                    #[cfg(unix)]
567                    if code.is_none() {
568                        use std::os::unix::process::ExitStatusExt as _;
569                        signal = status.signal();
570
571                        if let Some(sig) = signal
572                            && [2, 9, 17, 19, 23].contains(&sig)
573                        {
574                            tracing::warn!(
575                                "app-process exited by signal ({sig}), \
576                                                will exit monitor-process with code 1"
577                            );
578                            zng_env::exit(1);
579                        }
580                    }
581
582                    tracing::error!(
583                        "app-process crashed with exit code ({:#X}), signal ({:#?}), {} crashes previously",
584                        code.unwrap_or(0),
585                        signal.unwrap_or(0),
586                        dialog_args.app_crashes.len()
587                    );
588
589                    let timestamp = SystemTime::now();
590
591                    dialog_args.app_crashes.push(CrashError::new(
592                        timestamp,
593                        code,
594                        signal,
595                        stdout.into_txt_blocking(false),
596                        stderr.into_txt_blocking(false),
597                        dump_file,
598                        args.clone(),
599                    ));
600
601                    // show dialog, retries once if dialog crashes too.
602                    for _ in 0..2 {
603                        // serialize app-crashes to a temp JSON file
604                        let timestamp_nanos = timestamp.duration_since(SystemTime::UNIX_EPOCH).map(|d| d.as_nanos()).unwrap_or(0);
605                        let mut timestamp = timestamp_nanos;
606                        let mut retries = 0;
607                        let crash_file = loop {
608                            let path = std::env::temp_dir().join(format!("zng-crash-errors-{timestamp:#x}"));
609                            match std::fs::File::create_new(&path) {
610                                Ok(f) => match serde_json::to_writer(std::io::BufWriter::new(f), &dialog_args) {
611                                    Ok(_) => break path,
612                                    Err(e) => {
613                                        if e.is_io() {
614                                            if retries > 20 {
615                                                panic!("error writing crash errors, {e}");
616                                            } else if retries > 5 {
617                                                timestamp += 1;
618                                            }
619                                            std::thread::sleep(100.ms());
620                                        } else {
621                                            panic!("error serializing crash errors, {e}");
622                                        }
623                                    }
624                                },
625                                Err(e) => {
626                                    if e.kind() == std::io::ErrorKind::AlreadyExists {
627                                        timestamp += 1;
628                                    } else {
629                                        if retries > 20 {
630                                            panic!("error creating crash errors file, {e}");
631                                        } else if retries > 5 {
632                                            timestamp += 1;
633                                        }
634                                        std::thread::sleep(100.ms());
635                                    }
636                                }
637                            }
638                            retries += 1;
639                        };
640
641                        let dialog_result = if has_dialog_handler {
642                            let mut dialog_process = std::process::Command::new(&exe);
643                            for cfg in &mut cfg_dialog {
644                                cfg(&mut dialog_process, &dialog_args);
645                            }
646                            run_process(dump_dir.as_deref(), dialog_process.env(DIALOG_PROCESS, &crash_file))
647                        } else {
648                            Ok((
649                                std::process::ExitStatus::default(),
650                                tap::StdoutTap::dummy(),
651                                tap::StderrTap::dummy(),
652                                None,
653                            ))
654                        };
655
656                        for _ in 0..5 {
657                            if !crash_file.exists() || std::fs::remove_file(&crash_file).is_ok() {
658                                break;
659                            }
660                            std::thread::sleep(100.ms());
661                        }
662
663                        let response = match dialog_result {
664                            Ok((dlg_status, dlg_stdout, dlg_stderr, dlg_dump_file)) => {
665                                if dlg_status.success() {
666                                    let dlg_stdout = dlg_stdout.into_string_blocking(false);
667                                    dlg_stdout
668                                        .lines()
669                                        .filter_map(|l| l.trim().strip_prefix(RESPONSE_PREFIX))
670                                        .next_back()
671                                        .unwrap_or("exit 0")
672                                        .to_owned()
673                                } else {
674                                    let code = dlg_status.code();
675                                    #[allow(unused_mut)] // Windows has no signal
676                                    let mut signal = None::<i32>;
677
678                                    #[cfg(windows)]
679                                    if code == Some(1) {
680                                        tracing::warn!(
681                                            "dialog-process exit code (1), probably killed by the system, \
682                                                        will exit monitor-process with the same code"
683                                        );
684                                        zng_env::exit(1);
685                                    }
686                                    #[cfg(unix)]
687                                    if code.is_none() {
688                                        use std::os::unix::process::ExitStatusExt as _;
689                                        signal = status.signal();
690
691                                        if let Some(sig) = signal
692                                            && [2, 9, 17, 19, 23].contains(&sig)
693                                        {
694                                            tracing::warn!(
695                                                "dialog-process exited by signal ({sig}), \
696                                                                will exit monitor-process with code 1"
697                                            );
698                                            zng_env::exit(1);
699                                        }
700                                    }
701
702                                    let dialog_crash = CrashError::new(
703                                        SystemTime::now(),
704                                        code,
705                                        signal,
706                                        dlg_stdout.into_txt_blocking(false),
707                                        dlg_stderr.into_txt_blocking(false),
708                                        dlg_dump_file,
709                                        Box::new([]),
710                                    );
711                                    tracing::error!("crash dialog-process crashed, {dialog_crash}");
712
713                                    if dialog_args.dialog_crash.is_none() {
714                                        dialog_args.dialog_crash = Some(dialog_crash);
715                                        continue;
716                                    } else {
717                                        let latest = dialog_args.latest();
718                                        eprintln!("{latest}");
719                                        zng_env::exit(latest.code.unwrap_or(1));
720                                    }
721                                }
722                            }
723                            Err(e) => panic!("error running dialog-process, {e}"),
724                        };
725
726                        if let Some(args_json) = response.strip_prefix("restart ") {
727                            args = serde_json::from_str(args_json).expect("crash dialog-process did not respond 'restart' correctly");
728                            break;
729                        } else if let Some(code) = response.strip_prefix("exit ") {
730                            let code: i32 = code.parse().expect("crash dialog-process did not respond 'code' correctly");
731                            zng_env::exit(code);
732                        } else {
733                            panic!("crash dialog-process did not respond correctly")
734                        }
735                    }
736                }
737            }
738            Err(e) => panic!("error running app-process, {e}"),
739        }
740    }
741}
742fn run_process(
743    dump_dir: Option<&Path>,
744    command: &mut std::process::Command,
745) -> std::io::Result<(std::process::ExitStatus, tap::StdoutTap, tap::StderrTap, Option<PathBuf>)> {
746    struct DumpServer {
747        shutdown: Arc<AtomicBool>,
748        runner: std::thread::JoinHandle<Option<PathBuf>>,
749    }
750    let mut dump_server = None;
751    if let Some(dump_dir) = dump_dir {
752        match std::fs::create_dir_all(dump_dir) {
753            Ok(_) => {
754                let uuid = uuid::Uuid::new_v4();
755                let dump_file = dump_dir.join(format!("{}.dmp", uuid.simple()));
756                let dump_channel = std::env::temp_dir().join(format!("zng-crash-{}", uuid.simple()));
757                match minidumper::Server::with_name(minidumper::SocketName::Path(&dump_channel)) {
758                    Ok(mut s) => {
759                        command.env(DUMP_CHANNEL, &dump_channel);
760                        let shutdown = Arc::new(AtomicBool::new(false));
761                        let runner = std::thread::Builder::new()
762                            .name("minidumper-server".into())
763                            .stack_size(512 * 1024)
764                            .spawn(clmv!(shutdown, || {
765                                let created_file = Arc::new(Mutex::new(None));
766                                if let Err(e) = s.run(
767                                    Box::new(MinidumpServerHandler {
768                                        dump_file,
769                                        created_file: created_file.clone(),
770                                    }),
771                                    &shutdown,
772                                    None,
773                                ) {
774                                    tracing::error!("minidump server exited with error, {e}");
775                                }
776                                created_file.lock().take()
777                            }))
778                            .expect("failed to spawn thread");
779                        dump_server = Some(DumpServer { shutdown, runner });
780                    }
781                    Err(e) => tracing::error!("failed to spawn minidump server, will not enable crash handling, {e}"),
782                }
783            }
784            Err(e) => tracing::error!("cannot create minidump dir, will not enable crash handling, {e}"),
785        }
786    }
787
788    let mut app_process = command
789        .env("RUST_BACKTRACE", "full")
790        .env("CLICOLOR_FORCE", "1")
791        .stdout(std::process::Stdio::piped())
792        .stderr(std::process::Stdio::piped())
793        .spawn()?;
794
795    let stdout = tap::StdoutTap::new_blocking(app_process.stdout.take().unwrap());
796    let stderr = tap::StderrTap::new_blocking(app_process.stderr.take().unwrap());
797
798    let status = app_process.wait()?;
799
800    let mut dump_file = None;
801    if let Some(s) = dump_server {
802        s.shutdown.store(true, atomic::Ordering::Relaxed);
803        match s.runner.join() {
804            Ok(r) => dump_file = r,
805            Err(p) => std::panic::resume_unwind(p),
806        };
807    }
808
809    Ok((status, stdout, stderr, dump_file))
810}
811struct MinidumpServerHandler {
812    dump_file: PathBuf,
813    created_file: Arc<Mutex<Option<PathBuf>>>,
814}
815impl minidumper::ServerHandler for MinidumpServerHandler {
816    fn create_minidump_file(&self) -> Result<(std::fs::File, PathBuf), std::io::Error> {
817        let file = std::fs::File::create_new(&self.dump_file)?;
818        Ok((file, self.dump_file.clone()))
819    }
820
821    fn on_minidump_created(&self, result: Result<minidumper::MinidumpBinary, minidumper::Error>) -> minidumper::LoopAction {
822        match result {
823            Ok(b) => *self.created_file.lock() = Some(b.path),
824            Err(e) => tracing::error!("failed to write minidump file, {e}"),
825        }
826        minidumper::LoopAction::Exit
827    }
828
829    fn on_message(&self, _: u32, _: Vec<u8>) {}
830
831    fn on_client_connected(&self, num_clients: usize) -> minidumper::LoopAction {
832        if num_clients > 1 {
833            tracing::error!("expected only one minidump client, {num_clients} connected, exiting server");
834            minidumper::LoopAction::Exit
835        } else {
836            minidumper::LoopAction::Continue
837        }
838    }
839
840    fn on_client_disconnected(&self, num_clients: usize) -> minidumper::LoopAction {
841        if num_clients != 0 {
842            tracing::error!("expected only one minidump client disconnect, {num_clients} still connected");
843        }
844        minidumper::LoopAction::Exit
845    }
846}
847
848fn crash_handler_app_process(dump_enabled: bool) {
849    CrashPanic::set_hook(|| crate::widget::WIDGET.trace_path());
850    if dump_enabled {
851        minidump_attach();
852    }
853
854    // app-process execution happens after this.
855}
856
857fn crash_handler_dialog_process(dump_enabled: bool, dialog: CrashDialogHandler, args_file: String) -> ! {
858    zng_env::set_process_name("crash-dialog-process");
859
860    CrashPanic::set_hook(|| crate::widget::WIDGET.trace_path());
861    if dump_enabled {
862        minidump_attach();
863    }
864
865    let mut retries = 0;
866    let args = loop {
867        match std::fs::read_to_string(&args_file) {
868            Ok(args) => break args,
869            Err(e) => {
870                if e.kind() != std::io::ErrorKind::NotFound && retries < 10 {
871                    retries += 1;
872                    continue;
873                }
874                panic!("error reading args file, {e}");
875            }
876        }
877    };
878
879    dialog(serde_json::from_str(&args).expect("error deserializing args"));
880    CrashArgs {
881        app_crashes: vec![],
882        dialog_crash: None,
883    }
884    .exit(0)
885}
886
887fn minidump_attach() {
888    let channel_name = match std::env::var(DUMP_CHANNEL) {
889        Ok(n) if !n.is_empty() => PathBuf::from(n),
890        _ => {
891            eprintln!("expected minidump channel name, this instance will not handle crashes");
892            return;
893        }
894    };
895    let client = match minidumper::Client::with_name(minidumper::SocketName::Path(&channel_name)) {
896        Ok(c) => c,
897        Err(e) => {
898            eprintln!("failed to connect minidump client, this instance will not handle crashes, {e}");
899            return;
900        }
901    };
902    struct Handler(minidumper::Client);
903    // SAFETY: on_crash does the minimal possible work
904    unsafe impl crash_handler::CrashEvent for Handler {
905        fn on_crash(&self, context: &crash_handler::CrashContext) -> crash_handler::CrashEventResult {
906            crash_handler::CrashEventResult::Handled(self.0.request_dump(context).is_ok())
907        }
908    }
909    let handler = match crash_handler::CrashHandler::attach(Box::new(Handler(client))) {
910        Ok(h) => h,
911        Err(e) => {
912            eprintln!("failed attach minidump crash handler, this instance will not handle crashes, {e}");
913            return;
914        }
915    };
916
917    *CRASH_HANDLER.lock() = Some(handler);
918}
919static CRASH_HANDLER: Mutex<Option<crash_handler::CrashHandler>> = Mutex::new(None);