zng_app/
crash_handler.rs

1#![cfg(all(
2    feature = "crash_handler",
3    not(any(target_arch = "wasm32", target_os = "android", target_os = "ios"))
4))]
5
6//! App-process crash handler.
7//!
8//! See the `zng::app::crash_handler` documentation for more details.
9
10use parking_lot::Mutex;
11use std::{
12    fmt,
13    io::{BufRead, Write},
14    path::{Path, PathBuf},
15    sync::{Arc, atomic::AtomicBool},
16    time::SystemTime,
17};
18use zng_clone_move::clmv;
19use zng_layout::unit::TimeUnits as _;
20
21use zng_txt::{ToTxt as _, Txt};
22
23/// Environment variable that causes the crash handler to not start if set.
24///
25/// This is particularly useful to set in debugger launch configs. Crash handler spawns
26/// a different process for the app  so break points will not work.
27pub const NO_CRASH_HANDLER: &str = "ZNG_NO_CRASH_HANDLER";
28
29zng_env::on_process_start!(|process_start_args| {
30    if std::env::var(NO_CRASH_HANDLER).is_ok() {
31        return;
32    }
33
34    let mut config = CrashConfig::new();
35    for ext in CRASH_CONFIG {
36        ext(&mut config);
37        if config.no_crash_handler {
38            return;
39        }
40    }
41
42    if process_start_args.next_handlers_count > 0 && process_start_args.yield_count < zng_env::ProcessStartArgs::MAX_YIELD_COUNT - 10 {
43        // extra sure that this is the app-process
44        return process_start_args.yield_once();
45    }
46
47    if std::env::var(APP_PROCESS) != Err(std::env::VarError::NotPresent) {
48        return crash_handler_app_process(config.dump_dir.is_some());
49    }
50
51    match std::env::var(DIALOG_PROCESS) {
52        Ok(args_file) => crash_handler_dialog_process(
53            config.dump_dir.is_some(),
54            config
55                .dialog
56                .or(config.default_dialog)
57                .expect("dialog-process spawned without dialog handler"),
58            args_file,
59        ),
60        Err(e) => match e {
61            std::env::VarError::NotPresent => {}
62            e => panic!("invalid dialog env args, {e:?}"),
63        },
64    }
65
66    crash_handler_monitor_process(
67        config.dump_dir,
68        config.app_process,
69        config.dialog_process,
70        config.default_dialog.is_some() || config.dialog.is_some(),
71    );
72});
73
74/// Gets the number of crash restarts in the app-process.
75///
76/// Always returns zero if called in other processes.
77pub fn restart_count() -> usize {
78    match std::env::var(APP_PROCESS) {
79        Ok(c) => c.strip_prefix("restart-").unwrap_or("0").parse().unwrap_or(0),
80        Err(_) => 0,
81    }
82}
83
84const APP_PROCESS: &str = "ZNG_CRASH_HANDLER_APP";
85const DIALOG_PROCESS: &str = "ZNG_CRASH_HANDLER_DIALOG";
86const DUMP_CHANNEL: &str = "ZNG_MINIDUMP_CHANNEL";
87const RESPONSE_PREFIX: &str = "zng_crash_response: ";
88
89#[linkme::distributed_slice]
90static CRASH_CONFIG: [fn(&mut CrashConfig)];
91
92/// <span data-del-macro-root></span> Register a `FnOnce(&mut CrashConfig)` closure to be
93/// called on process init to configure the crash handler.
94///
95/// See [`CrashConfig`] for more details.
96#[macro_export]
97macro_rules! crash_handler_config {
98    ($closure:expr) => {
99        // expanded from:
100        // #[linkme::distributed_slice(CRASH_CONFIG)]
101        // static _CRASH_CONFIG: fn(&FooArgs) = _foo;
102        // so that users don't need to depend on linkme just to call this macro.
103        #[used]
104        #[cfg_attr(
105            any(
106                target_os = "none",
107                target_os = "linux",
108                target_os = "android",
109                target_os = "fuchsia",
110                target_os = "psp"
111            ),
112            unsafe(link_section = "linkme_CRASH_CONFIG")
113        )]
114        #[cfg_attr(
115            any(target_os = "macos", target_os = "ios", target_os = "tvos"),
116            unsafe(link_section = "__DATA,__linkmeK3uV0Fq0,regular,no_dead_strip")
117        )]
118        #[cfg_attr(
119            any(target_os = "uefi", target_os = "windows"),
120            unsafe(link_section = ".linkme_CRASH_CONFIG$b")
121        )]
122        #[cfg_attr(target_os = "illumos", unsafe(link_section = "set_linkme_CRASH_CONFIG"))]
123        #[cfg_attr(
124            any(target_os = "freebsd", target_os = "openbsd"),
125            unsafe(link_section = "linkme_CRASH_CONFIG")
126        )]
127        #[doc(hidden)]
128        static _CRASH_CONFIG: fn(&mut $crate::crash_handler::CrashConfig) = _crash_config;
129        #[doc(hidden)]
130        fn _crash_config(cfg: &mut $crate::crash_handler::CrashConfig) {
131            fn crash_config(cfg: &mut $crate::crash_handler::CrashConfig, handler: impl FnOnce(&mut $crate::crash_handler::CrashConfig)) {
132                handler(cfg)
133            }
134            crash_config(cfg, $closure)
135        }
136    };
137}
138pub use crate::crash_handler_config;
139
140type ConfigProcess = Vec<Box<dyn for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command>>;
141type CrashDialogHandler = Box<dyn FnOnce(CrashArgs)>;
142
143/// Crash handler config.
144///
145/// Use [`crash_handler_config!`] to set config.
146///
147/// [`crash_handler_config!`]: crate::crash_handler_config!
148pub struct CrashConfig {
149    default_dialog: Option<CrashDialogHandler>,
150    dialog: Option<CrashDialogHandler>,
151    app_process: ConfigProcess,
152    dialog_process: ConfigProcess,
153    dump_dir: Option<PathBuf>,
154    no_crash_handler: bool,
155}
156impl CrashConfig {
157    fn new() -> Self {
158        Self {
159            default_dialog: None,
160            dialog: None,
161            app_process: vec![],
162            dialog_process: vec![],
163            dump_dir: Some(zng_env::cache("zng_minidump")),
164            no_crash_handler: false,
165        }
166    }
167
168    /// Set the crash dialog process handler.
169    ///
170    /// The dialog `handler` can run an app or show a native dialog, it must use the [`CrashArgs`] process
171    /// terminating methods to respond, if it returns [`CrashArgs::exit`] will run.
172    ///
173    /// Note that the handler does not need to actually show any dialog, it can just save crash info and
174    /// restart the app for example.
175    pub fn dialog(&mut self, handler: impl FnOnce(CrashArgs) + 'static) {
176        if self.dialog.is_none() {
177            self.dialog = Some(Box::new(handler));
178        }
179    }
180
181    /// Set the crash dialog-handler used if `crash_dialog` is not set.
182    ///
183    /// This is used by app libraries or themes to provide a default dialog.
184    pub fn default_dialog(&mut self, handler: impl FnOnce(CrashArgs) + 'static) {
185        self.default_dialog = Some(Box::new(handler));
186    }
187
188    /// Add a closure that is called just before the app-process is spawned.
189    pub fn app_process(
190        &mut self,
191        cfg: impl for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command + 'static,
192    ) {
193        self.app_process.push(Box::new(cfg));
194    }
195
196    /// Add a closure that is called just before the dialog-process is spawned.
197    pub fn dialog_process(
198        &mut self,
199        cfg: impl for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command + 'static,
200    ) {
201        self.dialog_process.push(Box::new(cfg));
202    }
203
204    /// Change the minidump directory.
205    ///
206    /// Is `zng::env::cache("zng_minidump")` by default.
207    pub fn minidump_dir(&mut self, dir: impl Into<PathBuf>) {
208        self.dump_dir = Some(dir.into());
209    }
210
211    /// Do not collect a minidump.
212    pub fn no_minidump(&mut self) {
213        self.dump_dir = None;
214    }
215
216    /// Does not run with crash handler.
217    ///
218    /// This is equivalent of running with `NO_ZNG_CRASH_HANDLER` env var.
219    pub fn no_crash_handler(&mut self) {
220        self.no_crash_handler = true;
221    }
222}
223
224/// Arguments for the crash handler dialog function.
225#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
226#[non_exhaustive]
227pub struct CrashArgs {
228    /// Info about the app-process crashes.
229    ///
230    /// Has at least one entry, latest is last. Includes all crashes since the start of the monitor-process.
231    pub app_crashes: Vec<CrashError>,
232
233    /// Info about a crash in the dialog-process spawned to handle the latest app-process crash.
234    ///
235    /// If set this is the last chance to show something to the end user, if the current dialog crashes too
236    /// the monitor-process will give up. If you started an `APP` to show a crash dialog try using a native
237    /// dialog directly now, or just give up, clearly things are far from ok.
238    pub dialog_crash: Option<CrashError>,
239}
240impl CrashArgs {
241    /// Latest crash.
242    pub fn latest(&self) -> &CrashError {
243        self.app_crashes.last().unwrap()
244    }
245
246    /// Restart the app-process with same argument as the latest crash.
247    pub fn restart(&self) -> ! {
248        let json_args = serde_json::to_string(&self.latest().args[..]).unwrap();
249        println!("{RESPONSE_PREFIX}restart {json_args}");
250        zng_env::exit(0)
251    }
252
253    /// Restart the app-process with custom arguments.
254    pub fn restart_with(&self, args: &[Txt]) -> ! {
255        let json_args = serde_json::to_string(&args).unwrap();
256        println!("{RESPONSE_PREFIX}restart {json_args}");
257        zng_env::exit(0)
258    }
259
260    /// Exit the monitor-process (application) with code.
261    pub fn exit(&self, code: i32) -> ! {
262        println!("{RESPONSE_PREFIX}exit {code}");
263        zng_env::exit(0)
264    }
265}
266impl fmt::Display for CrashArgs {
267    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
268        writeln!(f, "APP CRASHES:\n")?;
269
270        for c in self.app_crashes.iter() {
271            writeln!(f, "{c}")?;
272        }
273
274        if let Some(c) = &self.dialog_crash {
275            writeln!(f, "\nDIALOG CRASH:\n")?;
276            writeln!(f, "{c}")?;
277        }
278
279        Ok(())
280    }
281}
282
283/// Info about an app-process crash.
284#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
285#[non_exhaustive]
286pub struct CrashError {
287    /// Crash moment.
288    pub timestamp: SystemTime,
289    /// Process exit code.
290    pub code: Option<i32>,
291    /// Unix signal that terminated the process.
292    pub signal: Option<i32>,
293    /// Full capture of the app stdout.
294    pub stdout: Txt,
295    /// Full capture of the app stderr.
296    pub stderr: Txt,
297    /// Arguments used.
298    pub args: Box<[Txt]>,
299    /// Minidump file.
300    pub minidump: Option<PathBuf>,
301    /// Operating system.
302    ///
303    /// See [`std::env::consts::OS`] for details.
304    pub os: Txt,
305}
306/// Alternate mode `{:#}` prints plain stdout and stderr (no ANSI escape sequences).
307impl fmt::Display for CrashError {
308    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
309        writeln!(f, "timestamp: {}", self.unix_time())?;
310        if let Some(c) = self.code {
311            writeln!(f, "exit code: {c:#X}")?
312        }
313        if let Some(c) = self.signal {
314            writeln!(f, "exit signal: {c}")?
315        }
316        if let Some(p) = self.minidump.as_ref() {
317            writeln!(f, "minidump: {}", p.display())?
318        }
319        if f.alternate() {
320            write!(f, "\nSTDOUT:\n{}\nSTDERR:\n{}\n", self.stdout_plain(), self.stderr_plain())
321        } else {
322            write!(f, "\nSTDOUT:\n{}\nSTDERR:\n{}\n", self.stdout, self.stderr)
323        }
324    }
325}
326impl CrashError {
327    fn new(
328        timestamp: SystemTime,
329        code: Option<i32>,
330        signal: Option<i32>,
331        stdout: Txt,
332        stderr: Txt,
333        minidump: Option<PathBuf>,
334        args: Box<[Txt]>,
335    ) -> Self {
336        Self {
337            timestamp,
338            code,
339            signal,
340            stdout,
341            stderr,
342            args,
343            minidump,
344            os: std::env::consts::OS.into(),
345        }
346    }
347
348    /// Seconds since Unix epoch.
349    pub fn unix_time(&self) -> u64 {
350        self.timestamp.duration_since(SystemTime::UNIX_EPOCH).unwrap_or_default().as_secs()
351    }
352
353    /// Gets if `stdout` does not contain any ANSI scape sequences.
354    pub fn is_stdout_plain(&self) -> bool {
355        !self.stdout.contains(CSI)
356    }
357
358    /// Gets if `stderr` does not contain any ANSI scape sequences.
359    pub fn is_stderr_plain(&self) -> bool {
360        !self.stderr.contains(CSI)
361    }
362
363    /// Get `stdout` without any ANSI escape sequences (CSI).
364    pub fn stdout_plain(&self) -> Txt {
365        remove_ansi_csi(&self.stdout)
366    }
367
368    /// Get `stderr` without any ANSI escape sequences (CSI).
369    pub fn stderr_plain(&self) -> Txt {
370        remove_ansi_csi(&self.stderr)
371    }
372
373    /// Gets if `stderr` contains a crash panic.
374    pub fn has_panic(&self) -> bool {
375        if self.code == Some(101) {
376            CrashPanic::contains(&self.stderr_plain())
377        } else {
378            false
379        }
380    }
381
382    /// Gets if `stderr` contains a crash panic that traced widget/window path.
383    pub fn has_panic_widget(&self) -> bool {
384        if self.code == Some(101) {
385            CrashPanic::contains_widget(&self.stderr_plain())
386        } else {
387            false
388        }
389    }
390
391    /// Try parse `stderr` for the crash panic.
392    ///
393    /// Only reliably works if the panic fully printed correctly and was formatted by the panic
394    /// hook installed by `crash_handler` or by the display print of [`CrashPanic`].
395    pub fn find_panic(&self) -> Option<CrashPanic> {
396        if self.code == Some(101) {
397            CrashPanic::find(&self.stderr_plain())
398        } else {
399            None
400        }
401    }
402
403    /// Best attempt at generating a readable error message.
404    ///
405    /// Is the panic message, or the minidump exception, with the exit code and signal.
406    pub fn message(&self) -> Txt {
407        let mut msg = if let Some(msg) = self.find_panic().map(|p| p.message) {
408            msg
409        } else if let Some(msg) = self.minidump_message() {
410            msg
411        } else {
412            "".into()
413        };
414        use std::fmt::Write as _;
415
416        if let Some(c) = self.code {
417            let sep = if msg.is_empty() { "" } else { "\n" };
418            write!(&mut msg, "{sep}Code: {c:#X}").unwrap();
419        }
420        if let Some(c) = self.signal {
421            let sep = if msg.is_empty() { "" } else { "\n" };
422            write!(&mut msg, "{sep}Signal: {c}").unwrap();
423        }
424        msg.end_mut();
425        msg
426    }
427
428    fn minidump_message(&self) -> Option<Txt> {
429        use minidump::*;
430
431        let dump = match Minidump::read_path(self.minidump.as_ref()?) {
432            Ok(d) => d,
433            Err(e) => {
434                tracing::error!("error reading minidump, {e}");
435                return None;
436            }
437        };
438
439        let system_info = match dump.get_stream::<MinidumpSystemInfo>() {
440            Ok(s) => s,
441            Err(e) => {
442                tracing::error!("error reading minidump system info, {e}");
443                return None;
444            }
445        };
446        let exception = match dump.get_stream::<MinidumpException>() {
447            Ok(s) => s,
448            Err(e) => {
449                tracing::error!("error reading minidump exception, {e}");
450                return None;
451            }
452        };
453
454        let crash_reason = exception.get_crash_reason(system_info.os, system_info.cpu);
455
456        Some(zng_txt::formatx!("{crash_reason}"))
457    }
458}
459
460const CSI: &str = "\x1b[";
461
462/// Remove ANSI escape sequences (CSI) from `s`.
463pub fn remove_ansi_csi(mut s: &str) -> Txt {
464    fn is_esc_end(byte: u8) -> bool {
465        (0x40..=0x7e).contains(&byte)
466    }
467
468    let mut r = String::new();
469    while let Some(i) = s.find(CSI) {
470        r.push_str(&s[..i]);
471        s = &s[i + CSI.len()..];
472        let mut esc_end = 0;
473        while esc_end < s.len() && !is_esc_end(s.as_bytes()[esc_end]) {
474            esc_end += 1;
475        }
476        esc_end += 1;
477        s = &s[esc_end..];
478    }
479    r.push_str(s);
480    r.into()
481}
482
483/// Panic parsed from a `stderr` dump.
484#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
485#[non_exhaustive]
486pub struct CrashPanic {
487    /// Name of thread that panicked.
488    pub thread: Txt,
489    /// Panic message.
490    pub message: Txt,
491    /// Path to file that defines the panic.
492    pub file: Txt,
493    /// Line of code that defines the panic.
494    pub line: u32,
495    /// Column in the line of code that defines the panic.
496    pub column: u32,
497    /// Widget where the panic happened.
498    pub widget_path: Txt,
499    /// Stack backtrace.
500    pub backtrace: Txt,
501}
502
503/// Alternate mode `{:#}` prints full backtrace.
504impl fmt::Display for CrashPanic {
505    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
506        writeln!(
507            f,
508            "thread '{}' panicked at {}:{}:{}:",
509            self.thread, self.file, self.line, self.column
510        )?;
511        for line in self.message.lines() {
512            writeln!(f, "   {line}")?;
513        }
514        writeln!(f, "widget path:\n   {}", self.widget_path)?;
515
516        if f.alternate() {
517            writeln!(f, "stack backtrace:\n{}", self.backtrace)
518        } else {
519            writeln!(f, "stack backtrace:")?;
520            let mut snippet = 9;
521            for frame in self.backtrace_frames().skip_while(|f| f.is_after_panic) {
522                write!(f, "{frame}")?;
523                if snippet > 0 {
524                    let code = frame.code_snippet();
525                    if !code.is_empty() {
526                        snippet -= 1;
527                        writeln!(f, "{code}")?;
528                    }
529                }
530            }
531            Ok(())
532        }
533    }
534}
535impl CrashPanic {
536    /// Gets if `stderr` contains a panic that can be parsed by [`find`].
537    ///
538    /// [`find`]: Self::find
539    pub fn contains(stderr: &str) -> bool {
540        Self::find_impl(stderr, false).is_some()
541    }
542
543    /// Gets if `stderr` contains a panic that can be parsed by [`find`] and traced a widget/window path.
544    ///
545    /// [`find`]: Self::find
546    pub fn contains_widget(stderr: &str) -> bool {
547        match Self::find_impl(stderr, false) {
548            Some(p) => !p.widget_path.is_empty(),
549            None => false,
550        }
551    }
552
553    /// Try parse `stderr` for the crash panic.
554    ///
555    /// Only reliably works if the panic fully printed correctly and was formatted by the panic
556    /// hook installed by `crash_handler` or by the display print of this type.
557    pub fn find(stderr: &str) -> Option<Self> {
558        Self::find_impl(stderr, true)
559    }
560
561    fn find_impl(stderr: &str, parse: bool) -> Option<Self> {
562        let mut panic_at = usize::MAX;
563        let mut widget_path = usize::MAX;
564        let mut stack_backtrace = usize::MAX;
565        let mut i = 0;
566        for line in stderr.lines() {
567            if line.starts_with("thread '") && line.contains("' panicked at ") && line.ends_with(':') {
568                panic_at = i;
569                widget_path = usize::MAX;
570                stack_backtrace = usize::MAX;
571            } else if line == "widget path:" {
572                widget_path = i + "widget path:\n".len();
573            } else if line == "stack backtrace:" {
574                stack_backtrace = i + "stack backtrace:\n".len();
575            }
576            i += line.len() + "\n".len();
577        }
578
579        if panic_at == usize::MAX {
580            return None;
581        }
582
583        if !parse {
584            return Some(Self {
585                thread: Txt::from(""),
586                message: Txt::from(""),
587                file: Txt::from(""),
588                line: 0,
589                column: 0,
590                widget_path: if widget_path < stderr.len() {
591                    Txt::from("true")
592                } else {
593                    Txt::from("")
594                },
595                backtrace: Txt::from(""),
596            });
597        }
598
599        let panic_str = stderr[panic_at..].lines().next().unwrap();
600        let (thread, location) = panic_str.strip_prefix("thread '").unwrap().split_once("' panicked at ").unwrap();
601        let mut location = location.split(':');
602        let file = location.next().unwrap_or("");
603        let line: u32 = location.next().unwrap_or("0").parse().unwrap_or(0);
604        let column: u32 = location.next().unwrap_or("0").parse().unwrap_or(0);
605
606        let mut message = String::new();
607        let mut sep = "";
608        for line in stderr[panic_at + panic_str.len() + "\n".len()..].lines() {
609            if let Some(line) = line.strip_prefix("   ") {
610                message.push_str(sep);
611                message.push_str(line);
612                sep = "\n";
613            } else {
614                if message.is_empty() && line != "widget path:" && line != "stack backtrace:" {
615                    // not formatted by us, probably by Rust
616                    line.clone_into(&mut message);
617                }
618                break;
619            }
620        }
621
622        let widget_path = if widget_path < stderr.len() {
623            stderr[widget_path..].lines().next().unwrap().trim()
624        } else {
625            ""
626        };
627
628        let backtrace = if stack_backtrace < stderr.len() {
629            let mut i = stack_backtrace;
630            'backtrace_seek: for line in stderr[stack_backtrace..].lines() {
631                if !line.starts_with(' ') {
632                    'digit_check: for c in line.chars() {
633                        if !c.is_ascii_digit() {
634                            if c == ':' {
635                                break 'digit_check;
636                            } else {
637                                break 'backtrace_seek;
638                            }
639                        }
640                    }
641                }
642                i += line.len() + "\n".len();
643            }
644            &stderr[stack_backtrace..i]
645        } else {
646            ""
647        };
648
649        Some(Self {
650            thread: thread.to_txt(),
651            message: message.into(),
652            file: file.to_txt(),
653            line,
654            column,
655            widget_path: widget_path.to_txt(),
656            backtrace: backtrace.to_txt(),
657        })
658    }
659
660    /// Iterate over frames parsed from the `backtrace`.
661    pub fn backtrace_frames(&self) -> impl Iterator<Item = BacktraceFrame> + '_ {
662        BacktraceFrame::parse(&self.backtrace)
663    }
664}
665
666/// Represents a frame parsed from a stack backtrace.
667#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
668#[non_exhaustive]
669pub struct BacktraceFrame {
670    /// Position on the backtrace.
671    pub n: usize,
672
673    /// Function name.
674    pub name: Txt,
675    /// Source code file.
676    pub file: Txt,
677    /// Source code line.
678    pub line: u32,
679
680    /// If this frame is inside the Rust panic code.
681    pub is_after_panic: bool,
682}
683impl fmt::Display for BacktraceFrame {
684    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
685        writeln!(f, "{:>4}: {}", self.n, self.name)?;
686        if !self.file.is_empty() {
687            writeln!(f, "      at {}:{}", self.file, self.line)?;
688        }
689        Ok(())
690    }
691}
692impl BacktraceFrame {
693    /// Iterate over frames parsed from the `backtrace`.
694    pub fn parse(mut backtrace: &str) -> impl Iterator<Item = BacktraceFrame> + '_ {
695        let mut is_after_panic = backtrace.lines().any(|l| l.ends_with("core::panicking::panic_fmt"));
696        std::iter::from_fn(move || {
697            if backtrace.is_empty() {
698                None
699            } else {
700                let n_name = backtrace.lines().next().unwrap();
701                let (n, name) = if let Some((n, name)) = n_name.split_once(':') {
702                    let n = match n.trim_start().parse() {
703                        Ok(n) => n,
704                        Err(_) => {
705                            backtrace = "";
706                            return None;
707                        }
708                    };
709                    let name = name.trim();
710                    if name.is_empty() {
711                        backtrace = "";
712                        return None;
713                    }
714                    (n, name)
715                } else {
716                    backtrace = "";
717                    return None;
718                };
719
720                backtrace = &backtrace[n_name.len() + 1..];
721                let r = if backtrace.trim_start().starts_with("at ") {
722                    let file_line = backtrace.lines().next().unwrap();
723                    let (file, line) = if let Some((file, line)) = file_line.rsplit_once(':') {
724                        let file = file.trim_start().strip_prefix("at ").unwrap();
725                        let line = match line.trim_end().parse() {
726                            Ok(l) => l,
727                            Err(_) => {
728                                backtrace = "";
729                                return None;
730                            }
731                        };
732                        (file, line)
733                    } else {
734                        backtrace = "";
735                        return None;
736                    };
737
738                    backtrace = &backtrace[file_line.len() + 1..];
739
740                    BacktraceFrame {
741                        n,
742                        name: name.to_txt(),
743                        file: file.to_txt(),
744                        line,
745                        is_after_panic,
746                    }
747                } else {
748                    BacktraceFrame {
749                        n,
750                        name: name.to_txt(),
751                        file: Txt::from(""),
752                        line: 0,
753                        is_after_panic,
754                    }
755                };
756
757                if is_after_panic && name == "core::panicking::panic_fmt" {
758                    is_after_panic = false;
759                }
760
761                Some(r)
762            }
763        })
764    }
765
766    /// Reads the code line + four surrounding lines if the code file can be found.
767    pub fn code_snippet(&self) -> Txt {
768        if !self.file.is_empty()
769            && self.line > 0
770            && let Ok(file) = std::fs::File::open(&self.file)
771        {
772            use std::fmt::Write as _;
773            let mut r = String::new();
774
775            let reader = std::io::BufReader::new(file);
776
777            let line_s = self.line - 2.min(self.line - 1);
778            let lines = reader.lines().skip(line_s as usize - 1).take(5);
779            for (line, line_n) in lines.zip(line_s..) {
780                let line = match line {
781                    Ok(l) => l,
782                    Err(_) => return Txt::from(""),
783                };
784
785                if line_n == self.line {
786                    writeln!(&mut r, "      {line_n:>4} > {line}").unwrap();
787                } else {
788                    writeln!(&mut r, "      {line_n:>4} │ {line}").unwrap();
789                }
790            }
791
792            return r.into();
793        }
794        Txt::from("")
795    }
796}
797
798fn crash_handler_monitor_process(
799    dump_dir: Option<PathBuf>,
800    mut cfg_app: ConfigProcess,
801    mut cfg_dialog: ConfigProcess,
802    has_dialog_handler: bool,
803) -> ! {
804    zng_env::set_process_name("crash-handler-process");
805
806    let exe = std::env::current_exe()
807        .and_then(dunce::canonicalize)
808        .expect("failed to get the current executable");
809
810    let mut args: Box<[_]> = std::env::args().skip(1).map(Txt::from).collect();
811
812    let mut dialog_args = CrashArgs {
813        app_crashes: vec![],
814        dialog_crash: None,
815    };
816    loop {
817        let mut app_process = std::process::Command::new(&exe);
818        for cfg in &mut cfg_app {
819            cfg(&mut app_process, &dialog_args);
820        }
821
822        match run_process(
823            dump_dir.as_deref(),
824            app_process
825                .env(APP_PROCESS, format!("restart-{}", dialog_args.app_crashes.len()))
826                .args(args.iter()),
827        ) {
828            Ok((status, [stdout, stderr], dump_file)) => {
829                if status.success() {
830                    let code = status.code().unwrap_or(0);
831                    tracing::info!(
832                        "crash monitor-process exiting with success code ({code}), {} crashes",
833                        dialog_args.app_crashes.len()
834                    );
835                    zng_env::exit(code);
836                } else {
837                    let code = status.code();
838                    #[allow(unused_mut)] // Windows has no signal
839                    let mut signal = None::<i32>;
840
841                    #[cfg(windows)]
842                    if code == Some(1) {
843                        tracing::warn!(
844                            "app-process exit code (1), probably killed by the system, \
845                                        will exit monitor-process with the same code"
846                        );
847                        zng_env::exit(1);
848                    }
849                    #[cfg(unix)]
850                    if code.is_none() {
851                        use std::os::unix::process::ExitStatusExt as _;
852                        signal = status.signal();
853
854                        if let Some(sig) = signal
855                            && [2, 9, 17, 19, 23].contains(&sig)
856                        {
857                            tracing::warn!(
858                                "app-process exited by signal ({sig}), \
859                                                will exit monitor-process with code 1"
860                            );
861                            zng_env::exit(1);
862                        }
863                    }
864
865                    tracing::error!(
866                        "app-process crashed with exit code ({:#X}), signal ({:#?}), {} crashes previously",
867                        code.unwrap_or(0),
868                        signal.unwrap_or(0),
869                        dialog_args.app_crashes.len()
870                    );
871
872                    let timestamp = SystemTime::now();
873
874                    dialog_args.app_crashes.push(CrashError::new(
875                        timestamp,
876                        code,
877                        signal,
878                        stdout.into(),
879                        stderr.into(),
880                        dump_file,
881                        args.clone(),
882                    ));
883
884                    // show dialog, retries once if dialog crashes too.
885                    for _ in 0..2 {
886                        // serialize app-crashes to a temp JSON file
887                        let timestamp_nanos = timestamp.duration_since(SystemTime::UNIX_EPOCH).map(|d| d.as_nanos()).unwrap_or(0);
888                        let mut timestamp = timestamp_nanos;
889                        let mut retries = 0;
890                        let crash_file = loop {
891                            let path = std::env::temp_dir().join(format!("zng-crash-errors-{timestamp:#x}"));
892                            match std::fs::File::create_new(&path) {
893                                Ok(f) => match serde_json::to_writer(std::io::BufWriter::new(f), &dialog_args) {
894                                    Ok(_) => break path,
895                                    Err(e) => {
896                                        if e.is_io() {
897                                            if retries > 20 {
898                                                panic!("error writing crash errors, {e}");
899                                            } else if retries > 5 {
900                                                timestamp += 1;
901                                            }
902                                            std::thread::sleep(100.ms());
903                                        } else {
904                                            panic!("error serializing crash errors, {e}");
905                                        }
906                                    }
907                                },
908                                Err(e) => {
909                                    if e.kind() == std::io::ErrorKind::AlreadyExists {
910                                        timestamp += 1;
911                                    } else {
912                                        if retries > 20 {
913                                            panic!("error creating crash errors file, {e}");
914                                        } else if retries > 5 {
915                                            timestamp += 1;
916                                        }
917                                        std::thread::sleep(100.ms());
918                                    }
919                                }
920                            }
921                            retries += 1;
922                        };
923
924                        let dialog_result = if has_dialog_handler {
925                            let mut dialog_process = std::process::Command::new(&exe);
926                            for cfg in &mut cfg_dialog {
927                                cfg(&mut dialog_process, &dialog_args);
928                            }
929                            run_process(dump_dir.as_deref(), dialog_process.env(DIALOG_PROCESS, &crash_file))
930                        } else {
931                            Ok((std::process::ExitStatus::default(), [String::new(), String::new()], None))
932                        };
933
934                        for _ in 0..5 {
935                            if !crash_file.exists() || std::fs::remove_file(&crash_file).is_ok() {
936                                break;
937                            }
938                            std::thread::sleep(100.ms());
939                        }
940
941                        let response = match dialog_result {
942                            Ok((dlg_status, [dlg_stdout, dlg_stderr], dlg_dump_file)) => {
943                                if dlg_status.success() {
944                                    dlg_stdout
945                                        .lines()
946                                        .filter_map(|l| l.trim().strip_prefix(RESPONSE_PREFIX))
947                                        .next_back()
948                                        .unwrap_or("exit 0")
949                                        .to_owned()
950                                } else {
951                                    let code = dlg_status.code();
952                                    #[allow(unused_mut)] // Windows has no signal
953                                    let mut signal = None::<i32>;
954
955                                    #[cfg(windows)]
956                                    if code == Some(1) {
957                                        tracing::warn!(
958                                            "dialog-process exit code (1), probably killed by the system, \
959                                                        will exit monitor-process with the same code"
960                                        );
961                                        zng_env::exit(1);
962                                    }
963                                    #[cfg(unix)]
964                                    if code.is_none() {
965                                        use std::os::unix::process::ExitStatusExt as _;
966                                        signal = status.signal();
967
968                                        if let Some(sig) = signal
969                                            && [2, 9, 17, 19, 23].contains(&sig)
970                                        {
971                                            tracing::warn!(
972                                                "dialog-process exited by signal ({sig}), \
973                                                                will exit monitor-process with code 1"
974                                            );
975                                            zng_env::exit(1);
976                                        }
977                                    }
978
979                                    let dialog_crash = CrashError::new(
980                                        SystemTime::now(),
981                                        code,
982                                        signal,
983                                        dlg_stdout.into(),
984                                        dlg_stderr.into(),
985                                        dlg_dump_file,
986                                        Box::new([]),
987                                    );
988                                    tracing::error!("crash dialog-process crashed, {dialog_crash}");
989
990                                    if dialog_args.dialog_crash.is_none() {
991                                        dialog_args.dialog_crash = Some(dialog_crash);
992                                        continue;
993                                    } else {
994                                        let latest = dialog_args.latest();
995                                        eprintln!("{latest}");
996                                        zng_env::exit(latest.code.unwrap_or(1));
997                                    }
998                                }
999                            }
1000                            Err(e) => panic!("error running dialog-process, {e}"),
1001                        };
1002
1003                        if let Some(args_json) = response.strip_prefix("restart ") {
1004                            args = serde_json::from_str(args_json).expect("crash dialog-process did not respond 'restart' correctly");
1005                            break;
1006                        } else if let Some(code) = response.strip_prefix("exit ") {
1007                            let code: i32 = code.parse().expect("crash dialog-process did not respond 'code' correctly");
1008                            zng_env::exit(code);
1009                        } else {
1010                            panic!("crash dialog-process did not respond correctly")
1011                        }
1012                    }
1013                }
1014            }
1015            Err(e) => panic!("error running app-process, {e}"),
1016        }
1017    }
1018}
1019fn run_process(
1020    dump_dir: Option<&Path>,
1021    command: &mut std::process::Command,
1022) -> std::io::Result<(std::process::ExitStatus, [String; 2], Option<PathBuf>)> {
1023    struct DumpServer {
1024        shutdown: Arc<AtomicBool>,
1025        runner: std::thread::JoinHandle<Option<PathBuf>>,
1026    }
1027    let mut dump_server = None;
1028    if let Some(dump_dir) = dump_dir {
1029        match std::fs::create_dir_all(dump_dir) {
1030            Ok(_) => {
1031                let uuid = uuid::Uuid::new_v4();
1032                let dump_file = dump_dir.join(format!("{}.dmp", uuid.simple()));
1033                let dump_channel = std::env::temp_dir().join(format!("zng-crash-{}", uuid.simple()));
1034                match minidumper::Server::with_name(dump_channel.as_path()) {
1035                    Ok(mut s) => {
1036                        command.env(DUMP_CHANNEL, &dump_channel);
1037                        let shutdown = Arc::new(AtomicBool::new(false));
1038                        let runner = std::thread::Builder::new()
1039                            .name("minidumper-server".into())
1040                            .stack_size(512 * 1024)
1041                            .spawn(clmv!(shutdown, || {
1042                                let created_file = Arc::new(Mutex::new(None));
1043                                if let Err(e) = s.run(
1044                                    Box::new(MinidumpServerHandler {
1045                                        dump_file,
1046                                        created_file: created_file.clone(),
1047                                    }),
1048                                    &shutdown,
1049                                    None,
1050                                ) {
1051                                    tracing::error!("minidump server exited with error, {e}");
1052                                }
1053                                created_file.lock().take()
1054                            }))
1055                            .expect("failed to spawn thread");
1056                        dump_server = Some(DumpServer { shutdown, runner });
1057                    }
1058                    Err(e) => tracing::error!("failed to spawn minidump server, will not enable crash handling, {e}"),
1059                }
1060            }
1061            Err(e) => tracing::error!("cannot create minidump dir, will not enable crash handling, {e}"),
1062        }
1063    }
1064
1065    let mut app_process = command
1066        .env("RUST_BACKTRACE", "full")
1067        .env("CLICOLOR_FORCE", "1")
1068        .stdout(std::process::Stdio::piped())
1069        .stderr(std::process::Stdio::piped())
1070        .spawn()?;
1071
1072    let stdout = capture_and_print(app_process.stdout.take().unwrap(), false);
1073    let stderr = capture_and_print(app_process.stderr.take().unwrap(), true);
1074
1075    let status = app_process.wait()?;
1076
1077    let stdout = match stdout.join() {
1078        Ok(r) => r,
1079        Err(p) => std::panic::resume_unwind(p),
1080    };
1081    let stderr = match stderr.join() {
1082        Ok(r) => r,
1083        Err(p) => std::panic::resume_unwind(p),
1084    };
1085
1086    let mut dump_file = None;
1087    if let Some(s) = dump_server {
1088        s.shutdown.store(true, atomic::Ordering::Relaxed);
1089        match s.runner.join() {
1090            Ok(r) => dump_file = r,
1091            Err(p) => std::panic::resume_unwind(p),
1092        };
1093    }
1094
1095    Ok((status, [stdout, stderr], dump_file))
1096}
1097struct MinidumpServerHandler {
1098    dump_file: PathBuf,
1099    created_file: Arc<Mutex<Option<PathBuf>>>,
1100}
1101impl minidumper::ServerHandler for MinidumpServerHandler {
1102    fn create_minidump_file(&self) -> Result<(std::fs::File, PathBuf), std::io::Error> {
1103        let file = std::fs::File::create_new(&self.dump_file)?;
1104        Ok((file, self.dump_file.clone()))
1105    }
1106
1107    fn on_minidump_created(&self, result: Result<minidumper::MinidumpBinary, minidumper::Error>) -> minidumper::LoopAction {
1108        match result {
1109            Ok(b) => *self.created_file.lock() = Some(b.path),
1110            Err(e) => tracing::error!("failed to write minidump file, {e}"),
1111        }
1112        minidumper::LoopAction::Exit
1113    }
1114
1115    fn on_message(&self, _: u32, _: Vec<u8>) {}
1116
1117    fn on_client_connected(&self, num_clients: usize) -> minidumper::LoopAction {
1118        if num_clients > 1 {
1119            tracing::error!("expected only one minidump client, {num_clients} connected, exiting server");
1120            minidumper::LoopAction::Exit
1121        } else {
1122            minidumper::LoopAction::Continue
1123        }
1124    }
1125
1126    fn on_client_disconnected(&self, num_clients: usize) -> minidumper::LoopAction {
1127        if num_clients != 0 {
1128            tracing::error!("expected only one minidump client disconnect, {num_clients} still connected");
1129        }
1130        minidumper::LoopAction::Exit
1131    }
1132}
1133fn capture_and_print(mut stream: impl std::io::Read + Send + 'static, is_err: bool) -> std::thread::JoinHandle<String> {
1134    std::thread::Builder::new()
1135        .name(format!("{}-reader", if is_err { "stderr" } else { "stdout" }))
1136        .stack_size(256 * 1024)
1137        .spawn(move || {
1138            let mut capture = vec![];
1139            let mut buffer = [0u8; 32];
1140            loop {
1141                match stream.read(&mut buffer) {
1142                    Ok(n) => {
1143                        if n == 0 {
1144                            break;
1145                        }
1146
1147                        let new = &buffer[..n];
1148                        capture.write_all(new).unwrap();
1149                        let r = if is_err {
1150                            let mut s = std::io::stderr();
1151                            s.write_all(new).and_then(|_| s.flush())
1152                        } else {
1153                            let mut s = std::io::stdout();
1154                            s.write_all(new).and_then(|_| s.flush())
1155                        };
1156                        if let Err(e) = r {
1157                            panic!("{} write error, {}", if is_err { "stderr" } else { "stdout" }, e)
1158                        }
1159                    }
1160                    Err(e) => panic!("{} read error, {}", if is_err { "stderr" } else { "stdout" }, e),
1161                }
1162            }
1163            String::from_utf8_lossy(&capture).into_owned()
1164        })
1165        .expect("failed to spawn thread")
1166}
1167
1168fn crash_handler_app_process(dump_enabled: bool) {
1169    std::panic::set_hook(Box::new(panic_handler));
1170    if dump_enabled {
1171        minidump_attach();
1172    }
1173
1174    // app-process execution happens after this.
1175}
1176
1177fn crash_handler_dialog_process(dump_enabled: bool, dialog: CrashDialogHandler, args_file: String) -> ! {
1178    zng_env::set_process_name("crash-dialog-process");
1179
1180    std::panic::set_hook(Box::new(panic_handler));
1181    if dump_enabled {
1182        minidump_attach();
1183    }
1184
1185    let mut retries = 0;
1186    let args = loop {
1187        match std::fs::read_to_string(&args_file) {
1188            Ok(args) => break args,
1189            Err(e) => {
1190                if e.kind() != std::io::ErrorKind::NotFound && retries < 10 {
1191                    retries += 1;
1192                    continue;
1193                }
1194                panic!("error reading args file, {e}");
1195            }
1196        }
1197    };
1198
1199    dialog(serde_json::from_str(&args).expect("error deserializing args"));
1200    CrashArgs {
1201        app_crashes: vec![],
1202        dialog_crash: None,
1203    }
1204    .exit(0)
1205}
1206
1207fn panic_handler(info: &std::panic::PanicHookInfo) {
1208    let backtrace = std::backtrace::Backtrace::capture();
1209    let path = crate::widget::WIDGET.trace_path();
1210    let panic = PanicInfo::from_hook(info);
1211    eprintln!("{panic}widget path:\n   {path}\nstack backtrace:\n{backtrace}");
1212}
1213
1214fn minidump_attach() {
1215    let channel_name = match std::env::var(DUMP_CHANNEL) {
1216        Ok(n) if !n.is_empty() => PathBuf::from(n),
1217        _ => {
1218            eprintln!("expected minidump channel name, this instance will not handle crashes");
1219            return;
1220        }
1221    };
1222    let client = match minidumper::Client::with_name(channel_name.as_path()) {
1223        Ok(c) => c,
1224        Err(e) => {
1225            eprintln!("failed to connect minidump client, this instance will not handle crashes, {e}");
1226            return;
1227        }
1228    };
1229    struct Handler(minidumper::Client);
1230    // SAFETY: on_crash does the minimal possible work
1231    unsafe impl crash_handler::CrashEvent for Handler {
1232        fn on_crash(&self, context: &crash_handler::CrashContext) -> crash_handler::CrashEventResult {
1233            crash_handler::CrashEventResult::Handled(self.0.request_dump(context).is_ok())
1234        }
1235    }
1236    let handler = match crash_handler::CrashHandler::attach(Box::new(Handler(client))) {
1237        Ok(h) => h,
1238        Err(e) => {
1239            eprintln!("failed attach minidump crash handler, this instance will not handle crashes, {e}");
1240            return;
1241        }
1242    };
1243
1244    *CRASH_HANDLER.lock() = Some(handler);
1245}
1246static CRASH_HANDLER: Mutex<Option<crash_handler::CrashHandler>> = Mutex::new(None);
1247
1248#[derive(Debug)]
1249struct PanicInfo {
1250    pub thread: Txt,
1251    pub msg: Txt,
1252    pub file: Txt,
1253    pub line: u32,
1254    pub column: u32,
1255}
1256impl PanicInfo {
1257    pub fn from_hook(info: &std::panic::PanicHookInfo) -> Self {
1258        let current_thread = std::thread::current();
1259        let thread = current_thread.name().unwrap_or("<unnamed>");
1260        let msg = Self::payload(info.payload());
1261
1262        let (file, line, column) = if let Some(l) = info.location() {
1263            (l.file(), l.line(), l.column())
1264        } else {
1265            ("<unknown>", 0, 0)
1266        };
1267        Self {
1268            thread: thread.to_txt(),
1269            msg,
1270            file: file.to_txt(),
1271            line,
1272            column,
1273        }
1274    }
1275
1276    fn payload(p: &dyn std::any::Any) -> Txt {
1277        match p.downcast_ref::<&'static str>() {
1278            Some(s) => s,
1279            None => match p.downcast_ref::<String>() {
1280                Some(s) => &s[..],
1281                None => "Box<dyn Any>",
1282            },
1283        }
1284        .to_txt()
1285    }
1286}
1287impl std::error::Error for PanicInfo {}
1288impl fmt::Display for PanicInfo {
1289    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1290        writeln!(
1291            f,
1292            "thread '{}' panicked at {}:{}:{}:",
1293            self.thread, self.file, self.line, self.column
1294        )?;
1295        for line in self.msg.lines() {
1296            writeln!(f, "   {line}")?;
1297        }
1298        Ok(())
1299    }
1300}