zng_app/
crash_handler.rs

1#![cfg(all(
2    feature = "crash_handler",
3    not(any(target_arch = "wasm32", target_os = "android", target_os = "ios"))
4))]
5
6//! App-process crash handler.
7//!
8//! See the `zng::app::crash_handler` documentation for more details.
9
10use parking_lot::Mutex;
11use std::{
12    fmt,
13    io::{BufRead, Write},
14    path::{Path, PathBuf},
15    sync::{Arc, atomic::AtomicBool},
16    time::SystemTime,
17};
18use zng_clone_move::clmv;
19use zng_layout::unit::TimeUnits as _;
20
21use zng_txt::{ToTxt as _, Txt};
22
23/// Environment variable that causes the crash handler to not start if set.
24///
25/// This is particularly useful to set in debugger launch configs. Crash handler spawns
26/// a different process for the app  so break points will not work.
27pub const NO_CRASH_HANDLER: &str = "ZNG_NO_CRASH_HANDLER";
28
29zng_env::on_process_start!(|process_start_args| {
30    if std::env::var(NO_CRASH_HANDLER).is_ok() {
31        return;
32    }
33
34    let mut config = CrashConfig::new();
35    for ext in CRASH_CONFIG {
36        ext(&mut config);
37        if config.no_crash_handler {
38            return;
39        }
40    }
41
42    if process_start_args.next_handlers_count > 0 && process_start_args.yield_count < zng_env::ProcessStartArgs::MAX_YIELD_COUNT - 10 {
43        // extra sure that this is the app-process
44        return process_start_args.yield_once();
45    }
46
47    if std::env::var(APP_PROCESS) != Err(std::env::VarError::NotPresent) {
48        return crash_handler_app_process(config.dump_dir.is_some());
49    }
50
51    match std::env::var(DIALOG_PROCESS) {
52        Ok(args_file) => crash_handler_dialog_process(
53            config.dump_dir.is_some(),
54            config
55                .dialog
56                .or(config.default_dialog)
57                .expect("dialog-process spawned without dialog handler"),
58            args_file,
59        ),
60        Err(e) => match e {
61            std::env::VarError::NotPresent => {}
62            e => panic!("invalid dialog env args, {e:?}"),
63        },
64    }
65
66    crash_handler_monitor_process(
67        config.dump_dir,
68        config.app_process,
69        config.dialog_process,
70        config.default_dialog.is_some() || config.dialog.is_some(),
71    );
72});
73
74/// Gets the number of crash restarts in the app-process.
75///
76/// Always returns zero if called in other processes.
77pub fn restart_count() -> usize {
78    match std::env::var(APP_PROCESS) {
79        Ok(c) => c.strip_prefix("restart-").unwrap_or("0").parse().unwrap_or(0),
80        Err(_) => 0,
81    }
82}
83
84const APP_PROCESS: &str = "ZNG_CRASH_HANDLER_APP";
85const DIALOG_PROCESS: &str = "ZNG_CRASH_HANDLER_DIALOG";
86const DUMP_CHANNEL: &str = "ZNG_MINIDUMP_CHANNEL";
87const RESPONSE_PREFIX: &str = "zng_crash_response: ";
88
89#[linkme::distributed_slice]
90static CRASH_CONFIG: [fn(&mut CrashConfig)];
91
92/// <span data-del-macro-root></span> Register a `FnOnce(&mut CrashConfig)` closure to be
93/// called on process init to configure the crash handler.
94///
95/// See [`CrashConfig`] for more details.
96#[macro_export]
97macro_rules! crash_handler_config {
98    ($closure:expr) => {
99        // expanded from:
100        // #[linkme::distributed_slice(CRASH_CONFIG)]
101        // static _CRASH_CONFIG: fn(&FooArgs) = _foo;
102        // so that users don't need to depend on linkme just to call this macro.
103        #[used]
104        #[cfg_attr(
105            any(
106                target_os = "none",
107                target_os = "linux",
108                target_os = "android",
109                target_os = "fuchsia",
110                target_os = "psp"
111            ),
112            unsafe(link_section = "linkme_CRASH_CONFIG")
113        )]
114        #[cfg_attr(
115            any(target_os = "macos", target_os = "ios", target_os = "tvos"),
116            unsafe(link_section = "__DATA,__linkmeK3uV0Fq0,regular,no_dead_strip")
117        )]
118        #[cfg_attr(
119            any(target_os = "uefi", target_os = "windows"),
120            unsafe(link_section = ".linkme_CRASH_CONFIG$b")
121        )]
122        #[cfg_attr(target_os = "illumos", unsafe(link_section = "set_linkme_CRASH_CONFIG"))]
123        #[cfg_attr(
124            any(target_os = "freebsd", target_os = "openbsd"),
125            unsafe(link_section = "linkme_CRASH_CONFIG")
126        )]
127        #[doc(hidden)]
128        static _CRASH_CONFIG: fn(&mut $crate::crash_handler::CrashConfig) = _crash_config;
129        #[doc(hidden)]
130        fn _crash_config(cfg: &mut $crate::crash_handler::CrashConfig) {
131            fn crash_config(cfg: &mut $crate::crash_handler::CrashConfig, handler: impl FnOnce(&mut $crate::crash_handler::CrashConfig)) {
132                handler(cfg)
133            }
134            crash_config(cfg, $closure)
135        }
136    };
137}
138pub use crate::crash_handler_config;
139
140type ConfigProcess = Vec<Box<dyn for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command>>;
141type CrashDialogHandler = Box<dyn FnOnce(CrashArgs)>;
142
143/// Crash handler config.
144///
145/// Use [`crash_handler_config!`] to set config.
146///
147/// [`crash_handler_config!`]: crate::crash_handler_config!
148pub struct CrashConfig {
149    default_dialog: Option<CrashDialogHandler>,
150    dialog: Option<CrashDialogHandler>,
151    app_process: ConfigProcess,
152    dialog_process: ConfigProcess,
153    dump_dir: Option<PathBuf>,
154    no_crash_handler: bool,
155}
156impl CrashConfig {
157    fn new() -> Self {
158        Self {
159            default_dialog: None,
160            dialog: None,
161            app_process: vec![],
162            dialog_process: vec![],
163            dump_dir: Some(zng_env::cache("zng_minidump")),
164            no_crash_handler: false,
165        }
166    }
167
168    /// Set the crash dialog process handler.
169    ///
170    /// The dialog `handler` can run an app or show a native dialog, it must use the [`CrashArgs`] process
171    /// terminating methods to respond, if it returns [`CrashArgs::exit`] will run.
172    ///
173    /// Note that the handler does not need to actually show any dialog, it can just save crash info and
174    /// restart the app for example.
175    pub fn dialog(&mut self, handler: impl FnOnce(CrashArgs) + 'static) {
176        if self.dialog.is_none() {
177            self.dialog = Some(Box::new(handler));
178        }
179    }
180
181    /// Set the crash dialog-handler used if `crash_dialog` is not set.
182    ///
183    /// This is used by app libraries or themes to provide a default dialog.
184    pub fn default_dialog(&mut self, handler: impl FnOnce(CrashArgs) + 'static) {
185        self.default_dialog = Some(Box::new(handler));
186    }
187
188    /// Add a closure that is called just before the app-process is spawned.
189    pub fn app_process(
190        &mut self,
191        cfg: impl for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command + 'static,
192    ) {
193        self.app_process.push(Box::new(cfg));
194    }
195
196    /// Add a closure that is called just before the dialog-process is spawned.
197    pub fn dialog_process(
198        &mut self,
199        cfg: impl for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command + 'static,
200    ) {
201        self.dialog_process.push(Box::new(cfg));
202    }
203
204    /// Change the minidump directory.
205    ///
206    /// Is `zng::env::cache("zng_minidump")` by default.
207    pub fn minidump_dir(&mut self, dir: impl Into<PathBuf>) {
208        self.dump_dir = Some(dir.into());
209    }
210
211    /// Do not collect a minidump.
212    pub fn no_minidump(&mut self) {
213        self.dump_dir = None;
214    }
215
216    /// Does not run with crash handler.
217    ///
218    /// This is equivalent of running with `NO_ZNG_CRASH_HANDLER` env var.
219    pub fn no_crash_handler(&mut self) {
220        self.no_crash_handler = true;
221    }
222}
223
224/// Arguments for the crash handler dialog function.
225#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
226pub struct CrashArgs {
227    /// Info about the app-process crashes.
228    ///
229    /// Has at least one entry, latest is last. Includes all crashes since the start of the monitor-process.
230    pub app_crashes: Vec<CrashError>,
231
232    /// Info about a crash in the dialog-process spawned to handle the latest app-process crash.
233    ///
234    /// If set this is the last chance to show something to the end user, if the current dialog crashes too
235    /// the monitor-process will give up. If you started an `APP` to show a crash dialog try using a native
236    /// dialog directly now, or just give up, clearly things are far from ok.
237    pub dialog_crash: Option<CrashError>,
238}
239impl CrashArgs {
240    /// Latest crash.
241    pub fn latest(&self) -> &CrashError {
242        self.app_crashes.last().unwrap()
243    }
244
245    /// Restart the app-process with same argument as the latest crash.
246    pub fn restart(&self) -> ! {
247        let json_args = serde_json::to_string(&self.latest().args[..]).unwrap();
248        println!("{RESPONSE_PREFIX}restart {json_args}");
249        zng_env::exit(0)
250    }
251
252    /// Restart the app-process with custom arguments.
253    pub fn restart_with(&self, args: &[Txt]) -> ! {
254        let json_args = serde_json::to_string(&args).unwrap();
255        println!("{RESPONSE_PREFIX}restart {json_args}");
256        zng_env::exit(0)
257    }
258
259    /// Exit the monitor-process (application) with code.
260    pub fn exit(&self, code: i32) -> ! {
261        println!("{RESPONSE_PREFIX}exit {code}");
262        zng_env::exit(0)
263    }
264}
265impl fmt::Display for CrashArgs {
266    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
267        writeln!(f, "APP CRASHES:\n")?;
268
269        for c in self.app_crashes.iter() {
270            writeln!(f, "{c}")?;
271        }
272
273        if let Some(c) = &self.dialog_crash {
274            writeln!(f, "\nDIALOG CRASH:\n")?;
275            writeln!(f, "{c}")?;
276        }
277
278        Ok(())
279    }
280}
281
282/// Info about an app-process crash.
283#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
284pub struct CrashError {
285    /// Crash moment.
286    pub timestamp: SystemTime,
287    /// Process exit code.
288    pub code: Option<i32>,
289    /// Unix signal that terminated the process.
290    pub signal: Option<i32>,
291    /// Full capture of the app stdout.
292    pub stdout: Txt,
293    /// Full capture of the app stderr.
294    pub stderr: Txt,
295    /// Arguments used.
296    pub args: Box<[Txt]>,
297    /// Minidump file.
298    pub minidump: Option<PathBuf>,
299    /// Operating system.
300    ///
301    /// See [`std::env::consts::OS`] for details.
302    pub os: Txt,
303}
304/// Alternate mode `{:#}` prints plain stdout and stderr (no ANSI escape sequences).
305impl fmt::Display for CrashError {
306    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
307        writeln!(f, "timestamp: {}", self.unix_time())?;
308        if let Some(c) = self.code {
309            writeln!(f, "exit code: {c:#X}")?
310        }
311        if let Some(c) = self.signal {
312            writeln!(f, "exit signal: {c}")?
313        }
314        if let Some(p) = self.minidump.as_ref() {
315            writeln!(f, "minidump: {}", p.display())?
316        }
317        if f.alternate() {
318            write!(f, "\nSTDOUT:\n{}\nSTDERR:\n{}\n", self.stdout_plain(), self.stderr_plain())
319        } else {
320            write!(f, "\nSTDOUT:\n{}\nSTDERR:\n{}\n", self.stdout, self.stderr)
321        }
322    }
323}
324impl CrashError {
325    fn new(
326        timestamp: SystemTime,
327        code: Option<i32>,
328        signal: Option<i32>,
329        stdout: Txt,
330        stderr: Txt,
331        minidump: Option<PathBuf>,
332        args: Box<[Txt]>,
333    ) -> Self {
334        Self {
335            timestamp,
336            code,
337            signal,
338            stdout,
339            stderr,
340            args,
341            minidump,
342            os: std::env::consts::OS.into(),
343        }
344    }
345
346    /// Seconds since Unix epoch.
347    pub fn unix_time(&self) -> u64 {
348        self.timestamp.duration_since(SystemTime::UNIX_EPOCH).unwrap_or_default().as_secs()
349    }
350
351    /// Gets if `stdout` does not contain any ANSI scape sequences.
352    pub fn is_stdout_plain(&self) -> bool {
353        !self.stdout.contains(CSI)
354    }
355
356    /// Gets if `stderr` does not contain any ANSI scape sequences.
357    pub fn is_stderr_plain(&self) -> bool {
358        !self.stderr.contains(CSI)
359    }
360
361    /// Get `stdout` without any ANSI escape sequences (CSI).
362    pub fn stdout_plain(&self) -> Txt {
363        remove_ansi_csi(&self.stdout)
364    }
365
366    /// Get `stderr` without any ANSI escape sequences (CSI).
367    pub fn stderr_plain(&self) -> Txt {
368        remove_ansi_csi(&self.stderr)
369    }
370
371    /// Gets if `stderr` contains a crash panic.
372    pub fn has_panic(&self) -> bool {
373        if self.code == Some(101) {
374            CrashPanic::contains(&self.stderr_plain())
375        } else {
376            false
377        }
378    }
379
380    /// Gets if `stderr` contains a crash panic that traced widget/window path.
381    pub fn has_panic_widget(&self) -> bool {
382        if self.code == Some(101) {
383            CrashPanic::contains_widget(&self.stderr_plain())
384        } else {
385            false
386        }
387    }
388
389    /// Try parse `stderr` for the crash panic.
390    ///
391    /// Only reliably works if the panic fully printed correctly and was formatted by the panic
392    /// hook installed by `crash_handler` or by the display print of [`CrashPanic`].
393    pub fn find_panic(&self) -> Option<CrashPanic> {
394        if self.code == Some(101) {
395            CrashPanic::find(&self.stderr_plain())
396        } else {
397            None
398        }
399    }
400
401    /// Best attempt at generating a readable error message.
402    ///
403    /// Is the panic message, or the minidump exception, with the exit code and signal.
404    pub fn message(&self) -> Txt {
405        let mut msg = if let Some(msg) = self.find_panic().map(|p| p.message) {
406            msg
407        } else if let Some(msg) = self.minidump_message() {
408            msg
409        } else {
410            "".into()
411        };
412        use std::fmt::Write as _;
413
414        if let Some(c) = self.code {
415            let sep = if msg.is_empty() { "" } else { "\n" };
416            write!(&mut msg, "{sep}Code: {c:#X}").unwrap();
417        }
418        if let Some(c) = self.signal {
419            let sep = if msg.is_empty() { "" } else { "\n" };
420            write!(&mut msg, "{sep}Signal: {c}").unwrap();
421        }
422        msg.end_mut();
423        msg
424    }
425
426    fn minidump_message(&self) -> Option<Txt> {
427        use minidump::*;
428
429        let dump = match Minidump::read_path(self.minidump.as_ref()?) {
430            Ok(d) => d,
431            Err(e) => {
432                tracing::error!("error reading minidump, {e}");
433                return None;
434            }
435        };
436
437        let system_info = match dump.get_stream::<MinidumpSystemInfo>() {
438            Ok(s) => s,
439            Err(e) => {
440                tracing::error!("error reading minidump system info, {e}");
441                return None;
442            }
443        };
444        let exception = match dump.get_stream::<MinidumpException>() {
445            Ok(s) => s,
446            Err(e) => {
447                tracing::error!("error reading minidump exception, {e}");
448                return None;
449            }
450        };
451
452        let crash_reason = exception.get_crash_reason(system_info.os, system_info.cpu);
453
454        Some(zng_txt::formatx!("{crash_reason}"))
455    }
456}
457
458const CSI: &str = "\x1b[";
459
460/// Remove ANSI escape sequences (CSI) from `s`.
461pub fn remove_ansi_csi(mut s: &str) -> Txt {
462    fn is_esc_end(byte: u8) -> bool {
463        (0x40..=0x7e).contains(&byte)
464    }
465
466    let mut r = String::new();
467    while let Some(i) = s.find(CSI) {
468        r.push_str(&s[..i]);
469        s = &s[i + CSI.len()..];
470        let mut esc_end = 0;
471        while esc_end < s.len() && !is_esc_end(s.as_bytes()[esc_end]) {
472            esc_end += 1;
473        }
474        esc_end += 1;
475        s = &s[esc_end..];
476    }
477    r.push_str(s);
478    r.into()
479}
480
481/// Panic parsed from a `stderr` dump.
482#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
483pub struct CrashPanic {
484    /// Name of thread that panicked.
485    pub thread: Txt,
486    /// Panic message.
487    pub message: Txt,
488    /// Path to file that defines the panic.
489    pub file: Txt,
490    /// Line of code that defines the panic.
491    pub line: u32,
492    /// Column in the line of code that defines the panic.
493    pub column: u32,
494    /// Widget where the panic happened.
495    pub widget_path: Txt,
496    /// Stack backtrace.
497    pub backtrace: Txt,
498}
499
500/// Alternate mode `{:#}` prints full backtrace.
501impl fmt::Display for CrashPanic {
502    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
503        writeln!(
504            f,
505            "thread '{}' panicked at {}:{}:{}:",
506            self.thread, self.file, self.line, self.column
507        )?;
508        for line in self.message.lines() {
509            writeln!(f, "   {line}")?;
510        }
511        writeln!(f, "widget path:\n   {}", self.widget_path)?;
512
513        if f.alternate() {
514            writeln!(f, "stack backtrace:\n{}", self.backtrace)
515        } else {
516            writeln!(f, "stack backtrace:")?;
517            let mut snippet = 9;
518            for frame in self.backtrace_frames().skip_while(|f| f.is_after_panic) {
519                write!(f, "{frame}")?;
520                if snippet > 0 {
521                    let code = frame.code_snippet();
522                    if !code.is_empty() {
523                        snippet -= 1;
524                        writeln!(f, "{}", code)?;
525                    }
526                }
527            }
528            Ok(())
529        }
530    }
531}
532impl CrashPanic {
533    /// Gets if `stderr` contains a panic that can be parsed by [`find`].
534    ///
535    /// [`find`]: Self::find
536    pub fn contains(stderr: &str) -> bool {
537        Self::find_impl(stderr, false).is_some()
538    }
539
540    /// Gets if `stderr` contains a panic that can be parsed by [`find`] and traced a widget/window path.
541    ///
542    /// [`find`]: Self::find
543    pub fn contains_widget(stderr: &str) -> bool {
544        match Self::find_impl(stderr, false) {
545            Some(p) => !p.widget_path.is_empty(),
546            None => false,
547        }
548    }
549
550    /// Try parse `stderr` for the crash panic.
551    ///
552    /// Only reliably works if the panic fully printed correctly and was formatted by the panic
553    /// hook installed by `crash_handler` or by the display print of this type.
554    pub fn find(stderr: &str) -> Option<Self> {
555        Self::find_impl(stderr, true)
556    }
557
558    fn find_impl(stderr: &str, parse: bool) -> Option<Self> {
559        let mut panic_at = usize::MAX;
560        let mut widget_path = usize::MAX;
561        let mut stack_backtrace = usize::MAX;
562        let mut i = 0;
563        for line in stderr.lines() {
564            if line.starts_with("thread '") && line.contains("' panicked at ") && line.ends_with(':') {
565                panic_at = i;
566                widget_path = usize::MAX;
567                stack_backtrace = usize::MAX;
568            } else if line == "widget path:" {
569                widget_path = i + "widget path:\n".len();
570            } else if line == "stack backtrace:" {
571                stack_backtrace = i + "stack backtrace:\n".len();
572            }
573            i += line.len() + "\n".len();
574        }
575
576        if panic_at == usize::MAX {
577            return None;
578        }
579
580        if !parse {
581            return Some(Self {
582                thread: Txt::from(""),
583                message: Txt::from(""),
584                file: Txt::from(""),
585                line: 0,
586                column: 0,
587                widget_path: if widget_path < stderr.len() {
588                    Txt::from("true")
589                } else {
590                    Txt::from("")
591                },
592                backtrace: Txt::from(""),
593            });
594        }
595
596        let panic_str = stderr[panic_at..].lines().next().unwrap();
597        let (thread, location) = panic_str.strip_prefix("thread '").unwrap().split_once("' panicked at ").unwrap();
598        let mut location = location.split(':');
599        let file = location.next().unwrap_or("");
600        let line: u32 = location.next().unwrap_or("0").parse().unwrap_or(0);
601        let column: u32 = location.next().unwrap_or("0").parse().unwrap_or(0);
602
603        let mut message = String::new();
604        let mut sep = "";
605        for line in stderr[panic_at + panic_str.len() + "\n".len()..].lines() {
606            if let Some(line) = line.strip_prefix("   ") {
607                message.push_str(sep);
608                message.push_str(line);
609                sep = "\n";
610            } else {
611                if message.is_empty() && line != "widget path:" && line != "stack backtrace:" {
612                    // not formatted by us, probably by Rust
613                    line.clone_into(&mut message);
614                }
615                break;
616            }
617        }
618
619        let widget_path = if widget_path < stderr.len() {
620            stderr[widget_path..].lines().next().unwrap().trim()
621        } else {
622            ""
623        };
624
625        let backtrace = if stack_backtrace < stderr.len() {
626            let mut i = stack_backtrace;
627            'backtrace_seek: for line in stderr[stack_backtrace..].lines() {
628                if !line.starts_with(' ') {
629                    'digit_check: for c in line.chars() {
630                        if !c.is_ascii_digit() {
631                            if c == ':' {
632                                break 'digit_check;
633                            } else {
634                                break 'backtrace_seek;
635                            }
636                        }
637                    }
638                }
639                i += line.len() + "\n".len();
640            }
641            &stderr[stack_backtrace..i]
642        } else {
643            ""
644        };
645
646        Some(Self {
647            thread: thread.to_txt(),
648            message: message.into(),
649            file: file.to_txt(),
650            line,
651            column,
652            widget_path: widget_path.to_txt(),
653            backtrace: backtrace.to_txt(),
654        })
655    }
656
657    /// Iterate over frames parsed from the `backtrace`.
658    pub fn backtrace_frames(&self) -> impl Iterator<Item = BacktraceFrame> + '_ {
659        BacktraceFrame::parse(&self.backtrace)
660    }
661}
662
663/// Represents a frame parsed from a stack backtrace.
664#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
665pub struct BacktraceFrame {
666    /// Position on the backtrace.
667    pub n: usize,
668
669    /// Function name.
670    pub name: Txt,
671    /// Source code file.
672    pub file: Txt,
673    /// Source code line.
674    pub line: u32,
675
676    /// If this frame is inside the Rust panic code.
677    pub is_after_panic: bool,
678}
679impl fmt::Display for BacktraceFrame {
680    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
681        writeln!(f, "{:>4}: {}", self.n, self.name)?;
682        if !self.file.is_empty() {
683            writeln!(f, "      at {}:{}", self.file, self.line)?;
684        }
685        Ok(())
686    }
687}
688impl BacktraceFrame {
689    /// Iterate over frames parsed from the `backtrace`.
690    pub fn parse(mut backtrace: &str) -> impl Iterator<Item = BacktraceFrame> + '_ {
691        let mut is_after_panic = backtrace.lines().any(|l| l.ends_with("core::panicking::panic_fmt"));
692        std::iter::from_fn(move || {
693            if backtrace.is_empty() {
694                None
695            } else {
696                let n_name = backtrace.lines().next().unwrap();
697                let (n, name) = if let Some((n, name)) = n_name.split_once(':') {
698                    let n = match n.trim_start().parse() {
699                        Ok(n) => n,
700                        Err(_) => {
701                            backtrace = "";
702                            return None;
703                        }
704                    };
705                    let name = name.trim();
706                    if name.is_empty() {
707                        backtrace = "";
708                        return None;
709                    }
710                    (n, name)
711                } else {
712                    backtrace = "";
713                    return None;
714                };
715
716                backtrace = &backtrace[n_name.len() + 1..];
717                let r = if backtrace.trim_start().starts_with("at ") {
718                    let file_line = backtrace.lines().next().unwrap();
719                    let (file, line) = if let Some((file, line)) = file_line.rsplit_once(':') {
720                        let file = file.trim_start().strip_prefix("at ").unwrap();
721                        let line = match line.trim_end().parse() {
722                            Ok(l) => l,
723                            Err(_) => {
724                                backtrace = "";
725                                return None;
726                            }
727                        };
728                        (file, line)
729                    } else {
730                        backtrace = "";
731                        return None;
732                    };
733
734                    backtrace = &backtrace[file_line.len() + 1..];
735
736                    BacktraceFrame {
737                        n,
738                        name: name.to_txt(),
739                        file: file.to_txt(),
740                        line,
741                        is_after_panic,
742                    }
743                } else {
744                    BacktraceFrame {
745                        n,
746                        name: name.to_txt(),
747                        file: Txt::from(""),
748                        line: 0,
749                        is_after_panic,
750                    }
751                };
752
753                if is_after_panic && name == "core::panicking::panic_fmt" {
754                    is_after_panic = false;
755                }
756
757                Some(r)
758            }
759        })
760    }
761
762    /// Reads the code line + four surrounding lines if the code file can be found.
763    pub fn code_snippet(&self) -> Txt {
764        if !self.file.is_empty() && self.line > 0 {
765            if let Ok(file) = std::fs::File::open(&self.file) {
766                use std::fmt::Write as _;
767                let mut r = String::new();
768
769                let reader = std::io::BufReader::new(file);
770
771                let line_s = self.line - 2.min(self.line - 1);
772                let lines = reader.lines().skip(line_s as usize - 1).take(5);
773                for (line, line_n) in lines.zip(line_s..) {
774                    let line = match line {
775                        Ok(l) => l,
776                        Err(_) => return Txt::from(""),
777                    };
778
779                    if line_n == self.line {
780                        writeln!(&mut r, "      {:>4} > {}", line_n, line).unwrap();
781                    } else {
782                        writeln!(&mut r, "      {:>4} │ {}", line_n, line).unwrap();
783                    }
784                }
785
786                return r.into();
787            }
788        }
789        Txt::from("")
790    }
791}
792
793fn crash_handler_monitor_process(
794    dump_dir: Option<PathBuf>,
795    mut cfg_app: ConfigProcess,
796    mut cfg_dialog: ConfigProcess,
797    has_dialog_handler: bool,
798) -> ! {
799    // monitor-process:
800    tracing::info!("crash monitor-process is running");
801
802    let exe = std::env::current_exe()
803        .and_then(dunce::canonicalize)
804        .expect("failed to get the current executable");
805
806    let mut args: Box<[_]> = std::env::args().skip(1).map(Txt::from).collect();
807
808    let mut dialog_args = CrashArgs {
809        app_crashes: vec![],
810        dialog_crash: None,
811    };
812    loop {
813        let mut app_process = std::process::Command::new(&exe);
814        for cfg in &mut cfg_app {
815            cfg(&mut app_process, &dialog_args);
816        }
817
818        match run_process(
819            dump_dir.as_deref(),
820            app_process
821                .env(APP_PROCESS, format!("restart-{}", dialog_args.app_crashes.len()))
822                .args(args.iter()),
823        ) {
824            Ok((status, [stdout, stderr], dump_file)) => {
825                if status.success() {
826                    let code = status.code().unwrap_or(0);
827                    tracing::info!(
828                        "crash monitor-process exiting with success code ({code}), {} crashes",
829                        dialog_args.app_crashes.len()
830                    );
831                    zng_env::exit(code);
832                } else {
833                    let code = status.code();
834                    #[allow(unused_mut)] // Windows has no signal
835                    let mut signal = None::<i32>;
836
837                    #[cfg(windows)]
838                    if code == Some(1) {
839                        tracing::warn!(
840                            "app-process exit code (1), probably killed by the system, \
841                                        will exit monitor-process with the same code"
842                        );
843                        zng_env::exit(1);
844                    }
845                    #[cfg(unix)]
846                    if code.is_none() {
847                        use std::os::unix::process::ExitStatusExt as _;
848                        signal = status.signal();
849
850                        if let Some(sig) = signal {
851                            if [2, 9, 17, 19, 23].contains(&sig) {
852                                tracing::warn!(
853                                    "app-process exited by signal ({sig}), \
854                                                will exit monitor-process with code 1"
855                                );
856                                zng_env::exit(1);
857                            }
858                        }
859                    }
860
861                    tracing::error!(
862                        "app-process crashed with exit code ({:#X}), signal ({:#?}), {} crashes previously",
863                        code.unwrap_or(0),
864                        signal.unwrap_or(0),
865                        dialog_args.app_crashes.len()
866                    );
867
868                    let timestamp = SystemTime::now();
869
870                    dialog_args.app_crashes.push(CrashError::new(
871                        timestamp,
872                        code,
873                        signal,
874                        stdout.into(),
875                        stderr.into(),
876                        dump_file,
877                        args.clone(),
878                    ));
879
880                    // show dialog, retries once if dialog crashes too.
881                    for _ in 0..2 {
882                        // serialize app-crashes to a temp JSON file
883                        let timestamp_nanos = timestamp.duration_since(SystemTime::UNIX_EPOCH).map(|d| d.as_nanos()).unwrap_or(0);
884                        let mut timestamp = timestamp_nanos;
885                        let mut retries = 0;
886                        let crash_file = loop {
887                            let path = std::env::temp_dir().join(format!("zng-crash-errors-{timestamp:#x}"));
888                            match std::fs::File::create_new(&path) {
889                                Ok(f) => match serde_json::to_writer(std::io::BufWriter::new(f), &dialog_args) {
890                                    Ok(_) => break path,
891                                    Err(e) => {
892                                        if e.is_io() {
893                                            if retries > 20 {
894                                                panic!("error writing crash errors, {e}");
895                                            } else if retries > 5 {
896                                                timestamp += 1;
897                                            }
898                                            std::thread::sleep(100.ms());
899                                        } else {
900                                            panic!("error serializing crash errors, {e}");
901                                        }
902                                    }
903                                },
904                                Err(e) => {
905                                    if e.kind() == std::io::ErrorKind::AlreadyExists {
906                                        timestamp += 1;
907                                    } else {
908                                        if retries > 20 {
909                                            panic!("error creating crash errors file, {e}");
910                                        } else if retries > 5 {
911                                            timestamp += 1;
912                                        }
913                                        std::thread::sleep(100.ms());
914                                    }
915                                }
916                            }
917                            retries += 1;
918                        };
919
920                        let dialog_result = if has_dialog_handler {
921                            let mut dialog_process = std::process::Command::new(&exe);
922                            for cfg in &mut cfg_dialog {
923                                cfg(&mut dialog_process, &dialog_args);
924                            }
925                            run_process(dump_dir.as_deref(), dialog_process.env(DIALOG_PROCESS, &crash_file))
926                        } else {
927                            Ok((std::process::ExitStatus::default(), [String::new(), String::new()], None))
928                        };
929
930                        for _ in 0..5 {
931                            if !crash_file.exists() || std::fs::remove_file(&crash_file).is_ok() {
932                                break;
933                            }
934                            std::thread::sleep(100.ms());
935                        }
936
937                        let response = match dialog_result {
938                            Ok((dlg_status, [dlg_stdout, dlg_stderr], dlg_dump_file)) => {
939                                if dlg_status.success() {
940                                    dlg_stdout
941                                        .lines()
942                                        .filter_map(|l| l.trim().strip_prefix(RESPONSE_PREFIX))
943                                        .next_back()
944                                        .unwrap_or("exit 0")
945                                        .to_owned()
946                                } else {
947                                    let code = dlg_status.code();
948                                    #[allow(unused_mut)] // Windows has no signal
949                                    let mut signal = None::<i32>;
950
951                                    #[cfg(windows)]
952                                    if code == Some(1) {
953                                        tracing::warn!(
954                                            "dialog-process exit code (1), probably killed by the system, \
955                                                        will exit monitor-process with the same code"
956                                        );
957                                        zng_env::exit(1);
958                                    }
959                                    #[cfg(unix)]
960                                    if code.is_none() {
961                                        use std::os::unix::process::ExitStatusExt as _;
962                                        signal = status.signal();
963
964                                        if let Some(sig) = signal {
965                                            if [2, 9, 17, 19, 23].contains(&sig) {
966                                                tracing::warn!(
967                                                    "dialog-process exited by signal ({sig}), \
968                                                                will exit monitor-process with code 1"
969                                                );
970                                                zng_env::exit(1);
971                                            }
972                                        }
973                                    }
974
975                                    let dialog_crash = CrashError::new(
976                                        SystemTime::now(),
977                                        code,
978                                        signal,
979                                        dlg_stdout.into(),
980                                        dlg_stderr.into(),
981                                        dlg_dump_file,
982                                        Box::new([]),
983                                    );
984                                    tracing::error!("crash dialog-process crashed, {dialog_crash}");
985
986                                    if dialog_args.dialog_crash.is_none() {
987                                        dialog_args.dialog_crash = Some(dialog_crash);
988                                        continue;
989                                    } else {
990                                        let latest = dialog_args.latest();
991                                        eprintln!("{latest}");
992                                        zng_env::exit(latest.code.unwrap_or(1));
993                                    }
994                                }
995                            }
996                            Err(e) => panic!("error running dialog-process, {e}"),
997                        };
998
999                        if let Some(args_json) = response.strip_prefix("restart ") {
1000                            args = serde_json::from_str(args_json).expect("crash dialog-process did not respond 'restart' correctly");
1001                            break;
1002                        } else if let Some(code) = response.strip_prefix("exit ") {
1003                            let code: i32 = code.parse().expect("crash dialog-process did not respond 'code' correctly");
1004                            zng_env::exit(code);
1005                        } else {
1006                            panic!("crash dialog-process did not respond correctly")
1007                        }
1008                    }
1009                }
1010            }
1011            Err(e) => panic!("error running app-process, {e}"),
1012        }
1013    }
1014}
1015fn run_process(
1016    dump_dir: Option<&Path>,
1017    command: &mut std::process::Command,
1018) -> std::io::Result<(std::process::ExitStatus, [String; 2], Option<PathBuf>)> {
1019    struct DumpServer {
1020        shutdown: Arc<AtomicBool>,
1021        runner: std::thread::JoinHandle<Option<PathBuf>>,
1022    }
1023    let mut dump_server = None;
1024    if let Some(dump_dir) = dump_dir {
1025        match std::fs::create_dir_all(dump_dir) {
1026            Ok(_) => {
1027                let uuid = uuid::Uuid::new_v4();
1028                let dump_file = dump_dir.join(format!("{}.dmp", uuid.simple()));
1029                let dump_channel = std::env::temp_dir().join(format!("zng-crash-{}", uuid.simple()));
1030                match minidumper::Server::with_name(dump_channel.as_path()) {
1031                    Ok(mut s) => {
1032                        command.env(DUMP_CHANNEL, &dump_channel);
1033                        let shutdown = Arc::new(AtomicBool::new(false));
1034                        let runner = std::thread::spawn(clmv!(shutdown, || {
1035                            let created_file = Arc::new(Mutex::new(None));
1036                            if let Err(e) = s.run(
1037                                Box::new(MinidumpServerHandler {
1038                                    dump_file,
1039                                    created_file: created_file.clone(),
1040                                }),
1041                                &shutdown,
1042                                None,
1043                            ) {
1044                                tracing::error!("minidump server exited with error, {e}");
1045                            }
1046                            let r = created_file.lock().take();
1047                            r
1048                        }));
1049                        dump_server = Some(DumpServer { shutdown, runner });
1050                    }
1051                    Err(e) => tracing::error!("failed to spawn minidump server, will not enable crash handling, {e}"),
1052                }
1053            }
1054            Err(e) => tracing::error!("cannot create minidump dir, will not enable crash handling, {e}"),
1055        }
1056    }
1057
1058    let mut app_process = command
1059        .env("RUST_BACKTRACE", "full")
1060        .env("CLICOLOR_FORCE", "1")
1061        .stdout(std::process::Stdio::piped())
1062        .stderr(std::process::Stdio::piped())
1063        .spawn()?;
1064
1065    let stdout = capture_and_print(app_process.stdout.take().unwrap(), false);
1066    let stderr = capture_and_print(app_process.stderr.take().unwrap(), true);
1067
1068    let status = app_process.wait()?;
1069
1070    let stdout = match stdout.join() {
1071        Ok(r) => r,
1072        Err(p) => std::panic::resume_unwind(p),
1073    };
1074    let stderr = match stderr.join() {
1075        Ok(r) => r,
1076        Err(p) => std::panic::resume_unwind(p),
1077    };
1078
1079    let mut dump_file = None;
1080    if let Some(s) = dump_server {
1081        s.shutdown.store(true, atomic::Ordering::Relaxed);
1082        match s.runner.join() {
1083            Ok(r) => dump_file = r,
1084            Err(p) => std::panic::resume_unwind(p),
1085        };
1086    }
1087
1088    Ok((status, [stdout, stderr], dump_file))
1089}
1090struct MinidumpServerHandler {
1091    dump_file: PathBuf,
1092    created_file: Arc<Mutex<Option<PathBuf>>>,
1093}
1094impl minidumper::ServerHandler for MinidumpServerHandler {
1095    fn create_minidump_file(&self) -> Result<(std::fs::File, PathBuf), std::io::Error> {
1096        let file = std::fs::File::create_new(&self.dump_file)?;
1097        Ok((file, self.dump_file.clone()))
1098    }
1099
1100    fn on_minidump_created(&self, result: Result<minidumper::MinidumpBinary, minidumper::Error>) -> minidumper::LoopAction {
1101        match result {
1102            Ok(b) => *self.created_file.lock() = Some(b.path),
1103            Err(e) => tracing::error!("failed to write minidump file, {e}"),
1104        }
1105        minidumper::LoopAction::Exit
1106    }
1107
1108    fn on_message(&self, _: u32, _: Vec<u8>) {}
1109
1110    fn on_client_connected(&self, num_clients: usize) -> minidumper::LoopAction {
1111        if num_clients > 1 {
1112            tracing::error!("expected only one minidump client, {num_clients} connected, exiting server");
1113            minidumper::LoopAction::Exit
1114        } else {
1115            minidumper::LoopAction::Continue
1116        }
1117    }
1118
1119    fn on_client_disconnected(&self, num_clients: usize) -> minidumper::LoopAction {
1120        if num_clients != 0 {
1121            tracing::error!("expected only one minidump client disconnect, {num_clients} still connected");
1122        }
1123        minidumper::LoopAction::Exit
1124    }
1125}
1126fn capture_and_print(mut stream: impl std::io::Read + Send + 'static, is_err: bool) -> std::thread::JoinHandle<String> {
1127    std::thread::spawn(move || {
1128        let mut capture = vec![];
1129        let mut buffer = [0u8; 32];
1130        loop {
1131            match stream.read(&mut buffer) {
1132                Ok(n) => {
1133                    if n == 0 {
1134                        break;
1135                    }
1136
1137                    let new = &buffer[..n];
1138                    capture.write_all(new).unwrap();
1139                    let r = if is_err {
1140                        let mut s = std::io::stderr();
1141                        s.write_all(new).and_then(|_| s.flush())
1142                    } else {
1143                        let mut s = std::io::stdout();
1144                        s.write_all(new).and_then(|_| s.flush())
1145                    };
1146                    if let Err(e) = r {
1147                        panic!("{} write error, {}", if is_err { "stderr" } else { "stdout" }, e)
1148                    }
1149                }
1150                Err(e) => panic!("{} read error, {}", if is_err { "stderr" } else { "stdout" }, e),
1151            }
1152        }
1153        String::from_utf8_lossy(&capture).into_owned()
1154    })
1155}
1156
1157fn crash_handler_app_process(dump_enabled: bool) {
1158    tracing::info!("app-process is running");
1159
1160    std::panic::set_hook(Box::new(panic_handler));
1161    if dump_enabled {
1162        minidump_attach();
1163    }
1164
1165    // app-process execution happens after this.
1166}
1167
1168fn crash_handler_dialog_process(dump_enabled: bool, dialog: CrashDialogHandler, args_file: String) -> ! {
1169    tracing::info!("crash dialog-process is running");
1170
1171    std::panic::set_hook(Box::new(panic_handler));
1172    if dump_enabled {
1173        minidump_attach();
1174    }
1175
1176    let mut retries = 0;
1177    let args = loop {
1178        match std::fs::read_to_string(&args_file) {
1179            Ok(args) => break args,
1180            Err(e) => {
1181                if e.kind() != std::io::ErrorKind::NotFound && retries < 10 {
1182                    retries += 1;
1183                    continue;
1184                }
1185                panic!("error reading args file, {e}");
1186            }
1187        }
1188    };
1189
1190    dialog(serde_json::from_str(&args).expect("error deserializing args"));
1191    CrashArgs {
1192        app_crashes: vec![],
1193        dialog_crash: None,
1194    }
1195    .exit(0)
1196}
1197
1198fn panic_handler(info: &std::panic::PanicHookInfo) {
1199    let backtrace = std::backtrace::Backtrace::capture();
1200    let path = crate::widget::WIDGET.trace_path();
1201    let panic = PanicInfo::from_hook(info);
1202    eprintln!("{panic}widget path:\n   {path}\nstack backtrace:\n{backtrace}");
1203}
1204
1205fn minidump_attach() {
1206    let channel_name = match std::env::var(DUMP_CHANNEL) {
1207        Ok(n) if !n.is_empty() => PathBuf::from(n),
1208        _ => {
1209            eprintln!("expected minidump channel name, this instance will not handle crashes");
1210            return;
1211        }
1212    };
1213    let client = match minidumper::Client::with_name(channel_name.as_path()) {
1214        Ok(c) => c,
1215        Err(e) => {
1216            eprintln!("failed to connect minidump client, this instance will not handle crashes, {e}");
1217            return;
1218        }
1219    };
1220    struct Handler(minidumper::Client);
1221    // SAFETY: on_crash does the minimal possible work
1222    unsafe impl crash_handler::CrashEvent for Handler {
1223        fn on_crash(&self, context: &crash_handler::CrashContext) -> crash_handler::CrashEventResult {
1224            crash_handler::CrashEventResult::Handled(self.0.request_dump(context).is_ok())
1225        }
1226    }
1227    let handler = match crash_handler::CrashHandler::attach(Box::new(Handler(client))) {
1228        Ok(h) => h,
1229        Err(e) => {
1230            eprintln!("failed attach minidump crash handler, this instance will not handle crashes, {e}");
1231            return;
1232        }
1233    };
1234
1235    *CRASH_HANDLER.lock() = Some(handler);
1236}
1237static CRASH_HANDLER: Mutex<Option<crash_handler::CrashHandler>> = Mutex::new(None);
1238
1239#[derive(Debug)]
1240struct PanicInfo {
1241    pub thread: Txt,
1242    pub msg: Txt,
1243    pub file: Txt,
1244    pub line: u32,
1245    pub column: u32,
1246}
1247impl PanicInfo {
1248    pub fn from_hook(info: &std::panic::PanicHookInfo) -> Self {
1249        let current_thread = std::thread::current();
1250        let thread = current_thread.name().unwrap_or("<unnamed>");
1251        let msg = Self::payload(info.payload());
1252
1253        let (file, line, column) = if let Some(l) = info.location() {
1254            (l.file(), l.line(), l.column())
1255        } else {
1256            ("<unknown>", 0, 0)
1257        };
1258        Self {
1259            thread: thread.to_txt(),
1260            msg,
1261            file: file.to_txt(),
1262            line,
1263            column,
1264        }
1265    }
1266
1267    fn payload(p: &dyn std::any::Any) -> Txt {
1268        match p.downcast_ref::<&'static str>() {
1269            Some(s) => s,
1270            None => match p.downcast_ref::<String>() {
1271                Some(s) => &s[..],
1272                None => "Box<dyn Any>",
1273            },
1274        }
1275        .to_txt()
1276    }
1277}
1278impl std::error::Error for PanicInfo {}
1279impl fmt::Display for PanicInfo {
1280    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1281        writeln!(
1282            f,
1283            "thread '{}' panicked at {}:{}:{}:",
1284            self.thread, self.file, self.line, self.column
1285        )?;
1286        for line in self.msg.lines() {
1287            writeln!(f, "   {line}")?;
1288        }
1289        Ok(())
1290    }
1291}