zng_app/
crash_handler.rs

1#![cfg(all(
2    feature = "crash_handler",
3    not(any(target_arch = "wasm32", target_os = "android", target_os = "ios"))
4))]
5
6//! App-process crash handler.
7//!
8//! See the `zng::app::crash_handler` documentation for more details.
9
10use parking_lot::Mutex;
11use std::{
12    fmt,
13    io::{BufRead, Write},
14    path::{Path, PathBuf},
15    sync::{Arc, atomic::AtomicBool},
16    time::SystemTime,
17};
18use zng_clone_move::clmv;
19use zng_layout::unit::TimeUnits as _;
20
21use zng_txt::{ToTxt as _, Txt};
22
23/// Environment variable that causes the crash handler to not start if set.
24///
25/// This is particularly useful to set in debugger launch configs. Crash handler spawns
26/// a different process for the app  so break points will not work.
27pub const NO_CRASH_HANDLER: &str = "ZNG_NO_CRASH_HANDLER";
28
29zng_env::on_process_start!(|process_start_args| {
30    if std::env::var(NO_CRASH_HANDLER).is_ok() {
31        return;
32    }
33
34    let mut config = CrashConfig::new();
35    for ext in CRASH_CONFIG {
36        ext(&mut config);
37        if config.no_crash_handler {
38            return;
39        }
40    }
41
42    if process_start_args.next_handlers_count > 0 && process_start_args.yield_count < zng_env::ProcessStartArgs::MAX_YIELD_COUNT - 10 {
43        // extra sure that this is the app-process
44        return process_start_args.yield_once();
45    }
46
47    if std::env::var(APP_PROCESS) != Err(std::env::VarError::NotPresent) {
48        return crash_handler_app_process(config.dump_dir.is_some());
49    }
50
51    match std::env::var(DIALOG_PROCESS) {
52        Ok(args_file) => crash_handler_dialog_process(
53            config.dump_dir.is_some(),
54            config
55                .dialog
56                .or(config.default_dialog)
57                .expect("dialog-process spawned without dialog handler"),
58            args_file,
59        ),
60        Err(e) => match e {
61            std::env::VarError::NotPresent => {}
62            e => panic!("invalid dialog env args, {e:?}"),
63        },
64    }
65
66    crash_handler_monitor_process(
67        config.dump_dir,
68        config.app_process,
69        config.dialog_process,
70        config.default_dialog.is_some() || config.dialog.is_some(),
71    );
72});
73
74/// Gets the number of crash restarts in the app-process.
75///
76/// Always returns zero if called in other processes.
77pub fn restart_count() -> usize {
78    match std::env::var(APP_PROCESS) {
79        Ok(c) => c.strip_prefix("restart-").unwrap_or("0").parse().unwrap_or(0),
80        Err(_) => 0,
81    }
82}
83
84const APP_PROCESS: &str = "ZNG_CRASH_HANDLER_APP";
85const DIALOG_PROCESS: &str = "ZNG_CRASH_HANDLER_DIALOG";
86const DUMP_CHANNEL: &str = "ZNG_MINIDUMP_CHANNEL";
87const RESPONSE_PREFIX: &str = "zng_crash_response: ";
88
89#[doc(hidden)]
90#[linkme::distributed_slice]
91pub static CRASH_CONFIG: [fn(&mut CrashConfig)];
92
93#[doc(hidden)]
94pub use linkme as __linkme;
95
96/// <span data-del-macro-root></span> Register a `FnOnce(&mut CrashConfig)` closure to be
97/// called on process init to configure the crash handler.
98///
99/// See [`CrashConfig`] for more details.
100#[macro_export]
101macro_rules! crash_handler_config {
102    ($closure:expr) => {
103        // expanded from:
104        #[$crate::crash_handler::__linkme::distributed_slice($crate::crash_handler::CRASH_CONFIG)]
105        #[linkme(crate = $crate::crash_handler::__linkme)]
106        #[doc(hidden)]
107        static _CRASH_CONFIG: fn(&mut $crate::crash_handler::CrashConfig) = _crash_config;
108        #[doc(hidden)]
109        fn _crash_config(cfg: &mut $crate::crash_handler::CrashConfig) {
110            fn crash_config(cfg: &mut $crate::crash_handler::CrashConfig, handler: impl FnOnce(&mut $crate::crash_handler::CrashConfig)) {
111                handler(cfg)
112            }
113            crash_config(cfg, $closure)
114        }
115    };
116}
117pub use crate::crash_handler_config;
118
119type ConfigProcess = Vec<Box<dyn for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command>>;
120type CrashDialogHandler = Box<dyn FnOnce(CrashArgs)>;
121
122/// Crash handler config.
123///
124/// Use [`crash_handler_config!`] to set config.
125///
126/// [`crash_handler_config!`]: crate::crash_handler_config!
127pub struct CrashConfig {
128    default_dialog: Option<CrashDialogHandler>,
129    dialog: Option<CrashDialogHandler>,
130    app_process: ConfigProcess,
131    dialog_process: ConfigProcess,
132    dump_dir: Option<PathBuf>,
133    no_crash_handler: bool,
134}
135impl CrashConfig {
136    fn new() -> Self {
137        Self {
138            default_dialog: None,
139            dialog: None,
140            app_process: vec![],
141            dialog_process: vec![],
142            dump_dir: Some(zng_env::cache("zng_minidump")),
143            no_crash_handler: false,
144        }
145    }
146
147    /// Set the crash dialog process handler.
148    ///
149    /// The dialog `handler` can run an app or show a native dialog, it must use the [`CrashArgs`] process
150    /// terminating methods to respond, if it returns [`CrashArgs::exit`] will run.
151    ///
152    /// Note that the handler does not need to actually show any dialog, it can just save crash info and
153    /// restart the app for example.
154    pub fn dialog(&mut self, handler: impl FnOnce(CrashArgs) + 'static) {
155        if self.dialog.is_none() {
156            self.dialog = Some(Box::new(handler));
157        }
158    }
159
160    /// Set the crash dialog-handler used if `crash_dialog` is not set.
161    ///
162    /// This is used by app libraries or themes to provide a default dialog.
163    pub fn default_dialog(&mut self, handler: impl FnOnce(CrashArgs) + 'static) {
164        self.default_dialog = Some(Box::new(handler));
165    }
166
167    /// Add a closure that is called just before the app-process is spawned.
168    pub fn app_process(
169        &mut self,
170        cfg: impl for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command + 'static,
171    ) {
172        self.app_process.push(Box::new(cfg));
173    }
174
175    /// Add a closure that is called just before the dialog-process is spawned.
176    pub fn dialog_process(
177        &mut self,
178        cfg: impl for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command + 'static,
179    ) {
180        self.dialog_process.push(Box::new(cfg));
181    }
182
183    /// Change the minidump directory.
184    ///
185    /// Is `zng::env::cache("zng_minidump")` by default.
186    pub fn minidump_dir(&mut self, dir: impl Into<PathBuf>) {
187        self.dump_dir = Some(dir.into());
188    }
189
190    /// Do not collect a minidump.
191    pub fn no_minidump(&mut self) {
192        self.dump_dir = None;
193    }
194
195    /// Does not run with crash handler.
196    ///
197    /// This is equivalent of running with `NO_ZNG_CRASH_HANDLER` env var.
198    pub fn no_crash_handler(&mut self) {
199        self.no_crash_handler = true;
200    }
201}
202
203/// Arguments for the crash handler dialog function.
204#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
205#[non_exhaustive]
206pub struct CrashArgs {
207    /// Info about the app-process crashes.
208    ///
209    /// Has at least one entry, latest is last. Includes all crashes since the start of the monitor-process.
210    pub app_crashes: Vec<CrashError>,
211
212    /// Info about a crash in the dialog-process spawned to handle the latest app-process crash.
213    ///
214    /// If set this is the last chance to show something to the end user, if the current dialog crashes too
215    /// the monitor-process will give up. If you started an `APP` to show a crash dialog try using a native
216    /// dialog directly now, or just give up, clearly things are far from ok.
217    pub dialog_crash: Option<CrashError>,
218}
219impl CrashArgs {
220    /// Latest crash.
221    pub fn latest(&self) -> &CrashError {
222        self.app_crashes.last().unwrap()
223    }
224
225    /// Restart the app-process with same argument as the latest crash.
226    pub fn restart(&self) -> ! {
227        let json_args = serde_json::to_string(&self.latest().args[..]).unwrap();
228        println!("{RESPONSE_PREFIX}restart {json_args}");
229        zng_env::exit(0)
230    }
231
232    /// Restart the app-process with custom arguments.
233    pub fn restart_with(&self, args: &[Txt]) -> ! {
234        let json_args = serde_json::to_string(&args).unwrap();
235        println!("{RESPONSE_PREFIX}restart {json_args}");
236        zng_env::exit(0)
237    }
238
239    /// Exit the monitor-process (application) with code.
240    pub fn exit(&self, code: i32) -> ! {
241        println!("{RESPONSE_PREFIX}exit {code}");
242        zng_env::exit(0)
243    }
244}
245impl fmt::Display for CrashArgs {
246    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
247        writeln!(f, "APP CRASHES:\n")?;
248
249        for c in self.app_crashes.iter() {
250            writeln!(f, "{c}")?;
251        }
252
253        if let Some(c) = &self.dialog_crash {
254            writeln!(f, "\nDIALOG CRASH:\n")?;
255            writeln!(f, "{c}")?;
256        }
257
258        Ok(())
259    }
260}
261
262/// Info about an app-process crash.
263#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
264#[non_exhaustive]
265pub struct CrashError {
266    /// Crash moment.
267    pub timestamp: SystemTime,
268    /// Process exit code.
269    pub code: Option<i32>,
270    /// Unix signal that terminated the process.
271    pub signal: Option<i32>,
272    /// Full capture of the app stdout.
273    pub stdout: Txt,
274    /// Full capture of the app stderr.
275    pub stderr: Txt,
276    /// Arguments used.
277    pub args: Box<[Txt]>,
278    /// Minidump file.
279    pub minidump: Option<PathBuf>,
280    /// Operating system.
281    ///
282    /// See [`std::env::consts::OS`] for details.
283    pub os: Txt,
284}
285/// Alternate mode `{:#}` prints plain stdout and stderr (no ANSI escape sequences).
286impl fmt::Display for CrashError {
287    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288        writeln!(f, "timestamp: {}", self.unix_time())?;
289        if let Some(c) = self.code {
290            writeln!(f, "exit code: {c:#X}")?
291        }
292        if let Some(c) = self.signal {
293            writeln!(f, "exit signal: {c}")?
294        }
295        if let Some(p) = self.minidump.as_ref() {
296            writeln!(f, "minidump: {}", p.display())?
297        }
298        if f.alternate() {
299            write!(f, "\nSTDOUT:\n{}\nSTDERR:\n{}\n", self.stdout_plain(), self.stderr_plain())
300        } else {
301            write!(f, "\nSTDOUT:\n{}\nSTDERR:\n{}\n", self.stdout, self.stderr)
302        }
303    }
304}
305impl CrashError {
306    fn new(
307        timestamp: SystemTime,
308        code: Option<i32>,
309        signal: Option<i32>,
310        stdout: Txt,
311        stderr: Txt,
312        minidump: Option<PathBuf>,
313        args: Box<[Txt]>,
314    ) -> Self {
315        Self {
316            timestamp,
317            code,
318            signal,
319            stdout,
320            stderr,
321            args,
322            minidump,
323            os: std::env::consts::OS.into(),
324        }
325    }
326
327    /// Seconds since Unix epoch.
328    pub fn unix_time(&self) -> u64 {
329        self.timestamp.duration_since(SystemTime::UNIX_EPOCH).unwrap_or_default().as_secs()
330    }
331
332    /// Gets if `stdout` does not contain any ANSI scape sequences.
333    pub fn is_stdout_plain(&self) -> bool {
334        !self.stdout.contains(CSI)
335    }
336
337    /// Gets if `stderr` does not contain any ANSI scape sequences.
338    pub fn is_stderr_plain(&self) -> bool {
339        !self.stderr.contains(CSI)
340    }
341
342    /// Get `stdout` without any ANSI escape sequences (CSI).
343    pub fn stdout_plain(&self) -> Txt {
344        remove_ansi_csi(&self.stdout)
345    }
346
347    /// Get `stderr` without any ANSI escape sequences (CSI).
348    pub fn stderr_plain(&self) -> Txt {
349        remove_ansi_csi(&self.stderr)
350    }
351
352    /// Gets if `stderr` contains a crash panic.
353    pub fn has_panic(&self) -> bool {
354        if self.code == Some(101) {
355            CrashPanic::contains(&self.stderr_plain())
356        } else {
357            false
358        }
359    }
360
361    /// Gets if `stderr` contains a crash panic that traced widget/window path.
362    pub fn has_panic_widget(&self) -> bool {
363        if self.code == Some(101) {
364            CrashPanic::contains_widget(&self.stderr_plain())
365        } else {
366            false
367        }
368    }
369
370    /// Try parse `stderr` for the crash panic.
371    ///
372    /// Only reliably works if the panic fully printed correctly and was formatted by the panic
373    /// hook installed by `crash_handler` or by the display print of [`CrashPanic`].
374    pub fn find_panic(&self) -> Option<CrashPanic> {
375        if self.code == Some(101) {
376            CrashPanic::find(&self.stderr_plain())
377        } else {
378            None
379        }
380    }
381
382    /// Best attempt at generating a readable error message.
383    ///
384    /// Is the panic message, or the minidump exception, with the exit code and signal.
385    pub fn message(&self) -> Txt {
386        let mut msg = if let Some(msg) = self.find_panic().map(|p| p.message) {
387            msg
388        } else if let Some(msg) = self.minidump_message() {
389            msg
390        } else {
391            "".into()
392        };
393        use std::fmt::Write as _;
394
395        if let Some(c) = self.code {
396            let sep = if msg.is_empty() { "" } else { "\n" };
397            write!(&mut msg, "{sep}Code: {c:#X}").unwrap();
398        }
399        if let Some(c) = self.signal {
400            let sep = if msg.is_empty() { "" } else { "\n" };
401            write!(&mut msg, "{sep}Signal: {c}").unwrap();
402        }
403        msg.end_mut();
404        msg
405    }
406
407    fn minidump_message(&self) -> Option<Txt> {
408        use minidump::*;
409
410        let dump = match Minidump::read_path(self.minidump.as_ref()?) {
411            Ok(d) => d,
412            Err(e) => {
413                tracing::error!("error reading minidump, {e}");
414                return None;
415            }
416        };
417
418        let system_info = match dump.get_stream::<MinidumpSystemInfo>() {
419            Ok(s) => s,
420            Err(e) => {
421                tracing::error!("error reading minidump system info, {e}");
422                return None;
423            }
424        };
425        let exception = match dump.get_stream::<MinidumpException>() {
426            Ok(s) => s,
427            Err(e) => {
428                tracing::error!("error reading minidump exception, {e}");
429                return None;
430            }
431        };
432
433        let crash_reason = exception.get_crash_reason(system_info.os, system_info.cpu);
434
435        Some(zng_txt::formatx!("{crash_reason}"))
436    }
437}
438
439const CSI: &str = "\x1b[";
440
441/// Remove ANSI escape sequences (CSI) from `s`.
442pub fn remove_ansi_csi(mut s: &str) -> Txt {
443    fn is_esc_end(byte: u8) -> bool {
444        (0x40..=0x7e).contains(&byte)
445    }
446
447    let mut r = String::new();
448    while let Some(i) = s.find(CSI) {
449        r.push_str(&s[..i]);
450        s = &s[i + CSI.len()..];
451        let mut esc_end = 0;
452        while esc_end < s.len() && !is_esc_end(s.as_bytes()[esc_end]) {
453            esc_end += 1;
454        }
455        esc_end += 1;
456        s = &s[esc_end..];
457    }
458    r.push_str(s);
459    r.into()
460}
461
462/// Panic parsed from a `stderr` dump.
463#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
464#[non_exhaustive]
465pub struct CrashPanic {
466    /// Name of thread that panicked.
467    pub thread: Txt,
468    /// Panic message.
469    pub message: Txt,
470    /// Path to file that defines the panic.
471    pub file: Txt,
472    /// Line of code that defines the panic.
473    pub line: u32,
474    /// Column in the line of code that defines the panic.
475    pub column: u32,
476    /// Widget where the panic happened.
477    pub widget_path: Txt,
478    /// Stack backtrace.
479    pub backtrace: Txt,
480}
481
482/// Alternate mode `{:#}` prints full backtrace.
483impl fmt::Display for CrashPanic {
484    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
485        writeln!(
486            f,
487            "thread '{}' panicked at {}:{}:{}:",
488            self.thread, self.file, self.line, self.column
489        )?;
490        for line in self.message.lines() {
491            writeln!(f, "   {line}")?;
492        }
493        writeln!(f, "widget path:\n   {}", self.widget_path)?;
494
495        if f.alternate() {
496            writeln!(f, "stack backtrace:\n{}", self.backtrace)
497        } else {
498            writeln!(f, "stack backtrace:")?;
499            let mut snippet = 9;
500            for frame in self.backtrace_frames().skip_while(|f| f.is_after_panic) {
501                write!(f, "{frame}")?;
502                if snippet > 0 {
503                    let code = frame.code_snippet();
504                    if !code.is_empty() {
505                        snippet -= 1;
506                        writeln!(f, "{code}")?;
507                    }
508                }
509            }
510            Ok(())
511        }
512    }
513}
514impl CrashPanic {
515    /// Gets if `stderr` contains a panic that can be parsed by [`find`].
516    ///
517    /// [`find`]: Self::find
518    pub fn contains(stderr: &str) -> bool {
519        Self::find_impl(stderr, false).is_some()
520    }
521
522    /// Gets if `stderr` contains a panic that can be parsed by [`find`] and traced a widget/window path.
523    ///
524    /// [`find`]: Self::find
525    pub fn contains_widget(stderr: &str) -> bool {
526        match Self::find_impl(stderr, false) {
527            Some(p) => !p.widget_path.is_empty(),
528            None => false,
529        }
530    }
531
532    /// Try parse `stderr` for the crash panic.
533    ///
534    /// Only reliably works if the panic fully printed correctly and was formatted by the panic
535    /// hook installed by `crash_handler` or by the display print of this type.
536    pub fn find(stderr: &str) -> Option<Self> {
537        Self::find_impl(stderr, true)
538    }
539
540    fn find_impl(stderr: &str, parse: bool) -> Option<Self> {
541        let mut panic_at = usize::MAX;
542        let mut widget_path = usize::MAX;
543        let mut stack_backtrace = usize::MAX;
544        let mut i = 0;
545        for line in stderr.lines() {
546            if line.starts_with("thread '") && line.contains("' panicked at ") && line.ends_with(':') {
547                panic_at = i;
548                widget_path = usize::MAX;
549                stack_backtrace = usize::MAX;
550            } else if line == "widget path:" {
551                widget_path = i + "widget path:\n".len();
552            } else if line == "stack backtrace:" {
553                stack_backtrace = i + "stack backtrace:\n".len();
554            }
555            i += line.len() + "\n".len();
556        }
557
558        if panic_at == usize::MAX {
559            return None;
560        }
561
562        if !parse {
563            return Some(Self {
564                thread: Txt::from(""),
565                message: Txt::from(""),
566                file: Txt::from(""),
567                line: 0,
568                column: 0,
569                widget_path: if widget_path < stderr.len() {
570                    Txt::from("true")
571                } else {
572                    Txt::from("")
573                },
574                backtrace: Txt::from(""),
575            });
576        }
577
578        let panic_str = stderr[panic_at..].lines().next().unwrap();
579        let (thread, location) = panic_str.strip_prefix("thread '").unwrap().split_once("' panicked at ").unwrap();
580        let mut location = location.split(':');
581        let file = location.next().unwrap_or("");
582        let line: u32 = location.next().unwrap_or("0").parse().unwrap_or(0);
583        let column: u32 = location.next().unwrap_or("0").parse().unwrap_or(0);
584
585        let mut message = String::new();
586        let mut sep = "";
587        for line in stderr[panic_at + panic_str.len() + "\n".len()..].lines() {
588            if let Some(line) = line.strip_prefix("   ") {
589                message.push_str(sep);
590                message.push_str(line);
591                sep = "\n";
592            } else {
593                if message.is_empty() && line != "widget path:" && line != "stack backtrace:" {
594                    // not formatted by us, probably by Rust
595                    line.clone_into(&mut message);
596                }
597                break;
598            }
599        }
600
601        let widget_path = if widget_path < stderr.len() {
602            stderr[widget_path..].lines().next().unwrap().trim()
603        } else {
604            ""
605        };
606
607        let backtrace = if stack_backtrace < stderr.len() {
608            let mut i = stack_backtrace;
609            'backtrace_seek: for line in stderr[stack_backtrace..].lines() {
610                if !line.starts_with(' ') {
611                    'digit_check: for c in line.chars() {
612                        if !c.is_ascii_digit() {
613                            if c == ':' {
614                                break 'digit_check;
615                            } else {
616                                break 'backtrace_seek;
617                            }
618                        }
619                    }
620                }
621                i += line.len() + "\n".len();
622            }
623            &stderr[stack_backtrace..i]
624        } else {
625            ""
626        };
627
628        Some(Self {
629            thread: thread.to_txt(),
630            message: message.into(),
631            file: file.to_txt(),
632            line,
633            column,
634            widget_path: widget_path.to_txt(),
635            backtrace: backtrace.to_txt(),
636        })
637    }
638
639    /// Iterate over frames parsed from the `backtrace`.
640    pub fn backtrace_frames(&self) -> impl Iterator<Item = BacktraceFrame> + '_ {
641        BacktraceFrame::parse(&self.backtrace)
642    }
643}
644
645/// Represents a frame parsed from a stack backtrace.
646#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
647#[non_exhaustive]
648pub struct BacktraceFrame {
649    /// Position on the backtrace.
650    pub n: usize,
651
652    /// Function name.
653    pub name: Txt,
654    /// Source code file.
655    pub file: Txt,
656    /// Source code line.
657    pub line: u32,
658
659    /// If this frame is inside the Rust panic code.
660    pub is_after_panic: bool,
661}
662impl fmt::Display for BacktraceFrame {
663    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
664        writeln!(f, "{:>4}: {}", self.n, self.name)?;
665        if !self.file.is_empty() {
666            writeln!(f, "      at {}:{}", self.file, self.line)?;
667        }
668        Ok(())
669    }
670}
671impl BacktraceFrame {
672    /// Iterate over frames parsed from the `backtrace`.
673    pub fn parse(mut backtrace: &str) -> impl Iterator<Item = BacktraceFrame> + '_ {
674        let mut is_after_panic = backtrace.lines().any(|l| l.ends_with("core::panicking::panic_fmt"));
675        std::iter::from_fn(move || {
676            if backtrace.is_empty() {
677                None
678            } else {
679                let n_name = backtrace.lines().next().unwrap();
680                let (n, name) = if let Some((n, name)) = n_name.split_once(':') {
681                    let n = match n.trim_start().parse() {
682                        Ok(n) => n,
683                        Err(_) => {
684                            backtrace = "";
685                            return None;
686                        }
687                    };
688                    let name = name.trim();
689                    if name.is_empty() {
690                        backtrace = "";
691                        return None;
692                    }
693                    (n, name)
694                } else {
695                    backtrace = "";
696                    return None;
697                };
698
699                backtrace = &backtrace[n_name.len() + 1..];
700                let r = if backtrace.trim_start().starts_with("at ") {
701                    let file_line = backtrace.lines().next().unwrap();
702                    let (file, line) = if let Some((file, line)) = file_line.rsplit_once(':') {
703                        let file = file.trim_start().strip_prefix("at ").unwrap();
704                        let line = match line.trim_end().parse() {
705                            Ok(l) => l,
706                            Err(_) => {
707                                backtrace = "";
708                                return None;
709                            }
710                        };
711                        (file, line)
712                    } else {
713                        backtrace = "";
714                        return None;
715                    };
716
717                    backtrace = &backtrace[file_line.len() + 1..];
718
719                    BacktraceFrame {
720                        n,
721                        name: name.to_txt(),
722                        file: file.to_txt(),
723                        line,
724                        is_after_panic,
725                    }
726                } else {
727                    BacktraceFrame {
728                        n,
729                        name: name.to_txt(),
730                        file: Txt::from(""),
731                        line: 0,
732                        is_after_panic,
733                    }
734                };
735
736                if is_after_panic && name == "core::panicking::panic_fmt" {
737                    is_after_panic = false;
738                }
739
740                Some(r)
741            }
742        })
743    }
744
745    /// Reads the code line + four surrounding lines if the code file can be found.
746    pub fn code_snippet(&self) -> Txt {
747        if !self.file.is_empty()
748            && self.line > 0
749            && let Ok(file) = std::fs::File::open(&self.file)
750        {
751            use std::fmt::Write as _;
752            let mut r = String::new();
753
754            let reader = std::io::BufReader::new(file);
755
756            let line_s = self.line - 2.min(self.line - 1);
757            let lines = reader.lines().skip(line_s as usize - 1).take(5);
758            for (line, line_n) in lines.zip(line_s..) {
759                let line = match line {
760                    Ok(l) => l,
761                    Err(_) => return Txt::from(""),
762                };
763
764                if line_n == self.line {
765                    writeln!(&mut r, "      {line_n:>4} > {line}").unwrap();
766                } else {
767                    writeln!(&mut r, "      {line_n:>4} │ {line}").unwrap();
768                }
769            }
770
771            return r.into();
772        }
773        Txt::from("")
774    }
775}
776
777fn crash_handler_monitor_process(
778    dump_dir: Option<PathBuf>,
779    mut cfg_app: ConfigProcess,
780    mut cfg_dialog: ConfigProcess,
781    has_dialog_handler: bool,
782) -> ! {
783    zng_env::set_process_name("crash-handler-process");
784
785    let exe = std::env::current_exe()
786        .and_then(dunce::canonicalize)
787        .expect("failed to get the current executable");
788
789    let mut args: Box<[_]> = std::env::args().skip(1).map(Txt::from).collect();
790
791    let mut dialog_args = CrashArgs {
792        app_crashes: vec![],
793        dialog_crash: None,
794    };
795    loop {
796        let mut app_process = std::process::Command::new(&exe);
797        for cfg in &mut cfg_app {
798            cfg(&mut app_process, &dialog_args);
799        }
800
801        match run_process(
802            dump_dir.as_deref(),
803            app_process
804                .env(APP_PROCESS, format!("restart-{}", dialog_args.app_crashes.len()))
805                .args(args.iter()),
806        ) {
807            Ok((status, [stdout, stderr], dump_file)) => {
808                if status.success() {
809                    let code = status.code().unwrap_or(0);
810                    tracing::info!(
811                        "crash monitor-process exiting with success code ({code}), {} crashes",
812                        dialog_args.app_crashes.len()
813                    );
814                    zng_env::exit(code);
815                } else {
816                    let code = status.code();
817                    #[allow(unused_mut)] // Windows has no signal
818                    let mut signal = None::<i32>;
819
820                    #[cfg(windows)]
821                    if code == Some(1) {
822                        tracing::warn!(
823                            "app-process exit code (1), probably killed by the system, \
824                                        will exit monitor-process with the same code"
825                        );
826                        zng_env::exit(1);
827                    }
828                    #[cfg(unix)]
829                    if code.is_none() {
830                        use std::os::unix::process::ExitStatusExt as _;
831                        signal = status.signal();
832
833                        if let Some(sig) = signal
834                            && [2, 9, 17, 19, 23].contains(&sig)
835                        {
836                            tracing::warn!(
837                                "app-process exited by signal ({sig}), \
838                                                will exit monitor-process with code 1"
839                            );
840                            zng_env::exit(1);
841                        }
842                    }
843
844                    tracing::error!(
845                        "app-process crashed with exit code ({:#X}), signal ({:#?}), {} crashes previously",
846                        code.unwrap_or(0),
847                        signal.unwrap_or(0),
848                        dialog_args.app_crashes.len()
849                    );
850
851                    let timestamp = SystemTime::now();
852
853                    dialog_args.app_crashes.push(CrashError::new(
854                        timestamp,
855                        code,
856                        signal,
857                        stdout.into(),
858                        stderr.into(),
859                        dump_file,
860                        args.clone(),
861                    ));
862
863                    // show dialog, retries once if dialog crashes too.
864                    for _ in 0..2 {
865                        // serialize app-crashes to a temp JSON file
866                        let timestamp_nanos = timestamp.duration_since(SystemTime::UNIX_EPOCH).map(|d| d.as_nanos()).unwrap_or(0);
867                        let mut timestamp = timestamp_nanos;
868                        let mut retries = 0;
869                        let crash_file = loop {
870                            let path = std::env::temp_dir().join(format!("zng-crash-errors-{timestamp:#x}"));
871                            match std::fs::File::create_new(&path) {
872                                Ok(f) => match serde_json::to_writer(std::io::BufWriter::new(f), &dialog_args) {
873                                    Ok(_) => break path,
874                                    Err(e) => {
875                                        if e.is_io() {
876                                            if retries > 20 {
877                                                panic!("error writing crash errors, {e}");
878                                            } else if retries > 5 {
879                                                timestamp += 1;
880                                            }
881                                            std::thread::sleep(100.ms());
882                                        } else {
883                                            panic!("error serializing crash errors, {e}");
884                                        }
885                                    }
886                                },
887                                Err(e) => {
888                                    if e.kind() == std::io::ErrorKind::AlreadyExists {
889                                        timestamp += 1;
890                                    } else {
891                                        if retries > 20 {
892                                            panic!("error creating crash errors file, {e}");
893                                        } else if retries > 5 {
894                                            timestamp += 1;
895                                        }
896                                        std::thread::sleep(100.ms());
897                                    }
898                                }
899                            }
900                            retries += 1;
901                        };
902
903                        let dialog_result = if has_dialog_handler {
904                            let mut dialog_process = std::process::Command::new(&exe);
905                            for cfg in &mut cfg_dialog {
906                                cfg(&mut dialog_process, &dialog_args);
907                            }
908                            run_process(dump_dir.as_deref(), dialog_process.env(DIALOG_PROCESS, &crash_file))
909                        } else {
910                            Ok((std::process::ExitStatus::default(), [String::new(), String::new()], None))
911                        };
912
913                        for _ in 0..5 {
914                            if !crash_file.exists() || std::fs::remove_file(&crash_file).is_ok() {
915                                break;
916                            }
917                            std::thread::sleep(100.ms());
918                        }
919
920                        let response = match dialog_result {
921                            Ok((dlg_status, [dlg_stdout, dlg_stderr], dlg_dump_file)) => {
922                                if dlg_status.success() {
923                                    dlg_stdout
924                                        .lines()
925                                        .filter_map(|l| l.trim().strip_prefix(RESPONSE_PREFIX))
926                                        .next_back()
927                                        .unwrap_or("exit 0")
928                                        .to_owned()
929                                } else {
930                                    let code = dlg_status.code();
931                                    #[allow(unused_mut)] // Windows has no signal
932                                    let mut signal = None::<i32>;
933
934                                    #[cfg(windows)]
935                                    if code == Some(1) {
936                                        tracing::warn!(
937                                            "dialog-process exit code (1), probably killed by the system, \
938                                                        will exit monitor-process with the same code"
939                                        );
940                                        zng_env::exit(1);
941                                    }
942                                    #[cfg(unix)]
943                                    if code.is_none() {
944                                        use std::os::unix::process::ExitStatusExt as _;
945                                        signal = status.signal();
946
947                                        if let Some(sig) = signal
948                                            && [2, 9, 17, 19, 23].contains(&sig)
949                                        {
950                                            tracing::warn!(
951                                                "dialog-process exited by signal ({sig}), \
952                                                                will exit monitor-process with code 1"
953                                            );
954                                            zng_env::exit(1);
955                                        }
956                                    }
957
958                                    let dialog_crash = CrashError::new(
959                                        SystemTime::now(),
960                                        code,
961                                        signal,
962                                        dlg_stdout.into(),
963                                        dlg_stderr.into(),
964                                        dlg_dump_file,
965                                        Box::new([]),
966                                    );
967                                    tracing::error!("crash dialog-process crashed, {dialog_crash}");
968
969                                    if dialog_args.dialog_crash.is_none() {
970                                        dialog_args.dialog_crash = Some(dialog_crash);
971                                        continue;
972                                    } else {
973                                        let latest = dialog_args.latest();
974                                        eprintln!("{latest}");
975                                        zng_env::exit(latest.code.unwrap_or(1));
976                                    }
977                                }
978                            }
979                            Err(e) => panic!("error running dialog-process, {e}"),
980                        };
981
982                        if let Some(args_json) = response.strip_prefix("restart ") {
983                            args = serde_json::from_str(args_json).expect("crash dialog-process did not respond 'restart' correctly");
984                            break;
985                        } else if let Some(code) = response.strip_prefix("exit ") {
986                            let code: i32 = code.parse().expect("crash dialog-process did not respond 'code' correctly");
987                            zng_env::exit(code);
988                        } else {
989                            panic!("crash dialog-process did not respond correctly")
990                        }
991                    }
992                }
993            }
994            Err(e) => panic!("error running app-process, {e}"),
995        }
996    }
997}
998fn run_process(
999    dump_dir: Option<&Path>,
1000    command: &mut std::process::Command,
1001) -> std::io::Result<(std::process::ExitStatus, [String; 2], Option<PathBuf>)> {
1002    struct DumpServer {
1003        shutdown: Arc<AtomicBool>,
1004        runner: std::thread::JoinHandle<Option<PathBuf>>,
1005    }
1006    let mut dump_server = None;
1007    if let Some(dump_dir) = dump_dir {
1008        match std::fs::create_dir_all(dump_dir) {
1009            Ok(_) => {
1010                let uuid = uuid::Uuid::new_v4();
1011                let dump_file = dump_dir.join(format!("{}.dmp", uuid.simple()));
1012                let dump_channel = std::env::temp_dir().join(format!("zng-crash-{}", uuid.simple()));
1013                match minidumper::Server::with_name(dump_channel.as_path()) {
1014                    Ok(mut s) => {
1015                        command.env(DUMP_CHANNEL, &dump_channel);
1016                        let shutdown = Arc::new(AtomicBool::new(false));
1017                        let runner = std::thread::Builder::new()
1018                            .name("minidumper-server".into())
1019                            .stack_size(512 * 1024)
1020                            .spawn(clmv!(shutdown, || {
1021                                let created_file = Arc::new(Mutex::new(None));
1022                                if let Err(e) = s.run(
1023                                    Box::new(MinidumpServerHandler {
1024                                        dump_file,
1025                                        created_file: created_file.clone(),
1026                                    }),
1027                                    &shutdown,
1028                                    None,
1029                                ) {
1030                                    tracing::error!("minidump server exited with error, {e}");
1031                                }
1032                                created_file.lock().take()
1033                            }))
1034                            .expect("failed to spawn thread");
1035                        dump_server = Some(DumpServer { shutdown, runner });
1036                    }
1037                    Err(e) => tracing::error!("failed to spawn minidump server, will not enable crash handling, {e}"),
1038                }
1039            }
1040            Err(e) => tracing::error!("cannot create minidump dir, will not enable crash handling, {e}"),
1041        }
1042    }
1043
1044    let mut app_process = command
1045        .env("RUST_BACKTRACE", "full")
1046        .env("CLICOLOR_FORCE", "1")
1047        .stdout(std::process::Stdio::piped())
1048        .stderr(std::process::Stdio::piped())
1049        .spawn()?;
1050
1051    let stdout = capture_and_print(app_process.stdout.take().unwrap(), false);
1052    let stderr = capture_and_print(app_process.stderr.take().unwrap(), true);
1053
1054    let status = app_process.wait()?;
1055
1056    let stdout = match stdout.join() {
1057        Ok(r) => r,
1058        Err(p) => std::panic::resume_unwind(p),
1059    };
1060    let stderr = match stderr.join() {
1061        Ok(r) => r,
1062        Err(p) => std::panic::resume_unwind(p),
1063    };
1064
1065    let mut dump_file = None;
1066    if let Some(s) = dump_server {
1067        s.shutdown.store(true, atomic::Ordering::Relaxed);
1068        match s.runner.join() {
1069            Ok(r) => dump_file = r,
1070            Err(p) => std::panic::resume_unwind(p),
1071        };
1072    }
1073
1074    Ok((status, [stdout, stderr], dump_file))
1075}
1076struct MinidumpServerHandler {
1077    dump_file: PathBuf,
1078    created_file: Arc<Mutex<Option<PathBuf>>>,
1079}
1080impl minidumper::ServerHandler for MinidumpServerHandler {
1081    fn create_minidump_file(&self) -> Result<(std::fs::File, PathBuf), std::io::Error> {
1082        let file = std::fs::File::create_new(&self.dump_file)?;
1083        Ok((file, self.dump_file.clone()))
1084    }
1085
1086    fn on_minidump_created(&self, result: Result<minidumper::MinidumpBinary, minidumper::Error>) -> minidumper::LoopAction {
1087        match result {
1088            Ok(b) => *self.created_file.lock() = Some(b.path),
1089            Err(e) => tracing::error!("failed to write minidump file, {e}"),
1090        }
1091        minidumper::LoopAction::Exit
1092    }
1093
1094    fn on_message(&self, _: u32, _: Vec<u8>) {}
1095
1096    fn on_client_connected(&self, num_clients: usize) -> minidumper::LoopAction {
1097        if num_clients > 1 {
1098            tracing::error!("expected only one minidump client, {num_clients} connected, exiting server");
1099            minidumper::LoopAction::Exit
1100        } else {
1101            minidumper::LoopAction::Continue
1102        }
1103    }
1104
1105    fn on_client_disconnected(&self, num_clients: usize) -> minidumper::LoopAction {
1106        if num_clients != 0 {
1107            tracing::error!("expected only one minidump client disconnect, {num_clients} still connected");
1108        }
1109        minidumper::LoopAction::Exit
1110    }
1111}
1112fn capture_and_print(mut stream: impl std::io::Read + Send + 'static, is_err: bool) -> std::thread::JoinHandle<String> {
1113    std::thread::Builder::new()
1114        .name(format!("{}-reader", if is_err { "stderr" } else { "stdout" }))
1115        .stack_size(256 * 1024)
1116        .spawn(move || {
1117            let mut capture = vec![];
1118            let mut buffer = [0u8; 32];
1119            loop {
1120                match stream.read(&mut buffer) {
1121                    Ok(n) => {
1122                        if n == 0 {
1123                            break;
1124                        }
1125
1126                        let new = &buffer[..n];
1127                        capture.write_all(new).unwrap();
1128                        let r = if is_err {
1129                            let mut s = std::io::stderr();
1130                            s.write_all(new).and_then(|_| s.flush())
1131                        } else {
1132                            let mut s = std::io::stdout();
1133                            s.write_all(new).and_then(|_| s.flush())
1134                        };
1135                        if let Err(e) = r {
1136                            panic!("{} write error, {}", if is_err { "stderr" } else { "stdout" }, e)
1137                        }
1138                    }
1139                    Err(e) => panic!("{} read error, {}", if is_err { "stderr" } else { "stdout" }, e),
1140                }
1141            }
1142            String::from_utf8_lossy(&capture).into_owned()
1143        })
1144        .expect("failed to spawn thread")
1145}
1146
1147fn crash_handler_app_process(dump_enabled: bool) {
1148    std::panic::set_hook(Box::new(panic_handler));
1149    if dump_enabled {
1150        minidump_attach();
1151    }
1152
1153    // app-process execution happens after this.
1154}
1155
1156fn crash_handler_dialog_process(dump_enabled: bool, dialog: CrashDialogHandler, args_file: String) -> ! {
1157    zng_env::set_process_name("crash-dialog-process");
1158
1159    std::panic::set_hook(Box::new(panic_handler));
1160    if dump_enabled {
1161        minidump_attach();
1162    }
1163
1164    let mut retries = 0;
1165    let args = loop {
1166        match std::fs::read_to_string(&args_file) {
1167            Ok(args) => break args,
1168            Err(e) => {
1169                if e.kind() != std::io::ErrorKind::NotFound && retries < 10 {
1170                    retries += 1;
1171                    continue;
1172                }
1173                panic!("error reading args file, {e}");
1174            }
1175        }
1176    };
1177
1178    dialog(serde_json::from_str(&args).expect("error deserializing args"));
1179    CrashArgs {
1180        app_crashes: vec![],
1181        dialog_crash: None,
1182    }
1183    .exit(0)
1184}
1185
1186fn panic_handler(info: &std::panic::PanicHookInfo) {
1187    let backtrace = std::backtrace::Backtrace::capture();
1188    let path = crate::widget::WIDGET.trace_path();
1189    let panic = PanicInfo::from_hook(info);
1190    eprintln!("{panic}widget path:\n   {path}\nstack backtrace:\n{backtrace}");
1191}
1192
1193fn minidump_attach() {
1194    let channel_name = match std::env::var(DUMP_CHANNEL) {
1195        Ok(n) if !n.is_empty() => PathBuf::from(n),
1196        _ => {
1197            eprintln!("expected minidump channel name, this instance will not handle crashes");
1198            return;
1199        }
1200    };
1201    let client = match minidumper::Client::with_name(channel_name.as_path()) {
1202        Ok(c) => c,
1203        Err(e) => {
1204            eprintln!("failed to connect minidump client, this instance will not handle crashes, {e}");
1205            return;
1206        }
1207    };
1208    struct Handler(minidumper::Client);
1209    // SAFETY: on_crash does the minimal possible work
1210    unsafe impl crash_handler::CrashEvent for Handler {
1211        fn on_crash(&self, context: &crash_handler::CrashContext) -> crash_handler::CrashEventResult {
1212            crash_handler::CrashEventResult::Handled(self.0.request_dump(context).is_ok())
1213        }
1214    }
1215    let handler = match crash_handler::CrashHandler::attach(Box::new(Handler(client))) {
1216        Ok(h) => h,
1217        Err(e) => {
1218            eprintln!("failed attach minidump crash handler, this instance will not handle crashes, {e}");
1219            return;
1220        }
1221    };
1222
1223    *CRASH_HANDLER.lock() = Some(handler);
1224}
1225static CRASH_HANDLER: Mutex<Option<crash_handler::CrashHandler>> = Mutex::new(None);
1226
1227#[derive(Debug)]
1228struct PanicInfo {
1229    pub thread: Txt,
1230    pub msg: Txt,
1231    pub file: Txt,
1232    pub line: u32,
1233    pub column: u32,
1234}
1235impl PanicInfo {
1236    pub fn from_hook(info: &std::panic::PanicHookInfo) -> Self {
1237        let current_thread = std::thread::current();
1238        let thread = current_thread.name().unwrap_or("<unnamed>");
1239        let msg = Self::payload(info.payload());
1240
1241        let (file, line, column) = if let Some(l) = info.location() {
1242            (l.file(), l.line(), l.column())
1243        } else {
1244            ("<unknown>", 0, 0)
1245        };
1246        Self {
1247            thread: thread.to_txt(),
1248            msg,
1249            file: file.to_txt(),
1250            line,
1251            column,
1252        }
1253    }
1254
1255    fn payload(p: &dyn std::any::Any) -> Txt {
1256        match p.downcast_ref::<&'static str>() {
1257            Some(s) => s,
1258            None => match p.downcast_ref::<String>() {
1259                Some(s) => &s[..],
1260                None => "Box<dyn Any>",
1261            },
1262        }
1263        .to_txt()
1264    }
1265}
1266impl std::error::Error for PanicInfo {}
1267impl fmt::Display for PanicInfo {
1268    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1269        writeln!(
1270            f,
1271            "thread '{}' panicked at {}:{}:{}:",
1272            self.thread, self.file, self.line, self.column
1273        )?;
1274        for line in self.msg.lines() {
1275            writeln!(f, "   {line}")?;
1276        }
1277        Ok(())
1278    }
1279}