zng_app/
crash_handler.rs

1#![cfg(all(
2    feature = "crash_handler",
3    not(any(target_arch = "wasm32", target_os = "android", target_os = "ios"))
4))]
5
6//! App-process crash handler.
7//!
8//! See the `zng::app::crash_handler` documentation for more details.
9
10use parking_lot::Mutex;
11use std::{
12    fmt,
13    io::{BufRead, Write},
14    path::{Path, PathBuf},
15    sync::{Arc, atomic::AtomicBool},
16    time::SystemTime,
17};
18use zng_clone_move::clmv;
19use zng_layout::unit::TimeUnits as _;
20
21use zng_txt::{ToTxt as _, Txt};
22
23/// Environment variable that causes the crash handler to not start if set.
24///
25/// This is particularly useful to set in debugger launch configs. Crash handler spawns
26/// a different process for the app  so break points will not work.
27pub const NO_CRASH_HANDLER: &str = "ZNG_NO_CRASH_HANDLER";
28
29zng_env::on_process_start!(|process_start_args| {
30    if std::env::var(NO_CRASH_HANDLER).is_ok() {
31        return;
32    }
33    if zng_env::about().is_test {
34        tracing::debug!("ignoring crash_handler because is test process");
35        return;
36    }
37
38    let mut config = CrashConfig::new();
39    for ext in CRASH_CONFIG {
40        ext(&mut config);
41        if config.no_crash_handler {
42            return;
43        }
44    }
45
46    if process_start_args.next_handlers_count > 0 && process_start_args.yield_count < zng_env::ProcessStartArgs::MAX_YIELD_COUNT - 10 {
47        // extra sure that this is the app-process
48        return process_start_args.yield_once();
49    }
50
51    if std::env::var(APP_PROCESS) != Err(std::env::VarError::NotPresent) {
52        return crash_handler_app_process(config.dump_dir.is_some());
53    }
54
55    match std::env::var(DIALOG_PROCESS) {
56        Ok(args_file) => crash_handler_dialog_process(
57            config.dump_dir.is_some(),
58            config
59                .dialog
60                .or(config.default_dialog)
61                .expect("dialog-process spawned without dialog handler"),
62            args_file,
63        ),
64        Err(e) => match e {
65            std::env::VarError::NotPresent => {}
66            e => panic!("invalid dialog env args, {e:?}"),
67        },
68    }
69
70    crash_handler_monitor_process(
71        config.dump_dir,
72        config.app_process,
73        config.dialog_process,
74        config.default_dialog.is_some() || config.dialog.is_some(),
75    );
76});
77
78/// Gets the number of crash restarts in the app-process.
79///
80/// Always returns zero if called in other processes.
81pub fn restart_count() -> usize {
82    match std::env::var(APP_PROCESS) {
83        Ok(c) => c.strip_prefix("restart-").unwrap_or("0").parse().unwrap_or(0),
84        Err(_) => 0,
85    }
86}
87
88const APP_PROCESS: &str = "ZNG_CRASH_HANDLER_APP";
89const DIALOG_PROCESS: &str = "ZNG_CRASH_HANDLER_DIALOG";
90const DUMP_CHANNEL: &str = "ZNG_MINIDUMP_CHANNEL";
91const RESPONSE_PREFIX: &str = "zng_crash_response: ";
92
93#[doc(hidden)]
94#[linkme::distributed_slice]
95pub static CRASH_CONFIG: [fn(&mut CrashConfig)];
96
97#[doc(hidden)]
98pub use linkme as __linkme;
99
100/// <span data-del-macro-root></span> Register a `FnOnce(&mut CrashConfig)` closure to be
101/// called on process init to configure the crash handler.
102///
103/// See [`CrashConfig`] for more details.
104#[macro_export]
105macro_rules! crash_handler_config {
106    ($closure:expr) => {
107        // expanded from:
108        #[$crate::crash_handler::__linkme::distributed_slice($crate::crash_handler::CRASH_CONFIG)]
109        #[linkme(crate = $crate::crash_handler::__linkme)]
110        #[doc(hidden)]
111        static _CRASH_CONFIG: fn(&mut $crate::crash_handler::CrashConfig) = _crash_config;
112        #[doc(hidden)]
113        fn _crash_config(cfg: &mut $crate::crash_handler::CrashConfig) {
114            fn crash_config(cfg: &mut $crate::crash_handler::CrashConfig, handler: impl FnOnce(&mut $crate::crash_handler::CrashConfig)) {
115                handler(cfg)
116            }
117            crash_config(cfg, $closure)
118        }
119    };
120}
121pub use crate::crash_handler_config;
122
123type ConfigProcess = Vec<Box<dyn for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command>>;
124type CrashDialogHandler = Box<dyn FnOnce(CrashArgs)>;
125
126/// Crash handler config.
127///
128/// Use [`crash_handler_config!`] to set config.
129///
130/// [`crash_handler_config!`]: crate::crash_handler_config!
131pub struct CrashConfig {
132    default_dialog: Option<CrashDialogHandler>,
133    dialog: Option<CrashDialogHandler>,
134    app_process: ConfigProcess,
135    dialog_process: ConfigProcess,
136    dump_dir: Option<PathBuf>,
137    no_crash_handler: bool,
138}
139impl CrashConfig {
140    fn new() -> Self {
141        Self {
142            default_dialog: None,
143            dialog: None,
144            app_process: vec![],
145            dialog_process: vec![],
146            dump_dir: Some(zng_env::cache("zng_minidump")),
147            no_crash_handler: false,
148        }
149    }
150
151    /// Set the crash dialog process handler.
152    ///
153    /// The dialog `handler` can run an app or show a native dialog, it must use the [`CrashArgs`] process
154    /// terminating methods to respond, if it returns [`CrashArgs::exit`] will run.
155    ///
156    /// Note that the handler does not need to actually show any dialog, it can just save crash info and
157    /// restart the app for example.
158    pub fn dialog(&mut self, handler: impl FnOnce(CrashArgs) + 'static) {
159        if self.dialog.is_none() {
160            self.dialog = Some(Box::new(handler));
161        }
162    }
163
164    /// Set the crash dialog-handler used if `crash_dialog` is not set.
165    ///
166    /// This is used by app libraries or themes to provide a default dialog.
167    pub fn default_dialog(&mut self, handler: impl FnOnce(CrashArgs) + 'static) {
168        self.default_dialog = Some(Box::new(handler));
169    }
170
171    /// Add a closure that is called just before the app-process is spawned.
172    pub fn app_process(
173        &mut self,
174        cfg: impl for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command + 'static,
175    ) {
176        self.app_process.push(Box::new(cfg));
177    }
178
179    /// Add a closure that is called just before the dialog-process is spawned.
180    pub fn dialog_process(
181        &mut self,
182        cfg: impl for<'a, 'b> FnMut(&'a mut std::process::Command, &'b CrashArgs) -> &'a mut std::process::Command + 'static,
183    ) {
184        self.dialog_process.push(Box::new(cfg));
185    }
186
187    /// Change the minidump directory.
188    ///
189    /// Is `zng::env::cache("zng_minidump")` by default.
190    pub fn minidump_dir(&mut self, dir: impl Into<PathBuf>) {
191        self.dump_dir = Some(dir.into());
192    }
193
194    /// Do not collect a minidump.
195    pub fn no_minidump(&mut self) {
196        self.dump_dir = None;
197    }
198
199    /// Does not run with crash handler.
200    ///
201    /// This is equivalent of running with `NO_ZNG_CRASH_HANDLER` env var.
202    pub fn no_crash_handler(&mut self) {
203        self.no_crash_handler = true;
204    }
205}
206
207/// Arguments for the crash handler dialog function.
208#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
209#[non_exhaustive]
210pub struct CrashArgs {
211    /// Info about the app-process crashes.
212    ///
213    /// Has at least one entry, latest is last. Includes all crashes since the start of the monitor-process.
214    pub app_crashes: Vec<CrashError>,
215
216    /// Info about a crash in the dialog-process spawned to handle the latest app-process crash.
217    ///
218    /// If set this is the last chance to show something to the end user, if the current dialog crashes too
219    /// the monitor-process will give up. If you started an `APP` to show a crash dialog try using a native
220    /// dialog directly now, or just give up, clearly things are far from ok.
221    pub dialog_crash: Option<CrashError>,
222}
223impl CrashArgs {
224    /// Latest crash.
225    pub fn latest(&self) -> &CrashError {
226        self.app_crashes.last().unwrap()
227    }
228
229    /// Restart the app-process with same argument as the latest crash.
230    pub fn restart(&self) -> ! {
231        let json_args = serde_json::to_string(&self.latest().args[..]).unwrap();
232        println!("{RESPONSE_PREFIX}restart {json_args}");
233        zng_env::exit(0)
234    }
235
236    /// Restart the app-process with custom arguments.
237    pub fn restart_with(&self, args: &[Txt]) -> ! {
238        let json_args = serde_json::to_string(&args).unwrap();
239        println!("{RESPONSE_PREFIX}restart {json_args}");
240        zng_env::exit(0)
241    }
242
243    /// Exit the monitor-process (application) with code.
244    pub fn exit(&self, code: i32) -> ! {
245        println!("{RESPONSE_PREFIX}exit {code}");
246        zng_env::exit(0)
247    }
248}
249impl fmt::Display for CrashArgs {
250    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
251        writeln!(f, "APP CRASHES:\n")?;
252
253        for c in self.app_crashes.iter() {
254            writeln!(f, "{c}")?;
255        }
256
257        if let Some(c) = &self.dialog_crash {
258            writeln!(f, "\nDIALOG CRASH:\n")?;
259            writeln!(f, "{c}")?;
260        }
261
262        Ok(())
263    }
264}
265
266/// Info about an app-process crash.
267#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
268#[non_exhaustive]
269pub struct CrashError {
270    /// Crash moment.
271    pub timestamp: SystemTime,
272    /// Process exit code.
273    pub code: Option<i32>,
274    /// Unix signal that terminated the process.
275    pub signal: Option<i32>,
276    /// Full capture of the app stdout.
277    pub stdout: Txt,
278    /// Full capture of the app stderr.
279    pub stderr: Txt,
280    /// Arguments used.
281    pub args: Box<[Txt]>,
282    /// Minidump file.
283    pub minidump: Option<PathBuf>,
284    /// Operating system.
285    ///
286    /// See [`std::env::consts::OS`] for details.
287    pub os: Txt,
288}
289/// Alternate mode `{:#}` prints plain stdout and stderr (no ANSI escape sequences).
290impl fmt::Display for CrashError {
291    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
292        writeln!(f, "timestamp: {}", self.unix_time())?;
293        if let Some(c) = self.code {
294            writeln!(f, "exit code: {c:#X}")?
295        }
296        if let Some(c) = self.signal {
297            writeln!(f, "exit signal: {c}")?
298        }
299        if let Some(p) = self.minidump.as_ref() {
300            writeln!(f, "minidump: {}", p.display())?
301        }
302        if f.alternate() {
303            write!(f, "\nSTDOUT:\n{}\nSTDERR:\n{}\n", self.stdout_plain(), self.stderr_plain())
304        } else {
305            write!(f, "\nSTDOUT:\n{}\nSTDERR:\n{}\n", self.stdout, self.stderr)
306        }
307    }
308}
309impl CrashError {
310    fn new(
311        timestamp: SystemTime,
312        code: Option<i32>,
313        signal: Option<i32>,
314        stdout: Txt,
315        stderr: Txt,
316        minidump: Option<PathBuf>,
317        args: Box<[Txt]>,
318    ) -> Self {
319        Self {
320            timestamp,
321            code,
322            signal,
323            stdout,
324            stderr,
325            args,
326            minidump,
327            os: std::env::consts::OS.into(),
328        }
329    }
330
331    /// Seconds since Unix epoch.
332    pub fn unix_time(&self) -> u64 {
333        self.timestamp.duration_since(SystemTime::UNIX_EPOCH).unwrap_or_default().as_secs()
334    }
335
336    /// Gets if `stdout` does not contain any ANSI scape sequences.
337    pub fn is_stdout_plain(&self) -> bool {
338        !self.stdout.contains(CSI)
339    }
340
341    /// Gets if `stderr` does not contain any ANSI scape sequences.
342    pub fn is_stderr_plain(&self) -> bool {
343        !self.stderr.contains(CSI)
344    }
345
346    /// Get `stdout` without any ANSI escape sequences (CSI).
347    pub fn stdout_plain(&self) -> Txt {
348        remove_ansi_csi(&self.stdout)
349    }
350
351    /// Get `stderr` without any ANSI escape sequences (CSI).
352    pub fn stderr_plain(&self) -> Txt {
353        remove_ansi_csi(&self.stderr)
354    }
355
356    /// Gets if `stderr` contains a crash panic.
357    pub fn has_panic(&self) -> bool {
358        if self.code == Some(101) {
359            CrashPanic::contains(&self.stderr_plain())
360        } else {
361            false
362        }
363    }
364
365    /// Gets if `stderr` contains a crash panic that traced widget/window path.
366    pub fn has_panic_widget(&self) -> bool {
367        if self.code == Some(101) {
368            CrashPanic::contains_widget(&self.stderr_plain())
369        } else {
370            false
371        }
372    }
373
374    /// Try parse `stderr` for the crash panic.
375    ///
376    /// Only reliably works if the panic fully printed correctly and was formatted by the panic
377    /// hook installed by `crash_handler` or by the display print of [`CrashPanic`].
378    pub fn find_panic(&self) -> Option<CrashPanic> {
379        if self.code == Some(101) {
380            CrashPanic::find(&self.stderr_plain())
381        } else {
382            None
383        }
384    }
385
386    /// Best attempt at generating a readable error message.
387    ///
388    /// Is the panic message, or the minidump exception, with the exit code and signal.
389    pub fn message(&self) -> Txt {
390        let mut msg = if let Some(msg) = self.find_panic().map(|p| p.message) {
391            msg
392        } else if let Some(msg) = self.minidump_message() {
393            msg
394        } else {
395            "".into()
396        };
397        use std::fmt::Write as _;
398
399        if let Some(c) = self.code {
400            let sep = if msg.is_empty() { "" } else { "\n" };
401            write!(&mut msg, "{sep}Code: {c:#X}").unwrap();
402        }
403        if let Some(c) = self.signal {
404            let sep = if msg.is_empty() { "" } else { "\n" };
405            write!(&mut msg, "{sep}Signal: {c}").unwrap();
406        }
407        msg.end_mut();
408        msg
409    }
410
411    fn minidump_message(&self) -> Option<Txt> {
412        use minidump::*;
413
414        let dump = match Minidump::read_path(self.minidump.as_ref()?) {
415            Ok(d) => d,
416            Err(e) => {
417                tracing::error!("error reading minidump, {e}");
418                return None;
419            }
420        };
421
422        let system_info = match dump.get_stream::<MinidumpSystemInfo>() {
423            Ok(s) => s,
424            Err(e) => {
425                tracing::error!("error reading minidump system info, {e}");
426                return None;
427            }
428        };
429        let exception = match dump.get_stream::<MinidumpException>() {
430            Ok(s) => s,
431            Err(e) => {
432                tracing::error!("error reading minidump exception, {e}");
433                return None;
434            }
435        };
436
437        let crash_reason = exception.get_crash_reason(system_info.os, system_info.cpu);
438
439        Some(zng_txt::formatx!("{crash_reason}"))
440    }
441}
442
443const CSI: &str = "\x1b[";
444
445/// Remove ANSI escape sequences (CSI) from `s`.
446pub fn remove_ansi_csi(mut s: &str) -> Txt {
447    fn is_esc_end(byte: u8) -> bool {
448        (0x40..=0x7e).contains(&byte)
449    }
450
451    let mut r = String::new();
452    while let Some(i) = s.find(CSI) {
453        r.push_str(&s[..i]);
454        s = &s[i + CSI.len()..];
455        let mut esc_end = 0;
456        while esc_end < s.len() && !is_esc_end(s.as_bytes()[esc_end]) {
457            esc_end += 1;
458        }
459        esc_end += 1;
460        s = &s[esc_end..];
461    }
462    r.push_str(s);
463    r.into()
464}
465
466/// Panic parsed from a `stderr` dump.
467#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
468#[non_exhaustive]
469pub struct CrashPanic {
470    /// Name of thread that panicked.
471    pub thread: Txt,
472    /// Panic message.
473    pub message: Txt,
474    /// Path to file that defines the panic.
475    pub file: Txt,
476    /// Line of code that defines the panic.
477    pub line: u32,
478    /// Column in the line of code that defines the panic.
479    pub column: u32,
480    /// Widget where the panic happened.
481    pub widget_path: Txt,
482    /// Stack backtrace.
483    pub backtrace: Txt,
484}
485
486/// Alternate mode `{:#}` prints full backtrace.
487impl fmt::Display for CrashPanic {
488    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
489        writeln!(
490            f,
491            "thread '{}' panicked at {}:{}:{}:",
492            self.thread, self.file, self.line, self.column
493        )?;
494        for line in self.message.lines() {
495            writeln!(f, "   {line}")?;
496        }
497        writeln!(f, "widget path:\n   {}", self.widget_path)?;
498
499        if f.alternate() {
500            writeln!(f, "stack backtrace:\n{}", self.backtrace)
501        } else {
502            writeln!(f, "stack backtrace:")?;
503            let mut snippet = 9;
504            for frame in self.backtrace_frames().skip_while(|f| f.is_after_panic) {
505                write!(f, "{frame}")?;
506                if snippet > 0 {
507                    let code = frame.code_snippet();
508                    if !code.is_empty() {
509                        snippet -= 1;
510                        writeln!(f, "{code}")?;
511                    }
512                }
513            }
514            Ok(())
515        }
516    }
517}
518impl CrashPanic {
519    /// Gets if `stderr` contains a panic that can be parsed by [`find`].
520    ///
521    /// [`find`]: Self::find
522    pub fn contains(stderr: &str) -> bool {
523        Self::find_impl(stderr, false).is_some()
524    }
525
526    /// Gets if `stderr` contains a panic that can be parsed by [`find`] and traced a widget/window path.
527    ///
528    /// [`find`]: Self::find
529    pub fn contains_widget(stderr: &str) -> bool {
530        match Self::find_impl(stderr, false) {
531            Some(p) => !p.widget_path.is_empty(),
532            None => false,
533        }
534    }
535
536    /// Try parse `stderr` for the crash panic.
537    ///
538    /// Only reliably works if the panic fully printed correctly and was formatted by the panic
539    /// hook installed by `crash_handler` or by the display print of this type.
540    pub fn find(stderr: &str) -> Option<Self> {
541        Self::find_impl(stderr, true)
542    }
543
544    fn find_impl(stderr: &str, parse: bool) -> Option<Self> {
545        let mut panic_at = usize::MAX;
546        let mut widget_path = usize::MAX;
547        let mut stack_backtrace = usize::MAX;
548        let mut i = 0;
549        for line in stderr.lines() {
550            if line.starts_with("thread '") && line.contains("' panicked at ") && line.ends_with(':') {
551                panic_at = i;
552                widget_path = usize::MAX;
553                stack_backtrace = usize::MAX;
554            } else if line == "widget path:" {
555                widget_path = i + "widget path:\n".len();
556            } else if line == "stack backtrace:" {
557                stack_backtrace = i + "stack backtrace:\n".len();
558            }
559            i += line.len() + "\n".len();
560        }
561
562        if panic_at == usize::MAX {
563            return None;
564        }
565
566        if !parse {
567            return Some(Self {
568                thread: Txt::from(""),
569                message: Txt::from(""),
570                file: Txt::from(""),
571                line: 0,
572                column: 0,
573                widget_path: if widget_path < stderr.len() {
574                    Txt::from("true")
575                } else {
576                    Txt::from("")
577                },
578                backtrace: Txt::from(""),
579            });
580        }
581
582        let panic_str = stderr[panic_at..].lines().next().unwrap();
583        let (thread, location) = panic_str.strip_prefix("thread '").unwrap().split_once("' panicked at ").unwrap();
584        let mut location = location.split(':');
585        let file = location.next().unwrap_or("");
586        let line: u32 = location.next().unwrap_or("0").parse().unwrap_or(0);
587        let column: u32 = location.next().unwrap_or("0").parse().unwrap_or(0);
588
589        let mut message = String::new();
590        let mut sep = "";
591        for line in stderr[panic_at + panic_str.len() + "\n".len()..].lines() {
592            if let Some(line) = line.strip_prefix("   ") {
593                message.push_str(sep);
594                message.push_str(line);
595                sep = "\n";
596            } else {
597                if message.is_empty() && line != "widget path:" && line != "stack backtrace:" {
598                    // not formatted by us, probably by Rust
599                    line.clone_into(&mut message);
600                }
601                break;
602            }
603        }
604
605        let widget_path = if widget_path < stderr.len() {
606            stderr[widget_path..].lines().next().unwrap().trim()
607        } else {
608            ""
609        };
610
611        let backtrace = if stack_backtrace < stderr.len() {
612            let mut i = stack_backtrace;
613            'backtrace_seek: for line in stderr[stack_backtrace..].lines() {
614                if !line.starts_with(' ') {
615                    'digit_check: for c in line.chars() {
616                        if !c.is_ascii_digit() {
617                            if c == ':' {
618                                break 'digit_check;
619                            } else {
620                                break 'backtrace_seek;
621                            }
622                        }
623                    }
624                }
625                i += line.len() + "\n".len();
626            }
627            &stderr[stack_backtrace..i]
628        } else {
629            ""
630        };
631
632        Some(Self {
633            thread: thread.to_txt(),
634            message: message.into(),
635            file: file.to_txt(),
636            line,
637            column,
638            widget_path: widget_path.to_txt(),
639            backtrace: backtrace.to_txt(),
640        })
641    }
642
643    /// Iterate over frames parsed from the `backtrace`.
644    pub fn backtrace_frames(&self) -> impl Iterator<Item = BacktraceFrame> + '_ {
645        BacktraceFrame::parse(&self.backtrace)
646    }
647}
648
649/// Represents a frame parsed from a stack backtrace.
650#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
651#[non_exhaustive]
652pub struct BacktraceFrame {
653    /// Position on the backtrace.
654    pub n: usize,
655
656    /// Function name.
657    pub name: Txt,
658    /// Source code file.
659    pub file: Txt,
660    /// Source code line.
661    pub line: u32,
662
663    /// If this frame is inside the Rust panic code.
664    pub is_after_panic: bool,
665}
666impl fmt::Display for BacktraceFrame {
667    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
668        writeln!(f, "{:>4}: {}", self.n, self.name)?;
669        if !self.file.is_empty() {
670            writeln!(f, "      at {}:{}", self.file, self.line)?;
671        }
672        Ok(())
673    }
674}
675impl BacktraceFrame {
676    /// Iterate over frames parsed from the `backtrace`.
677    pub fn parse(mut backtrace: &str) -> impl Iterator<Item = BacktraceFrame> + '_ {
678        let mut is_after_panic = backtrace.lines().any(|l| l.ends_with("core::panicking::panic_fmt"));
679        std::iter::from_fn(move || {
680            if backtrace.is_empty() {
681                None
682            } else {
683                let n_name = backtrace.lines().next().unwrap();
684                let (n, name) = if let Some((n, name)) = n_name.split_once(':') {
685                    let n = match n.trim_start().parse() {
686                        Ok(n) => n,
687                        Err(_) => {
688                            backtrace = "";
689                            return None;
690                        }
691                    };
692                    let name = name.trim();
693                    if name.is_empty() {
694                        backtrace = "";
695                        return None;
696                    }
697                    (n, name)
698                } else {
699                    backtrace = "";
700                    return None;
701                };
702
703                backtrace = &backtrace[n_name.len() + 1..];
704                let r = if backtrace.trim_start().starts_with("at ") {
705                    let file_line = backtrace.lines().next().unwrap();
706                    let (file, line) = if let Some((file, line)) = file_line.rsplit_once(':') {
707                        let file = file.trim_start().strip_prefix("at ").unwrap();
708                        let line = match line.trim_end().parse() {
709                            Ok(l) => l,
710                            Err(_) => {
711                                backtrace = "";
712                                return None;
713                            }
714                        };
715                        (file, line)
716                    } else {
717                        backtrace = "";
718                        return None;
719                    };
720
721                    backtrace = &backtrace[file_line.len() + 1..];
722
723                    BacktraceFrame {
724                        n,
725                        name: name.to_txt(),
726                        file: file.to_txt(),
727                        line,
728                        is_after_panic,
729                    }
730                } else {
731                    BacktraceFrame {
732                        n,
733                        name: name.to_txt(),
734                        file: Txt::from(""),
735                        line: 0,
736                        is_after_panic,
737                    }
738                };
739
740                if is_after_panic && name == "core::panicking::panic_fmt" {
741                    is_after_panic = false;
742                }
743
744                Some(r)
745            }
746        })
747    }
748
749    /// Reads the code line + four surrounding lines if the code file can be found.
750    pub fn code_snippet(&self) -> Txt {
751        if !self.file.is_empty()
752            && self.line > 0
753            && let Ok(file) = std::fs::File::open(&self.file)
754        {
755            use std::fmt::Write as _;
756            let mut r = String::new();
757
758            let reader = std::io::BufReader::new(file);
759
760            let line_s = self.line - 2.min(self.line - 1);
761            let lines = reader.lines().skip(line_s as usize - 1).take(5);
762            for (line, line_n) in lines.zip(line_s..) {
763                let line = match line {
764                    Ok(l) => l,
765                    Err(_) => return Txt::from(""),
766                };
767
768                if line_n == self.line {
769                    writeln!(&mut r, "      {line_n:>4} > {line}").unwrap();
770                } else {
771                    writeln!(&mut r, "      {line_n:>4} │ {line}").unwrap();
772                }
773            }
774
775            return r.into();
776        }
777        Txt::from("")
778    }
779}
780
781fn crash_handler_monitor_process(
782    dump_dir: Option<PathBuf>,
783    mut cfg_app: ConfigProcess,
784    mut cfg_dialog: ConfigProcess,
785    has_dialog_handler: bool,
786) -> ! {
787    zng_env::set_process_name("crash-handler-process");
788
789    let exe = std::env::current_exe()
790        .and_then(dunce::canonicalize)
791        .expect("failed to get the current executable");
792
793    let mut args: Box<[_]> = std::env::args().skip(1).map(Txt::from).collect();
794
795    let mut dialog_args = CrashArgs {
796        app_crashes: vec![],
797        dialog_crash: None,
798    };
799    loop {
800        let mut app_process = std::process::Command::new(&exe);
801        for cfg in &mut cfg_app {
802            cfg(&mut app_process, &dialog_args);
803        }
804
805        match run_process(
806            dump_dir.as_deref(),
807            app_process
808                .env(APP_PROCESS, format!("restart-{}", dialog_args.app_crashes.len()))
809                .args(args.iter()),
810        ) {
811            Ok((status, [stdout, stderr], dump_file)) => {
812                if status.success() {
813                    let code = status.code().unwrap_or(0);
814                    tracing::info!(
815                        "crash monitor-process exiting with success code ({code}), {} crashes",
816                        dialog_args.app_crashes.len()
817                    );
818                    zng_env::exit(code);
819                } else {
820                    let code = status.code();
821                    #[allow(unused_mut)] // Windows has no signal
822                    let mut signal = None::<i32>;
823
824                    #[cfg(windows)]
825                    if code == Some(1) {
826                        tracing::warn!(
827                            "app-process exit code (1), probably killed by the system, \
828                                        will exit monitor-process with the same code"
829                        );
830                        zng_env::exit(1);
831                    }
832                    #[cfg(unix)]
833                    if code.is_none() {
834                        use std::os::unix::process::ExitStatusExt as _;
835                        signal = status.signal();
836
837                        if let Some(sig) = signal
838                            && [2, 9, 17, 19, 23].contains(&sig)
839                        {
840                            tracing::warn!(
841                                "app-process exited by signal ({sig}), \
842                                                will exit monitor-process with code 1"
843                            );
844                            zng_env::exit(1);
845                        }
846                    }
847
848                    tracing::error!(
849                        "app-process crashed with exit code ({:#X}), signal ({:#?}), {} crashes previously",
850                        code.unwrap_or(0),
851                        signal.unwrap_or(0),
852                        dialog_args.app_crashes.len()
853                    );
854
855                    let timestamp = SystemTime::now();
856
857                    dialog_args.app_crashes.push(CrashError::new(
858                        timestamp,
859                        code,
860                        signal,
861                        stdout.into(),
862                        stderr.into(),
863                        dump_file,
864                        args.clone(),
865                    ));
866
867                    // show dialog, retries once if dialog crashes too.
868                    for _ in 0..2 {
869                        // serialize app-crashes to a temp JSON file
870                        let timestamp_nanos = timestamp.duration_since(SystemTime::UNIX_EPOCH).map(|d| d.as_nanos()).unwrap_or(0);
871                        let mut timestamp = timestamp_nanos;
872                        let mut retries = 0;
873                        let crash_file = loop {
874                            let path = std::env::temp_dir().join(format!("zng-crash-errors-{timestamp:#x}"));
875                            match std::fs::File::create_new(&path) {
876                                Ok(f) => match serde_json::to_writer(std::io::BufWriter::new(f), &dialog_args) {
877                                    Ok(_) => break path,
878                                    Err(e) => {
879                                        if e.is_io() {
880                                            if retries > 20 {
881                                                panic!("error writing crash errors, {e}");
882                                            } else if retries > 5 {
883                                                timestamp += 1;
884                                            }
885                                            std::thread::sleep(100.ms());
886                                        } else {
887                                            panic!("error serializing crash errors, {e}");
888                                        }
889                                    }
890                                },
891                                Err(e) => {
892                                    if e.kind() == std::io::ErrorKind::AlreadyExists {
893                                        timestamp += 1;
894                                    } else {
895                                        if retries > 20 {
896                                            panic!("error creating crash errors file, {e}");
897                                        } else if retries > 5 {
898                                            timestamp += 1;
899                                        }
900                                        std::thread::sleep(100.ms());
901                                    }
902                                }
903                            }
904                            retries += 1;
905                        };
906
907                        let dialog_result = if has_dialog_handler {
908                            let mut dialog_process = std::process::Command::new(&exe);
909                            for cfg in &mut cfg_dialog {
910                                cfg(&mut dialog_process, &dialog_args);
911                            }
912                            run_process(dump_dir.as_deref(), dialog_process.env(DIALOG_PROCESS, &crash_file))
913                        } else {
914                            Ok((std::process::ExitStatus::default(), [String::new(), String::new()], None))
915                        };
916
917                        for _ in 0..5 {
918                            if !crash_file.exists() || std::fs::remove_file(&crash_file).is_ok() {
919                                break;
920                            }
921                            std::thread::sleep(100.ms());
922                        }
923
924                        let response = match dialog_result {
925                            Ok((dlg_status, [dlg_stdout, dlg_stderr], dlg_dump_file)) => {
926                                if dlg_status.success() {
927                                    dlg_stdout
928                                        .lines()
929                                        .filter_map(|l| l.trim().strip_prefix(RESPONSE_PREFIX))
930                                        .next_back()
931                                        .unwrap_or("exit 0")
932                                        .to_owned()
933                                } else {
934                                    let code = dlg_status.code();
935                                    #[allow(unused_mut)] // Windows has no signal
936                                    let mut signal = None::<i32>;
937
938                                    #[cfg(windows)]
939                                    if code == Some(1) {
940                                        tracing::warn!(
941                                            "dialog-process exit code (1), probably killed by the system, \
942                                                        will exit monitor-process with the same code"
943                                        );
944                                        zng_env::exit(1);
945                                    }
946                                    #[cfg(unix)]
947                                    if code.is_none() {
948                                        use std::os::unix::process::ExitStatusExt as _;
949                                        signal = status.signal();
950
951                                        if let Some(sig) = signal
952                                            && [2, 9, 17, 19, 23].contains(&sig)
953                                        {
954                                            tracing::warn!(
955                                                "dialog-process exited by signal ({sig}), \
956                                                                will exit monitor-process with code 1"
957                                            );
958                                            zng_env::exit(1);
959                                        }
960                                    }
961
962                                    let dialog_crash = CrashError::new(
963                                        SystemTime::now(),
964                                        code,
965                                        signal,
966                                        dlg_stdout.into(),
967                                        dlg_stderr.into(),
968                                        dlg_dump_file,
969                                        Box::new([]),
970                                    );
971                                    tracing::error!("crash dialog-process crashed, {dialog_crash}");
972
973                                    if dialog_args.dialog_crash.is_none() {
974                                        dialog_args.dialog_crash = Some(dialog_crash);
975                                        continue;
976                                    } else {
977                                        let latest = dialog_args.latest();
978                                        eprintln!("{latest}");
979                                        zng_env::exit(latest.code.unwrap_or(1));
980                                    }
981                                }
982                            }
983                            Err(e) => panic!("error running dialog-process, {e}"),
984                        };
985
986                        if let Some(args_json) = response.strip_prefix("restart ") {
987                            args = serde_json::from_str(args_json).expect("crash dialog-process did not respond 'restart' correctly");
988                            break;
989                        } else if let Some(code) = response.strip_prefix("exit ") {
990                            let code: i32 = code.parse().expect("crash dialog-process did not respond 'code' correctly");
991                            zng_env::exit(code);
992                        } else {
993                            panic!("crash dialog-process did not respond correctly")
994                        }
995                    }
996                }
997            }
998            Err(e) => panic!("error running app-process, {e}"),
999        }
1000    }
1001}
1002fn run_process(
1003    dump_dir: Option<&Path>,
1004    command: &mut std::process::Command,
1005) -> std::io::Result<(std::process::ExitStatus, [String; 2], Option<PathBuf>)> {
1006    struct DumpServer {
1007        shutdown: Arc<AtomicBool>,
1008        runner: std::thread::JoinHandle<Option<PathBuf>>,
1009    }
1010    let mut dump_server = None;
1011    if let Some(dump_dir) = dump_dir {
1012        match std::fs::create_dir_all(dump_dir) {
1013            Ok(_) => {
1014                let uuid = uuid::Uuid::new_v4();
1015                let dump_file = dump_dir.join(format!("{}.dmp", uuid.simple()));
1016                let dump_channel = std::env::temp_dir().join(format!("zng-crash-{}", uuid.simple()));
1017                match minidumper::Server::with_name(minidumper::SocketName::Path(&dump_channel)) {
1018                    Ok(mut s) => {
1019                        command.env(DUMP_CHANNEL, &dump_channel);
1020                        let shutdown = Arc::new(AtomicBool::new(false));
1021                        let runner = std::thread::Builder::new()
1022                            .name("minidumper-server".into())
1023                            .stack_size(512 * 1024)
1024                            .spawn(clmv!(shutdown, || {
1025                                let created_file = Arc::new(Mutex::new(None));
1026                                if let Err(e) = s.run(
1027                                    Box::new(MinidumpServerHandler {
1028                                        dump_file,
1029                                        created_file: created_file.clone(),
1030                                    }),
1031                                    &shutdown,
1032                                    None,
1033                                ) {
1034                                    tracing::error!("minidump server exited with error, {e}");
1035                                }
1036                                created_file.lock().take()
1037                            }))
1038                            .expect("failed to spawn thread");
1039                        dump_server = Some(DumpServer { shutdown, runner });
1040                    }
1041                    Err(e) => tracing::error!("failed to spawn minidump server, will not enable crash handling, {e}"),
1042                }
1043            }
1044            Err(e) => tracing::error!("cannot create minidump dir, will not enable crash handling, {e}"),
1045        }
1046    }
1047
1048    let mut app_process = command
1049        .env("RUST_BACKTRACE", "full")
1050        .env("CLICOLOR_FORCE", "1")
1051        .stdout(std::process::Stdio::piped())
1052        .stderr(std::process::Stdio::piped())
1053        .spawn()?;
1054
1055    let stdout = capture_and_print(app_process.stdout.take().unwrap(), false);
1056    let stderr = capture_and_print(app_process.stderr.take().unwrap(), true);
1057
1058    let status = app_process.wait()?;
1059
1060    let stdout = match stdout.join() {
1061        Ok(r) => r,
1062        Err(p) => std::panic::resume_unwind(p),
1063    };
1064    let stderr = match stderr.join() {
1065        Ok(r) => r,
1066        Err(p) => std::panic::resume_unwind(p),
1067    };
1068
1069    let mut dump_file = None;
1070    if let Some(s) = dump_server {
1071        s.shutdown.store(true, atomic::Ordering::Relaxed);
1072        match s.runner.join() {
1073            Ok(r) => dump_file = r,
1074            Err(p) => std::panic::resume_unwind(p),
1075        };
1076    }
1077
1078    Ok((status, [stdout, stderr], dump_file))
1079}
1080struct MinidumpServerHandler {
1081    dump_file: PathBuf,
1082    created_file: Arc<Mutex<Option<PathBuf>>>,
1083}
1084impl minidumper::ServerHandler for MinidumpServerHandler {
1085    fn create_minidump_file(&self) -> Result<(std::fs::File, PathBuf), std::io::Error> {
1086        let file = std::fs::File::create_new(&self.dump_file)?;
1087        Ok((file, self.dump_file.clone()))
1088    }
1089
1090    fn on_minidump_created(&self, result: Result<minidumper::MinidumpBinary, minidumper::Error>) -> minidumper::LoopAction {
1091        match result {
1092            Ok(b) => *self.created_file.lock() = Some(b.path),
1093            Err(e) => tracing::error!("failed to write minidump file, {e}"),
1094        }
1095        minidumper::LoopAction::Exit
1096    }
1097
1098    fn on_message(&self, _: u32, _: Vec<u8>) {}
1099
1100    fn on_client_connected(&self, num_clients: usize) -> minidumper::LoopAction {
1101        if num_clients > 1 {
1102            tracing::error!("expected only one minidump client, {num_clients} connected, exiting server");
1103            minidumper::LoopAction::Exit
1104        } else {
1105            minidumper::LoopAction::Continue
1106        }
1107    }
1108
1109    fn on_client_disconnected(&self, num_clients: usize) -> minidumper::LoopAction {
1110        if num_clients != 0 {
1111            tracing::error!("expected only one minidump client disconnect, {num_clients} still connected");
1112        }
1113        minidumper::LoopAction::Exit
1114    }
1115}
1116fn capture_and_print(mut stream: impl std::io::Read + Send + 'static, is_err: bool) -> std::thread::JoinHandle<String> {
1117    std::thread::Builder::new()
1118        .name(format!("{}-reader", if is_err { "stderr" } else { "stdout" }))
1119        .stack_size(256 * 1024)
1120        .spawn(move || {
1121            let mut capture = vec![];
1122            let mut buffer = [0u8; 32];
1123            loop {
1124                match stream.read(&mut buffer) {
1125                    Ok(n) => {
1126                        if n == 0 {
1127                            break;
1128                        }
1129
1130                        let new = &buffer[..n];
1131                        capture.write_all(new).unwrap();
1132                        let r = if is_err {
1133                            let mut s = std::io::stderr();
1134                            s.write_all(new).and_then(|_| s.flush())
1135                        } else {
1136                            let mut s = std::io::stdout();
1137                            s.write_all(new).and_then(|_| s.flush())
1138                        };
1139                        if let Err(e) = r {
1140                            panic!("{} write error, {}", if is_err { "stderr" } else { "stdout" }, e)
1141                        }
1142                    }
1143                    Err(e) => panic!("{} read error, {}", if is_err { "stderr" } else { "stdout" }, e),
1144                }
1145            }
1146            String::from_utf8_lossy(&capture).into_owned()
1147        })
1148        .expect("failed to spawn thread")
1149}
1150
1151fn crash_handler_app_process(dump_enabled: bool) {
1152    std::panic::set_hook(Box::new(panic_handler));
1153    if dump_enabled {
1154        minidump_attach();
1155    }
1156
1157    // app-process execution happens after this.
1158}
1159
1160fn crash_handler_dialog_process(dump_enabled: bool, dialog: CrashDialogHandler, args_file: String) -> ! {
1161    zng_env::set_process_name("crash-dialog-process");
1162
1163    std::panic::set_hook(Box::new(panic_handler));
1164    if dump_enabled {
1165        minidump_attach();
1166    }
1167
1168    let mut retries = 0;
1169    let args = loop {
1170        match std::fs::read_to_string(&args_file) {
1171            Ok(args) => break args,
1172            Err(e) => {
1173                if e.kind() != std::io::ErrorKind::NotFound && retries < 10 {
1174                    retries += 1;
1175                    continue;
1176                }
1177                panic!("error reading args file, {e}");
1178            }
1179        }
1180    };
1181
1182    dialog(serde_json::from_str(&args).expect("error deserializing args"));
1183    CrashArgs {
1184        app_crashes: vec![],
1185        dialog_crash: None,
1186    }
1187    .exit(0)
1188}
1189
1190fn panic_handler(info: &std::panic::PanicHookInfo) {
1191    let backtrace = std::backtrace::Backtrace::capture();
1192    let path = crate::widget::WIDGET.trace_path();
1193    let panic = PanicInfo::from_hook(info);
1194    eprintln!("{panic}widget path:\n   {path}\nstack backtrace:\n{backtrace}");
1195}
1196
1197fn minidump_attach() {
1198    let channel_name = match std::env::var(DUMP_CHANNEL) {
1199        Ok(n) if !n.is_empty() => PathBuf::from(n),
1200        _ => {
1201            eprintln!("expected minidump channel name, this instance will not handle crashes");
1202            return;
1203        }
1204    };
1205    let client = match minidumper::Client::with_name(minidumper::SocketName::Path(&channel_name)) {
1206        Ok(c) => c,
1207        Err(e) => {
1208            eprintln!("failed to connect minidump client, this instance will not handle crashes, {e}");
1209            return;
1210        }
1211    };
1212    struct Handler(minidumper::Client);
1213    // SAFETY: on_crash does the minimal possible work
1214    unsafe impl crash_handler::CrashEvent for Handler {
1215        fn on_crash(&self, context: &crash_handler::CrashContext) -> crash_handler::CrashEventResult {
1216            crash_handler::CrashEventResult::Handled(self.0.request_dump(context).is_ok())
1217        }
1218    }
1219    let handler = match crash_handler::CrashHandler::attach(Box::new(Handler(client))) {
1220        Ok(h) => h,
1221        Err(e) => {
1222            eprintln!("failed attach minidump crash handler, this instance will not handle crashes, {e}");
1223            return;
1224        }
1225    };
1226
1227    *CRASH_HANDLER.lock() = Some(handler);
1228}
1229static CRASH_HANDLER: Mutex<Option<crash_handler::CrashHandler>> = Mutex::new(None);
1230
1231#[derive(Debug)]
1232struct PanicInfo {
1233    pub thread: Txt,
1234    pub msg: Txt,
1235    pub file: Txt,
1236    pub line: u32,
1237    pub column: u32,
1238}
1239impl PanicInfo {
1240    pub fn from_hook(info: &std::panic::PanicHookInfo) -> Self {
1241        let current_thread = std::thread::current();
1242        let thread = current_thread.name().unwrap_or("<unnamed>");
1243        let msg = Self::payload(info.payload());
1244
1245        let (file, line, column) = if let Some(l) = info.location() {
1246            (l.file(), l.line(), l.column())
1247        } else {
1248            ("<unknown>", 0, 0)
1249        };
1250        Self {
1251            thread: thread.to_txt(),
1252            msg,
1253            file: file.to_txt(),
1254            line,
1255            column,
1256        }
1257    }
1258
1259    fn payload(p: &dyn std::any::Any) -> Txt {
1260        match p.downcast_ref::<&'static str>() {
1261            Some(s) => s,
1262            None => match p.downcast_ref::<String>() {
1263                Some(s) => &s[..],
1264                None => "Box<dyn Any>",
1265            },
1266        }
1267        .to_txt()
1268    }
1269}
1270impl std::error::Error for PanicInfo {}
1271impl fmt::Display for PanicInfo {
1272    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1273        writeln!(
1274            f,
1275            "thread '{}' panicked at {}:{}:{}:",
1276            self.thread, self.file, self.line, self.column
1277        )?;
1278        for line in self.msg.lines() {
1279            writeln!(f, "   {line}")?;
1280        }
1281        Ok(())
1282    }
1283}