From 4577aea9630bd1693f0657f0e13cec034a96ceae Mon Sep 17 00:00:00 2001
From: Joakim Hulthe <joakim@hulthe.net>
Date: Thu, 21 Nov 2024 17:10:10 +0100
Subject: [PATCH 1/2] Fix exception_logging module being included twice

---
 mullvad-daemon/src/main.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mullvad-daemon/src/main.rs b/mullvad-daemon/src/main.rs
index aee5191366de..ddf325167a10 100644
--- a/mullvad-daemon/src/main.rs
+++ b/mullvad-daemon/src/main.rs
@@ -3,14 +3,14 @@ use std::{path::PathBuf, thread, time::Duration};
 #[cfg(not(windows))]
 use mullvad_daemon::cleanup_old_rpc_socket;
 use mullvad_daemon::{
-    logging, rpc_uniqueness_check, runtime, version, Daemon, DaemonCommandChannel,
+    exception_logging, logging, rpc_uniqueness_check, runtime, version, Daemon,
+    DaemonCommandChannel,
 };
 use talpid_types::ErrorExt;
 
 mod cli;
 #[cfg(target_os = "linux")]
 mod early_boot_firewall;
-mod exception_logging;
 #[cfg(target_os = "macos")]
 mod macos_launch_daemon;
 #[cfg(windows)]

From 853c849c77f1ca40e7b1a43d7a4515eeadf79d25 Mon Sep 17 00:00:00 2001
From: Joakim Hulthe <joakim.hulthe@mullvad.net>
Date: Wed, 13 Nov 2024 10:22:00 +0100
Subject: [PATCH 2/2] Make unix signal-handler signal-safe

Other changes:
- Re-enable the signal handler in release-builds.
- Disable backtrace printing by default since it's not signal-safe.
- Add `MULLVAD_BACKTRACE_ON_FAULT` env variable to enable backtracing.
- Remove the alternate signal stack. The reasons for this are:
  - Setting up an alt-stack in a safe way is not trivial, our previous
    attempt was unsound in the presence of stack overflows. It can be
    done safely with mmap, but would require careful review.
  - The alt-stack is thread-local, meaning it would need to be
    initialized on a per-thread basis. We would need to hook into tokio
    and std::thread::spawn to be able to get good coverage, and even
    then there would no good way to ensure that *all* threads have an
    alternate stack, except that...
  - Rust (by default) allocates an alternate stack for every thread.
    Unfortunately, the prescence of Go code in our linked binary
    disables this feature. IMO, we should strive towards not having any
    Go code linked into the daemon for this reason.
---
 README.md                                    |   7 +
 mullvad-daemon/src/exception_logging/mod.rs  |   2 +-
 mullvad-daemon/src/exception_logging/unix.rs | 392 +++++++++++++------
 mullvad-daemon/src/main.rs                   |  15 +-
 4 files changed, 287 insertions(+), 129 deletions(-)

diff --git a/README.md b/README.md
index 66fef5182a80..c85567afc4ca 100644
--- a/README.md
+++ b/README.md
@@ -177,6 +177,13 @@ See [this](Release.md) for instructions on how to make a new release.
   interface UDS socket to users in the specified group. This means that only users in that group can
   use the CLI and GUI. By default, everyone has access to the socket.
 
+* `MULLVAD_BACKTRACE_ON_FAULT` - When enabled, if the daemon encounters a fault (e.g. `SIGSEGV`),
+  it will log a backtrace to stdout, and to `daemon.log`. By default, this is disabled in
+  release-builds and enabled in debug-builds. Set variable to `1` or `0` to explicitly enable or
+  disable this feature. Logging the backtrace cause heap allocation. Allocation is not signal safe,
+  but here it runs in the signal handler. This in technically undefined behavior and therefore
+  disabled by default. This usually works, but enable at your own risk.
+
 ### Development builds only
 
 * `MULLVAD_API_HOST` - Set the hostname to use in API requests. E.g. `api.mullvad.net`.
diff --git a/mullvad-daemon/src/exception_logging/mod.rs b/mullvad-daemon/src/exception_logging/mod.rs
index c1962a1ca148..552bda237054 100644
--- a/mullvad-daemon/src/exception_logging/mod.rs
+++ b/mullvad-daemon/src/exception_logging/mod.rs
@@ -8,4 +8,4 @@ pub use win::enable;
 mod unix;
 
 #[cfg(unix)]
-pub use unix::enable;
+pub use unix::{enable, set_log_file};
diff --git a/mullvad-daemon/src/exception_logging/unix.rs b/mullvad-daemon/src/exception_logging/unix.rs
index 9170c4d11624..b956db8319db 100644
--- a/mullvad-daemon/src/exception_logging/unix.rs
+++ b/mullvad-daemon/src/exception_logging/unix.rs
@@ -1,145 +1,283 @@
-//! Installs signal handlers to catch critical program faults and logs them. See [`enable`].
-//!
-//! NOTE: The signal handlers are disabled in release-builds. See docs on
-//! `handler::logging_fault_handler` for reasoning.
-
-pub use handler::enable;
-
-#[cfg(not(debug_assertions))]
-mod handler {
-    /// This is a no-op on release-builds; it does NOT install a signal handler.
-    pub fn enable() {
-        // No fault handler in release-builds.
+//! Install signal handlers to catch critical program faults and log them. See [`enable`].
+#![warn(clippy::undocumented_unsafe_blocks)]
+
+use libc::siginfo_t;
+use nix::sys::signal::{sigaction, SaFlags, SigAction, SigHandler, SigSet, Signal};
+
+use core::fmt;
+use std::{
+    backtrace::Backtrace,
+    env,
+    ffi::{c_int, c_void, CString},
+    os::fd::{FromRawFd, RawFd},
+    sync::{
+        atomic::{AtomicBool, Ordering},
+        Once, OnceLock,
+    },
+};
+
+/// Write fault to this file.
+static LOG_FILE_PATH: OnceLock<CString> = OnceLock::new();
+
+/// If true, the signal-handler will log a backtrace when triggered.
+///
+/// Default value is `true` for debug-builds, and `false` for release-builds.
+/// This can be overridden by setting the env var [ENABLE_BACKTRACE_VAR] to `1` or `0`.
+///
+/// # Safety
+/// The [Backtrace] implementation we use does not guarantee signal-safety; Invoking it from a
+/// signal handler can be unsound. In practice this usually works fine, but to avoid any risk to
+/// users, we disable backtracing by default in release-builds.
+static ENABLE_BACKTRACE: AtomicBool = AtomicBool::new(cfg!(debug_assertions));
+
+/// Name of the environment variable that sets [ENABLE_BACKTRACE].
+const ENABLE_BACKTRACE_VAR: &str = "MULLVAD_BACKTRACE_ON_FAULT";
+
+/// The signals we install handlers for.
+const FAULT_SIGNALS: [Signal; 5] = [
+    // Access to invalid memory address
+    Signal::SIGBUS,
+    // Floating point exception
+    Signal::SIGFPE,
+    // Illegal instructors
+    Signal::SIGILL,
+    // Invalid memory reference
+    Signal::SIGSEGV,
+    // Bad syscall
+    Signal::SIGSYS,
+];
+
+/// Set the file path used for fault handler logging.
+///
+/// Panics if called more than once.
+pub fn set_log_file(file_path: impl Into<CString>) {
+    if let Err(_file_path) = LOG_FILE_PATH.set(file_path.into()) {
+        panic!("set_log_file may not be called more than once");
     }
 }
 
-#[cfg(debug_assertions)]
-mod handler {
-    use libc::siginfo_t;
-    use nix::sys::signal::{sigaction, SaFlags, SigAction, SigHandler, SigSet, Signal};
-
-    use std::{
-        backtrace::Backtrace,
-        ffi::{c_int, c_void},
-        sync::{
-            atomic::{AtomicBool, Ordering},
-            Once,
+/// Install signal handlers to catch critical program faults, log them, and exit the process.
+pub fn enable() {
+    static INIT_ONCE: Once = Once::new();
+
+    INIT_ONCE.call_once(|| {
+        if let Ok(override_backtrace) = env::var(ENABLE_BACKTRACE_VAR).map(|v| v != "0") {
+            ENABLE_BACKTRACE.store(override_backtrace, Ordering::Release);
+        }
+
+        // SA_ONSTACK tells the signal handler to use an alternate stack, if one is available.
+        // The purpose of an alternate stack is to have somewhere to execute the signal handler in
+        // the case of a stack overflow. I.e. if an alternate stack hasn't been configured, stack
+        // overflows will silently cause the process to exit with code SIGSEGV.
+        //
+        // `libc::sigaltstack` can be used to set up an alternate stack on the current thread. The
+        // default behaviour of the Rust runtime is to set up alternate stacks for the main thread,
+        // and for every thread spawned using `std::thread`. However, Rust will not do this if any
+        // signal handlers have been configured before the Rust runtime is initialized (note: the
+        // initialization happens before main() is called). For example, if any Go code is linked
+        // into this binary, the Go runtime will probably be initialized first, and will set up
+        // it's own signal handlers.
+        //
+        // Go requires this flag to be set for all signal handlers.
+        // https://github.com/golang/go/blob/d6fb0ab2/src/os/signal/doc.go
+        let sig_handler_flags = SaFlags::SA_ONSTACK;
+
+        let signal_action = SigAction::new(
+            SigHandler::SigAction(fault_handler),
+            sig_handler_flags,
+            SigSet::empty(),
+        );
+
+        for signal in &FAULT_SIGNALS {
+            // SAFETY:
+            // `fault_handler` is signal-safe if ENABLE_BACKTRACE is false.
+            // See docs on ENABLE_BACKTRACE.
+            if let Err(err) = unsafe { sigaction(*signal, &signal_action) } {
+                log::error!("Failed to install signal handler for {}: {}", signal, err);
+            }
+        }
+    });
+}
+
+/// Signal handler to catch signals that are used to indicate unrecoverable errors in the daemon.
+///
+/// # Safety
+/// This function is signal-safe if [ENABLE_BACKTRACE] is false.
+//
+// NOTE: When implementing, make sure to adhere to the rules of signal-safety!
+// For a detailed definition, see https://man7.org/linux/man-pages/man7/signal-safety.7.html
+// The short version is:
+// - This function must be re-entrant.
+// - This function must only call functions that are signal-safe.
+// - The man-page provides a list of posix-functions that are signal-safe. (These can be found
+//   in the `libc`-crate)
+extern "C" fn fault_handler(
+    signum: c_int,
+    _siginfo: *mut siginfo_t,
+    _thread_context_ptr: *mut c_void,
+) {
+    // 128 + signum is the convention set by bash.
+    let signum_code: c_int = signum.saturating_add(0x80);
+
+    let code: c_int = match log_fault_to_file_and_stdout(signum) {
+        // Signal numbers are positive integers.
+        Ok(()) => signum_code,
+
+        // map error to error-codes.
+        Err(err) => match err {
+            FaultHandlerErr::Open => 2,
+            FaultHandlerErr::Write => 3,
+            FaultHandlerErr::FSync => 4,
+            FaultHandlerErr::Reentrancy => 5,
         },
     };
 
-    /// The signals we install handlers for.
-    const FAULT_SIGNALS: [Signal; 5] = [
-        // Access to invalid memory address
-        Signal::SIGBUS,
-        // Floating point exception
-        Signal::SIGFPE,
-        // Illegal instructors
-        Signal::SIGILL,
-        // Invalid memory reference
-        Signal::SIGSEGV,
-        // Bad syscall
-        Signal::SIGSYS,
-    ];
-
-    /// Install the signal handlers (debug-builds only).
-    pub fn enable() {
-        static INIT_ONCE: Once = Once::new();
-
-        INIT_ONCE.call_once(|| {
-            // Setup alt stack for signal handlers to be executed in.
-            // If the daemon ever needs to be compiled for architectures where memory can't be
-            // writeable and executable, the following block of code has to be disabled.
-            // This will also mean that stack overflows may be silent and undetectable
-            // in logs.
-            let sig_handler_flags = {
-                // The kernel will use the first properly aligned address, so alignment is not an
-                // issue.
-                let alt_stack = vec![0u8; libc::SIGSTKSZ];
-                let stack_t = libc::stack_t {
-                    ss_sp: alt_stack.as_ptr() as *mut c_void,
-                    ss_flags: 0,
-                    ss_size: alt_stack.len(),
-                };
-                let ret = unsafe { libc::sigaltstack(&stack_t, std::ptr::null_mut()) };
-                if ret != 0 {
-                    log::error!(
-                        "Failed to set alternative stack: {}",
-                        std::io::Error::last_os_error()
-                    );
-                    SaFlags::empty()
-                } else {
-                    std::mem::forget(alt_stack);
-                    SaFlags::SA_ONSTACK
-                }
-            };
+    // SIGNAL-SAFETY: This function is listed in `man 7 signal-safety`.
+    // SAFETY: This function is trivially safe to call.
+    unsafe { libc::_exit(code) }
+}
+
+/// Call from a signal handler to try to print the signal (and optionally a backtrace).
+///
+/// The output is written to stdout, and to a file if [set_exception_logging_file] was called.
+///
+/// # Safety
+/// This function is signal-safe if [ENABLE_BACKTRACE] is false.
+// NOTE: See rules on signal-safety in `fn fault_handler`!
+fn log_fault_to_file_and_stdout(signum: c_int) -> Result<(), FaultHandlerErr> {
+    // Guard against reentrancy, which can happen if this fault handler triggers another fault.
+    static REENTRANT: AtomicBool = AtomicBool::new(false);
+    if REENTRANT.swap(true, Ordering::SeqCst) {
+        return Err(FaultHandlerErr::Reentrancy);
+    }
+
+    // SAFETY: calling `write` on stdout is safe, even if it's been closed.
+    let stdout = unsafe { LibcWriter::from_raw_fd(libc::STDOUT_FILENO) };
+    log_fault_to_writer(signum, stdout)?;
 
-            let signal_action = SigAction::new(
-                SigHandler::SigAction(fault_handler),
-                sig_handler_flags,
-                SigSet::empty(),
-            );
-
-            for signal in &FAULT_SIGNALS {
-                // SAFETY: fault_handler is NOT signal-safe (see logging_fault_handler). We still
-                // use it, but only in development builds because it makes debugging
-                // easier.
-                if let Err(err) = unsafe { sigaction(*signal, &signal_action) } {
-                    log::error!("Failed to install signal handler for {}: {}", signal, err);
-                }
+    // SIGNAL-SAFETY: OnceLock::get is atomic and non-blocking.
+    if let Some(log_file_path) = LOG_FILE_PATH.get() {
+        let mut log_file: LibcWriter = {
+            let open_flags = libc::O_WRONLY | libc::O_APPEND;
+
+            // This file remains open until `_exit` is called by `fault_handler`.
+            // SIGNAL-SAFETY: This function is listed in `man 7 signal-safety`.
+            // SAFETY: the path is a null-terminated string.
+            let result_code = unsafe { libc::open(log_file_path.as_ptr(), open_flags) };
+
+            match result_code {
+                // `open` returns -1 on failure.
+                ..0 => return Err(FaultHandlerErr::Open),
+
+                // SAFETY: `fd` is an open file descriptor.
+                fd => unsafe { LibcWriter::from_raw_fd(fd) },
             }
-        });
+        };
+
+        log_fault_to_writer(signum, &mut log_file)?;
+        log_file.flush()?;
     }
 
-    /// Signal handler to catch signals that are used to indicate unrecoverable errors in the
-    /// daemon.
-    extern "C" fn fault_handler(
-        signum: c_int,
-        _siginfo: *mut siginfo_t,
-        _thread_context_ptr: *mut c_void,
-    ) {
-        // SAFETY: This function is known to be potentially unsound and should not be used in prod,
-        // but we keep it in debug-builds because debugging SIGSEGV faults is a PITA.
-        // See logging_fault_handler docs for more info.
-        unsafe { logging_fault_handler(signum) };
+    Ok(())
+}
+
+/// Call from a signal handler to try to write the signal (and optionally a backtrace).
+///
+/// The output is written to the writer passed in as an argument.
+///
+/// # Safety
+/// This function is signal-safe if [ENABLE_BACKTRACE] is false.
+// NOTE: See rules on signal-safety in `fn fault_handler`.
+fn log_fault_to_writer(signum: c_int, mut w: impl fmt::Write) -> Result<(), FaultHandlerErr> {
+    // SIGNAL-SAFETY: Signal::try_from(i32) is signal-safe
+    let signal_name: &str = match Signal::try_from(signum) {
+        // SIGNAL-SAFETY: as_str is const
+        Ok(signal) => signal.as_str(),
+        Err(_) => "UNKNOWN",
+    };
+
+    // SIGNAL-SAFETY:
+    // `writeln` resolves to <LibcWriter as fmt::Write>::write, which is signal-safe.
+    // formatting &str and i32 is signal-safe.
+    writeln!(w, "Caught signal {signum} ({signal_name})")?;
+
+    // Formatting a `Backtrace` is NOT signal-safe. See docs on ENABLE_BACKTRACE.
+    if ENABLE_BACKTRACE.load(Ordering::Acquire) {
+        writeln!(w, "Backtrace:")?;
+        writeln!(w, "{}", Backtrace::force_capture())?;
+    } else {
+        writeln!(w, "Set {ENABLE_BACKTRACE_VAR}=1 to print backtrace.")?
     }
 
-    /// Call from a signal handler to [log] the signal, and the current backtrace.
-    ///
-    /// See also: [fault_handler].
-    ///
-    /// # SAFETY
-    /// Calling this function from a signal handler is potentially unsound. This is because is
-    /// performs functions that are not "signal-safe", for example: `process::exit` and writing
-    /// to to stdout. See also: <https://man7.org/linux/man-pages/man7/signal-safety.7.html>.
-    ///
-    /// For this reason, this handler is only used in debug-builds.
-    // TODO: Consider rewriting this function to e.g. use a pipe to exfiltrate the backtrace to
-    // another process that can write the backtrace to tho log file. `write` is signal-safe.
-    unsafe fn logging_fault_handler(signum: c_int) {
-        // Guard against reentrancy, which can happen if this fault handler triggers another fault.
-        static REENTRANCY_GUARD: AtomicBool = AtomicBool::new(false);
-        if REENTRANCY_GUARD.swap(true, Ordering::SeqCst) {
-            // `process::abort` is signal-safe, unlike `process::exit`.
-            std::process::abort();
+    Ok(())
+}
+
+enum FaultHandlerErr {
+    /// A call to `libc::open` failed.
+    Open,
+
+    /// A call to `libc::write` failed.
+    Write,
+
+    /// A call to `libc::fsync` failed.
+    FSync,
+
+    /// Signal handler was called reentrantly.
+    Reentrancy,
+}
+
+/// A wrapper type that implements `fmt::Write` for a file descriptor through `libc::write`.
+struct LibcWriter {
+    file_descriptor: RawFd,
+}
+
+impl LibcWriter {
+    /// Call `libc::fsync` on the file descriptor.
+    pub fn flush(&self) -> Result<(), FaultHandlerErr> {
+        // SAFETY: This function is trivially safe to call
+        match unsafe { libc::fsync(self.file_descriptor) } {
+            ..0 => Err(FaultHandlerErr::FSync),
+            _ => Ok(()),
         }
+    }
+}
 
-        let signal: Signal = match Signal::try_from(signum) {
-            Ok(signal) => signal,
-            Err(err) => {
-                log::error!(
-                    "Signal handler triggered by unknown signal {}, exiting: {}",
-                    signum,
-                    err
-                );
-                std::process::exit(2);
-            }
-        };
+impl FromRawFd for LibcWriter {
+    /// Wrap a file descriptor in a [LibcWriter].
+    ///
+    /// # Safety
+    /// The file descriptor must refer to an opened file, or to stdout/stderr.
+    /// In the case of a file, it must remain open for the lifetime of the [LibcWriter].
+    unsafe fn from_raw_fd(file_descriptor: RawFd) -> Self {
+        Self { file_descriptor }
+    }
+}
 
-        log::error!("Caught signal {}", signal);
-        log::error!("Backtrace:");
-        for line in format!("{}", Backtrace::force_capture()).lines() {
-            log::error!("{line}");
+impl fmt::Write for LibcWriter {
+    fn write_str(&mut self, string: &str) -> fmt::Result {
+        let mut bytes = string.as_bytes();
+        while !bytes.is_empty() {
+            let ptr = bytes.as_ptr() as *const c_void;
+
+            // SAFETY:
+            // - `self.file_descriptor` is an open file descriptor.
+            // - `ptr` points to the start of `bytes`
+            let n = match unsafe { libc::write(self.file_descriptor, ptr, bytes.len()) } {
+                n if n > 0 => n as usize,
+                _ => return Err(fmt::Error),
+            };
+
+            bytes = &bytes[n..];
         }
-        std::process::exit(2);
+
+        Ok(())
+    }
+}
+
+impl From<fmt::Error> for FaultHandlerErr {
+    /// Convert a [fmt::Error] into a [FaultHandlerErr::Write].
+    /// [fmt::Error] is returned by [fmt::Write]-methods.
+    fn from(_: fmt::Error) -> Self {
+        FaultHandlerErr::Write
     }
 }
diff --git a/mullvad-daemon/src/main.rs b/mullvad-daemon/src/main.rs
index ddf325167a10..3569646ccfbf 100644
--- a/mullvad-daemon/src/main.rs
+++ b/mullvad-daemon/src/main.rs
@@ -137,7 +137,21 @@ fn init_early_boot_logging(config: &cli::Config) {
 }
 
 /// Initialize logging to stderr and to file (if provided).
+///
+/// Also install the [exception_logging] signal handler to log faults.
 fn init_logger(config: &cli::Config, log_file: Option<PathBuf>) -> Result<(), String> {
+    #[cfg(unix)]
+    if let Some(log_file) = &log_file {
+        use std::os::unix::ffi::OsStrExt;
+
+        exception_logging::set_log_file(
+            std::ffi::CString::new(log_file.as_os_str().as_bytes())
+                .map_err(|_| "Log file path contains null-bytes".to_string())?,
+        );
+    }
+
+    exception_logging::enable();
+
     logging::init_logger(
         config.log_level,
         log_file.as_ref(),
@@ -145,7 +159,6 @@ fn init_logger(config: &cli::Config, log_file: Option<PathBuf>) -> Result<(), St
     )
     .map_err(|e| e.display_chain_with_msg("Unable to initialize logger"))?;
     log_panics::init();
-    exception_logging::enable();
     version::log_version();
     Ok(())
 }