From 7cd99732e87885bb43081356626b7e81e8dcfa04 Mon Sep 17 00:00:00 2001 From: Markus Pettersson Date: Wed, 27 Nov 2024 10:18:34 +0100 Subject: [PATCH 1/3] Improve documentation on how to debug pf --- talpid-core/src/firewall/macos.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/talpid-core/src/firewall/macos.rs b/talpid-core/src/firewall/macos.rs index 24f5030cff76..5d4831816ccc 100644 --- a/talpid-core/src/firewall/macos.rs +++ b/talpid-core/src/firewall/macos.rs @@ -33,7 +33,7 @@ impl Firewall { pub fn new() -> Result { // Allows controlling whether firewall rules should log to pflog0. Useful for debugging the - // rules. + // rules. The firewall rules can be inspected by running `tcpdump -netttti pflog0`. let firewall_debugging = env::var("TALPID_FIREWALL_DEBUG"); let rule_logging = match firewall_debugging.as_ref().map(String::as_str) { Ok("pass") => RuleLogging::Pass, From 3bae761212cd9711c3841fd5501c115394cfa8fd Mon Sep 17 00:00:00 2001 From: Markus Pettersson Date: Wed, 27 Nov 2024 10:16:36 +0100 Subject: [PATCH 2/3] Disable Apple services workaroudns for unaffected macOS versions Do not apply redirect rules for DNS requests to port 53 to loopback on macOS versions that do not need the apple services NAT-redirect workaround. This effectively reverts to the old behaviour were a local DNS resolver is used only in the blocked and connecting states for macOS versions *not* in the version range 14.6 <= version < 15.1. Revert the nat redirect rules that were added to force traffic through the tunnel interface. This hack is no longer needed since it was fixed by apple in macOS 15.1. --- talpid-core/src/dns/macos.rs | 8 +- talpid-core/src/firewall/macos.rs | 122 ++++++++++++------ talpid-core/src/resolver.rs | 36 +++++- .../tunnel_state_machine/connected_state.rs | 73 ++++++----- .../tunnel_state_machine/connecting_state.rs | 69 +++++----- .../src/tunnel_state_machine/error_state.rs | 13 +- 6 files changed, 208 insertions(+), 113 deletions(-) diff --git a/talpid-core/src/dns/macos.rs b/talpid-core/src/dns/macos.rs index 21c243451262..aa40c0c4bc74 100644 --- a/talpid-core/src/dns/macos.rs +++ b/talpid-core/src/dns/macos.rs @@ -78,6 +78,7 @@ impl State { } } + /// Construct [`DnsSettings`] from the arguments and apply the desired addresses to all network services. fn apply_new_config( &mut self, store: &SCDynamicStore, @@ -375,8 +376,9 @@ impl InterfaceSettings { unsafe impl Send for InterfaceSettings {} pub struct DnsMonitor { + /// The backing "System Configuration framework" store, which allow us to access and detect + /// changes to the device's network configuration. store: SCDynamicStore, - /// The current DNS injection state. If this is `None` it means we are not injecting any DNS. /// When it's `Some(state)` we are actively making sure `state.dns_settings` is configured /// on all network interfaces. @@ -403,6 +405,10 @@ impl super::DnsMonitorT for DnsMonitor { }) } + /// Update the system config to use the DNS `config`. + /// + /// Note that the `interface` parameter does nothing on macOS. Since we can't configure DNS + /// on the tunnel interface, we have to configure all interfaces. fn set(&mut self, interface: &str, config: ResolvedDnsConfig) -> Result<()> { let port = config.port; let servers: Vec<_> = config.addresses().collect(); diff --git a/talpid-core/src/firewall/macos.rs b/talpid-core/src/firewall/macos.rs index 5d4831816ccc..a45186fa2328 100644 --- a/talpid-core/src/firewall/macos.rs +++ b/talpid-core/src/firewall/macos.rs @@ -1,17 +1,20 @@ -use super::{FirewallArguments, FirewallPolicy}; +use std::env; +use std::io; +use std::net::{IpAddr, Ipv4Addr}; +use std::ptr; +use std::sync::LazyLock; + use ipnetwork::IpNetwork; use libc::{c_int, sysctlbyname}; -use pfctl::{DropAction, FilterRuleAction, Ip, Uid}; -use std::{ - env, io, - net::{IpAddr, Ipv4Addr}, - ptr, -}; +use pfctl::{DropAction, FilterRuleAction, Ip, RedirectRule, Uid}; use subslice::SubsliceExt; use talpid_types::net::{ - self, AllowedEndpoint, AllowedTunnelTraffic, ALLOWED_LAN_MULTICAST_NETS, ALLOWED_LAN_NETS, + AllowedEndpoint, AllowedTunnelTraffic, TransportProtocol, ALLOWED_LAN_MULTICAST_NETS, + ALLOWED_LAN_NETS, }; +use super::{FirewallArguments, FirewallPolicy}; + pub use pfctl::Error; type Result = std::result::Result; @@ -20,6 +23,27 @@ type Result = std::result::Result; /// replaced by allowing the anchor name to be configured from the public API of this crate. const ANCHOR_NAME: &str = "mullvad"; +/// If NAT firewall rules should be applied to force Apple services through the tunnel. +/// +/// macOS versions 14.6 <= x < 15.1 were affected by a bug where Apple services tried to bypass the +/// tunnel by going out on the physical interface instead. To mitigate this and force all traffic +/// to go through the tunnel we added NAT filtering rules to redirect traffic all deviating traffic +/// to the tunnel. +/// +/// This is not something that we deem is necessary otherwise, and as such we disable NAT filtering +/// on macOS versions that are unaffected by this naughty bug, but keep it were it is necessary for +/// Apple services to function properly together with a VPN. +pub static NAT_WORKAROUND: LazyLock = LazyLock::new(|| { + use talpid_platform_metadata::MacosVersion; + let version = MacosVersion::new().expect("Could not detect macOS version"); + let v = |s| MacosVersion::from_raw_version(s).unwrap(); + let apply_workaround = v("14.6") <= version && version < v("15.1"); + if apply_workaround { + log::debug!("Using NAT redirect workaround"); + }; + apply_workaround +}); + pub struct Firewall { pf: pfctl::PfCtl, pf_was_enabled: Option, @@ -189,7 +213,9 @@ impl Firewall { anchor_change.set_scrub_rules(Self::get_scrub_rules()?); anchor_change.set_filter_rules(new_filter_rules); anchor_change.set_redirect_rules(self.get_dns_redirect_rules(policy)?); - anchor_change.set_nat_rules(self.get_nat_rules(policy)?); + if *NAT_WORKAROUND { + anchor_change.set_nat_rules(self.get_nat_rules(policy)?); + } self.pf.set_rules(ANCHOR_NAME, anchor_change)?; Ok(()) @@ -208,24 +234,44 @@ impl Firewall { &mut self, policy: &FirewallPolicy, ) -> Result> { - let redirect_rules = match policy { - FirewallPolicy::Connected { dns_config, .. } if dns_config.is_loopback() => vec![], - FirewallPolicy::Blocked { - dns_redirect_port, .. - } - | FirewallPolicy::Connecting { - dns_redirect_port, .. + /// Redirect DNS requests to `port`. Technically this redirects UDP on port 53 to `port`. + /// + /// For this to work as expected, please make sure a DNS resolver is running on `port`. + fn redirect_dns_to(port: u16) -> Result> { + let redirect_dns = pfctl::RedirectRuleBuilder::default() + .action(pfctl::RedirectRuleAction::Redirect) + .interface("lo0") + .proto(pfctl::Proto::Udp) + .to(pfctl::Port::from(53)) + .redirect_to(pfctl::Port::from(port)) + .build()?; + Ok(vec![redirect_dns]) + } + + let redirect_rules = if *crate::resolver::LOCAL_DNS_RESOLVER { + match policy { + FirewallPolicy::Connected { dns_config, .. } if dns_config.is_loopback() => { + vec![] + } + FirewallPolicy::Blocked { + dns_redirect_port, .. + } + | FirewallPolicy::Connecting { + dns_redirect_port, .. + } + | FirewallPolicy::Connected { + dns_redirect_port, .. + } => redirect_dns_to(*dns_redirect_port)?, } - | FirewallPolicy::Connected { - dns_redirect_port, .. - } => { - vec![pfctl::RedirectRuleBuilder::default() - .action(pfctl::RedirectRuleAction::Redirect) - .interface("lo0") - .proto(pfctl::Proto::Udp) - .to(pfctl::Port::from(53)) - .redirect_to(pfctl::Port::from(*dns_redirect_port)) - .build()?] + } else { + // Only apply redirect rules in the blocked state if we should *not* use our local DNS + // resolver, since it will be running in the blocked state to work with Apple's captive + // portal check. + match policy { + FirewallPolicy::Blocked { + dns_redirect_port, .. + } => redirect_dns_to(*dns_redirect_port)?, + FirewallPolicy::Connecting { .. } | FirewallPolicy::Connected { .. } => vec![], } }; Ok(redirect_rules) @@ -402,7 +448,9 @@ impl Firewall { &mut self.get_split_tunnel_rules(&tunnel.interface, redirect_interface)?, ); } else { - rules.push(self.route_everything_to(&tunnel.interface)?); + if *NAT_WORKAROUND { + rules.push(self.route_everything_to(&tunnel.interface)?); + } rules.extend(self.get_allow_tunnel_rules( tunnel.interface.as_str(), &AllowedTunnelTraffic::All, @@ -530,10 +578,7 @@ impl Firewall { Ok(rules) } - fn get_allow_relay_rule( - &self, - relay_endpoint: &net::AllowedEndpoint, - ) -> Result { + fn get_allow_relay_rule(&self, relay_endpoint: &AllowedEndpoint) -> Result { let pfctl_proto = as_pfctl_proto(relay_endpoint.endpoint.protocol); let mut builder = self.create_rule_builder(FilterRuleAction::Pass); @@ -919,8 +964,10 @@ impl Firewall { fn add_anchor(&mut self) -> Result<()> { self.pf .try_add_anchor(ANCHOR_NAME, pfctl::AnchorKind::Scrub)?; - self.pf - .try_add_anchor(ANCHOR_NAME, pfctl::AnchorKind::Nat)?; + if *NAT_WORKAROUND { + self.pf + .try_add_anchor(ANCHOR_NAME, pfctl::AnchorKind::Nat)?; + } self.pf .try_add_anchor(ANCHOR_NAME, pfctl::AnchorKind::Filter)?; self.pf @@ -931,6 +978,9 @@ impl Firewall { fn remove_anchor(&mut self) -> Result<()> { self.pf .try_remove_anchor(ANCHOR_NAME, pfctl::AnchorKind::Scrub)?; + // Opportunistically remove Nat anchor. + // This won't fail because `try_remove_anchor` promises to convert + // `pfctl::Error::AnchorDoesNotExist` to an `Ok(())` value. self.pf .try_remove_anchor(ANCHOR_NAME, pfctl::AnchorKind::Nat)?; self.pf @@ -941,10 +991,10 @@ impl Firewall { } } -fn as_pfctl_proto(protocol: net::TransportProtocol) -> pfctl::Proto { +fn as_pfctl_proto(protocol: TransportProtocol) -> pfctl::Proto { match protocol { - net::TransportProtocol::Udp => pfctl::Proto::Udp, - net::TransportProtocol::Tcp => pfctl::Proto::Tcp, + TransportProtocol::Udp => pfctl::Proto::Udp, + TransportProtocol::Tcp => pfctl::Proto::Tcp, } } diff --git a/talpid-core/src/resolver.rs b/talpid-core/src/resolver.rs index 555edfb3e068..77b416fb95d5 100644 --- a/talpid-core/src/resolver.rs +++ b/talpid-core/src/resolver.rs @@ -43,6 +43,40 @@ use hickory_server::{ }; use std::sync::LazyLock; +/// If a local DNS resolver should be used at all times. +/// +/// This setting does not affect the error or blocked state. In those states, we will want to use +/// the local DNS resoler to work around Apple's captive portals check. Exactly how this is done is +/// documented elsewhere. +pub static LOCAL_DNS_RESOLVER: LazyLock = LazyLock::new(|| { + use talpid_platform_metadata::MacosVersion; + let version = MacosVersion::new().expect("Could not detect macOS version"); + let v = |s| MacosVersion::from_raw_version(s).unwrap(); + // Apple services tried to perform DNS lookups on the physical interface on some macOS + // versions, so we added redirect rules to always redirect DNS to our local DNS resolver. + // This seems to break some apps which do not like that we redirect DNS on port 53 to our local + // DNS resolver running on some other, arbitrary port, and so we disable this behaviour on + // macOS versions that are unaffected by this naughty bug. + // + // The workaround should only be applied to the affected macOS versions because some programs + // set the `skip filtering` pf flag on loopback, which meant that the pf filtering would break + // unexpectedly. We could clear the `skip filtering` flag to force pf filtering on loopback, + // but apparently it is good practice to enable `skip filtering` on loopback so we decided + // against this. Source: https://www.openbsd.org/faq/pf/filter.html + // + // It should be noted that most programs still works fine with this workaround enabled. Notably + // programs that use `getaddrinfo` would behave correctly when we redirect DNS to our local + // resolver, while some programs always used port 53 no matter what (nslookup for example). + // Also, most programs don't set the `skip filtering` pf flag on loopback, but some notable + // ones do for some reason. Orbstack is one such example, which meant that people running + // containers would run into the aforementioned issue. + let use_local_dns_resolver = v("14.6") <= version && version < v("15.1"); + if use_local_dns_resolver { + log::debug!("Using local DNS resolver"); + } + use_local_dns_resolver +}); + const ALLOWED_RECORD_TYPES: &[RecordType] = &[RecordType::A, RecordType::CNAME]; const CAPTIVE_PORTAL_DOMAINS: &[&str] = &["captive.apple.com", "netcts.cdn-apple.com"]; @@ -238,7 +272,7 @@ impl ResolverHandle { self.listening_port } - /// Set the DNS server to forward queries to + /// Set the DNS server to forward queries to `dns_servers` pub async fn enable_forward(&self, dns_servers: Vec) { let (response_tx, response_rx) = oneshot::channel(); let _ = self.tx.unbounded_send(ResolverMessage::SetConfig { diff --git a/talpid-core/src/tunnel_state_machine/connected_state.rs b/talpid-core/src/tunnel_state_machine/connected_state.rs index eadb313501e5..0ad41b049c14 100644 --- a/talpid-core/src/tunnel_state_machine/connected_state.rs +++ b/talpid-core/src/tunnel_state_machine/connected_state.rs @@ -1,30 +1,29 @@ -use super::{ - AfterDisconnect, ConnectingState, DisconnectingState, ErrorState, EventConsequence, - EventResult, SharedTunnelStateValues, TunnelCommand, TunnelCommandReceiver, TunnelState, - TunnelStateTransition, -}; -use crate::{ - dns::ResolvedDnsConfig, - firewall::FirewallPolicy, - tunnel::{TunnelEvent, TunnelMetadata}, -}; -use futures::{ - channel::{mpsc, oneshot}, - stream::Fuse, - StreamExt, -}; +use futures::channel::{mpsc, oneshot}; +use futures::stream::Fuse; +use futures::StreamExt; + #[cfg(target_os = "android")] use talpid_tunnel::tun_provider::Error; -use talpid_types::{ - net::{AllowedClients, AllowedEndpoint, TunnelParameters}, - tunnel::{ErrorStateCause, FirewallPolicyError}, - BoxedError, ErrorExt, -}; - +use talpid_types::net::{AllowedClients, AllowedEndpoint, TunnelParameters}; +use talpid_types::tunnel::{ErrorStateCause, FirewallPolicyError}; +use talpid_types::{BoxedError, ErrorExt}; + +#[cfg(target_os = "macos")] +use crate::dns::DnsConfig; +use crate::dns::ResolvedDnsConfig; +use crate::firewall::FirewallPolicy; +#[cfg(target_os = "macos")] +use crate::resolver::LOCAL_DNS_RESOLVER; #[cfg(windows)] use crate::tunnel::TunnelMonitor; +use crate::tunnel::{TunnelEvent, TunnelMetadata}; use super::connecting_state::TunnelCloseEvent; +use super::{ + AfterDisconnect, ConnectingState, DisconnectingState, ErrorState, EventConsequence, + EventResult, SharedTunnelStateValues, TunnelCommand, TunnelCommandReceiver, TunnelState, + TunnelStateTransition, +}; pub(crate) type TunnelEventsReceiver = Fuse)>>; @@ -171,27 +170,31 @@ impl ConnectedState { // On macOS, configure only the local DNS resolver #[cfg(target_os = "macos")] - if !dns_config.is_loopback() { + // We do not want to forward DNS queries to *our* local resolver if we do not run a local + // DNS resolver *or* if the DNS config points to a loopback address. + if dns_config.is_loopback() || !*LOCAL_DNS_RESOLVER { + log::debug!("Not enabling local DNS resolver"); + shared_values + .dns_monitor + .set(&self.metadata.interface, dns_config) + .map_err(BoxedError::new)?; + } else { + log::debug!("Enabling local DNS resolver"); + // Tell local DNS resolver to start forwarding DNS queries to whatever `dns_config` + // specifies as DNS. shared_values.runtime.block_on( shared_values .filtering_resolver .enable_forward(dns_config.addresses().collect()), ); + // Set system DNS to our local DNS resolver + let system_dns = DnsConfig::default().resolve( + &[std::net::Ipv4Addr::LOCALHOST.into()], + shared_values.filtering_resolver.listening_port(), + ); shared_values .dns_monitor - .set( - "lo", - crate::dns::DnsConfig::default().resolve( - &[std::net::Ipv4Addr::LOCALHOST.into()], - shared_values.filtering_resolver.listening_port(), - ), - ) - .map_err(BoxedError::new)?; - } else { - log::debug!("Not enabling DNS forwarding since loopback is used"); - shared_values - .dns_monitor - .set(&self.metadata.interface, dns_config) + .set("lo", system_dns) .map_err(BoxedError::new)?; } diff --git a/talpid-core/src/tunnel_state_machine/connecting_state.rs b/talpid-core/src/tunnel_state_machine/connecting_state.rs index 53ef61475e82..7c7637cd20c2 100644 --- a/talpid-core/src/tunnel_state_machine/connecting_state.rs +++ b/talpid-core/src/tunnel_state_machine/connecting_state.rs @@ -1,34 +1,31 @@ +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::time::{Duration, Instant}; + +use futures::channel::{mpsc, oneshot}; +use futures::future::Fuse; +use futures::{FutureExt, StreamExt}; +use talpid_routing::RouteManagerHandle; +use talpid_tunnel::tun_provider::TunProvider; +use talpid_tunnel::{EventHook, TunnelArgs, TunnelEvent, TunnelMetadata}; +use talpid_types::net::{AllowedClients, AllowedEndpoint, AllowedTunnelTraffic, TunnelParameters}; +use talpid_types::tunnel::{ErrorStateCause, FirewallPolicyError}; +use talpid_types::ErrorExt; + +use super::connected_state::TunnelEventsReceiver; use super::{ AfterDisconnect, ConnectedState, DisconnectingState, ErrorState, EventConsequence, EventResult, SharedTunnelStateValues, TunnelCommand, TunnelCommandReceiver, TunnelState, TunnelStateTransition, }; -use crate::{ - firewall::FirewallPolicy, - tunnel::{self, TunnelMonitor}, -}; -use futures::{ - channel::{mpsc, oneshot}, - future::Fuse, - FutureExt, StreamExt, -}; -use std::{ - path::{Path, PathBuf}, - sync::{Arc, Mutex}, - thread, - time::{Duration, Instant}, -}; -use talpid_routing::RouteManagerHandle; -use talpid_tunnel::{ - tun_provider::TunProvider, EventHook, TunnelArgs, TunnelEvent, TunnelMetadata, -}; -use talpid_types::{ - net::{AllowedClients, AllowedEndpoint, AllowedTunnelTraffic, TunnelParameters}, - tunnel::{ErrorStateCause, FirewallPolicyError}, - ErrorExt, -}; -use super::connected_state::TunnelEventsReceiver; +#[cfg(target_os = "macos")] +use crate::dns::DnsConfig; +use crate::firewall::FirewallPolicy; +#[cfg(target_os = "macos")] +use crate::resolver::LOCAL_DNS_RESOLVER; +use crate::tunnel::{self, TunnelMonitor}; pub(crate) type TunnelCloseEvent = Fuse>>; @@ -57,17 +54,23 @@ impl ConnectingState { retry_attempt: u32, ) -> (Box, TunnelStateTransition) { #[cfg(target_os = "macos")] - if let Err(err) = shared_values.dns_monitor.set( - "lo", - crate::dns::DnsConfig::default().resolve( + if *LOCAL_DNS_RESOLVER { + // Set system DNS to our local DNS resolver + let system_dns = DnsConfig::default().resolve( &[std::net::Ipv4Addr::LOCALHOST.into()], shared_values.filtering_resolver.listening_port(), - ), - ) { - log::error!( - "{}", - err.display_chain_with_msg("Failed to configure system to use filtering resolver") ); + let _ = shared_values + .dns_monitor + .set("lo", system_dns) + .inspect_err(|err| { + log::error!( + "{}", + err.display_chain_with_msg( + "Failed to configure system to use filtering resolver" + ) + ); + }); } if shared_values.connectivity.is_offline() { diff --git a/talpid-core/src/tunnel_state_machine/error_state.rs b/talpid-core/src/tunnel_state_machine/error_state.rs index bdd8e9a014f8..0cf392a0e572 100644 --- a/talpid-core/src/tunnel_state_machine/error_state.rs +++ b/talpid-core/src/tunnel_state_machine/error_state.rs @@ -36,13 +36,12 @@ impl ErrorState { #[cfg(target_os = "macos")] if !block_reason.prevents_filtering_resolver() { - if let Err(err) = shared_values.dns_monitor.set( - "lo", - DnsConfig::default().resolve( - &[Ipv4Addr::LOCALHOST.into()], - shared_values.filtering_resolver.listening_port(), - ), - ) { + // Set system DNS to our local DNS resolver + let system_dns = DnsConfig::default().resolve( + &[Ipv4Addr::LOCALHOST.into()], + shared_values.filtering_resolver.listening_port(), + ); + if let Err(err) = shared_values.dns_monitor.set("lo", system_dns) { log::error!( "{}", err.display_chain_with_msg( From a24049a9a25c18b368aca1213fa5b3eb660d9e39 Mon Sep 17 00:00:00 2001 From: Markus Pettersson Date: Mon, 2 Dec 2024 15:37:41 +0100 Subject: [PATCH 3/3] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index addd434290e6..3412572eed27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,8 @@ Line wrap the file at 100 chars. Th #### macOS - Fix packets being duplicated on LAN when split tunneling is enabled. +- Fix DNS issues caused by forcibly using a local DNS resolver in all states. + Note that this fix is not present on macOS versions between 14.6 and 15.1. ### Security - Disable unix signal handler in release builds. The code was not signal safe and could potentially