From 63623346b9403da86a411c54f5e2d1029e78fcfb Mon Sep 17 00:00:00 2001 From: Reactor Scram Date: Tue, 16 Jul 2024 16:55:29 -0500 Subject: [PATCH] fix(headless-client/windows): improve Client startup times on Windows (#5375) Closes #5026 Closes #5879 On the resource-constrained Windows Server 2022 test VM, the median sign-in time dropped from 5.0 seconds to 2.2 seconds. # Changes - Measure end-to-end connection time in the GUI process - Use `ipconfig` instead of Powershell to flush DNS faster - Activate DNS control by manipulating the Windows Registry directly instead of calling Powershell - Remove deactivate step when changing DNS servers (seals a DNS leak when roaming networks) - Remove completely redundant `Set-DnsClientServerAddress` step from activating DNS control - Remove `Remove-NetRoute` powershell cmdlet that seems to do nothing # Benchmark 7 - Optimized release builds - x86-64 constrained VM (1 CPU thread, 2 GB RAM) Main with measurement added, `c1c99197e` from #5864 - 6.0 s - 5.5 s - 4.1 s - 5.0 s - 4.1 s - (Median = 5.0 s) Main with speedups added, `2128329f9` from #5375, this PR - 3.7 s - 2.2 s - 1.9 s - 2.3 s - 2.0 s - (Median = 2.2 s) ```[tasklist] ### Next steps - [x] Benchmark on the resource-constrained VM - [x] Move raw benchmark data to a comment and summarize in the description - [x] Clean up tasks that don't need to be in the commit - [x] Merge ``` # Hypothetical further optimizations - Ditch the `netsh` subprocess in `set_ips` --------- Signed-off-by: Reactor Scram --- rust/Cargo.lock | 1 + .../tunnel/src/device_channel/tun_windows.rs | 18 +-- rust/gui-client/src-tauri/src/client/gui.rs | 5 + rust/headless-client/Cargo.toml | 1 + rust/headless-client/src/dns_control.rs | 2 +- rust/headless-client/src/dns_control/linux.rs | 27 +++-- .../src/dns_control/linux/etc_resolv_conf.rs | 19 ++- .../src/dns_control/windows.rs | 108 ++++++++++-------- rust/headless-client/src/ipc_service.rs | 29 +++-- rust/headless-client/src/standalone.rs | 23 +++- website/src/components/Changelog/GUI.tsx | 4 +- 11 files changed, 140 insertions(+), 97 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 66703e4ab..fa422bf93 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1978,6 +1978,7 @@ dependencies = [ "uuid", "windows 0.57.0", "windows-service", + "winreg 0.52.0", ] [[package]] diff --git a/rust/connlib/tunnel/src/device_channel/tun_windows.rs b/rust/connlib/tunnel/src/device_channel/tun_windows.rs index c6310cb25..fe5ba3b2b 100644 --- a/rust/connlib/tunnel/src/device_channel/tun_windows.rs +++ b/rust/connlib/tunnel/src/device_channel/tun_windows.rs @@ -1,12 +1,7 @@ use crate::MTU; -use connlib_shared::{ - windows::{CREATE_NO_WINDOW, TUNNEL_NAME}, - Result, -}; +use connlib_shared::{windows::TUNNEL_NAME, Result}; use std::{ io, - os::windows::process::CommandExt, - process::{Command, Stdio}, str::FromStr, sync::Arc, task::{ready, Context, Poll}, @@ -82,17 +77,6 @@ impl Tun { let adapter = &Adapter::create(&wintun, ADAPTER_NAME, TUNNEL_NAME, Some(uuid))?; let iface_idx = adapter.get_adapter_index()?; - // Remove any routes that were previously associated with us - // TODO: Pick a more elegant way to do this - Command::new("powershell") - .creation_flags(CREATE_NO_WINDOW) - .arg("-Command") - .arg(format!( - "Remove-NetRoute -InterfaceIndex {iface_idx} -Confirm:$false" - )) - .stdout(Stdio::null()) - .status()?; - set_iface_config(adapter.get_luid(), MTU as u32)?; let session = Arc::new(adapter.start_session(RING_BUFFER_SIZE)?); diff --git a/rust/gui-client/src-tauri/src/client/gui.rs b/rust/gui-client/src-tauri/src/client/gui.rs index bdd8a98af..16bde19d9 100644 --- a/rust/gui-client/src-tauri/src/client/gui.rs +++ b/rust/gui-client/src-tauri/src/client/gui.rs @@ -491,10 +491,12 @@ impl Controller { let api_url = self.advanced_settings.api_url.clone(); tracing::info!(api_url = api_url.to_string(), "Starting connlib..."); + // Count the start instant from before we connect let start_instant = Instant::now(); self.ipc_client .connect_to_firezone(api_url.as_str(), token) .await?; + // Change the status after we begin connecting self.status = Status::Connecting { start_instant }; self.refresh_system_tray_menu()?; @@ -804,6 +806,8 @@ async fn run_controller( let mut dns_listener = network_changes::DnsListener::new()?; loop { + // TODO: Add `ControllerRequest::NetworkChange` and `DnsChange` and replace + // `tokio::select!` with a `poll_*` function tokio::select! { () = com_worker.notified() => { let new_have_internet = network_changes::check_internet().context("Failed to check for internet")?; @@ -845,6 +849,7 @@ async fn run_controller( } }, } + // Code down here may not run because the `select` sometimes `continue`s. } if let Err(error) = com_worker.close() { diff --git a/rust/headless-client/Cargo.toml b/rust/headless-client/Cargo.toml index bc3b2ce9a..8d910a318 100644 --- a/rust/headless-client/Cargo.toml +++ b/rust/headless-client/Cargo.toml @@ -56,6 +56,7 @@ ring = "0.17" thiserror = { version = "1.0", default-features = false } tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } windows-service = "0.7.0" +winreg = "0.52.0" [target.'cfg(target_os = "windows")'.dependencies.windows] version = "0.57.0" diff --git a/rust/headless-client/src/dns_control.rs b/rust/headless-client/src/dns_control.rs index 67e23a0cc..7d4569c53 100644 --- a/rust/headless-client/src/dns_control.rs +++ b/rust/headless-client/src/dns_control.rs @@ -8,7 +8,7 @@ mod windows; #[cfg(target_os = "windows")] use windows as platform; -pub(crate) use platform::{deactivate, system_resolvers, DnsController}; +pub(crate) use platform::{system_resolvers, DnsController}; // TODO: Move DNS and network change listening to the IPC service, so this won't // need to be public. diff --git a/rust/headless-client/src/dns_control/linux.rs b/rust/headless-client/src/dns_control/linux.rs index c6cbc461c..30a38f827 100644 --- a/rust/headless-client/src/dns_control/linux.rs +++ b/rust/headless-client/src/dns_control/linux.rs @@ -25,6 +25,11 @@ enum DnsControlMethod { Systemd, } +/// Controls system-wide DNS. +/// +/// Always call `deactivate` when Firezone starts. +/// +/// Only one of these should exist on the entire system at a time. pub(crate) struct DnsController { dns_control_method: Option, } @@ -41,15 +46,23 @@ impl Default for DnsController { impl Drop for DnsController { fn drop(&mut self) { - tracing::debug!("Reverting DNS control..."); - if let Some(DnsControlMethod::EtcResolvConf) = self.dns_control_method { - // TODO: Check that nobody else modified the file while we were running. - etc_resolv_conf::revert().ok(); + if let Err(error) = self.deactivate() { + tracing::error!(?error, "Failed to deactivate DNS control"); } } } impl DnsController { + #[allow(clippy::unnecessary_wraps)] + pub(crate) fn deactivate(&mut self) -> Result<()> { + tracing::debug!("Deactivating DNS control..."); + if let Some(DnsControlMethod::EtcResolvConf) = self.dns_control_method { + // TODO: Check that nobody else modified the file while we were running. + etc_resolv_conf::revert()?; + } + Ok(()) + } + /// Set the computer's system-wide DNS servers /// /// The `mut` in `&mut self` is not needed by Rust's rules, but @@ -185,12 +198,6 @@ fn parse_resolvectl_output(s: &str) -> Vec { .collect() } -// Does nothing on Linux, needed to match the Windows interface -#[allow(clippy::unnecessary_wraps)] -pub(crate) fn deactivate() -> Result<()> { - Ok(()) -} - #[cfg(test)] mod tests { use std::net::IpAddr; diff --git a/rust/headless-client/src/dns_control/linux/etc_resolv_conf.rs b/rust/headless-client/src/dns_control/linux/etc_resolv_conf.rs index ce8e63277..8cc9ffdf5 100644 --- a/rust/headless-client/src/dns_control/linux/etc_resolv_conf.rs +++ b/rust/headless-client/src/dns_control/linux/etc_resolv_conf.rs @@ -1,5 +1,9 @@ use anyhow::{Context, Result}; -use std::{io::Write, net::IpAddr, path::PathBuf}; +use std::{ + io::{self, Write}, + net::IpAddr, + path::PathBuf, +}; pub(crate) const ETC_RESOLV_CONF: &str = "/etc/resolv.conf"; pub(crate) const ETC_RESOLV_CONF_BACKUP: &str = "/etc/resolv.conf.before-firezone"; @@ -36,7 +40,7 @@ pub(crate) async fn configure(dns_config: &[IpAddr]) -> Result<()> { /// Revert changes Firezone made to `/etc/resolv.conf` /// -/// Must be sync because it's called in `Tun::drop` +/// Must be sync because it's called in `Drop` impls #[cfg_attr(test, mutants::skip)] // Would modify system-wide `/etc/resolv.conf` pub(crate) fn revert() -> Result<()> { revert_at_paths(&ResolvPaths::default()) @@ -123,11 +127,18 @@ async fn configure_at_paths(dns_config: &[IpAddr], paths: &ResolvPaths) -> Resul } fn revert_at_paths(paths: &ResolvPaths) -> Result<()> { - std::fs::copy(&paths.backup, &paths.resolv).context("Failed to restore backup")?; + match std::fs::copy(&paths.backup, &paths.resolv) { + Err(e) if e.kind() == io::ErrorKind::NotFound => { + tracing::debug!("Didn't revert `/etc/resolv.conf`, no backup file found"); + return Ok(()); + } + Err(e) => Err(e).context("Failed to restore `/etc/resolv.conf` backup")?, + Ok(_) => {} + } // Don't delete the backup file - If we lose power here, and the revert is rolled back, // we may want it. Filesystems are not atomic by default, and have weak ordering, // so we don't want to end up in a state where the backup is deleted and the revert was rolled back. - tracing::info!("Reverted resolv.conf file"); + tracing::info!("Reverted `/etc/resolv.conf`l"); Ok(()) } diff --git a/rust/headless-client/src/dns_control/windows.rs b/rust/headless-client/src/dns_control/windows.rs index b8a8958a9..791056b50 100644 --- a/rust/headless-client/src/dns_control/windows.rs +++ b/rust/headless-client/src/dns_control/windows.rs @@ -14,13 +14,18 @@ //! use anyhow::{Context as _, Result}; -use connlib_shared::windows::{CREATE_NO_WINDOW, TUNNEL_NAME}; -use std::{net::IpAddr, os::windows::process::CommandExt, process::Command}; +use connlib_shared::windows::CREATE_NO_WINDOW; +use std::{net::IpAddr, os::windows::process::CommandExt, path::Path, process::Command}; pub fn system_resolvers_for_gui() -> Result> { system_resolvers() } +/// Controls system-wide DNS. +/// +/// Always call `deactivate` when Firezone starts. +/// +/// Only one of these should exist on the entire system at a time. #[derive(Default)] pub(crate) struct DnsController {} @@ -30,16 +35,31 @@ const FZ_MAGIC: &str = "firezone-fd0020211111"; impl Drop for DnsController { fn drop(&mut self) { - if let Err(error) = deactivate() { + if let Err(error) = self.deactivate() { tracing::error!(?error, "Failed to deactivate DNS control"); } } } impl DnsController { - /// Set the computer's system-wide DNS servers + /// Deactivate any control Firezone has over the computer's DNS /// - /// There's a gap in this because on Windows we deactivate and re-activate control. + /// Must be `sync` so we can call it from `Drop` + /// TODO: Replace this with manual registry twiddling one day if we feel safe. + pub(crate) fn deactivate(&mut self) -> Result<()> { + Command::new("powershell") + .creation_flags(CREATE_NO_WINDOW) + .args(["-Command", "Get-DnsClientNrptRule", "|"]) + .args(["where", "Comment", "-eq", FZ_MAGIC, "|"]) + .args(["foreach", "{"]) + .args(["Remove-DnsClientNrptRule", "-Name", "$_.Name", "-Force"]) + .args(["}"]) + .status()?; + tracing::info!("Deactivated DNS control"); + Ok(()) + } + + /// Set the computer's system-wide DNS servers /// /// The `mut` in `&mut self` is not needed by Rust's rules, but /// it would be bad if this was called from 2 threads at once. @@ -47,7 +67,6 @@ impl DnsController { /// Must be async to match the Linux signature #[allow(clippy::unused_async)] pub(crate) async fn set_dns(&mut self, dns_config: &[IpAddr]) -> Result<()> { - deactivate().context("Failed to deactivate DNS control")?; activate(dns_config).context("Failed to activate DNS control")?; Ok(()) } @@ -57,9 +76,9 @@ impl DnsController { /// `&self` is needed to match the Linux signature pub(crate) fn flush(&self) -> Result<()> { tracing::debug!("Flushing Windows DNS cache..."); - Command::new("powershell") + Command::new("ipconfig") .creation_flags(CREATE_NO_WINDOW) - .args(["-Command", "Clear-DnsClientCache"]) + .args(["/flushdns"]) .status()?; tracing::debug!("Flushed DNS."); Ok(()) @@ -82,57 +101,48 @@ pub(crate) fn system_resolvers() -> Result> { Ok(resolvers) } +/// A UUID for the Firezone Client NRPT rule, chosen randomly at dev time. +/// +/// Our NRPT rule should always live in the registry at +/// `Computer\HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\Dnscache\Parameters\DnsPolicyConfig\$NRPT_REG_KEY` +/// +/// We can use this UUID as a handle to enable, disable, or modify the rule. +const NRPT_REG_KEY: &str = "{6C0507CB-C884-4A78-BC55-0ACEE21227F6}"; + /// Tells Windows to send all DNS queries to our sentinels /// /// Parameters: /// - `dns_config_string`: Comma-separated IP addresses of DNS servers, e.g. "1.1.1.1,8.8.8.8" -pub(crate) fn activate(dns_config: &[IpAddr]) -> Result<()> { - let dns_config_string = dns_config - .iter() - .map(|ip| format!("\"{ip}\"")) - .collect::>() - .join(","); - - // Set our DNS IP as the DNS server for our interface +fn activate(dns_config: &[IpAddr]) -> Result<()> { // TODO: Known issue where web browsers will keep a connection open to a site, // using QUIC, HTTP/2, or even HTTP/1.1, and so they won't resolve the DNS // again unless you let that connection time out: // - Command::new("powershell") - .creation_flags(CREATE_NO_WINDOW) - .arg("-Command") - .arg(format!( - "Set-DnsClientServerAddress {TUNNEL_NAME} -ServerAddresses({dns_config_string})" - )) - .status()?; tracing::info!("Activating DNS control"); - Command::new("powershell") - .creation_flags(CREATE_NO_WINDOW) - .args([ - "-Command", - "Add-DnsClientNrptRule", - "-Namespace", - ".", - "-Comment", - FZ_MAGIC, - "-NameServers", - &dns_config_string, - ]) - .status()?; - Ok(()) -} + let dns_config_string = dns_config + .iter() + .map(|ip| ip.to_string()) + .collect::>() + .join(";"); + + let hkcu = winreg::RegKey::predef(winreg::enums::HKEY_LOCAL_MACHINE); + let base = Path::new("SYSTEM") + .join("CurrentControlSet") + .join("Services") + .join("Dnscache") + .join("Parameters") + .join("DnsPolicyConfig") + .join(NRPT_REG_KEY); + + let (key, _) = hkcu.create_subkey(base)?; + key.set_value("Comment", &FZ_MAGIC)?; + key.set_value("ConfigOptions", &0x8u32)?; + key.set_value("DisplayName", &"Firezone SplitDNS")?; + key.set_value("GenericDNSServers", &dns_config_string)?; + key.set_value("IPSECCARestriction", &"")?; + key.set_value("Name", &vec!["."])?; + key.set_value("Version", &0x2u32)?; -// Must be `sync` so we can call it from `Drop` -pub(crate) fn deactivate() -> Result<()> { - Command::new("powershell") - .creation_flags(CREATE_NO_WINDOW) - .args(["-Command", "Get-DnsClientNrptRule", "|"]) - .args(["where", "Comment", "-eq", FZ_MAGIC, "|"]) - .args(["foreach", "{"]) - .args(["Remove-DnsClientNrptRule", "-Name", "$_.Name", "-Force"]) - .args(["}"]) - .status()?; - tracing::info!("Deactivated DNS control"); Ok(()) } diff --git a/rust/headless-client/src/ipc_service.rs b/rust/headless-client/src/ipc_service.rs index cc62196bb..880cf832d 100644 --- a/rust/headless-client/src/ipc_service.rs +++ b/rust/headless-client/src/ipc_service.rs @@ -120,13 +120,20 @@ fn run_smoke_test() -> Result<()> { crate::setup_stdout_logging()?; let rt = tokio::runtime::Runtime::new()?; let _guard = rt.enter(); + let mut dns_controller = DnsController::default(); + // Deactivate Firezone DNS control in case the system or IPC service crashed + // and we need to recover. + dns_controller.deactivate()?; let mut signals = signals::Terminate::new()?; // Couldn't get the loop to work here yet, so SIGHUP is not implemented rt.block_on(async { device_id::get_or_create().context("Failed to read / create device ID")?; let mut server = IpcServer::new(ServiceId::Prod).await?; - let _ = Handler::new(&mut server).await?.run(&mut signals).await; + let _ = Handler::new(&mut server, &mut dns_controller) + .await? + .run(&mut signals) + .await; Ok::<_, anyhow::Error>(()) }) } @@ -140,8 +147,9 @@ async fn ipc_listen(signals: &mut signals::Terminate) -> Result<()> { // This also gives the GUI a safe place to put the log filter config device_id::get_or_create().context("Failed to read / create device ID")?; let mut server = IpcServer::new(ServiceId::Prod).await?; + let mut dns_controller = DnsController::default(); loop { - let mut handler_fut = pin!(Handler::new(&mut server)); + let mut handler_fut = pin!(Handler::new(&mut server, &mut dns_controller)); let Some(handler) = poll_fn(|cx| { if let Poll::Ready(()) = signals.poll_recv(cx) { Poll::Ready(None) @@ -165,11 +173,11 @@ async fn ipc_listen(signals: &mut signals::Terminate) -> Result<()> { } /// Handles one IPC client -struct Handler { +struct Handler<'a> { callback_handler: CallbackHandler, cb_rx: mpsc::Receiver, connlib: Option, - dns_controller: DnsController, + dns_controller: &'a mut DnsController, ipc_rx: ipc::ServerRead, ipc_tx: ipc::ServerWrite, last_connlib_start_instant: Option, @@ -193,9 +201,9 @@ enum HandlerOk { ServiceTerminating, } -impl Handler { - async fn new(server: &mut IpcServer) -> Result { - dns_control::deactivate()?; +impl<'a> Handler<'a> { + async fn new(server: &mut IpcServer, dns_controller: &'a mut DnsController) -> Result { + dns_controller.deactivate()?; let (ipc_rx, ipc_tx) = server .next_client_split() .await @@ -207,7 +215,7 @@ impl Handler { callback_handler: CallbackHandler { cb_tx }, cb_rx, connlib: None, - dns_controller: Default::default(), + dns_controller, ipc_rx, ipc_tx, last_connlib_start_instant: None, @@ -295,8 +303,7 @@ impl Handler { // The first `OnUpdateResources` marks when connlib is fully initialized if let IpcServerMsg::OnUpdateResources(_) = &msg { if let Some(instant) = self.last_connlib_start_instant.take() { - let dur = instant.elapsed(); - tracing::info!(?dur, "Connlib started"); + tracing::info!(elapsed = ?instant.elapsed(), "Tunnel ready"); } // On every resources update, flush DNS to mitigate @@ -357,7 +364,7 @@ impl Handler { ClientMsg::Disconnect => { if let Some(connlib) = self.connlib.take() { connlib.disconnect(); - dns_control::deactivate()?; + self.dns_controller.deactivate()?; } else { tracing::error!("Error - Got Disconnect when we're already not connected"); } diff --git a/rust/headless-client/src/standalone.rs b/rust/headless-client/src/standalone.rs index 001650f54..fcd2fe0d6 100644 --- a/rust/headless-client/src/standalone.rs +++ b/rust/headless-client/src/standalone.rs @@ -15,7 +15,7 @@ use std::{ pin::pin, sync::Arc, }; -use tokio::sync::mpsc; +use tokio::{sync::mpsc, time::Instant}; use tokio_stream::wrappers::ReceiverStream; /// Command-line args for the headless Client @@ -46,6 +46,12 @@ struct Cli { #[arg(long)] check: bool, + /// Connect to the Firezone network and initialize, then exit + /// + /// Use this to check how fast you can connect. + #[arg(long)] + exit: bool, + /// Friendly name for this client to display in the UI. #[arg(long, env = "FIREZONE_NAME")] firezone_name: Option, @@ -127,7 +133,6 @@ pub fn run_only_headless_client() -> Result<()> { cli.token_path.display() ) })?; - tracing::info!("Running in headless / standalone mode"); // TODO: Should this default to 30 days? let max_partition_time = cli.common.max_partition_time.map(|d| d.into()); @@ -154,6 +159,8 @@ pub fn run_only_headless_client() -> Result<()> { let (cb_tx, cb_rx) = mpsc::channel(10); let callbacks = CallbackHandler { cb_tx }; + // The name matches that in `ipc_service.rs` + let mut last_connlib_start_instant = Some(Instant::now()); platform::setup_before_connlib()?; let args = ConnectArgs { url, @@ -174,6 +181,9 @@ pub fn run_only_headless_client() -> Result<()> { let mut terminate = pin!(terminate.recv().fuse()); let mut hangup = pin!(hangup.recv().fuse()); let mut dns_controller = DnsController::default(); + // Deactivate Firezone DNS control in case the system or IPC service crashed + // and we need to recover. + dns_controller.deactivate()?; let mut tun_device = TunDeviceManager::new()?; let mut cb_rx = ReceiverStream::new(cb_rx).fuse(); @@ -213,7 +223,14 @@ pub fn run_only_headless_client() -> Result<()> { dns_controller.set_dns(&dns).await?; } InternalServerMsg::OnUpdateRoutes { ipv4, ipv6 } => { - tun_device.set_routes(ipv4, ipv6).await? + tun_device.set_routes(ipv4, ipv6).await?; + if let Some(instant) = last_connlib_start_instant.take() { + tracing::info!(elapsed = ?instant.elapsed(), "Tunnel ready"); + } + if cli.exit { + tracing::info!("Exiting due to `--exit` CLI flag"); + break Ok(()); + } } } } diff --git a/website/src/components/Changelog/GUI.tsx b/website/src/components/Changelog/GUI.tsx index 8a8f0af52..33bbed83c 100644 --- a/website/src/components/Changelog/GUI.tsx +++ b/website/src/components/Changelog/GUI.tsx @@ -17,8 +17,8 @@ export default function GUI({ title }: { title: string }) { If the GUI is open during an update, close it and prompt the user to restart the GUI. - - This is a maintenance release with no user-facing changes. + + Improves sign-in speed and fixes a DNS leak */}