fix(windows): patch some DNS leaks (#4530)

Fixes #4488 

```[tasklist]
# Before merging
- [x] There's one call site that won't compile on Linux. Make this cross-platform.
- [x] Does the rule get removed every time when you quit gracefully?
- [x] Will this NRPT rule prevent connlib from re-resolving the portal IP if it needs to?
- [x] Test network switching. Does this work worse, better, or the same?
- [ ] Is the Windows DNS cache flushed exactly when it needs to be?
```

- After connlib connects to the portal, we add an NRPT rule asking
Windows to send **all** DNS queries to our sentinels. This should also
be called whenever the interface is re-configured, which might change
the sentinel IPs
- When exiting gracefully, we delete the rule to restore normal DNS
behavior without having to back up and restore the other IPs
- We also delete the rule at startup so that if Firezone crashes or
misbehaves, restarting it should restore normal DNS
- We also flush the system-wide DNS cache whenever we claim different
routes. This may flush too often, and it may also miss some flushes that
we should do. It needs double-checking.
- There is still a gap when changing networks, DNS can leak there, but I
don't think it's worse than before.
This commit is contained in:
Reactor Scram
2024-04-15 16:10:30 -05:00
committed by GitHub
parent 7bc1d51b0f
commit 53968063a5
7 changed files with 153 additions and 30 deletions

View File

@@ -39,7 +39,6 @@ proptest = { version = "1.4.0", optional = true }
log = "0.4"
[dev-dependencies]
anyhow = "1.0"
itertools = "0.12"
tempfile = "3.10.1"
mutants = "0.0.3" # Needed to mark functions as exempt from `cargo-mutants` testing

View File

@@ -51,6 +51,19 @@ pub const BUNDLE_ID: &str = "dev.firezone.client";
const VERSION: &str = env!("CARGO_PKG_VERSION");
const LIB_NAME: &str = "connlib";
/// Deactivates DNS control on Windows
#[cfg(target_os = "windows")]
pub fn deactivate_dns_control() -> anyhow::Result<()> {
windows::dns::deactivate()
}
/// Deactivates DNS control on Windows
#[cfg(not(target_os = "windows"))]
#[allow(clippy::unnecessary_wraps)]
pub fn deactivate_dns_control() -> anyhow::Result<()> {
Ok(())
}
pub fn keypair() -> (StaticSecret, PublicKey) {
let private_key = StaticSecret::random_from_rng(OsRng);
let public_key = PublicKey::from(&private_key);

View File

@@ -16,6 +16,113 @@ pub fn app_local_data_dir() -> Result<PathBuf, Error> {
Ok(path)
}
pub mod dns {
//! Gives Firezone DNS privilege over other DNS resolvers on the system
//!
//! This uses NRPT and claims all domains, similar to the `systemd-resolved` control method
//! on Linux.
//! This allows us to "shadow" DNS resolvers that are configured by the user or DHCP on
//! physical interfaces, as long as they don't have any NRPT rules that outrank us.
//!
//! If Firezone crashes, restarting Firezone and closing it gracefully will resume
//! normal DNS operation. The Powershell command to remove the NRPT rule can also be run
//! by hand.
//!
//! The system default resolvers don't need to be reverted because they're never deleted.
//!
//! <https://superuser.com/a/1752670>
use anyhow::Result;
use std::{net::IpAddr, os::windows::process::CommandExt, process::Command};
/// Hides Powershell's console on Windows
///
/// <https://stackoverflow.com/questions/59692146/is-it-possible-to-use-the-standard-library-to-spawn-a-process-without-showing-th#60958956>
/// Also used for self-elevation
const CREATE_NO_WINDOW: u32 = 0x08000000;
// Unique magic number that we can use to delete our well-known NRPT rule.
// Copied from the deep link schema
const FZ_MAGIC: &str = "firezone-fd0020211111";
/// Tells Windows to send all DNS queries to our sentinels
///
/// Parameters:
/// - `dns_config_string`: Comma-separated IP addresses of DNS servers, e.g. "1.1.1.1,8.8.8.8"
pub fn activate(dns_config: &[IpAddr], iface_idx: u32) -> Result<()> {
let dns_config_string = dns_config
.iter()
.map(|ip| format!("\"{ip}\""))
.collect::<Vec<_>>()
.join(",");
// Set our DNS IP as the DNS server for our interface
// TODO: Known issue where web browsers will keep a connection open to a site,
// using QUIC, HTTP/2, or even HTTP/1.1, and so they won't resolve the DNS
// again unless you let that connection time out:
// <https://github.com/firezone/firezone/issues/3113#issuecomment-1882096111>
// TODO: If we have a Windows gateway, it shouldn't configure DNS, right?
Command::new("powershell")
.creation_flags(CREATE_NO_WINDOW)
.arg("-Command")
.arg(format!("Set-DnsClientServerAddress -InterfaceIndex {iface_idx} -ServerAddresses({dns_config_string})"))
.status()?;
tracing::info!("Activating DNS control");
Command::new("powershell")
.creation_flags(CREATE_NO_WINDOW)
.args([
"-Command",
"Add-DnsClientNrptRule",
"-Namespace",
".",
"-Comment",
FZ_MAGIC,
"-NameServers",
&dns_config_string,
])
.status()?;
Ok(())
}
/// Tells Windows to send all DNS queries to this new set of sentinels
///
/// Currently implemented as just removing the rule and re-adding it, which
/// creates a gap but doesn't require us to parse Powershell output to figure
/// out the rule's UUID.
///
/// Parameters:
/// - `dns_config_string` - Passed verbatim to [`activate`]
pub fn change(dns_config: &[IpAddr], iface_idx: u32) -> Result<()> {
deactivate()?;
activate(dns_config, iface_idx)?;
Ok(())
}
pub fn deactivate() -> Result<()> {
Command::new("powershell")
.creation_flags(CREATE_NO_WINDOW)
.args(["-Command", "Get-DnsClientNrptRule", "|"])
.args(["where", "Comment", "-eq", FZ_MAGIC, "|"])
.args(["foreach", "{"])
.args(["Remove-DnsClientNrptRule", "-Name", "$_.Name", "-Force"])
.args(["}"])
.status()?;
tracing::info!("Deactivated DNS control");
Ok(())
}
/// Flush Windows' system-wide DNS cache
pub fn flush() -> Result<()> {
tracing::info!("Flushing Windows DNS cache");
Command::new("powershell")
.creation_flags(CREATE_NO_WINDOW)
.args(["-Command", "Clear-DnsClientCache"])
.status()?;
Ok(())
}
}
/// Returns the absolute path for installing and loading `wintun.dll`
///
/// e.g. `C:\Users\User\AppData\Local\dev.firezone.client\data\wintun.dll`

View File

@@ -39,6 +39,9 @@ pub struct Tun {
impl Drop for Tun {
fn drop(&mut self) {
if let Err(error) = connlib_shared::windows::dns::deactivate() {
tracing::error!(?error, "Failed to deactivate DNS control");
}
if let Err(e) = self.session.shutdown() {
tracing::error!("wintun::Session::shutdown: {e:#?}");
}
@@ -137,25 +140,8 @@ impl Tun {
let iface_idx = self.adapter.get_adapter_index()?;
// Set our DNS IP as the DNS server for our interface
// TODO: Known issue where web browsers will keep a connection open to a site,
// using QUIC, HTTP/2, or even HTTP/1.1, and so they won't resolve the DNS
// again unless you let that connection time out:
// <https://github.com/firezone/firezone/issues/3113#issuecomment-1882096111>
// TODO: If we have a Windows gateway, it shouldn't configure DNS, right?
Command::new("powershell")
.creation_flags(CREATE_NO_WINDOW)
.arg("-Command")
.arg(format!(
"Set-DnsClientServerAddress -InterfaceIndex {iface_idx} -ServerAddresses({})",
dns_config
.iter()
.map(|ip| format!("\"{ip}\""))
.collect::<Vec<_>>()
.join(",")
))
.stdout(Stdio::null())
.status()?;
connlib_shared::windows::dns::change(dns_config, iface_idx)
.expect("Should be able to control DNS");
Ok(())
}
@@ -176,6 +162,9 @@ impl Tun {
self.routes = new_routes;
// TODO: Might be calling this more often than it needs
connlib_shared::windows::dns::flush().expect("Should be able to flush Windows' DNS cache");
Ok(())
}

View File

@@ -71,8 +71,9 @@ mod imp {
}
pub(crate) fn elevate() -> Result<()> {
// Hides Powershell's console on Windows
// <https://stackoverflow.com/questions/59692146/is-it-possible-to-use-the-standard-library-to-spawn-a-process-without-showing-th#60958956>
/// Hides Powershell's console on Windows
///
/// <https://stackoverflow.com/questions/59692146/is-it-possible-to-use-the-standard-library-to-spawn-a-process-without-showing-th#60958956>
const CREATE_NO_WINDOW: u32 = 0x08000000;
let current_exe = tauri_utils::platform::current_exe()?;

View File

@@ -280,20 +280,21 @@ pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
// This seems to be a platform limitation that Tauri is unable to hide
// from us. It was the source of much consternation at time of writing.
match task.await {
let exit_code = match task.await {
Err(error) => {
tracing::error!(?error, "run_controller panicked");
app_handle.exit(1);
1
}
Ok(Err(error)) => {
tracing::error!(?error, "run_controller returned an error");
app_handle.exit(1);
1
}
Ok(Ok(_)) => {
tracing::info!("GUI controller task exited cleanly. Exiting process");
app_handle.exit(0);
}
}
Ok(Ok(_)) => 0,
};
cleanup();
tracing::info!(?exit_code);
app_handle.exit(exit_code);
});
Ok(())
};
@@ -334,6 +335,13 @@ pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
Ok(())
}
/// Best-effort cleanup of things like DNS control before graceful exit
fn cleanup() {
// Do this redundant deactivation because `Tun` will not automatically Drop before
// the process exits
connlib_shared::deactivate_dns_control().ok();
}
/// Runs a smoke test and then asks Controller to exit gracefully
///
/// You can purposely fail this test by deleting the exported zip file during

View File

@@ -76,6 +76,12 @@ pub fn connect(
let login = LoginUrl::client(api_url, &token, device_id.id, None, public_key.to_bytes())?;
// Deactivate DNS control since that can prevent us from bootstrapping a connection
// to the portal. Maybe we could bring up a sentinel resolver before
// connecting to the portal, but right now the portal seems to need system DNS
// for the first connection.
connlib_shared::deactivate_dns_control()?;
// All direct calls into connlib must be in the tunnel process
let session = connlib_client_shared::Session::connect(
login,