From fa790b231ae4cdde42b15dc5bdb5175bd0138660 Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Fri, 9 May 2025 15:25:48 +0930 Subject: [PATCH] fix(gateway): respond with SERVFAIL for missing nameserver (#9061) When we implemented #8350, we chose an error handling strategy that would shutdown the Gateway in case we didn't have a nameserver selected for handling those SRV and TXT queries. At the time, this was deemed to be sufficiently rare to be an adequate strategy. We have since learned that this can indeed happen when the Gateway starts without network connectivity which is quite common when using tools such as terraform to provision infrastructure. In #9060, we fix this by re-evaluating the fastest nameserver on a timer. This however doesn't change the error handling strategy when we don't have a working nameserver at all. It is practically impossible to have a working Gateway yet us being unable to select a nameserver. We read them from `/etc/resolv.conf` which is what `libc` uses to also resolve the domain we connect to for the WebSocket. A working WebSocket connection is required for us to establish connections to Clients, which in turn is a precursor to us receiving DNS queries from a Client. It causes unnecessary complexity to have a code path that can potentially terminate the Gateway, yet is practically unreachable. To fix this situation, we remove this code path and instead reply with a DNS SERVFAIL error. --------- Signed-off-by: Thomas Eizinger Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- rust/connlib/tunnel/src/io.rs | 13 ++----------- rust/connlib/tunnel/src/lib.rs | 23 ++++++++++++++++++++--- rust/gateway/src/eventloop.rs | 6 ------ 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/rust/connlib/tunnel/src/io.rs b/rust/connlib/tunnel/src/io.rs index 8febe9e7b..cb4e5ebb0 100644 --- a/rust/connlib/tunnel/src/io.rs +++ b/rust/connlib/tunnel/src/io.rs @@ -149,13 +149,8 @@ impl Io { self.sockets.poll_has_sockets(cx) } - pub fn fastest_nameserver(&self) -> io::Result { - let ns = self - .nameservers - .fastest() - .ok_or(io::Error::other(NoNameserverAvailable))?; - - Ok(ns) + pub fn fastest_nameserver(&self) -> Option { + self.nameservers.fastest() } pub fn poll<'b>( @@ -418,10 +413,6 @@ impl Io { } } -#[derive(Debug, thiserror::Error)] -#[error("No nameserver available to handle DNS query")] -pub struct NoNameserverAvailable; - fn is_max_wg_packet_size(d: &DatagramIn) -> bool { let len = d.packet.len(); if len > MAX_FZ_PAYLOAD { diff --git a/rust/connlib/tunnel/src/lib.rs b/rust/connlib/tunnel/src/lib.rs index 64a303277..afc142c15 100644 --- a/rust/connlib/tunnel/src/lib.rs +++ b/rust/connlib/tunnel/src/lib.rs @@ -70,7 +70,6 @@ pub type ClientTunnel = Tunnel; pub use client::ClientState; pub use gateway::{DnsResourceNatEntry, GatewayState, ResolveDnsRequest}; -pub use io::NoNameserverAvailable; pub use sockets::UdpSocketThreadStopped; pub use utils::turn; @@ -352,7 +351,16 @@ impl GatewayTunnel { continue; } Poll::Ready(io::Input::UdpDnsQuery(query)) => { - let nameserver = self.io.fastest_nameserver()?; + let Some(nameserver) = self.io.fastest_nameserver() else { + tracing::warn!(query = ?query.message, "No nameserver available to handle UDP DNS query"); + + self.io.send_udp_dns_response( + query.source, + dns_types::Response::servfail(&query.message), + )?; + + continue; + }; self.io.send_dns_query(dns::RecursiveQuery::via_udp( query.source, @@ -361,7 +369,16 @@ impl GatewayTunnel { )); } Poll::Ready(io::Input::TcpDnsQuery(query)) => { - let nameserver = self.io.fastest_nameserver()?; + let Some(nameserver) = self.io.fastest_nameserver() else { + tracing::warn!(query = ?query.message, "No nameserver available to handle TCP DNS query"); + + self.io.send_tcp_dns_response( + query.remote, + dns_types::Response::servfail(&query.message), + )?; + + continue; + }; self.io.send_dns_query(dns::RecursiveQuery::via_tcp( query.local, diff --git a/rust/gateway/src/eventloop.rs b/rust/gateway/src/eventloop.rs index 59b029d28..fa6b3d756 100644 --- a/rust/gateway/src/eventloop.rs +++ b/rust/gateway/src/eventloop.rs @@ -122,12 +122,6 @@ impl Eventloop { continue; } - if e.root_cause() - .is::() - { - return Poll::Ready(Err(e)); - } - if e.root_cause() .is::() {