feat(gateway): revise handling of DNS resolution errors (#10623)

Even prior to #10373, failures in resolving a name on the Gateway for a
DNS resource resulted in a failure of setting up the DNS resource NAT.
Without the DNS resource NAT, packets for that resource bounced on the
Gateway because we didn't have any traffic filters.

A non-existent filter is being treated as a "traffic not allowed" error
and we respond with an ICMP permission denied error. For domains where
both the A and AAAA query result in NXDOMAIN, that isn't necessarily
appropriate. Instead, I am proposing that for such cases, we want to
return a regular "address/host unreachable" ICMP error instead of the
more specific "permission denied" variant.

To achieve that, we refactor the Gateway's peer state to be able to hold
an `Option<IpAddr>` inside the `TranslationState`. This allows us to
always insert an entry for each proxy IP, even if we did not resolve any
IPs for it. Then, when receiving traffic for a proxy IP where the
resolved IP is `None`, we reply with the appropriate ICMP error.

As part of this, we also simplify the assignment of the proxy IPs. With
the NAT64 module removed, there is no more reason to cross-assign IPv4
and IPv6 addresses. We can simply leave the mappings for e.g. IPv6 proxy
addresses empty if the AAAA query didn't resolve anything.

From the Client's perspective, not much changes. The DNS resource NAT
setup will now succeed, even for domains that don't resolve to anything.
This doesn't change any behaviour though as we are currently already
passing packets through for failed DNS resource NAT setups. The main
change is that we now send back a different ICMP error. Most
importantly, the "address/host unreachable variant" does not trigger
#10462.
This commit is contained in:
Thomas Eizinger
2025-10-23 06:14:45 +11:00
committed by GitHub
parent d734ee0d21
commit ed2bc0bd25
9 changed files with 125 additions and 77 deletions

View File

@@ -116,6 +116,7 @@ jobs:
rg --count --no-ignore "Revoking resource authorization" "$TESTCASES_DIR"
rg --count --no-ignore "Re-seeding records for DNS resources" "$TESTCASES_DIR"
rg --count --no-ignore "Resource is known but its addressability changed" "$TESTCASES_DIR"
rg --count --no-ignore "No A / AAAA records for domain" "$TESTCASES_DIR"
# Make sure we are recovering from ICE disconnect
rg --count --no-ignore "State change \(got new possible\): Disconnected -> Checking" "$TESTCASES_DIR"

View File

@@ -245,6 +245,7 @@ services:
RUST_LOG: ${RUST_LOG:-wire=trace,debug,flow_logs=trace}
FIREZONE_API_URL: ws://api:8081
FIREZONE_ID: 4694E56C-7643-4A15-9DF3-638E5B05F570
FZFF_GATEWAY_USERSPACE_DNS_A_AAAA_RECORDS: true
command:
- sh
- -c

View File

@@ -220,3 +220,5 @@ cc 3bdd819cda2577278b0372cb7598418227ecab83271c48f5b28dc192f766061e
cc 764c22e664da06820cd02cba259196edeec94cce45e450959ce9354be7bc9f1c
cc 04193ee1047f542c469aa0893bf636df9c317943022d922e231de3e821b39486
cc e8520f159df085f7dbe6dce8b121336d33708af9f804a8a14bf6b5a3eb3a9d4d
cc fd95c0fcd3af20d73849004cb642b09c5bacfa7ca25781d0268441d49fe3b6cf
cc 4f65d01188bb870aa8f4893530f77ace3434c133cf24735619196df6d043f4cf

View File

@@ -10,6 +10,7 @@ use dns_types::DomainName;
use ip_network::{IpNetwork, Ipv4Network, Ipv6Network};
use ip_network_table::IpNetworkTable;
use ip_packet::{IpPacket, Protocol, UnsupportedProtocol};
use itertools::{EitherOrBoth, Itertools};
use crate::client::{IPV4_RESOURCES, IPV6_RESOURCES};
use crate::gateway::filter_engine::FilterEngine;
@@ -105,35 +106,51 @@ impl ClientOnGateway {
anyhow::ensure!(crate::dns::is_subdomain(&name, address));
let mapped_ipv4 = mapped_ipv4(&resolved_ips);
let mapped_ipv6 = mapped_ipv6(&resolved_ips);
if resolved_ips.is_empty() {
tracing::debug!(domain = %name, %resource_id, "No A / AAAA records for domain")
}
let ipv4_maps = proxy_ips
.iter()
.filter(|ip| ip.is_ipv4())
.zip(mapped_ipv4.iter().cycle().copied());
let ipv4_maps = proxy_ips.iter().filter(|ip| ip.is_ipv4()).zip_longest(
resolved_ips
.iter()
.filter(|ip| ip.is_ipv4())
.copied()
.cycle(),
);
let ipv6_maps = proxy_ips.iter().filter(|ip| ip.is_ipv6()).zip_longest(
resolved_ips
.iter()
.filter(|ip| ip.is_ipv6())
.copied()
.cycle(),
);
let ipv6_maps = proxy_ips
.iter()
.filter(|ip| ip.is_ipv6())
.zip(mapped_ipv6.iter().cycle().copied());
let mut ip_maps = ipv4_maps.chain(ipv6_maps);
let ip_maps = ipv4_maps.chain(ipv6_maps);
loop {
let Some(either_or_both) = ip_maps.next() else {
break;
};
let (proxy_ip, maybe_real_ip) = match either_or_both {
EitherOrBoth::Both(proxy_ip, real_ip) => (proxy_ip, Some(real_ip)),
EitherOrBoth::Left(proxy_ip) => (proxy_ip, None),
EitherOrBoth::Right(_) => break,
};
for (proxy_ip, real_ip) in ip_maps {
if let Some(state) = self.permanent_translations.get(proxy_ip)
&& self.nat_table.has_entry_for_inside(*proxy_ip)
&& state.resolved_ip != real_ip
&& state.resolved_ip != maybe_real_ip
{
tracing::debug!(%name, %proxy_ip, new_real_ip = %real_ip, current_real_ip = %state.resolved_ip, "Skipping DNS resource NAT entry because we have open NAT sessions for it");
tracing::debug!(%name, %proxy_ip, new_real_ip = ?maybe_real_ip, current_real_ip = ?state.resolved_ip, "Skipping DNS resource NAT entry because we have open NAT sessions for it");
continue;
}
tracing::debug!(%name, %proxy_ip, %real_ip);
tracing::debug!(%name, %proxy_ip, real_ip = ?maybe_real_ip);
self.permanent_translations.insert(
*proxy_ip,
TranslationState::new(resource_id, real_ip, name.clone()),
TranslationState::new(resource_id, maybe_real_ip, name.clone()),
);
}
@@ -404,10 +421,16 @@ impl ClientOnGateway {
));
};
if state.resolved_ip.is_ipv4() != dst.is_ipv4() {
let Some(resolved_ip) = state.resolved_ip else {
return Ok(TranslateOutboundResult::DestinationUnreachable(
ip_packet::make::icmp_dest_unreachable_network(&packet)?,
));
};
if resolved_ip.is_ipv4() != dst.is_ipv4() {
tracing::debug!(
%dst,
resolved = %state.resolved_ip,
resolved = %resolved_ip,
"Cannot translate between IP versions"
);
@@ -418,7 +441,7 @@ impl ClientOnGateway {
let (source_protocol, real_ip) =
self.nat_table
.translate_outgoing(&packet, state.resolved_ip, now)?;
.translate_outgoing(&packet, resolved_ip, now)?;
packet
.translate_destination(source_protocol, real_ip)
@@ -651,13 +674,13 @@ struct TranslationState {
/// Which (DNS) resource we belong to.
resource_id: ResourceId,
/// The IP we have resolved for the domain.
resolved_ip: IpAddr,
resolved_ip: Option<IpAddr>,
/// The domain we have resolved.
domain: DomainName,
}
impl TranslationState {
fn new(resource_id: ResourceId, resolved_ip: IpAddr, domain: DomainName) -> Self {
fn new(resource_id: ResourceId, resolved_ip: Option<IpAddr>, domain: DomainName) -> Self {
Self {
resource_id,
resolved_ip,
@@ -666,30 +689,6 @@ impl TranslationState {
}
}
fn ipv4_addresses(ip: &BTreeSet<IpAddr>) -> BTreeSet<IpAddr> {
ip.iter().filter(|ip| ip.is_ipv4()).copied().collect()
}
fn ipv6_addresses(ip: &BTreeSet<IpAddr>) -> BTreeSet<IpAddr> {
ip.iter().filter(|ip| ip.is_ipv6()).copied().collect()
}
fn mapped_ipv4(ips: &BTreeSet<IpAddr>) -> BTreeSet<IpAddr> {
if !ipv4_addresses(ips).is_empty() {
ipv4_addresses(ips)
} else {
ipv6_addresses(ips)
}
}
fn mapped_ipv6(ips: &BTreeSet<IpAddr>) -> BTreeSet<IpAddr> {
if !ipv6_addresses(ips).is_empty() {
ipv6_addresses(ips)
} else {
ipv4_addresses(ips)
}
}
fn is_dns_addr(addr: IpAddr) -> bool {
IpNetwork::from(IPV4_RESOURCES).contains(addr) || IpNetwork::from(IPV6_RESOURCES).contains(addr)
}
@@ -1280,7 +1279,6 @@ mod proptests {
};
use crate::proptest::*;
use ip_packet::make::{TcpFlags, icmp_request_packet, tcp_packet, udp_packet};
use itertools::Itertools as _;
use proptest::{
arbitrary::any,
collection, prop_oneof,

View File

@@ -1,4 +1,5 @@
use connlib_model::{ClientId, GatewayId, IpStack, RelayId, ResourceId, Site, SiteId};
use dns_types::DomainName;
use ip_network::{IpNetwork, Ipv4Network, Ipv6Network};
use proptest::{
arbitrary::{any, any_with},
@@ -38,7 +39,7 @@ pub fn dns_resource(sites: impl Strategy<Value = Vec<Site>>) -> impl Strategy<Va
.prop_map(
move |(id, name, address, address_description, ip_stack, sites)| DnsResource {
id,
address,
address: address.to_string(),
name,
sites,
address_description,
@@ -132,8 +133,10 @@ pub fn domain_label() -> impl Strategy<Value = String> {
any_with::<String>("[a-z]{3,6}".into())
}
pub fn domain_name(depth: Range<usize>) -> impl Strategy<Value = String> {
collection::vec(domain_label(), depth).prop_map(|labels| labels.join("."))
pub fn domain_name(depth: Range<usize>) -> impl Strategy<Value = DomainName> {
collection::vec(domain_label(), depth)
.prop_map(|labels| labels.join("."))
.prop_map(|d| d.parse().unwrap())
}
/// A strategy of IP networks, configurable by the size of the host mask.

View File

@@ -42,10 +42,6 @@ impl DnsRecords {
self.inner.keys().cloned()
}
pub(crate) fn contains_domain(&self, name: &DomainName) -> bool {
self.inner.contains_key(name)
}
pub(crate) fn merge(&mut self, other: Self) {
self.inner.extend(other.inner);
}

View File

@@ -5,6 +5,7 @@ use super::{
strategies::*, stub_portal::StubPortal, transition::*,
};
use crate::client;
use crate::proptest::domain_label;
use crate::{dns::is_subdomain, proptest::relay_id};
use connlib_model::{GatewayId, RelayId, Site, StaticSecret};
use dns_types::{DomainName, RecordType};
@@ -351,6 +352,33 @@ impl ReferenceState {
.prop_map(Transition::SendDnsQueries)
},
)
.with_if_not_empty(
2,
(
state.wildcard_dns_resources_on_client(),
state.reachable_dns_servers(),
),
|(wildcard_dns_resources, dns_servers)| {
dns_queries(
(
sample::select(wildcard_dns_resources).prop_flat_map(|r| {
let base = r.address.trim_start_matches("*.").to_owned();
domain_label().prop_map(move |label| {
format!("{label}.{base}").parse().unwrap()
})
}),
prop_oneof![
Just(vec![RecordType::A]),
Just(vec![RecordType::AAAA]),
Just(vec![RecordType::A, RecordType::AAAA])
],
),
sample::select(dns_servers),
)
.prop_map(Transition::SendDnsQueries)
},
)
.with_if_not_empty(
1,
state
@@ -683,12 +711,6 @@ impl ReferenceState {
.sending_socket_for(query.dns_server.ip())
.is_some();
let is_ptr_query = matches!(query.r_type, RecordType::PTR);
let is_known_domain = state.global_dns_records.contains_domain(&query.domain);
// In case we sampled a PTR query, the domain doesn't have to exist.
let ptr_or_known_domain = is_ptr_query || is_known_domain;
let has_dns_server = state
.client
.inner()
@@ -707,7 +729,6 @@ impl ReferenceState {
};
has_socket_for_server
&& ptr_or_known_domain
&& has_dns_server
&& gateway_is_present_in_case_dns_server_is_cidr_resource
}),
@@ -873,6 +894,19 @@ impl ReferenceState {
.collect()
}
fn wildcard_dns_resources_on_client(&self) -> Vec<client::DnsResource> {
self.portal
.all_resources()
.into_iter()
.flat_map(|r| match r {
client::Resource::Dns(r) => Some(r),
client::Resource::Cidr(_) | client::Resource::Internet(_) => None,
})
.filter(|r| self.client.inner().has_resource(r.id))
.filter(|r| r.address.starts_with("*."))
.collect()
}
fn regular_sites(&self) -> Vec<Site> {
let all_sites = self
.portal

View File

@@ -23,7 +23,7 @@ use std::{
pub(crate) fn global_dns_records() -> impl Strategy<Value = DnsRecords> {
collection::btree_map(
domain_name(2..4).prop_map(|d| d.parse().unwrap()),
domain_name(2..4),
collection::btree_set(dns_record(), 1..6),
0..5,
)
@@ -62,15 +62,9 @@ fn txt_record() -> impl Strategy<Value = Vec<u8>> {
}
fn srv_record() -> impl Strategy<Value = OwnedRecordData> {
(
any::<u16>(),
any::<u16>(),
any::<u16>(),
domain_name(2..4).prop_map(|d| d.parse().unwrap()),
(any::<u16>(), any::<u16>(), any::<u16>(), domain_name(2..4)).prop_map(
|(priority, weight, port, target)| dns_types::records::srv(priority, weight, port, target),
)
.prop_map(|(priority, weight, port, target)| {
dns_types::records::srv(priority, weight, port, target)
})
}
pub(crate) fn latency(max: u64) -> impl Strategy<Value = Duration> {

View File

@@ -16,7 +16,7 @@ use firezone_tunnel::{
DnsResourceNatEntry, GatewayEvent, GatewayTunnel, IPV4_TUNNEL, IPV6_TUNNEL, IpConfig,
ResolveDnsRequest, TunnelError,
};
use futures::FutureExt as _;
use futures::{FutureExt as _, TryFutureExt};
use hickory_resolver::TokioResolver;
use phoenix_channel::{PhoenixChannel, PublicKeyParam};
use std::collections::{BTreeMap, BTreeSet};
@@ -27,7 +27,7 @@ use std::pin::pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::{Duration, Instant};
use std::{io, mem};
use std::{io, iter, mem};
use tokio::sync::mpsc;
use crate::RELEASE;
@@ -664,12 +664,31 @@ impl Eventloop {
let resolver = self.resolver.clone();
async move {
let ips = resolver
.lookup_ip(domain.to_string())
.await
.with_context(|| format!("Failed to lookup domain '{domain}'"))?
.iter()
.collect::<Vec<_>>();
let ipv4_lookup = resolver
.ipv4_lookup(domain.to_string())
.map_ok(|ipv4| ipv4.into_iter().map(|r| IpAddr::V4(r.0)));
let ipv6_lookup = resolver
.ipv6_lookup(domain.to_string())
.map_ok(|ipv6| ipv6.into_iter().map(|r| IpAddr::V6(r.0)));
let ips = match futures::future::join(ipv4_lookup, ipv6_lookup).await {
(Ok(ipv4), Ok(ipv6)) => iter::empty().chain(ipv4).chain(ipv6).collect(),
(Ok(ipv4), Err(e)) => {
tracing::debug!(%domain, "AAAA lookup failed: {e}");
ipv4.collect()
}
(Err(e), Ok(ipv6)) => {
tracing::debug!(%domain, "A lookup failed: {e}");
ipv6.collect()
}
(Err(e1), Err(e2)) => {
tracing::debug!(%domain, "A and AAAA lookup failed: [{e1}; {e2}]");
vec![]
}
};
Ok(ips)
}