fix(gateway): don't route packets from expired NAT sessions (#8124)

When we receive an inbound packet from the TUN device on the Gateway, we
make a lookup in the NAT table to see if it needs to be translated back
to a DNS proxy IP.

At present, non-existence of such a NAT entry results in the packet
being sent entirely unmodified because that is what needs to happen for
CIDR resources. Whilst that is important, the same code path is
currently being executed for DNS resources whose NAT session expired!
Those packets should be dropped instead which is what we do with this
PR.

To differentiate between not having a NAT session at all or whether a
previous one existed but is expired now, we keep around all previous
"outside" tuples of NAT sessions around. Those are only very small in
their memory-footprint. The entire NAT table is scoped to a connection
to the given peer and will thus eventually freed once the peer
disconnects. This allows us to reliably and cheaply detect, whether a
packet is using an expired NAT session. This check must be cheap because
all traffic of CIDR resources and the Internet resource needs to perform
this check such that we know that they don't have to be translated.

This might be the source of some of the "Source not allowed" errors we
have been seeing in client logs.
This commit is contained in:
Thomas Eizinger
2025-02-14 19:21:23 +11:00
committed by GitHub
parent 8f0db6ad47
commit 9cce4fd637
4 changed files with 108 additions and 19 deletions

View File

@@ -95,9 +95,12 @@ impl GatewayState {
};
let cid = peer.id();
let packet = peer
let Some(packet) = peer
.translate_inbound(packet, now)
.context("Failed to translate inbound packet")?;
.context("Failed to translate inbound packet")?
else {
return Ok(None);
};
let Some(encrypted_packet) = self
.node

View File

@@ -287,7 +287,7 @@ impl ClientOnGateway {
&mut self,
packet: IpPacket,
now: Instant,
) -> anyhow::Result<IpPacket> {
) -> anyhow::Result<Option<IpPacket>> {
let (proto, ip) = match self.nat_table.translate_incoming(&packet, now)? {
TranslateIncomingResult::Ok { proto, src } => (proto, src),
TranslateIncomingResult::DestinationUnreachable(prototype) => {
@@ -297,9 +297,17 @@ impl ClientOnGateway {
.into_packet(self.ipv4, self.ipv6)
.context("Failed to create `DestinationUnreachable` ICMP error")?;
return Ok(icmp_error);
return Ok(Some(icmp_error));
}
TranslateIncomingResult::NoNatSession => return Ok(packet),
TranslateIncomingResult::ExpiredNatSession => {
tracing::debug!(
?packet,
"Expired NAT session for inbound packet of DNS resource; dropping"
);
return Ok(None);
}
TranslateIncomingResult::NoNatSession => return Ok(Some(packet)),
};
let mut packet = packet
@@ -307,7 +315,7 @@ impl ClientOnGateway {
.context("Failed to translate packet to new source")?;
packet.update_checksum();
Ok(packet)
Ok(Some(packet))
}
pub(crate) fn is_allowed(&self, resource: ResourceId) -> bool {
@@ -554,8 +562,9 @@ mod tests {
time::{Duration, Instant},
};
use crate::messages::gateway::{
Filter, PortRange, ResourceDescription, ResourceDescriptionCidr,
use crate::{
messages::gateway::{Filter, PortRange, ResourceDescription, ResourceDescriptionCidr},
peer::nat_table,
};
use chrono::Utc;
use connlib_model::{ClientId, ResourceId};
@@ -744,6 +753,68 @@ mod tests {
assert!(peer.translate_outbound(pkt, Instant::now()).is_ok());
}
#[test]
fn dns_resource_packet_is_dropped_after_nat_session_expires() {
let _guard = firezone_logging::test("trace");
let mut peer = ClientOnGateway::new(client_id(), source_v4_addr(), source_v6_addr());
peer.add_resource(foo_dns_resource(), None);
peer.setup_nat(
foo_name().parse().unwrap(),
resource_id(),
BTreeSet::from([foo_real_ip().into()]),
BTreeSet::from([foo_proxy_ip().into()]),
)
.unwrap();
let request = ip_packet::make::udp_packet(
source_v4_addr(),
foo_proxy_ip(),
1,
foo_allowed_port(),
vec![0, 0, 0, 0, 0, 0, 0, 0],
)
.unwrap();
let mut now = Instant::now();
assert!(matches!(peer.translate_outbound(request, now), Ok(Some(_))));
let response = ip_packet::make::udp_packet(
foo_real_ip(),
source_v4_addr(),
foo_allowed_port(),
1,
vec![0, 0, 0, 0, 0, 0, 0, 0],
)
.unwrap();
now += Duration::from_secs(30);
peer.handle_timeout(now);
assert!(
matches!(peer.translate_inbound(response, now), Ok(Some(_))),
"After 30s remote should still be able to send a packet back"
);
let response = ip_packet::make::udp_packet(
foo_real_ip(),
source_v4_addr(),
foo_allowed_port(),
1,
vec![0, 0, 0, 0, 0, 0, 0, 0],
)
.unwrap();
now += nat_table::TTL;
peer.handle_timeout(now);
assert!(
matches!(peer.translate_inbound(response, now), Ok(None)),
"After 1 minute of inactivity, NAT session should be freed"
);
}
fn foo_dns_resource() -> crate::messages::gateway::ResourceDescription {
crate::messages::gateway::ResourceDescription::Dns(
crate::messages::gateway::ResourceDescriptionDns {

View File

@@ -2,7 +2,7 @@
use anyhow::{Context, Result};
use bimap::BiMap;
use ip_packet::{DestUnreachable, FailedPacket, IpPacket, PacketBuilder, Protocol};
use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashSet};
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
use std::time::{Duration, Instant};
@@ -20,26 +20,23 @@ use std::time::{Duration, Instant};
pub(crate) struct NatTable {
pub(crate) table: BiMap<(Protocol, IpAddr), (Protocol, IpAddr)>,
pub(crate) last_seen: BTreeMap<(Protocol, IpAddr), Instant>,
// We don't bother with proactively freeing this because a single entry is only ~20 bytes and it gets cleanup once the connection to the client goes away.
expired: HashSet<(Protocol, IpAddr)>,
}
const TTL: Duration = Duration::from_secs(60);
pub(crate) const TTL: Duration = Duration::from_secs(60);
impl NatTable {
pub(crate) fn handle_timeout(&mut self, now: Instant) {
let mut removed = Vec::new();
for (outside, e) in self.last_seen.iter() {
if now.duration_since(*e) >= TTL {
if let Some((inside, _)) = self.table.remove_by_right(outside) {
tracing::debug!(?inside, ?outside, "NAT session expired");
self.expired.insert(*outside);
}
removed.push(*outside);
}
}
for r in removed {
self.last_seen.remove(&r);
}
}
pub(crate) fn translate_outgoing(
@@ -101,6 +98,10 @@ impl NatTable {
));
}
if self.expired.contains(&outside) {
return Ok(TranslateIncomingResult::ExpiredNatSession);
}
tracing::trace!(?outside, "No active NAT session; skipping translation");
return Ok(TranslateIncomingResult::NoNatSession);
@@ -112,6 +113,10 @@ impl NatTable {
return Ok(TranslateIncomingResult::Ok { proto, src });
}
if self.expired.contains(&outside) {
return Ok(TranslateIncomingResult::ExpiredNatSession);
}
tracing::trace!(?outside, "No active NAT session; skipping translation");
Ok(TranslateIncomingResult::NoNatSession)
@@ -201,6 +206,7 @@ impl DestinationUnreachablePrototype {
pub enum TranslateIncomingResult {
Ok { proto: Protocol, src: IpAddr },
DestinationUnreachable(DestinationUnreachablePrototype),
ExpiredNatSession,
NoNatSession,
}
@@ -246,7 +252,10 @@ mod tests {
// Assert
if response_delay >= Duration::from_secs(60) {
assert_eq!(translate_incoming, TranslateIncomingResult::NoNatSession);
assert_eq!(
translate_incoming,
TranslateIncomingResult::ExpiredNatSession
);
} else {
assert_eq!(
translate_incoming,
@@ -298,6 +307,7 @@ mod tests {
match res {
TranslateIncomingResult::Ok { proto, src } => (proto, src),
TranslateIncomingResult::NoNatSession
| TranslateIncomingResult::ExpiredNatSession
| TranslateIncomingResult::DestinationUnreachable(_) => panic!("Wrong result"),
}
});

View File

@@ -22,7 +22,12 @@ export default function Gateway() {
return (
<Entries downloadLinks={downloadLinks} title="Gateway">
<Unreleased></Unreleased>
<Unreleased>
<ChangeItem pull="8124">
Fixes a bug in the routing of DNS resources that would lead to "Source
not allowed" errors in the Client logs.
</ChangeItem>
</Unreleased>
<Entry version="1.4.4" date={new Date("2025-02-11")}>
<ChangeItem pull="7944">
Fixes an edge case where a busy Gateway could experience a deadlock