diff --git a/rust/connlib/tunnel/src/gateway.rs b/rust/connlib/tunnel/src/gateway.rs index 329412b67..340ae269d 100644 --- a/rust/connlib/tunnel/src/gateway.rs +++ b/rust/connlib/tunnel/src/gateway.rs @@ -95,9 +95,12 @@ impl GatewayState { }; let cid = peer.id(); - let packet = peer + let Some(packet) = peer .translate_inbound(packet, now) - .context("Failed to translate inbound packet")?; + .context("Failed to translate inbound packet")? + else { + return Ok(None); + }; let Some(encrypted_packet) = self .node diff --git a/rust/connlib/tunnel/src/peer.rs b/rust/connlib/tunnel/src/peer.rs index ef501e29e..120c7936b 100644 --- a/rust/connlib/tunnel/src/peer.rs +++ b/rust/connlib/tunnel/src/peer.rs @@ -287,7 +287,7 @@ impl ClientOnGateway { &mut self, packet: IpPacket, now: Instant, - ) -> anyhow::Result { + ) -> anyhow::Result> { let (proto, ip) = match self.nat_table.translate_incoming(&packet, now)? { TranslateIncomingResult::Ok { proto, src } => (proto, src), TranslateIncomingResult::DestinationUnreachable(prototype) => { @@ -297,9 +297,17 @@ impl ClientOnGateway { .into_packet(self.ipv4, self.ipv6) .context("Failed to create `DestinationUnreachable` ICMP error")?; - return Ok(icmp_error); + return Ok(Some(icmp_error)); } - TranslateIncomingResult::NoNatSession => return Ok(packet), + TranslateIncomingResult::ExpiredNatSession => { + tracing::debug!( + ?packet, + "Expired NAT session for inbound packet of DNS resource; dropping" + ); + + return Ok(None); + } + TranslateIncomingResult::NoNatSession => return Ok(Some(packet)), }; let mut packet = packet @@ -307,7 +315,7 @@ impl ClientOnGateway { .context("Failed to translate packet to new source")?; packet.update_checksum(); - Ok(packet) + Ok(Some(packet)) } pub(crate) fn is_allowed(&self, resource: ResourceId) -> bool { @@ -554,8 +562,9 @@ mod tests { time::{Duration, Instant}, }; - use crate::messages::gateway::{ - Filter, PortRange, ResourceDescription, ResourceDescriptionCidr, + use crate::{ + messages::gateway::{Filter, PortRange, ResourceDescription, ResourceDescriptionCidr}, + peer::nat_table, }; use chrono::Utc; use connlib_model::{ClientId, ResourceId}; @@ -744,6 +753,68 @@ mod tests { assert!(peer.translate_outbound(pkt, Instant::now()).is_ok()); } + #[test] + fn dns_resource_packet_is_dropped_after_nat_session_expires() { + let _guard = firezone_logging::test("trace"); + + let mut peer = ClientOnGateway::new(client_id(), source_v4_addr(), source_v6_addr()); + peer.add_resource(foo_dns_resource(), None); + peer.setup_nat( + foo_name().parse().unwrap(), + resource_id(), + BTreeSet::from([foo_real_ip().into()]), + BTreeSet::from([foo_proxy_ip().into()]), + ) + .unwrap(); + + let request = ip_packet::make::udp_packet( + source_v4_addr(), + foo_proxy_ip(), + 1, + foo_allowed_port(), + vec![0, 0, 0, 0, 0, 0, 0, 0], + ) + .unwrap(); + + let mut now = Instant::now(); + + assert!(matches!(peer.translate_outbound(request, now), Ok(Some(_)))); + + let response = ip_packet::make::udp_packet( + foo_real_ip(), + source_v4_addr(), + foo_allowed_port(), + 1, + vec![0, 0, 0, 0, 0, 0, 0, 0], + ) + .unwrap(); + + now += Duration::from_secs(30); + peer.handle_timeout(now); + + assert!( + matches!(peer.translate_inbound(response, now), Ok(Some(_))), + "After 30s remote should still be able to send a packet back" + ); + + let response = ip_packet::make::udp_packet( + foo_real_ip(), + source_v4_addr(), + foo_allowed_port(), + 1, + vec![0, 0, 0, 0, 0, 0, 0, 0], + ) + .unwrap(); + + now += nat_table::TTL; + peer.handle_timeout(now); + + assert!( + matches!(peer.translate_inbound(response, now), Ok(None)), + "After 1 minute of inactivity, NAT session should be freed" + ); + } + fn foo_dns_resource() -> crate::messages::gateway::ResourceDescription { crate::messages::gateway::ResourceDescription::Dns( crate::messages::gateway::ResourceDescriptionDns { diff --git a/rust/connlib/tunnel/src/peer/nat_table.rs b/rust/connlib/tunnel/src/peer/nat_table.rs index c6858c141..ae4bb30fd 100644 --- a/rust/connlib/tunnel/src/peer/nat_table.rs +++ b/rust/connlib/tunnel/src/peer/nat_table.rs @@ -2,7 +2,7 @@ use anyhow::{Context, Result}; use bimap::BiMap; use ip_packet::{DestUnreachable, FailedPacket, IpPacket, PacketBuilder, Protocol}; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashSet}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use std::time::{Duration, Instant}; @@ -20,26 +20,23 @@ use std::time::{Duration, Instant}; pub(crate) struct NatTable { pub(crate) table: BiMap<(Protocol, IpAddr), (Protocol, IpAddr)>, pub(crate) last_seen: BTreeMap<(Protocol, IpAddr), Instant>, + + // We don't bother with proactively freeing this because a single entry is only ~20 bytes and it gets cleanup once the connection to the client goes away. + expired: HashSet<(Protocol, IpAddr)>, } -const TTL: Duration = Duration::from_secs(60); +pub(crate) const TTL: Duration = Duration::from_secs(60); impl NatTable { pub(crate) fn handle_timeout(&mut self, now: Instant) { - let mut removed = Vec::new(); for (outside, e) in self.last_seen.iter() { if now.duration_since(*e) >= TTL { if let Some((inside, _)) = self.table.remove_by_right(outside) { tracing::debug!(?inside, ?outside, "NAT session expired"); + self.expired.insert(*outside); } - - removed.push(*outside); } } - - for r in removed { - self.last_seen.remove(&r); - } } pub(crate) fn translate_outgoing( @@ -101,6 +98,10 @@ impl NatTable { )); } + if self.expired.contains(&outside) { + return Ok(TranslateIncomingResult::ExpiredNatSession); + } + tracing::trace!(?outside, "No active NAT session; skipping translation"); return Ok(TranslateIncomingResult::NoNatSession); @@ -112,6 +113,10 @@ impl NatTable { return Ok(TranslateIncomingResult::Ok { proto, src }); } + if self.expired.contains(&outside) { + return Ok(TranslateIncomingResult::ExpiredNatSession); + } + tracing::trace!(?outside, "No active NAT session; skipping translation"); Ok(TranslateIncomingResult::NoNatSession) @@ -201,6 +206,7 @@ impl DestinationUnreachablePrototype { pub enum TranslateIncomingResult { Ok { proto: Protocol, src: IpAddr }, DestinationUnreachable(DestinationUnreachablePrototype), + ExpiredNatSession, NoNatSession, } @@ -246,7 +252,10 @@ mod tests { // Assert if response_delay >= Duration::from_secs(60) { - assert_eq!(translate_incoming, TranslateIncomingResult::NoNatSession); + assert_eq!( + translate_incoming, + TranslateIncomingResult::ExpiredNatSession + ); } else { assert_eq!( translate_incoming, @@ -298,6 +307,7 @@ mod tests { match res { TranslateIncomingResult::Ok { proto, src } => (proto, src), TranslateIncomingResult::NoNatSession + | TranslateIncomingResult::ExpiredNatSession | TranslateIncomingResult::DestinationUnreachable(_) => panic!("Wrong result"), } }); diff --git a/website/src/components/Changelog/Gateway.tsx b/website/src/components/Changelog/Gateway.tsx index 215c55fcb..2697d5e4c 100644 --- a/website/src/components/Changelog/Gateway.tsx +++ b/website/src/components/Changelog/Gateway.tsx @@ -22,7 +22,12 @@ export default function Gateway() { return ( - + + + Fixes a bug in the routing of DNS resources that would lead to "Source + not allowed" errors in the Client logs. + + Fixes an edge case where a busy Gateway could experience a deadlock