chore(gateway): log unroutable packets only on DEBUG (#10897)

Currently, the Gateway logs all kinds of errors during packet processing
on WARN. Whilst it is generally good to be aware of warnings / errors,
some of these scenarios are particularly noisy. For various reasons, we
may not be able to route a packet arriving from the TUN device.

In such cases, we now return an `UnroutablePacket` error to the
event-loop which is special-cased to only log on DEBUG. It also includes
the 5 tuple as variables, which should make log analysis a bit easier if
we want to filter on specific parts of the 5 tuple.
This commit is contained in:
Thomas Eizinger
2025-11-18 15:23:14 +11:00
committed by GitHub
parent a742e0a10d
commit 6e2be658b0
8 changed files with 237 additions and 103 deletions

View File

@@ -366,7 +366,7 @@ where
self.pending_events.push_back(Event::ConnectionClosed(cid));
match connection.encapsulate(cid, peer_socket, goodbye, now, &mut self.allocations) {
match connection.encapsulate(cid, peer_socket, &goodbye, now, &mut self.allocations) {
Ok(Some(transmit)) => {
tracing::info!("Connection closed proactively (sent goodbye)");
@@ -496,7 +496,7 @@ where
pub fn encapsulate(
&mut self,
cid: TId,
packet: IpPacket,
packet: &IpPacket,
now: Instant,
) -> Result<Option<Transmit>> {
let conn = self.connections.get_established_mut(&cid, now)?;
@@ -512,7 +512,7 @@ where
let socket = match &mut conn.state {
ConnectionState::Connecting { ip_buffer, .. } => {
ip_buffer.enqueue(packet);
ip_buffer.enqueue(packet.clone());
let num_buffered = ip_buffer.len();
tracing::debug!(%num_buffered, %cid, "ICE is still in progress, buffering IP packet");
@@ -1970,7 +1970,7 @@ where
);
transmits.extend(ip_buffer.into_iter().flat_map(|packet| {
let transmit = self
.encapsulate(cid, remote_socket, packet, now, allocations)
.encapsulate(cid, remote_socket, &packet, now, allocations)
.inspect_err(|e| {
tracing::debug!(
"Failed to encapsulate buffered IP packet: {e:#}"
@@ -2128,14 +2128,14 @@ where
&mut self,
cid: TId,
socket: PeerSocket,
packet: IpPacket,
packet: &IpPacket,
now: Instant,
allocations: &mut BTreeMap<RId, Allocation>,
) -> Result<Option<Transmit>>
where
TId: fmt::Display,
{
self.state.on_outgoing(cid, &mut self.agent, &packet, now);
self.state.on_outgoing(cid, &mut self.agent, packet, now);
let packet_start = if socket.send_from_relay() { 4 } else { 0 };

View File

@@ -601,7 +601,7 @@ impl ClientState {
let transmit = self
.node
.encapsulate(gid, packet, now)
.encapsulate(gid, &packet, now)
.inspect_err(|e| tracing::debug!(%gid, "Failed to encapsulate: {e:#}"))
.ok()??;
@@ -1870,7 +1870,7 @@ fn encapsulate_and_buffer(
buffered_transmits: &mut VecDeque<Transmit>,
) {
let Some(transmit) = node
.encapsulate(gid, packet, now)
.encapsulate(gid, &packet, now)
.inspect_err(|e| tracing::debug!(%gid, "Failed to encapsulate: {e}"))
.ok()
.flatten()

View File

@@ -2,8 +2,12 @@ mod client_on_gateway;
mod filter_engine;
mod flow_tracker;
mod nat_table;
mod unroutable_packet;
pub use crate::gateway::unroutable_packet::UnroutablePacket;
pub(crate) use crate::gateway::client_on_gateway::ClientOnGateway;
pub(crate) use crate::gateway::unroutable_packet::RoutingError;
use crate::gateway::client_on_gateway::TranslateOutboundResult;
use crate::gateway::flow_tracker::FlowTracker;
@@ -111,31 +115,28 @@ impl GatewayState {
let dst = packet.destination();
if !crate::is_peer(dst) {
return Ok(None);
}
anyhow::ensure!(crate::is_peer(dst), UnroutablePacket::not_a_peer(&packet));
let peer = self
.peers
.peer_by_ip_mut(dst)
.with_context(|| UnroutablePacket::no_peer_state(&packet))?;
let Some(peer) = self.peers.peer_by_ip_mut(dst) else {
tracing::debug!(%dst, "Unknown client, perhaps already disconnected?");
return Ok(None);
};
let cid = peer.id();
flow_tracker::inbound_tun::record_client(cid);
let Some(packet) = peer
let packet = peer
.translate_inbound(packet, now)
.context("Failed to translate inbound packet")?
else {
return Ok(None);
};
.context("Failed to translate inbound packet")?;
let Some(encrypted_packet) = self
.node
.encapsulate(cid, packet, now)
.context("Failed to encapsulate")?
else {
return Ok(None);
let encrypted_packet = match self.node.encapsulate(cid, &packet, now) {
Ok(Some(encrypted_packet)) => encrypted_packet,
Ok(None) => return Ok(None),
Err(e) if e.is::<snownet::UnknownConnection>() => {
return Err(e.context(UnroutablePacket::not_connected(&packet)));
}
Err(e) => return Err(e),
};
flow_tracker::inbound_tun::record_wireguard_packet(
@@ -719,7 +720,7 @@ fn encrypt_packet(
now: Instant,
) -> Result<Option<Transmit>> {
let transmit = node
.encapsulate(cid, packet, now)
.encapsulate(cid, &packet, now)
.context("Failed to encapsulate")?;
Ok(transmit)

View File

@@ -16,10 +16,11 @@ use crate::client::{IPV4_RESOURCES, IPV6_RESOURCES};
use crate::gateway::filter_engine::FilterEngine;
use crate::gateway::flow_tracker;
use crate::gateway::nat_table::{NatTable, TranslateIncomingResult};
use crate::gateway::unroutable_packet::UnroutablePacket;
use crate::messages::gateway::Filters;
use crate::messages::gateway::ResourceDescription;
use crate::utils::network_contains_network;
use crate::{GatewayEvent, IpConfig, NotAllowedResource, NotClientIp, otel};
use crate::{GatewayEvent, IpConfig, NotAllowedResource, NotClientIp};
/// The state of one client on a gateway.
pub struct ClientOnGateway {
@@ -37,8 +38,6 @@ pub struct ClientOnGateway {
permanent_translations: BTreeMap<IpAddr, TranslationState>,
nat_table: NatTable,
buffered_events: VecDeque<GatewayEvent>,
num_dropped_packets: opentelemetry::metrics::Counter<u64>,
}
#[derive(Debug, PartialEq)]
@@ -66,7 +65,6 @@ impl ClientOnGateway {
nat_table: Default::default(),
buffered_events: Default::default(),
internet_resource_enabled: None,
num_dropped_packets: otel::metrics::network_packet_dropped(),
}
}
@@ -339,46 +337,30 @@ impl ClientOnGateway {
&mut self,
packet: IpPacket,
now: Instant,
) -> anyhow::Result<Option<IpPacket>> {
) -> anyhow::Result<IpPacket> {
// Traffic from our own IP is allowed.
match packet.source() {
IpAddr::V4(src) if src == self.gateway_tun.v4 => return Ok(Some(packet)),
IpAddr::V6(src) if src == self.gateway_tun.v6 => return Ok(Some(packet)),
IpAddr::V4(src) if src == self.gateway_tun.v4 => return Ok(packet),
IpAddr::V6(src) if src == self.gateway_tun.v6 => return Ok(packet),
IpAddr::V4(_) | IpAddr::V6(_) => {}
}
let Some(packet) = self.transform_tun_to_network(packet, now)? else {
return Ok(None);
};
let packet = self.transform_tun_to_network(packet, now)?;
self.ensure_client_ip(packet.destination())?;
// Always allow ICMP errors to pass through, even in the presence of filters that don't allow ICMP.
if packet.icmp_error().is_ok_and(|e| e.is_some()) {
return Ok(Some(packet));
return Ok(packet);
}
match self.classify_resource(packet.source(), packet.source_protocol()) {
Ok(rid) => {
flow_tracker::inbound_tun::record_resource(rid);
}
Err(e) => {
tracing::debug!(
"Inbound packet is not allowed, perhaps from an old client session? error = {e:#}"
);
self.num_dropped_packets.add(
1,
&[
otel::attr::network_type_for_packet(&packet),
otel::attr::network_io_direction_receive(),
otel::attr::error_type(e.root_cause().to_string()),
],
);
return Ok(None);
}
}
let rid = self
.classify_resource(packet.source(), packet.source_protocol())
.with_context(|| UnroutablePacket::not_allowed(&packet))?;
Ok(Some(packet))
flow_tracker::inbound_tun::record_resource(rid);
Ok(packet)
}
fn transform_network_to_tun(
@@ -442,7 +424,7 @@ impl ClientOnGateway {
&mut self,
mut packet: IpPacket,
now: Instant,
) -> anyhow::Result<Option<IpPacket>> {
) -> anyhow::Result<IpPacket> {
let (proto, ip) = match self.nat_table.translate_incoming(&packet, now)? {
TranslateIncomingResult::Ok { proto, src } => (proto, src),
TranslateIncomingResult::IcmpError(prototype) => {
@@ -452,29 +434,15 @@ impl ClientOnGateway {
.into_packet(self.client_tun.v4, self.client_tun.v6)
.context("Failed to create ICMP error")?;
return Ok(Some(icmp_error));
return Ok(icmp_error);
}
TranslateIncomingResult::ExpiredNatSession => {
tracing::debug!(
?packet,
"Expired NAT session for inbound packet of DNS resource; dropping"
);
self.num_dropped_packets.add(
1,
&[
otel::attr::network_type_for_packet(&packet),
otel::attr::network_io_direction_receive(),
otel::attr::error_type("ExpiredNatSession"),
],
);
return Ok(None);
bail!(UnroutablePacket::expired_nat_session(&packet))
}
TranslateIncomingResult::NoNatSession => {
// No NAT session means packet is likely for Internet Resource or a CIDR resource.
return Ok(Some(packet));
return Ok(packet);
}
};
@@ -483,7 +451,7 @@ impl ClientOnGateway {
.context("Failed to translate packet to new source")?;
packet.update_checksum();
Ok(Some(packet))
Ok(packet)
}
pub(crate) fn is_allowed(&self, resource: ResourceId) -> bool {
@@ -740,7 +708,7 @@ mod tests {
use ip_packet::make::TcpFlags;
use crate::{
gateway::nat_table,
gateway::{RoutingError, nat_table},
messages::gateway::{Filter, PortRange, ResourceDescriptionCidr},
};
@@ -866,11 +834,7 @@ mod tests {
peer.translate_outbound(request, Instant::now()).unwrap(),
TranslateOutboundResult::Send(_)
));
assert!(
peer.translate_inbound(response, Instant::now())
.unwrap()
.is_some()
);
peer.translate_inbound(response, Instant::now()).unwrap();
}
#[test]
@@ -1046,10 +1010,7 @@ mod tests {
now += Duration::from_secs(30);
peer.handle_timeout(now);
assert!(
matches!(peer.translate_inbound(response, now), Ok(Some(_))),
"After 30s remote should still be able to send a packet back"
);
peer.translate_inbound(response, now).unwrap();
let response = ip_packet::make::udp_packet(
foo_real_ip1(),
@@ -1063,10 +1024,13 @@ mod tests {
now += nat_table::UDP_TTL;
peer.handle_timeout(now);
assert!(
matches!(peer.translate_inbound(response, now), Ok(None)),
"After 1 minute of inactivity, NAT session should be freed"
);
let err = peer
.translate_inbound(response, now)
.unwrap_err()
.downcast::<UnroutablePacket>()
.unwrap();
assert_eq!(err.reason(), RoutingError::ExpiredNatSession);
}
#[test]
@@ -1125,9 +1089,7 @@ mod tests {
)
.unwrap();
let response = peer.translate_inbound(response, now).unwrap();
assert!(response.is_some());
peer.translate_inbound(response, now).unwrap();
}
{
@@ -1161,9 +1123,7 @@ mod tests {
)
.unwrap();
let response = peer.translate_inbound(response, now).unwrap();
assert!(response.is_some());
peer.translate_inbound(response, now).unwrap();
}
}

View File

@@ -0,0 +1,156 @@
use std::{
fmt::Display,
net::{IpAddr, SocketAddr},
};
use ip_packet::{IpPacket, Protocol};
#[derive(Debug, thiserror::Error)]
#[error("Unroutable packet: {error}")]
pub struct UnroutablePacket {
five_tuple: FiveTuple,
error: RoutingError,
}
impl UnroutablePacket {
pub fn not_allowed(packet: &IpPacket) -> Self {
Self {
five_tuple: FiveTuple::for_packet(packet),
error: RoutingError::NotAllowed,
}
}
pub fn expired_nat_session(packet: &IpPacket) -> Self {
Self {
five_tuple: FiveTuple::for_packet(packet),
error: RoutingError::ExpiredNatSession,
}
}
pub fn not_a_peer(packet: &IpPacket) -> Self {
Self {
five_tuple: FiveTuple::for_packet(packet),
error: RoutingError::NotAPeer,
}
}
pub fn no_peer_state(packet: &IpPacket) -> Self {
Self {
five_tuple: FiveTuple::for_packet(packet),
error: RoutingError::NoPeerState,
}
}
pub fn not_connected(packet: &IpPacket) -> Self {
Self {
five_tuple: FiveTuple::for_packet(packet),
error: RoutingError::NotConnected,
}
}
pub fn reason(&self) -> RoutingError {
self.error
}
pub fn source(&self) -> impl Display {
self.five_tuple.src
}
pub fn destination(&self) -> impl Display {
self.five_tuple.dst
}
pub fn proto(&self) -> impl Display {
self.five_tuple.proto
}
}
#[derive(Debug, derive_more::Display, Clone, Copy)]
enum MaybeIpOrSocket {
#[display("{_0}")]
Ip(IpAddr),
#[display("{_0}")]
Socket(SocketAddr),
#[display("unknown")]
Unknown,
}
#[derive(Debug, derive_more::Display, Clone, Copy)]
enum MaybeProto {
#[display("TCP")]
Tcp,
#[display("UDP")]
Udp,
#[display("ICMP")]
Icmp,
#[display("unknown")]
Unknown,
}
#[derive(Debug, Clone, Copy)]
struct FiveTuple {
src: MaybeIpOrSocket,
dst: MaybeIpOrSocket,
proto: MaybeProto,
}
impl FiveTuple {
fn for_packet(p: &IpPacket) -> Self {
let src_ip = p.source();
let dst_ip = p.destination();
let src_proto = p.source_protocol();
let dst_proto = p.destination_protocol();
match (src_proto, dst_proto) {
(Ok(Protocol::Tcp(src_port)), Ok(Protocol::Tcp(dst_port))) => Self {
src: MaybeIpOrSocket::Socket(SocketAddr::new(src_ip, src_port)),
dst: MaybeIpOrSocket::Socket(SocketAddr::new(dst_ip, dst_port)),
proto: MaybeProto::Tcp,
},
(Ok(Protocol::Udp(src_port)), Ok(Protocol::Udp(dst_port))) => Self {
src: MaybeIpOrSocket::Socket(SocketAddr::new(src_ip, src_port)),
dst: MaybeIpOrSocket::Socket(SocketAddr::new(dst_ip, dst_port)),
proto: MaybeProto::Udp,
},
(Ok(Protocol::Icmp(_)), Ok(Protocol::Icmp(_))) => Self {
src: MaybeIpOrSocket::Ip(src_ip),
dst: MaybeIpOrSocket::Ip(dst_ip),
proto: MaybeProto::Icmp,
},
_ => Self {
src: MaybeIpOrSocket::Unknown,
dst: MaybeIpOrSocket::Unknown,
proto: MaybeProto::Unknown,
},
}
}
}
#[derive(Debug, derive_more::Display, Clone, Copy, PartialEq, Eq)]
pub enum RoutingError {
#[display("Not allowed")]
NotAllowed,
#[display("Expired NAT session")]
ExpiredNatSession,
#[display("Not a Firezone peer")]
NotAPeer,
#[display("No peer state")]
NoPeerState,
#[display("No connection")]
NotConnected,
#[display("Other")]
Other,
}
impl From<RoutingError> for opentelemetry::Value {
fn from(value: RoutingError) -> Self {
match value {
RoutingError::NotAllowed => opentelemetry::Value::from("NotAllowed"),
RoutingError::ExpiredNatSession => opentelemetry::Value::from("ExpiredNatSession"),
RoutingError::NotAPeer => opentelemetry::Value::from("NotAPeer"),
RoutingError::NoPeerState => opentelemetry::Value::from("NoPeerState"),
RoutingError::NotConnected => opentelemetry::Value::from("NotConnected"),
RoutingError::Other => opentelemetry::Value::from("Other"),
}
}
}

View File

@@ -62,6 +62,7 @@ pub struct Io {
tun: Device,
outbound_packet_buffer: VecDeque<IpPacket>,
packet_counter: opentelemetry::metrics::Counter<u64>,
dropped_packets: opentelemetry::metrics::Counter<u64>,
}
#[derive(Debug)]
@@ -178,6 +179,7 @@ impl Io {
.u64_counter("system.network.packets")
.with_description("The number of packets processed.")
.build(),
dropped_packets: otel::metrics::network_packet_dropped(),
}
}
@@ -525,6 +527,10 @@ impl Io {
.ok_or(io::Error::other("No DNS server"))?
.send_response(to, message)
}
pub(crate) fn inc_dropped_packet(&self, attrs: &[opentelemetry::KeyValue]) {
self.dropped_packets.add(1, attrs);
}
}
fn is_max_wg_packet_size(d: &DatagramIn) -> bool {

View File

@@ -71,7 +71,7 @@ pub type ClientTunnel = Tunnel<ClientState>;
pub use client::ClientState;
pub use client::dns_config::DnsMapping;
pub use dns::DnsResourceRecord;
pub use gateway::{DnsResourceNatEntry, GatewayState, ResolveDnsRequest};
pub use gateway::{DnsResourceNatEntry, GatewayState, ResolveDnsRequest, UnroutablePacket};
pub use sockets::UdpSocketThreadStopped;
pub use utils::turn;
@@ -420,7 +420,20 @@ impl GatewayTunnel {
Ok(None) => {
self.role_state.handle_timeout(now, Utc::now());
}
Err(e) => error.push(e),
Err(e) => {
let routing_error = e
.downcast_ref::<gateway::UnroutablePacket>()
.map(|e| e.reason())
.unwrap_or(gateway::RoutingError::Other);
// TODO: Include more attributes here like IPv4/IPv6?
self.io.inc_dropped_packet(&[
otel::attr::error_type(routing_error),
otel::attr::network_io_direction_receive(),
]);
error.push(e);
}
}
}

View File

@@ -324,10 +324,8 @@ impl Eventloop {
continue;
}
if e.downcast_ref::<snownet::UnknownConnection>()
.is_some_and(|e| e.recently_disconnected())
{
tracing::debug!("{e:#}");
if let Some(e) = e.downcast_ref::<firezone_tunnel::UnroutablePacket>() {
tracing::debug!(src = %e.source(), dst = %e.destination(), proto = %e.proto(), "{e:#}");
continue;
}