chore(gateway): record metrics about dropped packets (#8942)

When a NAT session expires or other unallowed traffic is routed to the
Gateway, we drop these packets. It will be useful to learn, how often
that actually happens and what the reason is for why they got dropped.
To do so, we add a counter metric for these packets.

---------

Signed-off-by: Thomas Eizinger <thomas@eizinger.io>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Thomas Eizinger
2025-05-01 04:24:10 +10:00
committed by GitHub
parent 1f8090c60d
commit 8dd794d8c8
3 changed files with 41 additions and 9 deletions

View File

@@ -1,7 +1,7 @@
use std::{io, net::SocketAddr};
use ip_packet::IpPacket;
use opentelemetry::KeyValue;
use opentelemetry::{KeyValue, Value};
pub fn network_transport_udp() -> KeyValue {
KeyValue::new("network.transport", "udp")
@@ -58,7 +58,23 @@ pub fn io_error_code(e: &io::Error) -> KeyValue {
}
pub fn io_error_type(e: &io::Error) -> KeyValue {
KeyValue::new("error.type", format!("io::ErrorKind::{:?}", e.kind()))
error_type(format!("io::ErrorKind::{:?}", e.kind()))
}
pub fn error_type(ty: impl Into<Value>) -> KeyValue {
KeyValue::new("error.type", ty)
}
pub mod metrics {
use opentelemetry::metrics::Counter;
pub fn network_packet_dropped() -> Counter<u64> {
opentelemetry::global::meter("connlib")
.u64_counter("network.packet.dropped")
.with_description("Count of packets that are dropped or discarded")
.with_unit("{packet}")
.init()
}
}
#[cfg(test)]

View File

@@ -75,6 +75,8 @@ pub struct ClientOnGateway {
permanent_translations: BTreeMap<IpAddr, TranslationState>,
nat_table: NatTable,
buffered_events: VecDeque<GatewayEvent>,
num_dropped_packets: opentelemetry::metrics::Counter<u64>,
}
impl ClientOnGateway {
@@ -93,6 +95,7 @@ impl ClientOnGateway {
nat_table: Default::default(),
buffered_events: Default::default(),
internet_resource_enabled: false,
num_dropped_packets: crate::otel::metrics::network_packet_dropped(),
}
}
@@ -358,6 +361,15 @@ impl ClientOnGateway {
"Inbound packet is not allowed, perhaps from an old client session? error = {e:#}"
);
self.num_dropped_packets.add(
1,
&[
crate::otel::network_type_for_packet(&packet),
crate::otel::network_io_direction_receive(),
crate::otel::error_type(e.root_cause().to_string()),
],
);
return Ok(None);
}
@@ -386,6 +398,15 @@ impl ClientOnGateway {
"Expired NAT session for inbound packet of DNS resource; dropping"
);
self.num_dropped_packets.add(
1,
&[
crate::otel::network_type_for_packet(&packet),
crate::otel::network_io_direction_receive(),
crate::otel::error_type("ExpiredNatSession"),
],
);
return Ok(None);
}
TranslateIncomingResult::NoNatSession => {

View File

@@ -14,13 +14,7 @@ impl UniquePacketBuffer {
Self {
buffer: AllocRingBuffer::with_capacity_power_of_2(capacity),
tag,
num_dropped_packets: opentelemetry::global::meter("connlib")
.u64_counter("system.network.packet.dropped")
.with_description(
"The number of packets which have been dropped due to buffer overflows.",
)
.with_unit("{packet}")
.init(),
num_dropped_packets: crate::otel::metrics::network_packet_dropped(),
}
}
@@ -50,6 +44,7 @@ impl UniquePacketBuffer {
crate::otel::network_type_for_packet(&new),
crate::otel::network_io_direction_transmit(),
KeyValue::new("system.buffer.pool.name", self.tag),
crate::otel::error_type("BufferFull"),
],
);
}