mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 18:18:55 +00:00
chore(gateway): record metrics about dropped packets (#8942)
When a NAT session expires or other unallowed traffic is routed to the Gateway, we drop these packets. It will be useful to learn, how often that actually happens and what the reason is for why they got dropped. To do so, we add a counter metric for these packets. --------- Signed-off-by: Thomas Eizinger <thomas@eizinger.io> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
use std::{io, net::SocketAddr};
|
||||
|
||||
use ip_packet::IpPacket;
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry::{KeyValue, Value};
|
||||
|
||||
pub fn network_transport_udp() -> KeyValue {
|
||||
KeyValue::new("network.transport", "udp")
|
||||
@@ -58,7 +58,23 @@ pub fn io_error_code(e: &io::Error) -> KeyValue {
|
||||
}
|
||||
|
||||
pub fn io_error_type(e: &io::Error) -> KeyValue {
|
||||
KeyValue::new("error.type", format!("io::ErrorKind::{:?}", e.kind()))
|
||||
error_type(format!("io::ErrorKind::{:?}", e.kind()))
|
||||
}
|
||||
|
||||
pub fn error_type(ty: impl Into<Value>) -> KeyValue {
|
||||
KeyValue::new("error.type", ty)
|
||||
}
|
||||
|
||||
pub mod metrics {
|
||||
use opentelemetry::metrics::Counter;
|
||||
|
||||
pub fn network_packet_dropped() -> Counter<u64> {
|
||||
opentelemetry::global::meter("connlib")
|
||||
.u64_counter("network.packet.dropped")
|
||||
.with_description("Count of packets that are dropped or discarded")
|
||||
.with_unit("{packet}")
|
||||
.init()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -75,6 +75,8 @@ pub struct ClientOnGateway {
|
||||
permanent_translations: BTreeMap<IpAddr, TranslationState>,
|
||||
nat_table: NatTable,
|
||||
buffered_events: VecDeque<GatewayEvent>,
|
||||
|
||||
num_dropped_packets: opentelemetry::metrics::Counter<u64>,
|
||||
}
|
||||
|
||||
impl ClientOnGateway {
|
||||
@@ -93,6 +95,7 @@ impl ClientOnGateway {
|
||||
nat_table: Default::default(),
|
||||
buffered_events: Default::default(),
|
||||
internet_resource_enabled: false,
|
||||
num_dropped_packets: crate::otel::metrics::network_packet_dropped(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -358,6 +361,15 @@ impl ClientOnGateway {
|
||||
"Inbound packet is not allowed, perhaps from an old client session? error = {e:#}"
|
||||
);
|
||||
|
||||
self.num_dropped_packets.add(
|
||||
1,
|
||||
&[
|
||||
crate::otel::network_type_for_packet(&packet),
|
||||
crate::otel::network_io_direction_receive(),
|
||||
crate::otel::error_type(e.root_cause().to_string()),
|
||||
],
|
||||
);
|
||||
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
@@ -386,6 +398,15 @@ impl ClientOnGateway {
|
||||
"Expired NAT session for inbound packet of DNS resource; dropping"
|
||||
);
|
||||
|
||||
self.num_dropped_packets.add(
|
||||
1,
|
||||
&[
|
||||
crate::otel::network_type_for_packet(&packet),
|
||||
crate::otel::network_io_direction_receive(),
|
||||
crate::otel::error_type("ExpiredNatSession"),
|
||||
],
|
||||
);
|
||||
|
||||
return Ok(None);
|
||||
}
|
||||
TranslateIncomingResult::NoNatSession => {
|
||||
|
||||
@@ -14,13 +14,7 @@ impl UniquePacketBuffer {
|
||||
Self {
|
||||
buffer: AllocRingBuffer::with_capacity_power_of_2(capacity),
|
||||
tag,
|
||||
num_dropped_packets: opentelemetry::global::meter("connlib")
|
||||
.u64_counter("system.network.packet.dropped")
|
||||
.with_description(
|
||||
"The number of packets which have been dropped due to buffer overflows.",
|
||||
)
|
||||
.with_unit("{packet}")
|
||||
.init(),
|
||||
num_dropped_packets: crate::otel::metrics::network_packet_dropped(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,6 +44,7 @@ impl UniquePacketBuffer {
|
||||
crate::otel::network_type_for_packet(&new),
|
||||
crate::otel::network_io_direction_transmit(),
|
||||
KeyValue::new("system.buffer.pool.name", self.tag),
|
||||
crate::otel::error_type("BufferFull"),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user