chore(gateway): report more tunnel errors to event-loop (#7299)

Currently, the Gateway's state machine functions for processing packets
use type-signature that only return `Option`. Any errors while
processing packets are logged internally. This makes it difficult to
consistently log these errors.

We refactor these functions to return `Result<Option<T>>` in most cases,
indicating that they may fail for various reasons and also sometimes
succeed without producing an output.

This allows us to consistently log these errors in the event-loop.
Logging them on WARN or ERROR would be too spammy though. In order to
still be alerted about some of these, we use the `telemetry_event!`
macro which samples them at a rate of 1%. This will alert us about cases
that happen often and allows us to handle them explicitly.

Once this is deployed to staging, I will monitor the alerts in Sentry to
ensure we won't get spammed with events from customers on the next
release.
This commit is contained in:
Thomas Eizinger
2024-11-11 03:50:27 +00:00
committed by GitHub
parent ce8d8709d6
commit 62cb32b7a3
6 changed files with 117 additions and 100 deletions

View File

@@ -4,11 +4,11 @@ use crate::messages::{
use crate::utils::earliest;
use crate::{p2p_control, GatewayEvent};
use crate::{peer::ClientOnGateway, peer_store::PeerStore};
use anyhow::Context;
use anyhow::{Context, Result};
use boringtun::x25519::PublicKey;
use chrono::{DateTime, Utc};
use connlib_model::{ClientId, DomainName, RelayId, ResourceId};
use firezone_logging::{anyhow_dyn_err, std_dyn_err, telemetry_span};
use firezone_logging::{anyhow_dyn_err, telemetry_span};
use ip_network::{Ipv4Network, Ipv6Network};
use ip_packet::{FzP2pControlSlice, IpPacket};
use secrecy::{ExposeSecret as _, Secret};
@@ -85,32 +85,30 @@ impl GatewayState {
packet: IpPacket,
now: Instant,
buffer: &mut EncryptBuffer,
) -> Option<snownet::EncryptedPacket> {
) -> Result<Option<snownet::EncryptedPacket>> {
let dst = packet.destination();
if !is_client(dst) {
return None;
}
anyhow::ensure!(is_client(dst), "Packet not destined for a client");
let Some(peer) = self.peers.peer_by_ip_mut(dst) else {
tracing::trace!(%dst, "Couldn't find connection by IP");
return None;
};
let peer = self
.peers
.peer_by_ip_mut(dst)
.context("Couldn't find connection by IP")?;
let cid = peer.id();
let packet = peer
.encapsulate(packet, now)
.inspect_err(|e| tracing::debug!(%cid, "Failed to encapsulate: {e:#}"))
.ok()??;
.translate_inbound(packet, now)
.context("Failed to translate packet")?;
let transmit = self
let Some(encrypted_packet) = self
.node
.encapsulate(cid, packet, now, buffer)
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %cid, "Failed to encapsulate"))
.ok()??;
.context("Failed to encapsulate")?
else {
return Ok(None);
};
Some(transmit)
Ok(Some(encrypted_packet))
}
/// Handles UDP packets received on the network interface.
@@ -125,40 +123,44 @@ impl GatewayState {
from: SocketAddr,
packet: &[u8],
now: Instant,
) -> Option<IpPacket> {
let (cid, packet) = self.node.decapsulate(
local,
from,
packet,
now,
)
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %from, num_bytes = %packet.len(), "Failed to decapsulate incoming packet"))
.ok()??;
let Some(peer) = self.peers.get_mut(&cid) else {
tracing::warn!(%cid, "Couldn't find connection by ID");
return None;
) -> Result<Option<IpPacket>> {
let Some((cid, packet)) = self
.node
.decapsulate(local, from, packet, now)
.context("Failed to decapsulate")?
else {
return Ok(None);
};
let peer = self
.peers
.get_mut(&cid)
.context("Failed to find connection by ID")?;
if let Some(fz_p2p_control) = packet.as_fz_p2p_control() {
let response =
handle_p2p_control_packet(fz_p2p_control, peer, &mut self.buffered_events)?;
let Some(immediate_response) =
handle_p2p_control_packet(fz_p2p_control, peer, &mut self.buffered_events)
else {
return Ok(None);
};
let mut buffer = EncryptBuffer::new();
let transmit = encrypt_packet(response, cid, &mut self.node, &mut buffer, now)?;
let Some(transmit) =
encrypt_packet(immediate_response, cid, &mut self.node, &mut buffer, now)?
else {
return Ok(None);
};
self.buffered_transmits.push_back(transmit.into_owned());
return None;
return Ok(None);
}
let packet = peer
.decapsulate(packet, now)
.inspect_err(|e| tracing::debug!(%cid, "Invalid packet: {e:#}"))
.ok()?;
.translate_outbound(packet, now)
.context("Failed to translate packet")?;
Some(packet)
Ok(Some(packet))
}
pub fn cleanup_connection(&mut self, id: &ClientId) {
@@ -336,7 +338,7 @@ impl GatewayState {
let packet = dns_resource_nat::domain_status(req.resource, req.domain, nat_status);
let mut buffer = EncryptBuffer::new();
let Some(transmit) = encrypt_packet(packet, req.client, &mut self.node, &mut buffer, now)
let Some(transmit) = encrypt_packet(packet, req.client, &mut self.node, &mut buffer, now)?
else {
return Ok(());
};
@@ -499,13 +501,15 @@ fn encrypt_packet<'a>(
node: &mut ServerNode<ClientId, RelayId>,
buffer: &'a mut EncryptBuffer,
now: Instant,
) -> Option<Transmit<'a>> {
let encrypted_packet = node
) -> Result<Option<Transmit<'a>>> {
let Some(encrypted_packet) = node
.encapsulate(cid, packet, now, buffer)
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %cid, "Failed to encapsulate"))
.ok()??;
.context("Failed to encapsulate packet")?
else {
return Ok(None);
};
Some(encrypted_packet.to_transmit(buffer))
Ok(Some(encrypted_packet.to_transmit(buffer)))
}
/// Opaque request struct for when a domain name needs to be resolved.

View File

@@ -246,9 +246,10 @@ impl GatewayTunnel {
}
Poll::Ready(io::Input::Device(packet)) => {
let now = Instant::now();
let Some(enc_packet) =
self.role_state
.handle_tun_input(packet, now, &mut self.encrypt_buf)
let Some(enc_packet) = self
.role_state
.handle_tun_input(packet, now, &mut self.encrypt_buf)
.map_err(std::io::Error::other)?
else {
self.role_state.handle_timeout(now, Utc::now());
continue;
@@ -264,12 +265,16 @@ impl GatewayTunnel {
let utc_now = Utc::now();
for received in packets {
let Some(packet) = self.role_state.handle_network_input(
received.local,
received.from,
received.packet,
now,
) else {
let Some(packet) = self
.role_state
.handle_network_input(
received.local,
received.from,
received.packet,
now,
)
.map_err(std::io::Error::other)?
else {
self.role_state.handle_timeout(now, utc_now);
continue;
};

View File

@@ -428,7 +428,11 @@ impl ClientOnGateway {
Ok(packet)
}
pub fn decapsulate(&mut self, packet: IpPacket, now: Instant) -> anyhow::Result<IpPacket> {
pub fn translate_outbound(
&mut self,
packet: IpPacket,
now: Instant,
) -> anyhow::Result<IpPacket> {
self.ensure_allowed_src(&packet)?;
self.ensure_allowed_dst(&packet)?;
@@ -437,18 +441,16 @@ impl ClientOnGateway {
Ok(packet)
}
pub fn encapsulate(
pub fn translate_inbound(
&mut self,
packet: IpPacket,
now: Instant,
) -> anyhow::Result<Option<IpPacket>> {
) -> anyhow::Result<IpPacket> {
let Some((proto, ip)) = self.nat_table.translate_incoming(&packet, now)? else {
return Ok(Some(packet));
return Ok(packet);
};
let Some(mut packet) = packet.translate_source(self.ipv4, self.ipv6, proto, ip) else {
return Ok(None);
};
let mut packet = packet.translate_source(self.ipv4, self.ipv6, proto, ip)?;
self.permanent_translations
.get_mut(&ip)
@@ -457,7 +459,7 @@ impl ClientOnGateway {
packet.update_checksum();
Ok(Some(packet))
Ok(packet)
}
pub(crate) fn is_allowed(&self, resource: ResourceId) -> bool {
@@ -1134,7 +1136,7 @@ mod tests {
)
.unwrap();
assert!(peer.decapsulate(pkt, Instant::now()).is_ok());
assert!(peer.translate_outbound(pkt, Instant::now()).is_ok());
let pkt = ip_packet::make::udp_packet(
source_v4_addr(),
@@ -1145,7 +1147,7 @@ mod tests {
)
.unwrap();
assert!(peer.decapsulate(pkt, Instant::now()).is_err());
assert!(peer.translate_outbound(pkt, Instant::now()).is_err());
let pkt = ip_packet::make::udp_packet(
source_v4_addr(),
@@ -1156,7 +1158,7 @@ mod tests {
)
.unwrap();
assert!(peer.decapsulate(pkt, Instant::now()).is_err());
assert!(peer.translate_outbound(pkt, Instant::now()).is_err());
let pkt = ip_packet::make::udp_packet(
source_v4_addr(),
@@ -1167,7 +1169,7 @@ mod tests {
)
.unwrap();
assert!(peer.decapsulate(pkt, Instant::now()).is_ok());
assert!(peer.translate_outbound(pkt, Instant::now()).is_ok());
}
#[test]
@@ -1193,7 +1195,7 @@ mod tests {
)
.unwrap();
assert!(peer.decapsulate(pkt, Instant::now()).is_ok());
assert!(peer.translate_outbound(pkt, Instant::now()).is_ok());
let pkt = ip_packet::make::udp_packet(
source_v4_addr(),
@@ -1204,7 +1206,7 @@ mod tests {
)
.unwrap();
assert!(peer.decapsulate(pkt, Instant::now()).is_err());
assert!(peer.translate_outbound(pkt, Instant::now()).is_err());
let pkt = ip_packet::make::udp_packet(
source_v4_addr(),
@@ -1215,7 +1217,7 @@ mod tests {
)
.unwrap();
assert!(peer.decapsulate(pkt, Instant::now()).is_ok());
assert!(peer.translate_outbound(pkt, Instant::now()).is_ok());
}
fn foo_dns_resource() -> crate::messages::gateway::ResourceDescription {

View File

@@ -57,12 +57,13 @@ impl SimGateway {
now: Instant,
utc_now: DateTime<Utc>,
) -> Option<Transmit<'static>> {
let Some(packet) = self.sut.handle_network_input(
transmit.dst,
transmit.src.unwrap(),
&transmit.payload,
now,
) else {
let Some(packet) = self
.sut
.handle_network_input(transmit.dst, transmit.src.unwrap(), &transmit.payload, now)
.inspect_err(|e| tracing::warn!("{e:#}"))
.ok()
.flatten()
else {
self.sut.handle_timeout(now, utc_now);
return None;
};
@@ -91,7 +92,8 @@ impl SimGateway {
.filter_map(|packet| {
Some(
self.sut
.handle_tun_input(packet, now, &mut self.enc_buffer)?
.handle_tun_input(packet, now, &mut self.enc_buffer)
.unwrap()?
.to_transmit(&self.enc_buffer)
.into_owned(),
)
@@ -162,7 +164,8 @@ impl SimGateway {
self.request_received(&packet);
let transmit = self
.sut
.handle_tun_input(reply, now, &mut self.enc_buffer)?
.handle_tun_input(reply, now, &mut self.enc_buffer)
.unwrap()?
.to_transmit(&self.enc_buffer)
.into_owned();
@@ -217,7 +220,8 @@ impl SimGateway {
.expect("src and dst are taken from incoming packet");
let transmit = self
.sut
.handle_tun_input(echo_response, now, &mut self.enc_buffer)?
.handle_tun_input(echo_response, now, &mut self.enc_buffer)
.unwrap()?
.to_transmit(&self.enc_buffer)
.into_owned();

View File

@@ -4,7 +4,7 @@ use connlib_model::DomainName;
use connlib_model::{ClientId, ResourceId};
#[cfg(not(target_os = "windows"))]
use dns_lookup::{AddrInfoHints, AddrInfoIter, LookupError};
use firezone_logging::{anyhow_dyn_err, std_dyn_err, telemetry_span};
use firezone_logging::{anyhow_dyn_err, std_dyn_err, telemetry_event, telemetry_span};
use firezone_tunnel::messages::gateway::{
AllowAccess, ClientIceCandidates, ClientsIceCandidates, ConnectionReady, EgressMessages,
IngressMessages, RejectAccess, RequestConnection,
@@ -79,6 +79,7 @@ impl Eventloop {
}
Poll::Ready(Err(e)) => {
tracing::debug!(error = std_dyn_err(&e), "Tunnel error");
telemetry_event!(error = std_dyn_err(&e), "Tunnel error");
continue;
}
Poll::Pending => {}

View File

@@ -21,6 +21,7 @@ pub use fz_p2p_control_slice::FzP2pControlSlice;
#[cfg(all(test, feature = "proptest"))]
mod proptests;
use anyhow::{Context as _, Result};
use icmpv4_header_slice_mut::Icmpv4HeaderSliceMut;
use icmpv6_header_slice_mut::Icmpv6EchoHeaderSliceMut;
use ipv4_header_slice_mut::Ipv4HeaderSliceMut;
@@ -188,18 +189,20 @@ impl ConvertibleIpv4Packet {
self.ip_header().destination_addr()
}
fn consume_to_ipv6(mut self, src: Ipv6Addr, dst: Ipv6Addr) -> Option<ConvertibleIpv6Packet> {
fn consume_to_ipv6(mut self, src: Ipv6Addr, dst: Ipv6Addr) -> Result<ConvertibleIpv6Packet> {
// `translate_in_place` expects the packet to sit at a 20-byte offset.
// `self.start` tells us where the packet actually starts, thus we need to pass `self.start - 20` to the function.
let start_minus_padding = self.start.checked_sub(NAT46_OVERHEAD)?;
let start_minus_padding = self
.start
.checked_sub(NAT46_OVERHEAD)
.context("Invalid `start`of IP packet in buffer")?;
let offset = nat46::translate_in_place(
&mut self.buf[start_minus_padding..(self.start + self.len)],
src,
dst,
)
.inspect_err(|e| tracing::trace!("NAT46 failed: {e:#}"))
.ok()?;
.context("NAT46 failed")?;
// We need to handle 2 cases here:
// `offset` > NAT46_OVERHEAD
@@ -211,7 +214,7 @@ impl ConvertibleIpv4Packet {
let len_diff = (NAT46_OVERHEAD as isize) - (offset as isize);
let len = (self.len as isize) + len_diff;
Some(ConvertibleIpv6Packet {
Ok(ConvertibleIpv6Packet {
buf: self.buf,
start: start_minus_padding + offset,
len: len as usize,
@@ -268,12 +271,10 @@ impl ConvertibleIpv6Packet {
self.header().destination_addr()
}
fn consume_to_ipv4(mut self, src: Ipv4Addr, dst: Ipv4Addr) -> Option<ConvertibleIpv4Packet> {
nat64::translate_in_place(self.packet_mut(), src, dst)
.inspect_err(|e| tracing::trace!("NAT64 failed: {e:#}"))
.ok()?;
fn consume_to_ipv4(mut self, src: Ipv4Addr, dst: Ipv4Addr) -> Result<ConvertibleIpv4Packet> {
nat64::translate_in_place(self.packet_mut(), src, dst).context("NAT64 failed")?;
Some(ConvertibleIpv4Packet {
Ok(ConvertibleIpv4Packet {
buf: self.buf,
start: self.start + 20,
len: self.len - 20,
@@ -330,17 +331,17 @@ impl IpPacket {
}
}
pub(crate) fn consume_to_ipv4(self, src: Ipv4Addr, dst: Ipv4Addr) -> Option<IpPacket> {
pub(crate) fn consume_to_ipv4(self, src: Ipv4Addr, dst: Ipv4Addr) -> Result<IpPacket> {
match self {
IpPacket::Ipv4(pkt) => Some(IpPacket::Ipv4(pkt)),
IpPacket::Ipv6(pkt) => Some(IpPacket::Ipv4(pkt.consume_to_ipv4(src, dst)?)),
IpPacket::Ipv4(pkt) => Ok(IpPacket::Ipv4(pkt)),
IpPacket::Ipv6(pkt) => Ok(IpPacket::Ipv4(pkt.consume_to_ipv4(src, dst)?)),
}
}
pub(crate) fn consume_to_ipv6(self, src: Ipv6Addr, dst: Ipv6Addr) -> Option<IpPacket> {
pub(crate) fn consume_to_ipv6(self, src: Ipv6Addr, dst: Ipv6Addr) -> Result<IpPacket> {
match self {
IpPacket::Ipv4(pkt) => Some(IpPacket::Ipv6(pkt.consume_to_ipv6(src, dst)?)),
IpPacket::Ipv6(pkt) => Some(IpPacket::Ipv6(pkt)),
IpPacket::Ipv4(pkt) => Ok(IpPacket::Ipv6(pkt.consume_to_ipv6(src, dst)?)),
IpPacket::Ipv6(pkt) => Ok(IpPacket::Ipv6(pkt)),
}
}
@@ -654,7 +655,7 @@ impl IpPacket {
src_v6: Ipv6Addr,
src_proto: Protocol,
dst: IpAddr,
) -> Option<IpPacket> {
) -> Result<IpPacket> {
let mut packet = match (&self, dst) {
(&IpPacket::Ipv4(_), IpAddr::V6(dst)) => self.consume_to_ipv6(src_v6, dst)?,
(&IpPacket::Ipv6(_), IpAddr::V4(dst)) => self.consume_to_ipv4(src_v4, dst)?,
@@ -665,7 +666,7 @@ impl IpPacket {
};
packet.set_source_protocol(src_proto.value());
Some(packet)
Ok(packet)
}
pub fn translate_source(
@@ -674,7 +675,7 @@ impl IpPacket {
dst_v6: Ipv6Addr,
dst_proto: Protocol,
src: IpAddr,
) -> Option<IpPacket> {
) -> Result<IpPacket> {
let mut packet = match (&self, src) {
(&IpPacket::Ipv4(_), IpAddr::V6(src)) => self.consume_to_ipv6(src, dst_v6)?,
(&IpPacket::Ipv6(_), IpAddr::V4(src)) => self.consume_to_ipv4(src, dst_v4)?,
@@ -685,7 +686,7 @@ impl IpPacket {
};
packet.set_destination_protocol(dst_proto.value());
Some(packet)
Ok(packet)
}
#[inline]