mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 10:18:54 +00:00
chore(gateway): report more tunnel errors to event-loop (#7299)
Currently, the Gateway's state machine functions for processing packets use type-signature that only return `Option`. Any errors while processing packets are logged internally. This makes it difficult to consistently log these errors. We refactor these functions to return `Result<Option<T>>` in most cases, indicating that they may fail for various reasons and also sometimes succeed without producing an output. This allows us to consistently log these errors in the event-loop. Logging them on WARN or ERROR would be too spammy though. In order to still be alerted about some of these, we use the `telemetry_event!` macro which samples them at a rate of 1%. This will alert us about cases that happen often and allows us to handle them explicitly. Once this is deployed to staging, I will monitor the alerts in Sentry to ensure we won't get spammed with events from customers on the next release.
This commit is contained in:
@@ -4,11 +4,11 @@ use crate::messages::{
|
||||
use crate::utils::earliest;
|
||||
use crate::{p2p_control, GatewayEvent};
|
||||
use crate::{peer::ClientOnGateway, peer_store::PeerStore};
|
||||
use anyhow::Context;
|
||||
use anyhow::{Context, Result};
|
||||
use boringtun::x25519::PublicKey;
|
||||
use chrono::{DateTime, Utc};
|
||||
use connlib_model::{ClientId, DomainName, RelayId, ResourceId};
|
||||
use firezone_logging::{anyhow_dyn_err, std_dyn_err, telemetry_span};
|
||||
use firezone_logging::{anyhow_dyn_err, telemetry_span};
|
||||
use ip_network::{Ipv4Network, Ipv6Network};
|
||||
use ip_packet::{FzP2pControlSlice, IpPacket};
|
||||
use secrecy::{ExposeSecret as _, Secret};
|
||||
@@ -85,32 +85,30 @@ impl GatewayState {
|
||||
packet: IpPacket,
|
||||
now: Instant,
|
||||
buffer: &mut EncryptBuffer,
|
||||
) -> Option<snownet::EncryptedPacket> {
|
||||
) -> Result<Option<snownet::EncryptedPacket>> {
|
||||
let dst = packet.destination();
|
||||
|
||||
if !is_client(dst) {
|
||||
return None;
|
||||
}
|
||||
anyhow::ensure!(is_client(dst), "Packet not destined for a client");
|
||||
|
||||
let Some(peer) = self.peers.peer_by_ip_mut(dst) else {
|
||||
tracing::trace!(%dst, "Couldn't find connection by IP");
|
||||
|
||||
return None;
|
||||
};
|
||||
let peer = self
|
||||
.peers
|
||||
.peer_by_ip_mut(dst)
|
||||
.context("Couldn't find connection by IP")?;
|
||||
let cid = peer.id();
|
||||
|
||||
let packet = peer
|
||||
.encapsulate(packet, now)
|
||||
.inspect_err(|e| tracing::debug!(%cid, "Failed to encapsulate: {e:#}"))
|
||||
.ok()??;
|
||||
.translate_inbound(packet, now)
|
||||
.context("Failed to translate packet")?;
|
||||
|
||||
let transmit = self
|
||||
let Some(encrypted_packet) = self
|
||||
.node
|
||||
.encapsulate(cid, packet, now, buffer)
|
||||
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %cid, "Failed to encapsulate"))
|
||||
.ok()??;
|
||||
.context("Failed to encapsulate")?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Some(transmit)
|
||||
Ok(Some(encrypted_packet))
|
||||
}
|
||||
|
||||
/// Handles UDP packets received on the network interface.
|
||||
@@ -125,40 +123,44 @@ impl GatewayState {
|
||||
from: SocketAddr,
|
||||
packet: &[u8],
|
||||
now: Instant,
|
||||
) -> Option<IpPacket> {
|
||||
let (cid, packet) = self.node.decapsulate(
|
||||
local,
|
||||
from,
|
||||
packet,
|
||||
now,
|
||||
)
|
||||
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %from, num_bytes = %packet.len(), "Failed to decapsulate incoming packet"))
|
||||
.ok()??;
|
||||
|
||||
let Some(peer) = self.peers.get_mut(&cid) else {
|
||||
tracing::warn!(%cid, "Couldn't find connection by ID");
|
||||
|
||||
return None;
|
||||
) -> Result<Option<IpPacket>> {
|
||||
let Some((cid, packet)) = self
|
||||
.node
|
||||
.decapsulate(local, from, packet, now)
|
||||
.context("Failed to decapsulate")?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let peer = self
|
||||
.peers
|
||||
.get_mut(&cid)
|
||||
.context("Failed to find connection by ID")?;
|
||||
|
||||
if let Some(fz_p2p_control) = packet.as_fz_p2p_control() {
|
||||
let response =
|
||||
handle_p2p_control_packet(fz_p2p_control, peer, &mut self.buffered_events)?;
|
||||
let Some(immediate_response) =
|
||||
handle_p2p_control_packet(fz_p2p_control, peer, &mut self.buffered_events)
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let mut buffer = EncryptBuffer::new();
|
||||
let transmit = encrypt_packet(response, cid, &mut self.node, &mut buffer, now)?;
|
||||
let Some(transmit) =
|
||||
encrypt_packet(immediate_response, cid, &mut self.node, &mut buffer, now)?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
self.buffered_transmits.push_back(transmit.into_owned());
|
||||
|
||||
return None;
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let packet = peer
|
||||
.decapsulate(packet, now)
|
||||
.inspect_err(|e| tracing::debug!(%cid, "Invalid packet: {e:#}"))
|
||||
.ok()?;
|
||||
.translate_outbound(packet, now)
|
||||
.context("Failed to translate packet")?;
|
||||
|
||||
Some(packet)
|
||||
Ok(Some(packet))
|
||||
}
|
||||
|
||||
pub fn cleanup_connection(&mut self, id: &ClientId) {
|
||||
@@ -336,7 +338,7 @@ impl GatewayState {
|
||||
let packet = dns_resource_nat::domain_status(req.resource, req.domain, nat_status);
|
||||
|
||||
let mut buffer = EncryptBuffer::new();
|
||||
let Some(transmit) = encrypt_packet(packet, req.client, &mut self.node, &mut buffer, now)
|
||||
let Some(transmit) = encrypt_packet(packet, req.client, &mut self.node, &mut buffer, now)?
|
||||
else {
|
||||
return Ok(());
|
||||
};
|
||||
@@ -499,13 +501,15 @@ fn encrypt_packet<'a>(
|
||||
node: &mut ServerNode<ClientId, RelayId>,
|
||||
buffer: &'a mut EncryptBuffer,
|
||||
now: Instant,
|
||||
) -> Option<Transmit<'a>> {
|
||||
let encrypted_packet = node
|
||||
) -> Result<Option<Transmit<'a>>> {
|
||||
let Some(encrypted_packet) = node
|
||||
.encapsulate(cid, packet, now, buffer)
|
||||
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %cid, "Failed to encapsulate"))
|
||||
.ok()??;
|
||||
.context("Failed to encapsulate packet")?
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Some(encrypted_packet.to_transmit(buffer))
|
||||
Ok(Some(encrypted_packet.to_transmit(buffer)))
|
||||
}
|
||||
|
||||
/// Opaque request struct for when a domain name needs to be resolved.
|
||||
|
||||
@@ -246,9 +246,10 @@ impl GatewayTunnel {
|
||||
}
|
||||
Poll::Ready(io::Input::Device(packet)) => {
|
||||
let now = Instant::now();
|
||||
let Some(enc_packet) =
|
||||
self.role_state
|
||||
.handle_tun_input(packet, now, &mut self.encrypt_buf)
|
||||
let Some(enc_packet) = self
|
||||
.role_state
|
||||
.handle_tun_input(packet, now, &mut self.encrypt_buf)
|
||||
.map_err(std::io::Error::other)?
|
||||
else {
|
||||
self.role_state.handle_timeout(now, Utc::now());
|
||||
continue;
|
||||
@@ -264,12 +265,16 @@ impl GatewayTunnel {
|
||||
let utc_now = Utc::now();
|
||||
|
||||
for received in packets {
|
||||
let Some(packet) = self.role_state.handle_network_input(
|
||||
received.local,
|
||||
received.from,
|
||||
received.packet,
|
||||
now,
|
||||
) else {
|
||||
let Some(packet) = self
|
||||
.role_state
|
||||
.handle_network_input(
|
||||
received.local,
|
||||
received.from,
|
||||
received.packet,
|
||||
now,
|
||||
)
|
||||
.map_err(std::io::Error::other)?
|
||||
else {
|
||||
self.role_state.handle_timeout(now, utc_now);
|
||||
continue;
|
||||
};
|
||||
|
||||
@@ -428,7 +428,11 @@ impl ClientOnGateway {
|
||||
Ok(packet)
|
||||
}
|
||||
|
||||
pub fn decapsulate(&mut self, packet: IpPacket, now: Instant) -> anyhow::Result<IpPacket> {
|
||||
pub fn translate_outbound(
|
||||
&mut self,
|
||||
packet: IpPacket,
|
||||
now: Instant,
|
||||
) -> anyhow::Result<IpPacket> {
|
||||
self.ensure_allowed_src(&packet)?;
|
||||
self.ensure_allowed_dst(&packet)?;
|
||||
|
||||
@@ -437,18 +441,16 @@ impl ClientOnGateway {
|
||||
Ok(packet)
|
||||
}
|
||||
|
||||
pub fn encapsulate(
|
||||
pub fn translate_inbound(
|
||||
&mut self,
|
||||
packet: IpPacket,
|
||||
now: Instant,
|
||||
) -> anyhow::Result<Option<IpPacket>> {
|
||||
) -> anyhow::Result<IpPacket> {
|
||||
let Some((proto, ip)) = self.nat_table.translate_incoming(&packet, now)? else {
|
||||
return Ok(Some(packet));
|
||||
return Ok(packet);
|
||||
};
|
||||
|
||||
let Some(mut packet) = packet.translate_source(self.ipv4, self.ipv6, proto, ip) else {
|
||||
return Ok(None);
|
||||
};
|
||||
let mut packet = packet.translate_source(self.ipv4, self.ipv6, proto, ip)?;
|
||||
|
||||
self.permanent_translations
|
||||
.get_mut(&ip)
|
||||
@@ -457,7 +459,7 @@ impl ClientOnGateway {
|
||||
|
||||
packet.update_checksum();
|
||||
|
||||
Ok(Some(packet))
|
||||
Ok(packet)
|
||||
}
|
||||
|
||||
pub(crate) fn is_allowed(&self, resource: ResourceId) -> bool {
|
||||
@@ -1134,7 +1136,7 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(peer.decapsulate(pkt, Instant::now()).is_ok());
|
||||
assert!(peer.translate_outbound(pkt, Instant::now()).is_ok());
|
||||
|
||||
let pkt = ip_packet::make::udp_packet(
|
||||
source_v4_addr(),
|
||||
@@ -1145,7 +1147,7 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(peer.decapsulate(pkt, Instant::now()).is_err());
|
||||
assert!(peer.translate_outbound(pkt, Instant::now()).is_err());
|
||||
|
||||
let pkt = ip_packet::make::udp_packet(
|
||||
source_v4_addr(),
|
||||
@@ -1156,7 +1158,7 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(peer.decapsulate(pkt, Instant::now()).is_err());
|
||||
assert!(peer.translate_outbound(pkt, Instant::now()).is_err());
|
||||
|
||||
let pkt = ip_packet::make::udp_packet(
|
||||
source_v4_addr(),
|
||||
@@ -1167,7 +1169,7 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(peer.decapsulate(pkt, Instant::now()).is_ok());
|
||||
assert!(peer.translate_outbound(pkt, Instant::now()).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1193,7 +1195,7 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(peer.decapsulate(pkt, Instant::now()).is_ok());
|
||||
assert!(peer.translate_outbound(pkt, Instant::now()).is_ok());
|
||||
|
||||
let pkt = ip_packet::make::udp_packet(
|
||||
source_v4_addr(),
|
||||
@@ -1204,7 +1206,7 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(peer.decapsulate(pkt, Instant::now()).is_err());
|
||||
assert!(peer.translate_outbound(pkt, Instant::now()).is_err());
|
||||
|
||||
let pkt = ip_packet::make::udp_packet(
|
||||
source_v4_addr(),
|
||||
@@ -1215,7 +1217,7 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(peer.decapsulate(pkt, Instant::now()).is_ok());
|
||||
assert!(peer.translate_outbound(pkt, Instant::now()).is_ok());
|
||||
}
|
||||
|
||||
fn foo_dns_resource() -> crate::messages::gateway::ResourceDescription {
|
||||
|
||||
@@ -57,12 +57,13 @@ impl SimGateway {
|
||||
now: Instant,
|
||||
utc_now: DateTime<Utc>,
|
||||
) -> Option<Transmit<'static>> {
|
||||
let Some(packet) = self.sut.handle_network_input(
|
||||
transmit.dst,
|
||||
transmit.src.unwrap(),
|
||||
&transmit.payload,
|
||||
now,
|
||||
) else {
|
||||
let Some(packet) = self
|
||||
.sut
|
||||
.handle_network_input(transmit.dst, transmit.src.unwrap(), &transmit.payload, now)
|
||||
.inspect_err(|e| tracing::warn!("{e:#}"))
|
||||
.ok()
|
||||
.flatten()
|
||||
else {
|
||||
self.sut.handle_timeout(now, utc_now);
|
||||
return None;
|
||||
};
|
||||
@@ -91,7 +92,8 @@ impl SimGateway {
|
||||
.filter_map(|packet| {
|
||||
Some(
|
||||
self.sut
|
||||
.handle_tun_input(packet, now, &mut self.enc_buffer)?
|
||||
.handle_tun_input(packet, now, &mut self.enc_buffer)
|
||||
.unwrap()?
|
||||
.to_transmit(&self.enc_buffer)
|
||||
.into_owned(),
|
||||
)
|
||||
@@ -162,7 +164,8 @@ impl SimGateway {
|
||||
self.request_received(&packet);
|
||||
let transmit = self
|
||||
.sut
|
||||
.handle_tun_input(reply, now, &mut self.enc_buffer)?
|
||||
.handle_tun_input(reply, now, &mut self.enc_buffer)
|
||||
.unwrap()?
|
||||
.to_transmit(&self.enc_buffer)
|
||||
.into_owned();
|
||||
|
||||
@@ -217,7 +220,8 @@ impl SimGateway {
|
||||
.expect("src and dst are taken from incoming packet");
|
||||
let transmit = self
|
||||
.sut
|
||||
.handle_tun_input(echo_response, now, &mut self.enc_buffer)?
|
||||
.handle_tun_input(echo_response, now, &mut self.enc_buffer)
|
||||
.unwrap()?
|
||||
.to_transmit(&self.enc_buffer)
|
||||
.into_owned();
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ use connlib_model::DomainName;
|
||||
use connlib_model::{ClientId, ResourceId};
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
use dns_lookup::{AddrInfoHints, AddrInfoIter, LookupError};
|
||||
use firezone_logging::{anyhow_dyn_err, std_dyn_err, telemetry_span};
|
||||
use firezone_logging::{anyhow_dyn_err, std_dyn_err, telemetry_event, telemetry_span};
|
||||
use firezone_tunnel::messages::gateway::{
|
||||
AllowAccess, ClientIceCandidates, ClientsIceCandidates, ConnectionReady, EgressMessages,
|
||||
IngressMessages, RejectAccess, RequestConnection,
|
||||
@@ -79,6 +79,7 @@ impl Eventloop {
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
tracing::debug!(error = std_dyn_err(&e), "Tunnel error");
|
||||
telemetry_event!(error = std_dyn_err(&e), "Tunnel error");
|
||||
continue;
|
||||
}
|
||||
Poll::Pending => {}
|
||||
|
||||
@@ -21,6 +21,7 @@ pub use fz_p2p_control_slice::FzP2pControlSlice;
|
||||
#[cfg(all(test, feature = "proptest"))]
|
||||
mod proptests;
|
||||
|
||||
use anyhow::{Context as _, Result};
|
||||
use icmpv4_header_slice_mut::Icmpv4HeaderSliceMut;
|
||||
use icmpv6_header_slice_mut::Icmpv6EchoHeaderSliceMut;
|
||||
use ipv4_header_slice_mut::Ipv4HeaderSliceMut;
|
||||
@@ -188,18 +189,20 @@ impl ConvertibleIpv4Packet {
|
||||
self.ip_header().destination_addr()
|
||||
}
|
||||
|
||||
fn consume_to_ipv6(mut self, src: Ipv6Addr, dst: Ipv6Addr) -> Option<ConvertibleIpv6Packet> {
|
||||
fn consume_to_ipv6(mut self, src: Ipv6Addr, dst: Ipv6Addr) -> Result<ConvertibleIpv6Packet> {
|
||||
// `translate_in_place` expects the packet to sit at a 20-byte offset.
|
||||
// `self.start` tells us where the packet actually starts, thus we need to pass `self.start - 20` to the function.
|
||||
let start_minus_padding = self.start.checked_sub(NAT46_OVERHEAD)?;
|
||||
let start_minus_padding = self
|
||||
.start
|
||||
.checked_sub(NAT46_OVERHEAD)
|
||||
.context("Invalid `start`of IP packet in buffer")?;
|
||||
|
||||
let offset = nat46::translate_in_place(
|
||||
&mut self.buf[start_minus_padding..(self.start + self.len)],
|
||||
src,
|
||||
dst,
|
||||
)
|
||||
.inspect_err(|e| tracing::trace!("NAT46 failed: {e:#}"))
|
||||
.ok()?;
|
||||
.context("NAT46 failed")?;
|
||||
|
||||
// We need to handle 2 cases here:
|
||||
// `offset` > NAT46_OVERHEAD
|
||||
@@ -211,7 +214,7 @@ impl ConvertibleIpv4Packet {
|
||||
let len_diff = (NAT46_OVERHEAD as isize) - (offset as isize);
|
||||
let len = (self.len as isize) + len_diff;
|
||||
|
||||
Some(ConvertibleIpv6Packet {
|
||||
Ok(ConvertibleIpv6Packet {
|
||||
buf: self.buf,
|
||||
start: start_minus_padding + offset,
|
||||
len: len as usize,
|
||||
@@ -268,12 +271,10 @@ impl ConvertibleIpv6Packet {
|
||||
self.header().destination_addr()
|
||||
}
|
||||
|
||||
fn consume_to_ipv4(mut self, src: Ipv4Addr, dst: Ipv4Addr) -> Option<ConvertibleIpv4Packet> {
|
||||
nat64::translate_in_place(self.packet_mut(), src, dst)
|
||||
.inspect_err(|e| tracing::trace!("NAT64 failed: {e:#}"))
|
||||
.ok()?;
|
||||
fn consume_to_ipv4(mut self, src: Ipv4Addr, dst: Ipv4Addr) -> Result<ConvertibleIpv4Packet> {
|
||||
nat64::translate_in_place(self.packet_mut(), src, dst).context("NAT64 failed")?;
|
||||
|
||||
Some(ConvertibleIpv4Packet {
|
||||
Ok(ConvertibleIpv4Packet {
|
||||
buf: self.buf,
|
||||
start: self.start + 20,
|
||||
len: self.len - 20,
|
||||
@@ -330,17 +331,17 @@ impl IpPacket {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn consume_to_ipv4(self, src: Ipv4Addr, dst: Ipv4Addr) -> Option<IpPacket> {
|
||||
pub(crate) fn consume_to_ipv4(self, src: Ipv4Addr, dst: Ipv4Addr) -> Result<IpPacket> {
|
||||
match self {
|
||||
IpPacket::Ipv4(pkt) => Some(IpPacket::Ipv4(pkt)),
|
||||
IpPacket::Ipv6(pkt) => Some(IpPacket::Ipv4(pkt.consume_to_ipv4(src, dst)?)),
|
||||
IpPacket::Ipv4(pkt) => Ok(IpPacket::Ipv4(pkt)),
|
||||
IpPacket::Ipv6(pkt) => Ok(IpPacket::Ipv4(pkt.consume_to_ipv4(src, dst)?)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn consume_to_ipv6(self, src: Ipv6Addr, dst: Ipv6Addr) -> Option<IpPacket> {
|
||||
pub(crate) fn consume_to_ipv6(self, src: Ipv6Addr, dst: Ipv6Addr) -> Result<IpPacket> {
|
||||
match self {
|
||||
IpPacket::Ipv4(pkt) => Some(IpPacket::Ipv6(pkt.consume_to_ipv6(src, dst)?)),
|
||||
IpPacket::Ipv6(pkt) => Some(IpPacket::Ipv6(pkt)),
|
||||
IpPacket::Ipv4(pkt) => Ok(IpPacket::Ipv6(pkt.consume_to_ipv6(src, dst)?)),
|
||||
IpPacket::Ipv6(pkt) => Ok(IpPacket::Ipv6(pkt)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -654,7 +655,7 @@ impl IpPacket {
|
||||
src_v6: Ipv6Addr,
|
||||
src_proto: Protocol,
|
||||
dst: IpAddr,
|
||||
) -> Option<IpPacket> {
|
||||
) -> Result<IpPacket> {
|
||||
let mut packet = match (&self, dst) {
|
||||
(&IpPacket::Ipv4(_), IpAddr::V6(dst)) => self.consume_to_ipv6(src_v6, dst)?,
|
||||
(&IpPacket::Ipv6(_), IpAddr::V4(dst)) => self.consume_to_ipv4(src_v4, dst)?,
|
||||
@@ -665,7 +666,7 @@ impl IpPacket {
|
||||
};
|
||||
packet.set_source_protocol(src_proto.value());
|
||||
|
||||
Some(packet)
|
||||
Ok(packet)
|
||||
}
|
||||
|
||||
pub fn translate_source(
|
||||
@@ -674,7 +675,7 @@ impl IpPacket {
|
||||
dst_v6: Ipv6Addr,
|
||||
dst_proto: Protocol,
|
||||
src: IpAddr,
|
||||
) -> Option<IpPacket> {
|
||||
) -> Result<IpPacket> {
|
||||
let mut packet = match (&self, src) {
|
||||
(&IpPacket::Ipv4(_), IpAddr::V6(src)) => self.consume_to_ipv6(src, dst_v6)?,
|
||||
(&IpPacket::Ipv6(_), IpAddr::V4(src)) => self.consume_to_ipv4(src, dst_v4)?,
|
||||
@@ -685,7 +686,7 @@ impl IpPacket {
|
||||
};
|
||||
packet.set_destination_protocol(dst_proto.value());
|
||||
|
||||
Some(packet)
|
||||
Ok(packet)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
||||
Reference in New Issue
Block a user