From 19f51568c2c5fad2e237db31485f94427140e615 Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Tue, 12 Nov 2024 04:00:02 +0000 Subject: [PATCH] chore(rust): don't pass errors as values for debug logs (#7318) Our logging library `tracing` supports structured logging. Structured logging means we can include values within a `tracing::Event` without having to immediately format it as a string. Processing these values - such as errors - as their original type allows the various `tracing` layers to capture and represent them as they see fit. One of these layers is responsible for sending ERROR and WARN events to Sentry, as part of which `std::error::Error` values get automatically captured as so-called "sentry exceptions". Unfortunately, there is a caveat: If an `std::error::Error` value is included in an event that does not get mapped to an exception, the `error` field is completely lost. See https://github.com/getsentry/sentry-rust/issues/702 for details. To work around this, we introduce a `err_with_sources` adapter that an error and all its sources together into a string. For all `tracing::debug!` statements, we then use this to report these errors. It is really unfortunate that we have to do this and cannot use the same mechanism, regardless of the log level. However, until this is fixed upstream, this will do and gives us better information in the log submitted to Sentry. --- rust/Cargo.lock | 1 + rust/connlib/snownet/src/allocation.rs | 4 +-- rust/connlib/snownet/src/node.rs | 6 ++-- rust/connlib/tunnel/src/client.rs | 18 +++++----- rust/connlib/tunnel/src/io.rs | 4 +-- rust/dns-over-tcp/src/server.rs | 3 +- rust/gateway/src/eventloop.rs | 14 ++++---- rust/logging/Cargo.toml | 3 ++ rust/logging/src/err_with_sources.rs | 49 ++++++++++++++++++++++++++ rust/logging/src/lib.rs | 2 ++ rust/logging/src/unwrap_or.rs | 4 +-- rust/phoenix-channel/src/lib.rs | 4 +-- 12 files changed, 85 insertions(+), 27 deletions(-) create mode 100644 rust/logging/src/err_with_sources.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index ddf595625..959457b3f 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -2131,6 +2131,7 @@ dependencies = [ "nu-ansi-term 0.50.1", "rand 0.8.5", "sentry-tracing", + "thiserror", "time", "tracing", "tracing-appender", diff --git a/rust/connlib/snownet/src/allocation.rs b/rust/connlib/snownet/src/allocation.rs index faaa4cfe1..5a6ec9419 100644 --- a/rust/connlib/snownet/src/allocation.rs +++ b/rust/connlib/snownet/src/allocation.rs @@ -7,7 +7,7 @@ use crate::{ }; use ::backoff::backoff::Backoff; use bytecodec::{DecodeExt as _, EncodeExt as _}; -use firezone_logging::std_dyn_err; +use firezone_logging::{err_with_sources, std_dyn_err}; use hex_display::HexDisplayExt as _; use rand::random; use std::{ @@ -1227,7 +1227,7 @@ impl ChannelBindings { fn try_decode<'p>(&mut self, packet: &'p [u8], now: Instant) -> Option<(SocketAddr, &'p [u8])> { let (channel_number, payload) = crate::channel_data::decode(packet) .inspect_err(|e| { - tracing::debug!(error = std_dyn_err(e), "Malformed channel data message") + tracing::debug!("Malformed channel data message: {}", err_with_sources(e)) }) .ok()?; let Some(channel) = self.inner.get_mut(&channel_number) else { diff --git a/rust/connlib/snownet/src/node.rs b/rust/connlib/snownet/src/node.rs index c3f76a679..ef8b8d149 100644 --- a/rust/connlib/snownet/src/node.rs +++ b/rust/connlib/snownet/src/node.rs @@ -9,7 +9,7 @@ use boringtun::noise::{Tunn, TunnResult}; use boringtun::x25519::PublicKey; use boringtun::{noise::rate_limiter::RateLimiter, x25519::StaticSecret}; use core::fmt; -use firezone_logging::std_dyn_err; +use firezone_logging::err_with_sources; use hex_display::HexDisplayExt; use ip_packet::{ ConvertibleIpv4Packet, ConvertibleIpv6Packet, IpPacket, IpPacketBuf, MAX_DATAGRAM_PAYLOAD, @@ -336,7 +336,7 @@ where let candidate = match Candidate::from_sdp_string(&candidate) { Ok(c) => c, Err(e) => { - tracing::debug!(error = std_dyn_err(&e), "Failed to parse candidate"); + tracing::debug!("Failed to parse candidate: {}", err_with_sources(&e)); return; } }; @@ -372,7 +372,7 @@ where let candidate = match Candidate::from_sdp_string(&candidate) { Ok(c) => c, Err(e) => { - tracing::debug!(error = std_dyn_err(&e), "Failed to parse candidate"); + tracing::debug!("Failed to parse candidate: {}", err_with_sources(&e)); return; } }; diff --git a/rust/connlib/tunnel/src/client.rs b/rust/connlib/tunnel/src/client.rs index 7ac54e551..9c8fc5884 100644 --- a/rust/connlib/tunnel/src/client.rs +++ b/rust/connlib/tunnel/src/client.rs @@ -15,7 +15,7 @@ use connlib_model::PublicKey; use connlib_model::{GatewayId, RelayId, ResourceId, ResourceStatus, ResourceView}; use connlib_model::{Site, SiteId}; use firezone_logging::{ - anyhow_dyn_err, std_dyn_err, telemetry_event, unwrap_or_debug, unwrap_or_warn, + anyhow_dyn_err, err_with_sources, std_dyn_err, telemetry_event, unwrap_or_debug, unwrap_or_warn, }; use ip_network::{IpNetwork, Ipv4Network, Ipv6Network}; use ip_network_table::IpNetworkTable; @@ -327,7 +327,7 @@ impl ClientState { packet.as_ref(), now, ) - .inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %local, num_bytes = %packet.len(), "Failed to decapsulate incoming packet")) + .inspect_err(|e| tracing::debug!(%local, num_bytes = %packet.len(), "Failed to decapsulate incoming packet: {}", err_with_sources(e))) .ok()??; if self.tcp_dns_client.accepts(&packet) { @@ -382,7 +382,7 @@ impl ClientState { }) .unwrap_or_else(|e| { let error = std_dyn_err(&e); - tracing::debug!(error, "Recursive UDP DNS query failed"); + tracing::debug!("Recursive UDP DNS query failed: {}", err_with_sources(&e)); telemetry_event!(error, "Recursive UDP DNS query failed"); dns::servfail(response.query.for_slice_ref()) @@ -400,7 +400,7 @@ impl ClientState { }) .unwrap_or_else(|e| { let error = std_dyn_err(&e); - tracing::debug!(error, "Recursive TCP DNS query failed"); + tracing::debug!("Recursive TCP DNS query failed: {}", err_with_sources(&e)); telemetry_event!(error, "Recursive TCP DNS query failed"); dns::servfail(response.query.for_slice_ref()) @@ -453,7 +453,9 @@ impl ClientState { let transmit = self .node .encapsulate(gid, packet, now, buffer) - .inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %gid, "Failed to encapsulate")) + .inspect_err( + |e| tracing::debug!(%gid, "Failed to encapsulate: {}", err_with_sources(e)), + ) .ok()??; Some(transmit) @@ -984,7 +986,7 @@ impl ClientState { dns::ResolveStrategy::LocalResponse(response) => { unwrap_or_debug!( self.try_queue_udp_dns_response(upstream, source, &response), - "Failed to queue UDP DNS response" + "Failed to queue UDP DNS response: {}" ); } dns::ResolveStrategy::Recurse => { @@ -1026,7 +1028,7 @@ impl ClientState { dns::ResolveStrategy::LocalResponse(response) => { unwrap_or_debug!( self.tcp_dns_server.send_message(query.socket, response), - "Failed to send TCP DNS response" + "Failed to send TCP DNS response: {}" ); } dns::ResolveStrategy::Recurse => { @@ -1046,7 +1048,7 @@ impl ClientState { query.socket, dns::servfail(message.for_slice_ref()) ), - "Failed to send TCP DNS response" + "Failed to send TCP DNS response: {}" ); return; } diff --git a/rust/connlib/tunnel/src/io.rs b/rust/connlib/tunnel/src/io.rs index fcd54cfe4..c3191243d 100644 --- a/rust/connlib/tunnel/src/io.rs +++ b/rust/connlib/tunnel/src/io.rs @@ -1,6 +1,6 @@ use crate::{device_channel::Device, dns, sockets::Sockets}; use domain::base::Message; -use firezone_logging::{std_dyn_err, telemetry_span}; +use firezone_logging::{err_with_sources, std_dyn_err, telemetry_span}; use futures::{ future::{self, Either}, stream, Stream, StreamExt, @@ -352,7 +352,7 @@ async fn tun_send_recv( { Either::Left((Some(Command::SendPacket(p)), _)) => { if let Err(e) = device.write(p) { - tracing::debug!(error = std_dyn_err(&e), "Failed to write TUN packet"); + tracing::debug!("Failed to write TUN packet: {}", err_with_sources(&e)); }; } Either::Left((Some(Command::UpdateTun(tun)), _)) => { diff --git a/rust/dns-over-tcp/src/server.rs b/rust/dns-over-tcp/src/server.rs index 0dadd9c3e..862c32e4b 100644 --- a/rust/dns-over-tcp/src/server.rs +++ b/rust/dns-over-tcp/src/server.rs @@ -10,7 +10,6 @@ use crate::{ }; use anyhow::{Context as _, Result}; use domain::{base::Message, dep::octseq::OctetsInto as _}; -use firezone_logging::anyhow_dyn_err; use ip_packet::IpPacket; use smoltcp::{ iface::{Interface, PollResult, SocketSet}, @@ -188,7 +187,7 @@ impl Server { }); } Err(e) => { - tracing::debug!(error = anyhow_dyn_err(&e), "Error on receiving DNS query"); + tracing::debug!("Error on receiving DNS query: {e:#}"); socket.abort(); break; } diff --git a/rust/gateway/src/eventloop.rs b/rust/gateway/src/eventloop.rs index e2d48262e..cb49d51e4 100644 --- a/rust/gateway/src/eventloop.rs +++ b/rust/gateway/src/eventloop.rs @@ -4,7 +4,9 @@ use connlib_model::DomainName; use connlib_model::{ClientId, ResourceId}; #[cfg(not(target_os = "windows"))] use dns_lookup::{AddrInfoHints, AddrInfoIter, LookupError}; -use firezone_logging::{anyhow_dyn_err, std_dyn_err, telemetry_event, telemetry_span}; +use firezone_logging::{ + anyhow_dyn_err, err_with_sources, std_dyn_err, telemetry_event, telemetry_span, +}; use firezone_tunnel::messages::gateway::{ AllowAccess, ClientIceCandidates, ClientsIceCandidates, ConnectionReady, EgressMessages, IngressMessages, RejectAccess, RequestConnection, @@ -78,7 +80,7 @@ impl Eventloop { continue; } Poll::Ready(Err(e)) => { - tracing::debug!(error = std_dyn_err(&e), "Tunnel error"); + tracing::debug!("Tunnel error: {}", err_with_sources(&e)); telemetry_event!(error = std_dyn_err(&e), "Tunnel error"); continue; } @@ -353,7 +355,7 @@ impl Eventloop { req: RequestConnection, ) { let addresses = result - .inspect_err(|e| tracing::debug!(error = std_dyn_err(e), client = %req.client.id, reference = %req.reference, "DNS resolution timed out as part of connection request")) + .inspect_err(|e| tracing::debug!(client = %req.client.id, reference = %req.reference, "DNS resolution timed out as part of connection request: {}", err_with_sources(e))) .unwrap_or_default(); let answer = match self.tunnel.state_mut().accept( @@ -392,7 +394,7 @@ impl Eventloop { let client = req.client.id; self.tunnel.state_mut().cleanup_connection(&client); - tracing::debug!(error = anyhow_dyn_err(&e), %client, "Connection request failed"); + tracing::debug!(%client, "Connection request failed: {e:#}"); return; } @@ -409,7 +411,7 @@ impl Eventloop { pub fn allow_access(&mut self, result: Result, Timeout>, req: AllowAccess) { let addresses = result - .inspect_err(|e| tracing::debug!(error = std_dyn_err(e), client = %req.client_id, reference = %req.reference, "DNS resolution timed out as part of allow access request")) + .inspect_err(|e| tracing::debug!(client = %req.client_id, reference = %req.reference, "DNS resolution timed out as part of allow access request: {}", err_with_sources(e))) .unwrap_or_default(); if let Err(e) = self.tunnel.state_mut().allow_access( @@ -433,7 +435,7 @@ impl Eventloop { name: DomainName, ) { let addresses = result - .inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %conn_id, "DNS resolution timed out as part of allow access request")) + .inspect_err(|e| tracing::debug!(%conn_id, "DNS resolution timed out as part of allow access request: {}", err_with_sources(e))) .unwrap_or_default(); self.tunnel.state_mut().refresh_translation( diff --git a/rust/logging/Cargo.toml b/rust/logging/Cargo.toml index a33f08ac5..9d15ad4fc 100644 --- a/rust/logging/Cargo.toml +++ b/rust/logging/Cargo.toml @@ -18,5 +18,8 @@ tracing-log = "0.2" tracing-stackdriver = { version = "0.11.0" } tracing-subscriber = { workspace = true, features = ["env-filter"] } +[dev-dependencies] +thiserror = "1" + [lints] workspace = true diff --git a/rust/logging/src/err_with_sources.rs b/rust/logging/src/err_with_sources.rs new file mode 100644 index 000000000..61a9c0f0d --- /dev/null +++ b/rust/logging/src/err_with_sources.rs @@ -0,0 +1,49 @@ +use core::fmt; +use std::error::Error; + +/// Returns a [`fmt::Display`] adapter that prints the error and all its sources. +pub fn err_with_sources<'a>(e: &'a (dyn Error + 'static)) -> ErrorWithSources<'a> { + ErrorWithSources { e } +} + +pub struct ErrorWithSources<'a> { + e: &'a (dyn Error + 'static), +} + +impl<'a> fmt::Display for ErrorWithSources<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.e)?; + + for cause in anyhow::Chain::new(self.e).skip(1) { + write!(f, ": {}", cause)?; + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn prints_errors_with_sources() { + let error = Error3(Error2(Error1)); + + let display = err_with_sources(&error); + + assert_eq!(display.to_string(), "Argh: Failed to do the thing: oh no!"); + } + + #[derive(thiserror::Error, Debug)] + #[error("oh no!")] + struct Error1; + + #[derive(thiserror::Error, Debug)] + #[error("Failed to do the thing")] + struct Error2(#[source] Error1); + + #[derive(thiserror::Error, Debug)] + #[error("Argh")] + struct Error3(#[source] Error2); +} diff --git a/rust/logging/src/lib.rs b/rust/logging/src/lib.rs index 6c2d1d670..661ba10a3 100644 --- a/rust/logging/src/lib.rs +++ b/rust/logging/src/lib.rs @@ -3,6 +3,7 @@ pub mod file; mod format; #[macro_use] mod unwrap_or; +mod err_with_sources; use sentry_tracing::EventFilter; use tracing::{subscriber::DefaultGuard, Subscriber}; @@ -13,6 +14,7 @@ use tracing_subscriber::{ }; pub use dyn_err::{anyhow_dyn_err, std_dyn_err}; +pub use err_with_sources::{err_with_sources, ErrorWithSources}; pub use format::Format; /// Registers a global subscriber with stdout logging and `additional_layer` diff --git a/rust/logging/src/unwrap_or.rs b/rust/logging/src/unwrap_or.rs index 522a3220e..087d12c5d 100644 --- a/rust/logging/src/unwrap_or.rs +++ b/rust/logging/src/unwrap_or.rs @@ -26,7 +26,7 @@ macro_rules! unwrap_or_debug { Err(e) => { let error: &dyn ::std::error::Error = e.as_ref(); - ::tracing::debug!(error, $($arg)*) + ::tracing::debug!($($arg)*, $crate::err_with_sources(error)) } } }; @@ -43,7 +43,7 @@ macro_rules! unwrap_or_trace { Err(e) => { let error: &dyn ::std::error::Error = e.as_ref(); - ::tracing::debug!(error, $($arg)*) + ::tracing::debug!($($arg)*, $crate::err_with_sources(error)) } } }; diff --git a/rust/phoenix-channel/src/lib.rs b/rust/phoenix-channel/src/lib.rs index b7e6b5298..ca989e770 100644 --- a/rust/phoenix-channel/src/lib.rs +++ b/rust/phoenix-channel/src/lib.rs @@ -13,7 +13,7 @@ use std::{io, mem}; use backoff::backoff::Backoff; use backoff::ExponentialBackoff; use base64::Engine; -use firezone_logging::{std_dyn_err, telemetry_span}; +use firezone_logging::{err_with_sources, std_dyn_err, telemetry_span}; use futures::future::BoxFuture; use futures::{FutureExt, SinkExt, StreamExt}; use heartbeat::{Heartbeat, MissedLastHeartbeat}; @@ -415,7 +415,7 @@ where let socket_factory = self.socket_factory.clone(); let socket_addresses = self.socket_addresses(); - tracing::debug!(error = std_dyn_err(&e), ?backoff, max_elapsed_time = ?self.reconnect_backoff.max_elapsed_time, "Reconnecting to portal on transient client error"); + tracing::debug!(?backoff, max_elapsed_time = ?self.reconnect_backoff.max_elapsed_time, "Reconnecting to portal on transient client error: {}", err_with_sources(&e)); self.state = State::Connecting(Box::pin(async move { tokio::time::sleep(backoff).await;