chore(rust): don't pass errors as values for debug logs (#7318)

Our logging library `tracing` supports structured logging. Structured
logging means we can include values within a `tracing::Event` without
having to immediately format it as a string. Processing these values -
such as errors - as their original type allows the various `tracing`
layers to capture and represent them as they see fit.

One of these layers is responsible for sending ERROR and WARN events to
Sentry, as part of which `std::error::Error` values get automatically
captured as so-called "sentry exceptions".

Unfortunately, there is a caveat: If an `std::error::Error` value is
included in an event that does not get mapped to an exception, the
`error` field is completely lost. See
https://github.com/getsentry/sentry-rust/issues/702 for details.

To work around this, we introduce a `err_with_sources` adapter that an
error and all its sources together into a string. For all
`tracing::debug!` statements, we then use this to report these errors.

It is really unfortunate that we have to do this and cannot use the same
mechanism, regardless of the log level. However, until this is fixed
upstream, this will do and gives us better information in the log
submitted to Sentry.
This commit is contained in:
Thomas Eizinger
2024-11-12 04:00:02 +00:00
committed by GitHub
parent 9e9dfd5e97
commit 19f51568c2
12 changed files with 85 additions and 27 deletions

1
rust/Cargo.lock generated
View File

@@ -2131,6 +2131,7 @@ dependencies = [
"nu-ansi-term 0.50.1",
"rand 0.8.5",
"sentry-tracing",
"thiserror",
"time",
"tracing",
"tracing-appender",

View File

@@ -7,7 +7,7 @@ use crate::{
};
use ::backoff::backoff::Backoff;
use bytecodec::{DecodeExt as _, EncodeExt as _};
use firezone_logging::std_dyn_err;
use firezone_logging::{err_with_sources, std_dyn_err};
use hex_display::HexDisplayExt as _;
use rand::random;
use std::{
@@ -1227,7 +1227,7 @@ impl ChannelBindings {
fn try_decode<'p>(&mut self, packet: &'p [u8], now: Instant) -> Option<(SocketAddr, &'p [u8])> {
let (channel_number, payload) = crate::channel_data::decode(packet)
.inspect_err(|e| {
tracing::debug!(error = std_dyn_err(e), "Malformed channel data message")
tracing::debug!("Malformed channel data message: {}", err_with_sources(e))
})
.ok()?;
let Some(channel) = self.inner.get_mut(&channel_number) else {

View File

@@ -9,7 +9,7 @@ use boringtun::noise::{Tunn, TunnResult};
use boringtun::x25519::PublicKey;
use boringtun::{noise::rate_limiter::RateLimiter, x25519::StaticSecret};
use core::fmt;
use firezone_logging::std_dyn_err;
use firezone_logging::err_with_sources;
use hex_display::HexDisplayExt;
use ip_packet::{
ConvertibleIpv4Packet, ConvertibleIpv6Packet, IpPacket, IpPacketBuf, MAX_DATAGRAM_PAYLOAD,
@@ -336,7 +336,7 @@ where
let candidate = match Candidate::from_sdp_string(&candidate) {
Ok(c) => c,
Err(e) => {
tracing::debug!(error = std_dyn_err(&e), "Failed to parse candidate");
tracing::debug!("Failed to parse candidate: {}", err_with_sources(&e));
return;
}
};
@@ -372,7 +372,7 @@ where
let candidate = match Candidate::from_sdp_string(&candidate) {
Ok(c) => c,
Err(e) => {
tracing::debug!(error = std_dyn_err(&e), "Failed to parse candidate");
tracing::debug!("Failed to parse candidate: {}", err_with_sources(&e));
return;
}
};

View File

@@ -15,7 +15,7 @@ use connlib_model::PublicKey;
use connlib_model::{GatewayId, RelayId, ResourceId, ResourceStatus, ResourceView};
use connlib_model::{Site, SiteId};
use firezone_logging::{
anyhow_dyn_err, std_dyn_err, telemetry_event, unwrap_or_debug, unwrap_or_warn,
anyhow_dyn_err, err_with_sources, std_dyn_err, telemetry_event, unwrap_or_debug, unwrap_or_warn,
};
use ip_network::{IpNetwork, Ipv4Network, Ipv6Network};
use ip_network_table::IpNetworkTable;
@@ -327,7 +327,7 @@ impl ClientState {
packet.as_ref(),
now,
)
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %local, num_bytes = %packet.len(), "Failed to decapsulate incoming packet"))
.inspect_err(|e| tracing::debug!(%local, num_bytes = %packet.len(), "Failed to decapsulate incoming packet: {}", err_with_sources(e)))
.ok()??;
if self.tcp_dns_client.accepts(&packet) {
@@ -382,7 +382,7 @@ impl ClientState {
})
.unwrap_or_else(|e| {
let error = std_dyn_err(&e);
tracing::debug!(error, "Recursive UDP DNS query failed");
tracing::debug!("Recursive UDP DNS query failed: {}", err_with_sources(&e));
telemetry_event!(error, "Recursive UDP DNS query failed");
dns::servfail(response.query.for_slice_ref())
@@ -400,7 +400,7 @@ impl ClientState {
})
.unwrap_or_else(|e| {
let error = std_dyn_err(&e);
tracing::debug!(error, "Recursive TCP DNS query failed");
tracing::debug!("Recursive TCP DNS query failed: {}", err_with_sources(&e));
telemetry_event!(error, "Recursive TCP DNS query failed");
dns::servfail(response.query.for_slice_ref())
@@ -453,7 +453,9 @@ impl ClientState {
let transmit = self
.node
.encapsulate(gid, packet, now, buffer)
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %gid, "Failed to encapsulate"))
.inspect_err(
|e| tracing::debug!(%gid, "Failed to encapsulate: {}", err_with_sources(e)),
)
.ok()??;
Some(transmit)
@@ -984,7 +986,7 @@ impl ClientState {
dns::ResolveStrategy::LocalResponse(response) => {
unwrap_or_debug!(
self.try_queue_udp_dns_response(upstream, source, &response),
"Failed to queue UDP DNS response"
"Failed to queue UDP DNS response: {}"
);
}
dns::ResolveStrategy::Recurse => {
@@ -1026,7 +1028,7 @@ impl ClientState {
dns::ResolveStrategy::LocalResponse(response) => {
unwrap_or_debug!(
self.tcp_dns_server.send_message(query.socket, response),
"Failed to send TCP DNS response"
"Failed to send TCP DNS response: {}"
);
}
dns::ResolveStrategy::Recurse => {
@@ -1046,7 +1048,7 @@ impl ClientState {
query.socket,
dns::servfail(message.for_slice_ref())
),
"Failed to send TCP DNS response"
"Failed to send TCP DNS response: {}"
);
return;
}

View File

@@ -1,6 +1,6 @@
use crate::{device_channel::Device, dns, sockets::Sockets};
use domain::base::Message;
use firezone_logging::{std_dyn_err, telemetry_span};
use firezone_logging::{err_with_sources, std_dyn_err, telemetry_span};
use futures::{
future::{self, Either},
stream, Stream, StreamExt,
@@ -352,7 +352,7 @@ async fn tun_send_recv(
{
Either::Left((Some(Command::SendPacket(p)), _)) => {
if let Err(e) = device.write(p) {
tracing::debug!(error = std_dyn_err(&e), "Failed to write TUN packet");
tracing::debug!("Failed to write TUN packet: {}", err_with_sources(&e));
};
}
Either::Left((Some(Command::UpdateTun(tun)), _)) => {

View File

@@ -10,7 +10,6 @@ use crate::{
};
use anyhow::{Context as _, Result};
use domain::{base::Message, dep::octseq::OctetsInto as _};
use firezone_logging::anyhow_dyn_err;
use ip_packet::IpPacket;
use smoltcp::{
iface::{Interface, PollResult, SocketSet},
@@ -188,7 +187,7 @@ impl Server {
});
}
Err(e) => {
tracing::debug!(error = anyhow_dyn_err(&e), "Error on receiving DNS query");
tracing::debug!("Error on receiving DNS query: {e:#}");
socket.abort();
break;
}

View File

@@ -4,7 +4,9 @@ use connlib_model::DomainName;
use connlib_model::{ClientId, ResourceId};
#[cfg(not(target_os = "windows"))]
use dns_lookup::{AddrInfoHints, AddrInfoIter, LookupError};
use firezone_logging::{anyhow_dyn_err, std_dyn_err, telemetry_event, telemetry_span};
use firezone_logging::{
anyhow_dyn_err, err_with_sources, std_dyn_err, telemetry_event, telemetry_span,
};
use firezone_tunnel::messages::gateway::{
AllowAccess, ClientIceCandidates, ClientsIceCandidates, ConnectionReady, EgressMessages,
IngressMessages, RejectAccess, RequestConnection,
@@ -78,7 +80,7 @@ impl Eventloop {
continue;
}
Poll::Ready(Err(e)) => {
tracing::debug!(error = std_dyn_err(&e), "Tunnel error");
tracing::debug!("Tunnel error: {}", err_with_sources(&e));
telemetry_event!(error = std_dyn_err(&e), "Tunnel error");
continue;
}
@@ -353,7 +355,7 @@ impl Eventloop {
req: RequestConnection,
) {
let addresses = result
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), client = %req.client.id, reference = %req.reference, "DNS resolution timed out as part of connection request"))
.inspect_err(|e| tracing::debug!(client = %req.client.id, reference = %req.reference, "DNS resolution timed out as part of connection request: {}", err_with_sources(e)))
.unwrap_or_default();
let answer = match self.tunnel.state_mut().accept(
@@ -392,7 +394,7 @@ impl Eventloop {
let client = req.client.id;
self.tunnel.state_mut().cleanup_connection(&client);
tracing::debug!(error = anyhow_dyn_err(&e), %client, "Connection request failed");
tracing::debug!(%client, "Connection request failed: {e:#}");
return;
}
@@ -409,7 +411,7 @@ impl Eventloop {
pub fn allow_access(&mut self, result: Result<Vec<IpAddr>, Timeout>, req: AllowAccess) {
let addresses = result
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), client = %req.client_id, reference = %req.reference, "DNS resolution timed out as part of allow access request"))
.inspect_err(|e| tracing::debug!(client = %req.client_id, reference = %req.reference, "DNS resolution timed out as part of allow access request: {}", err_with_sources(e)))
.unwrap_or_default();
if let Err(e) = self.tunnel.state_mut().allow_access(
@@ -433,7 +435,7 @@ impl Eventloop {
name: DomainName,
) {
let addresses = result
.inspect_err(|e| tracing::debug!(error = std_dyn_err(e), %conn_id, "DNS resolution timed out as part of allow access request"))
.inspect_err(|e| tracing::debug!(%conn_id, "DNS resolution timed out as part of allow access request: {}", err_with_sources(e)))
.unwrap_or_default();
self.tunnel.state_mut().refresh_translation(

View File

@@ -18,5 +18,8 @@ tracing-log = "0.2"
tracing-stackdriver = { version = "0.11.0" }
tracing-subscriber = { workspace = true, features = ["env-filter"] }
[dev-dependencies]
thiserror = "1"
[lints]
workspace = true

View File

@@ -0,0 +1,49 @@
use core::fmt;
use std::error::Error;
/// Returns a [`fmt::Display`] adapter that prints the error and all its sources.
pub fn err_with_sources<'a>(e: &'a (dyn Error + 'static)) -> ErrorWithSources<'a> {
ErrorWithSources { e }
}
pub struct ErrorWithSources<'a> {
e: &'a (dyn Error + 'static),
}
impl<'a> fmt::Display for ErrorWithSources<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.e)?;
for cause in anyhow::Chain::new(self.e).skip(1) {
write!(f, ": {}", cause)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn prints_errors_with_sources() {
let error = Error3(Error2(Error1));
let display = err_with_sources(&error);
assert_eq!(display.to_string(), "Argh: Failed to do the thing: oh no!");
}
#[derive(thiserror::Error, Debug)]
#[error("oh no!")]
struct Error1;
#[derive(thiserror::Error, Debug)]
#[error("Failed to do the thing")]
struct Error2(#[source] Error1);
#[derive(thiserror::Error, Debug)]
#[error("Argh")]
struct Error3(#[source] Error2);
}

View File

@@ -3,6 +3,7 @@ pub mod file;
mod format;
#[macro_use]
mod unwrap_or;
mod err_with_sources;
use sentry_tracing::EventFilter;
use tracing::{subscriber::DefaultGuard, Subscriber};
@@ -13,6 +14,7 @@ use tracing_subscriber::{
};
pub use dyn_err::{anyhow_dyn_err, std_dyn_err};
pub use err_with_sources::{err_with_sources, ErrorWithSources};
pub use format::Format;
/// Registers a global subscriber with stdout logging and `additional_layer`

View File

@@ -26,7 +26,7 @@ macro_rules! unwrap_or_debug {
Err(e) => {
let error: &dyn ::std::error::Error = e.as_ref();
::tracing::debug!(error, $($arg)*)
::tracing::debug!($($arg)*, $crate::err_with_sources(error))
}
}
};
@@ -43,7 +43,7 @@ macro_rules! unwrap_or_trace {
Err(e) => {
let error: &dyn ::std::error::Error = e.as_ref();
::tracing::debug!(error, $($arg)*)
::tracing::debug!($($arg)*, $crate::err_with_sources(error))
}
}
};

View File

@@ -13,7 +13,7 @@ use std::{io, mem};
use backoff::backoff::Backoff;
use backoff::ExponentialBackoff;
use base64::Engine;
use firezone_logging::{std_dyn_err, telemetry_span};
use firezone_logging::{err_with_sources, std_dyn_err, telemetry_span};
use futures::future::BoxFuture;
use futures::{FutureExt, SinkExt, StreamExt};
use heartbeat::{Heartbeat, MissedLastHeartbeat};
@@ -415,7 +415,7 @@ where
let socket_factory = self.socket_factory.clone();
let socket_addresses = self.socket_addresses();
tracing::debug!(error = std_dyn_err(&e), ?backoff, max_elapsed_time = ?self.reconnect_backoff.max_elapsed_time, "Reconnecting to portal on transient client error");
tracing::debug!(?backoff, max_elapsed_time = ?self.reconnect_backoff.max_elapsed_time, "Reconnecting to portal on transient client error: {}", err_with_sources(&e));
self.state = State::Connecting(Box::pin(async move {
tokio::time::sleep(backoff).await;