From 990324b2ec0db6618ef70da354e11cbf19f72ace Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Wed, 23 Oct 2024 10:23:49 +1100 Subject: [PATCH] chore(rust): enable `sentry-tracing` integration (#7105) Using the `sentry-tracing` integration, we can automatically capture events based on what we log via `tracing`. The mapping is defined as follows: - ERROR: Gets captured as a fatal error - WARN: Gets captured as a message - INFO: Gets captured as a breadcrumb - `_`: Does not get captured at all If telemetry isn't active / configured, this integration does nothing. It is therefore safe to just always enable it. --- rust/Cargo.lock | 2 +- rust/connlib/clients/shared/Cargo.toml | 1 - rust/connlib/clients/shared/src/lib.rs | 52 +++++++++------------ rust/gateway/src/main.rs | 3 +- rust/gui-client/src-common/src/logging.rs | 4 +- rust/gui-client/src-tauri/src/client/gui.rs | 2 - rust/logging/Cargo.toml | 1 + rust/logging/src/lib.rs | 33 +++++++++++-- rust/telemetry/src/lib.rs | 2 +- 9 files changed, 59 insertions(+), 41 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 11d18273d..6b9060459 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1054,7 +1054,6 @@ dependencies = [ "chrono", "connlib-model", "firezone-logging", - "firezone-telemetry", "firezone-tunnel", "ip_network", "phoenix-channel", @@ -2020,6 +2019,7 @@ version = "0.1.0" dependencies = [ "anyhow", "nu-ansi-term 0.50.1", + "sentry-tracing", "time", "tracing", "tracing-appender", diff --git a/rust/connlib/clients/shared/Cargo.toml b/rust/connlib/clients/shared/Cargo.toml index 8ef866883..617ee2986 100644 --- a/rust/connlib/clients/shared/Cargo.toml +++ b/rust/connlib/clients/shared/Cargo.toml @@ -12,7 +12,6 @@ backoff = { workspace = true } bimap = "0.6" connlib-model = { workspace = true } firezone-logging = { workspace = true } -firezone-telemetry = { workspace = true } firezone-tunnel = { workspace = true } ip_network = { version = "0.4", default-features = false } phoenix-channel = { workspace = true } diff --git a/rust/connlib/clients/shared/src/lib.rs b/rust/connlib/clients/shared/src/lib.rs index b8117066a..a9eac86e8 100644 --- a/rust/connlib/clients/shared/src/lib.rs +++ b/rust/connlib/clients/shared/src/lib.rs @@ -3,14 +3,13 @@ pub use crate::serde_routelist::{V4RouteList, V6RouteList}; pub use callbacks::{Callbacks, DisconnectError}; pub use connlib_model::StaticSecret; pub use eventloop::Eventloop; -use firezone_logging::std_dyn_err; pub use firezone_tunnel::messages::client::{ ResourceDescription, {IngressMessages, ReplyMessages}, }; use connlib_model::ResourceId; use eventloop::Command; -use firezone_telemetry as telemetry; +use firezone_logging::std_dyn_err; use firezone_tunnel::ClientTunnel; use phoenix_channel::{PhoenixChannel, PublicKeyParam}; use socket_factory::{SocketFactory, TcpSocket, UdpSocket}; @@ -152,40 +151,33 @@ async fn connect_supervisor( } Ok(Err(e)) => { if e.is_authentication_error() { - telemetry::capture_message( - "Portal authentication error", - telemetry::Level::Warning, - ); + tracing::warn!(error = std_dyn_err(&e), "Portal authentication error"); } else { - telemetry::capture_error(&e); + tracing::error!(error = std_dyn_err(&e), "connlib failed"); } - tracing::error!(error = std_dyn_err(&e), "connlib failed"); callbacks.on_disconnect(&e); } - Err(e) => { - telemetry::capture_error(&e); - match e.try_into_panic() { - Ok(panic) => { - if let Some(msg) = panic.downcast_ref::<&str>() { - tracing::error!("connlib panicked: {msg}"); - callbacks.on_disconnect(&DisconnectError::Panic(msg.to_string())); - return; - } - if let Some(msg) = panic.downcast_ref::() { - tracing::error!("connlib panicked: {msg}"); - callbacks.on_disconnect(&DisconnectError::Panic(msg.to_string())); - return; - } + Err(e) => match e.try_into_panic() { + Ok(panic) => { + if let Some(msg) = panic.downcast_ref::<&str>() { + tracing::error!("connlib panicked: {msg}"); + callbacks.on_disconnect(&DisconnectError::Panic(msg.to_string())); + return; + } + if let Some(msg) = panic.downcast_ref::() { + tracing::error!("connlib panicked: {msg}"); + callbacks.on_disconnect(&DisconnectError::Panic(msg.to_string())); + return; + } - tracing::error!("connlib panicked with a non-string payload"); - callbacks.on_disconnect(&DisconnectError::PanicNonStringPayload); - } - Err(_) => { - tracing::error!("connlib task was cancelled"); - callbacks.on_disconnect(&DisconnectError::Cancelled); - } + tracing::error!("connlib panicked with a non-string payload"); + callbacks.on_disconnect(&DisconnectError::PanicNonStringPayload); } - } + Err(_) => { + tracing::error!("connlib task was cancelled"); + callbacks.on_disconnect(&DisconnectError::Cancelled); + } + }, } } diff --git a/rust/gateway/src/main.rs b/rust/gateway/src/main.rs index 30dd453f0..37452f623 100644 --- a/rust/gateway/src/main.rs +++ b/rust/gateway/src/main.rs @@ -55,13 +55,12 @@ async fn main() { // That looks like a "crash" but we "just" exit with a fatal error. if let Err(e) = try_main(cli).await { tracing::error!(error = anyhow_dyn_err(&e)); - firezone_telemetry::capture_anyhow(&e); std::process::exit(1); } } async fn try_main(cli: Cli) -> Result<()> { - firezone_logging::setup_global_subscriber(layer::Identity::new()); + firezone_logging::setup_global_subscriber(layer::Identity::default()); let firezone_id = get_firezone_id(cli.firezone_id).await .context("Couldn't read FIREZONE_ID or write it to disk: Please provide it through the env variable or provide rw access to /var/lib/firezone/")?; diff --git a/rust/gui-client/src-common/src/logging.rs b/rust/gui-client/src-common/src/logging.rs index 5527eb5de..7b69b6dcf 100644 --- a/rust/gui-client/src-common/src/logging.rs +++ b/rust/gui-client/src-common/src/logging.rs @@ -56,7 +56,9 @@ pub fn setup(directives: &str) -> Result { let (layer, logger) = firezone_logging::file::layer(&log_path); let layer = layer.and_then(fmt::layer()); let (filter, reloader) = reload::Layer::new(firezone_logging::try_filter(directives)?); - let subscriber = Registry::default().with(layer.with_filter(filter)); + let subscriber = Registry::default() + .with(layer.with_filter(filter)) + .with(firezone_logging::sentry_layer()); set_global_default(subscriber)?; if let Err(error) = output_vt100::try_init() { tracing::debug!( diff --git a/rust/gui-client/src-tauri/src/client/gui.rs b/rust/gui-client/src-tauri/src/client/gui.rs index 0e6484081..bde50e796 100644 --- a/rust/gui-client/src-tauri/src/client/gui.rs +++ b/rust/gui-client/src-tauri/src/client/gui.rs @@ -267,13 +267,11 @@ pub(crate) fn run( let exit_code = match task.await { Err(error) => { - telemetry::capture_error(&error); tracing::error!(?error, "run_controller panicked"); telemetry::end_session_with_status(telemetry::SessionStatus::Crashed); 1 } Ok(Err(error)) => { - telemetry::capture_error(&error); tracing::error!(?error, "run_controller returned an error"); errors::show_error_dialog(&error).unwrap(); telemetry::end_session_with_status(telemetry::SessionStatus::Crashed); diff --git a/rust/logging/Cargo.toml b/rust/logging/Cargo.toml index 9706346f4..bb6e4c59c 100644 --- a/rust/logging/Cargo.toml +++ b/rust/logging/Cargo.toml @@ -9,6 +9,7 @@ publish = false [dependencies] anyhow = "1" nu-ansi-term = { version = "0.50" } +sentry-tracing = "0.34.0" time = { version = "0.3.36", features = ["formatting"] } tracing = { workspace = true } tracing-appender = { version = "0.2.2" } diff --git a/rust/logging/src/lib.rs b/rust/logging/src/lib.rs index 5ed1327bd..7c650e433 100644 --- a/rust/logging/src/lib.rs +++ b/rust/logging/src/lib.rs @@ -3,11 +3,12 @@ pub mod file; mod format; mod log_unwrap; -use tracing::subscriber::DefaultGuard; +use sentry_tracing::EventFilter; +use tracing::{subscriber::DefaultGuard, Subscriber}; use tracing_log::LogTracer; use tracing_subscriber::{ - filter::ParseError, fmt, layer::SubscriberExt as _, util::SubscriberInitExt, EnvFilter, Layer, - Registry, + filter::ParseError, fmt, layer::SubscriberExt as _, registry::LookupSpan, + util::SubscriberInitExt, EnvFilter, Layer, Registry, }; pub use dyn_err::{anyhow_dyn_err, std_dyn_err}; @@ -23,6 +24,7 @@ where let subscriber = Registry::default() .with(additional_layer) + .with(sentry_layer()) .with(fmt::layer().event_format(Format::new())) .with(filter(&directives)); tracing::subscriber::set_global_default(subscriber).expect("Could not set global default"); @@ -66,3 +68,28 @@ pub fn test_global(directives: &str) { ) .ok(); } + +/// Constructs a [`tracing::Layer`](Layer) that captures events and spans and reports them to Sentry. +/// +/// ## Events +/// +/// - error events are reported as sentry exceptions +/// - warn events are reported as sentry messages +/// - info events are captured as breadcrumbs (and submitted together with warns & errors) +/// +/// # Spans +/// +/// The default span-filter captures all spans with level INFO, WARN and ERROR as sentry "transactions". +pub fn sentry_layer() -> sentry_tracing::SentryLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, +{ + sentry_tracing::layer() + .event_filter(|md| match *md.level() { + tracing::Level::ERROR => EventFilter::Exception, + tracing::Level::WARN => EventFilter::Event, + tracing::Level::INFO => EventFilter::Breadcrumb, + _ => EventFilter::Ignore, + }) + .enable_span_attributes() +} diff --git a/rust/telemetry/src/lib.rs b/rust/telemetry/src/lib.rs index 2f83dc29c..9ffd8ad7f 100644 --- a/rust/telemetry/src/lib.rs +++ b/rust/telemetry/src/lib.rs @@ -67,7 +67,7 @@ impl Telemetry { environment: Some(environment.into()), // We can't get the release number ourselves because we don't know if we're embedded in a GUI Client or a Headless Client. release: Some(release.into()), - traces_sample_rate: 1.0, + traces_sample_rate: 0.1, ..Default::default() }, ));