chore(rust): enable sentry-tracing integration (#7105)

Using the `sentry-tracing` integration, we can automatically capture
events based on what we log via `tracing`. The mapping is defined as
follows:

- ERROR: Gets captured as a fatal error
- WARN: Gets captured as a message
- INFO: Gets captured as a breadcrumb
- `_`: Does not get captured at all

If telemetry isn't active / configured, this integration does nothing.
It is therefore safe to just always enable it.
This commit is contained in:
Thomas Eizinger
2024-10-23 10:23:49 +11:00
committed by GitHub
parent b7b7626cfa
commit 990324b2ec
9 changed files with 59 additions and 41 deletions

2
rust/Cargo.lock generated
View File

@@ -1054,7 +1054,6 @@ dependencies = [
"chrono",
"connlib-model",
"firezone-logging",
"firezone-telemetry",
"firezone-tunnel",
"ip_network",
"phoenix-channel",
@@ -2020,6 +2019,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"nu-ansi-term 0.50.1",
"sentry-tracing",
"time",
"tracing",
"tracing-appender",

View File

@@ -12,7 +12,6 @@ backoff = { workspace = true }
bimap = "0.6"
connlib-model = { workspace = true }
firezone-logging = { workspace = true }
firezone-telemetry = { workspace = true }
firezone-tunnel = { workspace = true }
ip_network = { version = "0.4", default-features = false }
phoenix-channel = { workspace = true }

View File

@@ -3,14 +3,13 @@ pub use crate::serde_routelist::{V4RouteList, V6RouteList};
pub use callbacks::{Callbacks, DisconnectError};
pub use connlib_model::StaticSecret;
pub use eventloop::Eventloop;
use firezone_logging::std_dyn_err;
pub use firezone_tunnel::messages::client::{
ResourceDescription, {IngressMessages, ReplyMessages},
};
use connlib_model::ResourceId;
use eventloop::Command;
use firezone_telemetry as telemetry;
use firezone_logging::std_dyn_err;
use firezone_tunnel::ClientTunnel;
use phoenix_channel::{PhoenixChannel, PublicKeyParam};
use socket_factory::{SocketFactory, TcpSocket, UdpSocket};
@@ -152,40 +151,33 @@ async fn connect_supervisor<CB>(
}
Ok(Err(e)) => {
if e.is_authentication_error() {
telemetry::capture_message(
"Portal authentication error",
telemetry::Level::Warning,
);
tracing::warn!(error = std_dyn_err(&e), "Portal authentication error");
} else {
telemetry::capture_error(&e);
tracing::error!(error = std_dyn_err(&e), "connlib failed");
}
tracing::error!(error = std_dyn_err(&e), "connlib failed");
callbacks.on_disconnect(&e);
}
Err(e) => {
telemetry::capture_error(&e);
match e.try_into_panic() {
Ok(panic) => {
if let Some(msg) = panic.downcast_ref::<&str>() {
tracing::error!("connlib panicked: {msg}");
callbacks.on_disconnect(&DisconnectError::Panic(msg.to_string()));
return;
}
if let Some(msg) = panic.downcast_ref::<String>() {
tracing::error!("connlib panicked: {msg}");
callbacks.on_disconnect(&DisconnectError::Panic(msg.to_string()));
return;
}
Err(e) => match e.try_into_panic() {
Ok(panic) => {
if let Some(msg) = panic.downcast_ref::<&str>() {
tracing::error!("connlib panicked: {msg}");
callbacks.on_disconnect(&DisconnectError::Panic(msg.to_string()));
return;
}
if let Some(msg) = panic.downcast_ref::<String>() {
tracing::error!("connlib panicked: {msg}");
callbacks.on_disconnect(&DisconnectError::Panic(msg.to_string()));
return;
}
tracing::error!("connlib panicked with a non-string payload");
callbacks.on_disconnect(&DisconnectError::PanicNonStringPayload);
}
Err(_) => {
tracing::error!("connlib task was cancelled");
callbacks.on_disconnect(&DisconnectError::Cancelled);
}
tracing::error!("connlib panicked with a non-string payload");
callbacks.on_disconnect(&DisconnectError::PanicNonStringPayload);
}
}
Err(_) => {
tracing::error!("connlib task was cancelled");
callbacks.on_disconnect(&DisconnectError::Cancelled);
}
},
}
}

View File

@@ -55,13 +55,12 @@ async fn main() {
// That looks like a "crash" but we "just" exit with a fatal error.
if let Err(e) = try_main(cli).await {
tracing::error!(error = anyhow_dyn_err(&e));
firezone_telemetry::capture_anyhow(&e);
std::process::exit(1);
}
}
async fn try_main(cli: Cli) -> Result<()> {
firezone_logging::setup_global_subscriber(layer::Identity::new());
firezone_logging::setup_global_subscriber(layer::Identity::default());
let firezone_id = get_firezone_id(cli.firezone_id).await
.context("Couldn't read FIREZONE_ID or write it to disk: Please provide it through the env variable or provide rw access to /var/lib/firezone/")?;

View File

@@ -56,7 +56,9 @@ pub fn setup(directives: &str) -> Result<Handles> {
let (layer, logger) = firezone_logging::file::layer(&log_path);
let layer = layer.and_then(fmt::layer());
let (filter, reloader) = reload::Layer::new(firezone_logging::try_filter(directives)?);
let subscriber = Registry::default().with(layer.with_filter(filter));
let subscriber = Registry::default()
.with(layer.with_filter(filter))
.with(firezone_logging::sentry_layer());
set_global_default(subscriber)?;
if let Err(error) = output_vt100::try_init() {
tracing::debug!(

View File

@@ -267,13 +267,11 @@ pub(crate) fn run(
let exit_code = match task.await {
Err(error) => {
telemetry::capture_error(&error);
tracing::error!(?error, "run_controller panicked");
telemetry::end_session_with_status(telemetry::SessionStatus::Crashed);
1
}
Ok(Err(error)) => {
telemetry::capture_error(&error);
tracing::error!(?error, "run_controller returned an error");
errors::show_error_dialog(&error).unwrap();
telemetry::end_session_with_status(telemetry::SessionStatus::Crashed);

View File

@@ -9,6 +9,7 @@ publish = false
[dependencies]
anyhow = "1"
nu-ansi-term = { version = "0.50" }
sentry-tracing = "0.34.0"
time = { version = "0.3.36", features = ["formatting"] }
tracing = { workspace = true }
tracing-appender = { version = "0.2.2" }

View File

@@ -3,11 +3,12 @@ pub mod file;
mod format;
mod log_unwrap;
use tracing::subscriber::DefaultGuard;
use sentry_tracing::EventFilter;
use tracing::{subscriber::DefaultGuard, Subscriber};
use tracing_log::LogTracer;
use tracing_subscriber::{
filter::ParseError, fmt, layer::SubscriberExt as _, util::SubscriberInitExt, EnvFilter, Layer,
Registry,
filter::ParseError, fmt, layer::SubscriberExt as _, registry::LookupSpan,
util::SubscriberInitExt, EnvFilter, Layer, Registry,
};
pub use dyn_err::{anyhow_dyn_err, std_dyn_err};
@@ -23,6 +24,7 @@ where
let subscriber = Registry::default()
.with(additional_layer)
.with(sentry_layer())
.with(fmt::layer().event_format(Format::new()))
.with(filter(&directives));
tracing::subscriber::set_global_default(subscriber).expect("Could not set global default");
@@ -66,3 +68,28 @@ pub fn test_global(directives: &str) {
)
.ok();
}
/// Constructs a [`tracing::Layer`](Layer) that captures events and spans and reports them to Sentry.
///
/// ## Events
///
/// - error events are reported as sentry exceptions
/// - warn events are reported as sentry messages
/// - info events are captured as breadcrumbs (and submitted together with warns & errors)
///
/// # Spans
///
/// The default span-filter captures all spans with level INFO, WARN and ERROR as sentry "transactions".
pub fn sentry_layer<S>() -> sentry_tracing::SentryLayer<S>
where
S: Subscriber + for<'a> LookupSpan<'a>,
{
sentry_tracing::layer()
.event_filter(|md| match *md.level() {
tracing::Level::ERROR => EventFilter::Exception,
tracing::Level::WARN => EventFilter::Event,
tracing::Level::INFO => EventFilter::Breadcrumb,
_ => EventFilter::Ignore,
})
.enable_span_attributes()
}

View File

@@ -67,7 +67,7 @@ impl Telemetry {
environment: Some(environment.into()),
// We can't get the release number ourselves because we don't know if we're embedded in a GUI Client or a Headless Client.
release: Some(release.into()),
traces_sample_rate: 1.0,
traces_sample_rate: 0.1,
..Default::default()
},
));