From 82fcad0a3b271d7a689294a2b23945aa9a165412 Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Fri, 25 Oct 2024 07:25:26 +1100 Subject: [PATCH] refactor(rust): only send `telemetry` spans to Sentry (#7153) With the introduction of the `tracing-sentry` integration in #7105, we started sending tracing spans to Sentry. By default, all spans with level INFO and above get sampled at the configured rate and sent to Sentry. This results in a lot of useless transaction in Sentry because we use INFO level spans in multiple places in connlib to attach contextual information like the current connection ID. This PR introduces the concept of `telemetry` spans which - similar to the `telemetry` log target in #7147 - qualifies a span for being sent to Sentry. By convention, these are also defined as requiring the TRACE level. This ensures we won't ever see them as part of regular log output. --- rust/headless-client/src/ipc_service.rs | 25 +++++++++++-------------- rust/headless-client/src/main.rs | 19 ++++++++----------- rust/logging/src/lib.rs | 17 +++++++++++------ rust/phoenix-channel/src/lib.rs | 9 ++++++--- rust/telemetry/src/lib.rs | 3 +-- 5 files changed, 37 insertions(+), 36 deletions(-) diff --git a/rust/headless-client/src/ipc_service.rs b/rust/headless-client/src/ipc_service.rs index 41b188ee3..6f96fb0cc 100644 --- a/rust/headless-client/src/ipc_service.rs +++ b/rust/headless-client/src/ipc_service.rs @@ -521,11 +521,9 @@ impl<'a> Handler<'a> { /// /// Throws matchable errors for bad URLs, unable to reach the portal, or unable to create the tunnel device fn connect_to_firezone(&mut self, api_url: &str, token: SecretString) -> Result<(), Error> { - let ctx = firezone_telemetry::TransactionContext::new( - "connect_to_firezone", - "Connecting to Firezone", - ); - let transaction = firezone_telemetry::start_transaction(ctx); + let _connect_span = + tracing::trace_span!(target: "telemetry", "connect_to_firezone").entered(); + assert!(self.session.is_none()); let device_id = device_id::get_or_create().map_err(|e| Error::DeviceId(e.to_string()))?; @@ -543,7 +541,6 @@ impl<'a> Handler<'a> { let callbacks = CallbackHandler { cb_tx }; // Synchronous DNS resolution here - let phoenix_span = transaction.start_child("phoenix", "Resolve DNS for PhoenixChannel"); let portal = PhoenixChannel::disconnected( Secret::new(url), // The IPC service must use the GUI's version number, not the Headless Client's. But refactoring to separate the IPC service from the Headless Client will take a while. @@ -557,7 +554,6 @@ impl<'a> Handler<'a> { Arc::new(tcp_socket_factory), ) .map_err(|e| Error::PortalConnection(e.to_string()))?; - phoenix_span.finish(); // Read the resolvers before starting connlib, in case connlib's startup interferes. let dns = self.dns_controller.system_resolvers(); @@ -571,18 +567,19 @@ impl<'a> Handler<'a> { // Call `set_dns` before `set_tun` so that the tunnel starts up with a valid list of resolvers. tracing::debug!(?dns, "Calling `set_dns`..."); connlib.set_dns(dns); - let tun_span = transaction.start_child("tun", "Raise tunnel with `make_tun`"); - let tun = self - .tun_device - .make_tun() - .map_err(|e| Error::TunnelDevice(e.to_string()))?; + + let tun = { + let _guard = tracing::trace_span!(target: "telemetry", "create_tun_device").entered(); + + self.tun_device + .make_tun() + .map_err(|e| Error::TunnelDevice(e.to_string()))? + }; connlib.set_tun(Box::new(tun)); - tun_span.finish(); let session = Session { cb_rx, connlib }; self.session = Some(session); - transaction.finish(); Ok(()) } } diff --git a/rust/headless-client/src/main.rs b/rust/headless-client/src/main.rs index ad49fa1c7..313952860 100644 --- a/rust/headless-client/src/main.rs +++ b/rust/headless-client/src/main.rs @@ -197,18 +197,14 @@ fn main() -> Result<()> { let mut last_connlib_start_instant = Some(Instant::now()); rt.block_on(async { - let ctx = firezone_telemetry::TransactionContext::new( - "connect_to_firezone", - "Connecting to Firezone", - ); - let transaction = firezone_telemetry::start_transaction(ctx); + let connect_span = + tracing::trace_span!(target: "telemetry", "connect_to_firezone").entered(); // The Headless Client will bail out here if there's no Internet, because `PhoenixChannel` will try to // resolve the portal host and fail. This is intentional behavior. The Headless Client should always be running under a manager like `systemd` or Windows' Service Controller, // so when it fails it will be restarted with backoff. `systemd` can additionally make us wait // for an Internet connection if it launches us at startup. // When running interactively, it is useful for the user to see that we can't reach the portal. - let phoenix_span = transaction.start_child("phoenix", "Connect PhoenixChannel"); let portal = PhoenixChannel::disconnected( Secret::new(url), get_user_agent(None, env!("CARGO_PKG_VERSION")), @@ -219,7 +215,6 @@ fn main() -> Result<()> { .build(), Arc::new(tcp_socket_factory), )?; - phoenix_span.finish(); let session = Session::connect( Arc::new(tcp_socket_factory), Arc::new(udp_socket_factory), @@ -244,13 +239,15 @@ fn main() -> Result<()> { new_network_notifier(tokio_handle.clone(), dns_control_method).await?; drop(tokio_handle); - let tun_span = transaction.start_child("tun", "Raise tunnel"); - let tun = tun_device.make_tun()?; + let tun = { + let _guard = tracing::trace_span!(target: "telemetry", "create_tun_device").entered(); + + tun_device.make_tun()? + }; session.set_tun(Box::new(tun)); - tun_span.finish(); session.set_dns(dns_controller.system_resolvers()); - transaction.finish(); + drop(connect_span); let result = loop { let mut dns_changed = pin!(dns_notifier.notified().fuse()); diff --git a/rust/logging/src/lib.rs b/rust/logging/src/lib.rs index ac412e0c0..249c91bf3 100644 --- a/rust/logging/src/lib.rs +++ b/rust/logging/src/lib.rs @@ -23,10 +23,13 @@ where let directives = std::env::var("RUST_LOG").unwrap_or_default(); let subscriber = Registry::default() - .with(additional_layer) - .with(sentry_layer()) - .with(fmt::layer().event_format(Format::new())) - .with(filter(&directives)); + .with(additional_layer.with_filter(filter(&directives))) + .with(sentry_layer()) // Sentry layer has its own event filtering mechanism. + .with( + fmt::layer() + .event_format(Format::new()) + .with_filter(filter(&directives)), + ); tracing::subscriber::set_global_default(subscriber).expect("Could not set global default"); LogTracer::init().unwrap(); } @@ -87,9 +90,10 @@ pub fn test_global(directives: &str) { /// Coupling the `telemetry` target to the `TRACE` level pretty much prevents these events from ever showing up in log files. /// By sampling them, we prevent flooding Sentry with lots of these logs. /// -/// ## Spans +/// ## Telemetry spans /// -/// The default span-filter captures all spans with level INFO, WARN and ERROR as sentry "transactions". +/// Only spans with the `telemetry` target on level `TRACE` will be submitted to Sentry. +/// They are subject to the sampling rate defined in the Sentry client configuration. pub fn sentry_layer() -> sentry_tracing::SentryLayer where S: Subscriber + for<'a> LookupSpan<'a>, @@ -109,5 +113,6 @@ where } _ => EventFilter::Ignore, }) + .span_filter(|md| *md.level() == tracing::Level::TRACE && md.target() == "telemetry") .enable_span_attributes() } diff --git a/rust/phoenix-channel/src/lib.rs b/rust/phoenix-channel/src/lib.rs index 89f72a8fc..fc2167311 100644 --- a/rust/phoenix-channel/src/lib.rs +++ b/rust/phoenix-channel/src/lib.rs @@ -258,12 +258,15 @@ where ) -> io::Result { let next_request_id = Arc::new(AtomicU64::new(0)); + let host_and_port = url.expose_secret().host_and_port(); + + let _guard = + tracing::trace_span!(target: "telemetry", "resolve_portal_url", host = %host_and_port.0).entered(); + // Statically resolve the host in the URL to a set of addresses. // We don't use these directly because we need to connect to the domain via TLS which requires a hostname. // We expose them to other components that deal with DNS stuff to ensure our domain always resolves to these IPs. - let resolved_addresses = url - .expose_secret() - .host_and_port() + let resolved_addresses = host_and_port .to_socket_addrs()? .map(|addr| addr.ip()) .collect(); diff --git a/rust/telemetry/src/lib.rs b/rust/telemetry/src/lib.rs index 9d65cf388..1471b0f53 100644 --- a/rust/telemetry/src/lib.rs +++ b/rust/telemetry/src/lib.rs @@ -3,8 +3,7 @@ use std::time::Duration; pub use sentry::{ add_breadcrumb, capture_error, capture_message, configure_scope, end_session, - end_session_with_status, start_transaction, types::protocol::v7::SessionStatus, Breadcrumb, - Hub, Level, TransactionContext, + end_session_with_status, types::protocol::v7::SessionStatus, Breadcrumb, Hub, Level, }; pub use sentry_anyhow::capture_anyhow;