refactor(rust): only send telemetry spans to Sentry (#7153)

With the introduction of the `tracing-sentry` integration in #7105, we
started sending tracing spans to Sentry. By default, all spans with
level INFO and above get sampled at the configured rate and sent to
Sentry.

This results in a lot of useless transaction in Sentry because we use
INFO level spans in multiple places in connlib to attach contextual
information like the current connection ID.

This PR introduces the concept of `telemetry` spans which - similar to
the `telemetry` log target in #7147 - qualifies a span for being sent to
Sentry. By convention, these are also defined as requiring the TRACE
level. This ensures we won't ever see them as part of regular log
output.
This commit is contained in:
Thomas Eizinger
2024-10-25 07:25:26 +11:00
committed by GitHub
parent 5cf105f073
commit 82fcad0a3b
5 changed files with 37 additions and 36 deletions

View File

@@ -521,11 +521,9 @@ impl<'a> Handler<'a> {
///
/// Throws matchable errors for bad URLs, unable to reach the portal, or unable to create the tunnel device
fn connect_to_firezone(&mut self, api_url: &str, token: SecretString) -> Result<(), Error> {
let ctx = firezone_telemetry::TransactionContext::new(
"connect_to_firezone",
"Connecting to Firezone",
);
let transaction = firezone_telemetry::start_transaction(ctx);
let _connect_span =
tracing::trace_span!(target: "telemetry", "connect_to_firezone").entered();
assert!(self.session.is_none());
let device_id = device_id::get_or_create().map_err(|e| Error::DeviceId(e.to_string()))?;
@@ -543,7 +541,6 @@ impl<'a> Handler<'a> {
let callbacks = CallbackHandler { cb_tx };
// Synchronous DNS resolution here
let phoenix_span = transaction.start_child("phoenix", "Resolve DNS for PhoenixChannel");
let portal = PhoenixChannel::disconnected(
Secret::new(url),
// The IPC service must use the GUI's version number, not the Headless Client's. But refactoring to separate the IPC service from the Headless Client will take a while.
@@ -557,7 +554,6 @@ impl<'a> Handler<'a> {
Arc::new(tcp_socket_factory),
)
.map_err(|e| Error::PortalConnection(e.to_string()))?;
phoenix_span.finish();
// Read the resolvers before starting connlib, in case connlib's startup interferes.
let dns = self.dns_controller.system_resolvers();
@@ -571,18 +567,19 @@ impl<'a> Handler<'a> {
// Call `set_dns` before `set_tun` so that the tunnel starts up with a valid list of resolvers.
tracing::debug!(?dns, "Calling `set_dns`...");
connlib.set_dns(dns);
let tun_span = transaction.start_child("tun", "Raise tunnel with `make_tun`");
let tun = self
.tun_device
.make_tun()
.map_err(|e| Error::TunnelDevice(e.to_string()))?;
let tun = {
let _guard = tracing::trace_span!(target: "telemetry", "create_tun_device").entered();
self.tun_device
.make_tun()
.map_err(|e| Error::TunnelDevice(e.to_string()))?
};
connlib.set_tun(Box::new(tun));
tun_span.finish();
let session = Session { cb_rx, connlib };
self.session = Some(session);
transaction.finish();
Ok(())
}
}

View File

@@ -197,18 +197,14 @@ fn main() -> Result<()> {
let mut last_connlib_start_instant = Some(Instant::now());
rt.block_on(async {
let ctx = firezone_telemetry::TransactionContext::new(
"connect_to_firezone",
"Connecting to Firezone",
);
let transaction = firezone_telemetry::start_transaction(ctx);
let connect_span =
tracing::trace_span!(target: "telemetry", "connect_to_firezone").entered();
// The Headless Client will bail out here if there's no Internet, because `PhoenixChannel` will try to
// resolve the portal host and fail. This is intentional behavior. The Headless Client should always be running under a manager like `systemd` or Windows' Service Controller,
// so when it fails it will be restarted with backoff. `systemd` can additionally make us wait
// for an Internet connection if it launches us at startup.
// When running interactively, it is useful for the user to see that we can't reach the portal.
let phoenix_span = transaction.start_child("phoenix", "Connect PhoenixChannel");
let portal = PhoenixChannel::disconnected(
Secret::new(url),
get_user_agent(None, env!("CARGO_PKG_VERSION")),
@@ -219,7 +215,6 @@ fn main() -> Result<()> {
.build(),
Arc::new(tcp_socket_factory),
)?;
phoenix_span.finish();
let session = Session::connect(
Arc::new(tcp_socket_factory),
Arc::new(udp_socket_factory),
@@ -244,13 +239,15 @@ fn main() -> Result<()> {
new_network_notifier(tokio_handle.clone(), dns_control_method).await?;
drop(tokio_handle);
let tun_span = transaction.start_child("tun", "Raise tunnel");
let tun = tun_device.make_tun()?;
let tun = {
let _guard = tracing::trace_span!(target: "telemetry", "create_tun_device").entered();
tun_device.make_tun()?
};
session.set_tun(Box::new(tun));
tun_span.finish();
session.set_dns(dns_controller.system_resolvers());
transaction.finish();
drop(connect_span);
let result = loop {
let mut dns_changed = pin!(dns_notifier.notified().fuse());

View File

@@ -23,10 +23,13 @@ where
let directives = std::env::var("RUST_LOG").unwrap_or_default();
let subscriber = Registry::default()
.with(additional_layer)
.with(sentry_layer())
.with(fmt::layer().event_format(Format::new()))
.with(filter(&directives));
.with(additional_layer.with_filter(filter(&directives)))
.with(sentry_layer()) // Sentry layer has its own event filtering mechanism.
.with(
fmt::layer()
.event_format(Format::new())
.with_filter(filter(&directives)),
);
tracing::subscriber::set_global_default(subscriber).expect("Could not set global default");
LogTracer::init().unwrap();
}
@@ -87,9 +90,10 @@ pub fn test_global(directives: &str) {
/// Coupling the `telemetry` target to the `TRACE` level pretty much prevents these events from ever showing up in log files.
/// By sampling them, we prevent flooding Sentry with lots of these logs.
///
/// ## Spans
/// ## Telemetry spans
///
/// The default span-filter captures all spans with level INFO, WARN and ERROR as sentry "transactions".
/// Only spans with the `telemetry` target on level `TRACE` will be submitted to Sentry.
/// They are subject to the sampling rate defined in the Sentry client configuration.
pub fn sentry_layer<S>() -> sentry_tracing::SentryLayer<S>
where
S: Subscriber + for<'a> LookupSpan<'a>,
@@ -109,5 +113,6 @@ where
}
_ => EventFilter::Ignore,
})
.span_filter(|md| *md.level() == tracing::Level::TRACE && md.target() == "telemetry")
.enable_span_attributes()
}

View File

@@ -258,12 +258,15 @@ where
) -> io::Result<Self> {
let next_request_id = Arc::new(AtomicU64::new(0));
let host_and_port = url.expose_secret().host_and_port();
let _guard =
tracing::trace_span!(target: "telemetry", "resolve_portal_url", host = %host_and_port.0).entered();
// Statically resolve the host in the URL to a set of addresses.
// We don't use these directly because we need to connect to the domain via TLS which requires a hostname.
// We expose them to other components that deal with DNS stuff to ensure our domain always resolves to these IPs.
let resolved_addresses = url
.expose_secret()
.host_and_port()
let resolved_addresses = host_and_port
.to_socket_addrs()?
.map(|addr| addr.ip())
.collect();

View File

@@ -3,8 +3,7 @@ use std::time::Duration;
pub use sentry::{
add_breadcrumb, capture_error, capture_message, configure_scope, end_session,
end_session_with_status, start_transaction, types::protocol::v7::SessionStatus, Breadcrumb,
Hub, Level, TransactionContext,
end_session_with_status, types::protocol::v7::SessionStatus, Breadcrumb, Hub, Level,
};
pub use sentry_anyhow::capture_anyhow;