mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 10:18:54 +00:00
refactor(rust): only send telemetry spans to Sentry (#7153)
With the introduction of the `tracing-sentry` integration in #7105, we started sending tracing spans to Sentry. By default, all spans with level INFO and above get sampled at the configured rate and sent to Sentry. This results in a lot of useless transaction in Sentry because we use INFO level spans in multiple places in connlib to attach contextual information like the current connection ID. This PR introduces the concept of `telemetry` spans which - similar to the `telemetry` log target in #7147 - qualifies a span for being sent to Sentry. By convention, these are also defined as requiring the TRACE level. This ensures we won't ever see them as part of regular log output.
This commit is contained in:
@@ -521,11 +521,9 @@ impl<'a> Handler<'a> {
|
||||
///
|
||||
/// Throws matchable errors for bad URLs, unable to reach the portal, or unable to create the tunnel device
|
||||
fn connect_to_firezone(&mut self, api_url: &str, token: SecretString) -> Result<(), Error> {
|
||||
let ctx = firezone_telemetry::TransactionContext::new(
|
||||
"connect_to_firezone",
|
||||
"Connecting to Firezone",
|
||||
);
|
||||
let transaction = firezone_telemetry::start_transaction(ctx);
|
||||
let _connect_span =
|
||||
tracing::trace_span!(target: "telemetry", "connect_to_firezone").entered();
|
||||
|
||||
assert!(self.session.is_none());
|
||||
let device_id = device_id::get_or_create().map_err(|e| Error::DeviceId(e.to_string()))?;
|
||||
|
||||
@@ -543,7 +541,6 @@ impl<'a> Handler<'a> {
|
||||
let callbacks = CallbackHandler { cb_tx };
|
||||
|
||||
// Synchronous DNS resolution here
|
||||
let phoenix_span = transaction.start_child("phoenix", "Resolve DNS for PhoenixChannel");
|
||||
let portal = PhoenixChannel::disconnected(
|
||||
Secret::new(url),
|
||||
// The IPC service must use the GUI's version number, not the Headless Client's. But refactoring to separate the IPC service from the Headless Client will take a while.
|
||||
@@ -557,7 +554,6 @@ impl<'a> Handler<'a> {
|
||||
Arc::new(tcp_socket_factory),
|
||||
)
|
||||
.map_err(|e| Error::PortalConnection(e.to_string()))?;
|
||||
phoenix_span.finish();
|
||||
|
||||
// Read the resolvers before starting connlib, in case connlib's startup interferes.
|
||||
let dns = self.dns_controller.system_resolvers();
|
||||
@@ -571,18 +567,19 @@ impl<'a> Handler<'a> {
|
||||
// Call `set_dns` before `set_tun` so that the tunnel starts up with a valid list of resolvers.
|
||||
tracing::debug!(?dns, "Calling `set_dns`...");
|
||||
connlib.set_dns(dns);
|
||||
let tun_span = transaction.start_child("tun", "Raise tunnel with `make_tun`");
|
||||
let tun = self
|
||||
.tun_device
|
||||
.make_tun()
|
||||
.map_err(|e| Error::TunnelDevice(e.to_string()))?;
|
||||
|
||||
let tun = {
|
||||
let _guard = tracing::trace_span!(target: "telemetry", "create_tun_device").entered();
|
||||
|
||||
self.tun_device
|
||||
.make_tun()
|
||||
.map_err(|e| Error::TunnelDevice(e.to_string()))?
|
||||
};
|
||||
connlib.set_tun(Box::new(tun));
|
||||
tun_span.finish();
|
||||
|
||||
let session = Session { cb_rx, connlib };
|
||||
self.session = Some(session);
|
||||
|
||||
transaction.finish();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -197,18 +197,14 @@ fn main() -> Result<()> {
|
||||
let mut last_connlib_start_instant = Some(Instant::now());
|
||||
|
||||
rt.block_on(async {
|
||||
let ctx = firezone_telemetry::TransactionContext::new(
|
||||
"connect_to_firezone",
|
||||
"Connecting to Firezone",
|
||||
);
|
||||
let transaction = firezone_telemetry::start_transaction(ctx);
|
||||
let connect_span =
|
||||
tracing::trace_span!(target: "telemetry", "connect_to_firezone").entered();
|
||||
|
||||
// The Headless Client will bail out here if there's no Internet, because `PhoenixChannel` will try to
|
||||
// resolve the portal host and fail. This is intentional behavior. The Headless Client should always be running under a manager like `systemd` or Windows' Service Controller,
|
||||
// so when it fails it will be restarted with backoff. `systemd` can additionally make us wait
|
||||
// for an Internet connection if it launches us at startup.
|
||||
// When running interactively, it is useful for the user to see that we can't reach the portal.
|
||||
let phoenix_span = transaction.start_child("phoenix", "Connect PhoenixChannel");
|
||||
let portal = PhoenixChannel::disconnected(
|
||||
Secret::new(url),
|
||||
get_user_agent(None, env!("CARGO_PKG_VERSION")),
|
||||
@@ -219,7 +215,6 @@ fn main() -> Result<()> {
|
||||
.build(),
|
||||
Arc::new(tcp_socket_factory),
|
||||
)?;
|
||||
phoenix_span.finish();
|
||||
let session = Session::connect(
|
||||
Arc::new(tcp_socket_factory),
|
||||
Arc::new(udp_socket_factory),
|
||||
@@ -244,13 +239,15 @@ fn main() -> Result<()> {
|
||||
new_network_notifier(tokio_handle.clone(), dns_control_method).await?;
|
||||
drop(tokio_handle);
|
||||
|
||||
let tun_span = transaction.start_child("tun", "Raise tunnel");
|
||||
let tun = tun_device.make_tun()?;
|
||||
let tun = {
|
||||
let _guard = tracing::trace_span!(target: "telemetry", "create_tun_device").entered();
|
||||
|
||||
tun_device.make_tun()?
|
||||
};
|
||||
session.set_tun(Box::new(tun));
|
||||
tun_span.finish();
|
||||
session.set_dns(dns_controller.system_resolvers());
|
||||
|
||||
transaction.finish();
|
||||
drop(connect_span);
|
||||
|
||||
let result = loop {
|
||||
let mut dns_changed = pin!(dns_notifier.notified().fuse());
|
||||
|
||||
@@ -23,10 +23,13 @@ where
|
||||
let directives = std::env::var("RUST_LOG").unwrap_or_default();
|
||||
|
||||
let subscriber = Registry::default()
|
||||
.with(additional_layer)
|
||||
.with(sentry_layer())
|
||||
.with(fmt::layer().event_format(Format::new()))
|
||||
.with(filter(&directives));
|
||||
.with(additional_layer.with_filter(filter(&directives)))
|
||||
.with(sentry_layer()) // Sentry layer has its own event filtering mechanism.
|
||||
.with(
|
||||
fmt::layer()
|
||||
.event_format(Format::new())
|
||||
.with_filter(filter(&directives)),
|
||||
);
|
||||
tracing::subscriber::set_global_default(subscriber).expect("Could not set global default");
|
||||
LogTracer::init().unwrap();
|
||||
}
|
||||
@@ -87,9 +90,10 @@ pub fn test_global(directives: &str) {
|
||||
/// Coupling the `telemetry` target to the `TRACE` level pretty much prevents these events from ever showing up in log files.
|
||||
/// By sampling them, we prevent flooding Sentry with lots of these logs.
|
||||
///
|
||||
/// ## Spans
|
||||
/// ## Telemetry spans
|
||||
///
|
||||
/// The default span-filter captures all spans with level INFO, WARN and ERROR as sentry "transactions".
|
||||
/// Only spans with the `telemetry` target on level `TRACE` will be submitted to Sentry.
|
||||
/// They are subject to the sampling rate defined in the Sentry client configuration.
|
||||
pub fn sentry_layer<S>() -> sentry_tracing::SentryLayer<S>
|
||||
where
|
||||
S: Subscriber + for<'a> LookupSpan<'a>,
|
||||
@@ -109,5 +113,6 @@ where
|
||||
}
|
||||
_ => EventFilter::Ignore,
|
||||
})
|
||||
.span_filter(|md| *md.level() == tracing::Level::TRACE && md.target() == "telemetry")
|
||||
.enable_span_attributes()
|
||||
}
|
||||
|
||||
@@ -258,12 +258,15 @@ where
|
||||
) -> io::Result<Self> {
|
||||
let next_request_id = Arc::new(AtomicU64::new(0));
|
||||
|
||||
let host_and_port = url.expose_secret().host_and_port();
|
||||
|
||||
let _guard =
|
||||
tracing::trace_span!(target: "telemetry", "resolve_portal_url", host = %host_and_port.0).entered();
|
||||
|
||||
// Statically resolve the host in the URL to a set of addresses.
|
||||
// We don't use these directly because we need to connect to the domain via TLS which requires a hostname.
|
||||
// We expose them to other components that deal with DNS stuff to ensure our domain always resolves to these IPs.
|
||||
let resolved_addresses = url
|
||||
.expose_secret()
|
||||
.host_and_port()
|
||||
let resolved_addresses = host_and_port
|
||||
.to_socket_addrs()?
|
||||
.map(|addr| addr.ip())
|
||||
.collect();
|
||||
|
||||
@@ -3,8 +3,7 @@ use std::time::Duration;
|
||||
|
||||
pub use sentry::{
|
||||
add_breadcrumb, capture_error, capture_message, configure_scope, end_session,
|
||||
end_session_with_status, start_transaction, types::protocol::v7::SessionStatus, Breadcrumb,
|
||||
Hub, Level, TransactionContext,
|
||||
end_session_with_status, types::protocol::v7::SessionStatus, Breadcrumb, Hub, Level,
|
||||
};
|
||||
pub use sentry_anyhow::capture_anyhow;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user