diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 712b2e3c3..608c2e672 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -2502,7 +2502,6 @@ dependencies = [ "nu-ansi-term 0.50.1", "output_vt100", "parking_lot", - "rand 0.8.5", "sentry-tracing", "supports-color", "tempfile", diff --git a/rust/connlib/phoenix-channel/src/lib.rs b/rust/connlib/phoenix-channel/src/lib.rs index 14469db70..1d1361154 100644 --- a/rust/connlib/phoenix-channel/src/lib.rs +++ b/rust/connlib/phoenix-channel/src/lib.rs @@ -13,7 +13,7 @@ use std::{io, mem}; use backoff::ExponentialBackoff; use backoff::backoff::Backoff; use base64::Engine; -use firezone_logging::{err_with_src, telemetry_span}; +use firezone_logging::err_with_src; use futures::future::BoxFuture; use futures::{FutureExt, SinkExt, StreamExt}; use itertools::Itertools as _; @@ -262,8 +262,6 @@ where ) -> io::Result { let host_and_port = url.expose_secret().host_and_port(); - let _span = telemetry_span!("resolve_portal_url", host = %host_and_port.0).entered(); - // Statically resolve the host in the URL to a set of addresses. // We use these when connecting the socket to avoid a dependency on DNS resolution later on. let resolved_addresses = host_and_port diff --git a/rust/connlib/tunnel/src/client.rs b/rust/connlib/tunnel/src/client.rs index fb9f56dcb..4a66b091d 100644 --- a/rust/connlib/tunnel/src/client.rs +++ b/rust/connlib/tunnel/src/client.rs @@ -19,7 +19,7 @@ use anyhow::Context; use bimap::BiMap; use connlib_model::{GatewayId, PublicKey, RelayId, ResourceId, ResourceStatus, ResourceView}; use connlib_model::{Site, SiteId}; -use firezone_logging::{err_with_src, telemetry_event, unwrap_or_debug, unwrap_or_warn}; +use firezone_logging::{err_with_src, unwrap_or_debug, unwrap_or_warn}; use ip_network::{IpNetwork, Ipv4Network, Ipv6Network}; use ip_network_table::IpNetworkTable; use ip_packet::{IpPacket, MAX_UDP_PAYLOAD}; @@ -474,7 +474,7 @@ impl ClientState { } }) .unwrap_or_else(|e| { - telemetry_event!("Recursive UDP DNS query failed: {}", err_with_src(&e)); + tracing::debug!("Recursive UDP DNS query failed: {}", err_with_src(&e)); dns_types::Response::servfail(&response.query) }); @@ -490,7 +490,7 @@ impl ClientState { tracing::trace!("Received recursive TCP DNS response"); }) .unwrap_or_else(|e| { - telemetry_event!("Recursive TCP DNS query failed: {}", err_with_src(&e)); + tracing::debug!("Recursive TCP DNS query failed: {}", err_with_src(&e)); dns_types::Response::servfail(&response.query) }); diff --git a/rust/connlib/tunnel/src/dns.rs b/rust/connlib/tunnel/src/dns.rs index f6361b045..195931684 100644 --- a/rust/connlib/tunnel/src/dns.rs +++ b/rust/connlib/tunnel/src/dns.rs @@ -5,7 +5,7 @@ use dns_types::{ DomainName, DomainNameRef, OwnedRecordData, Query, RecordType, Response, ResponseBuilder, ResponseCode, }; -use firezone_logging::{err_with_src, telemetry_span}; +use firezone_logging::err_with_src; use itertools::Itertools; use pattern::{Candidate, Pattern}; use std::io; @@ -231,8 +231,6 @@ impl StubResolver { /// /// This performs a linear search and is thus O(N) and **must not** be called in the hot-path of packet routing. fn match_resource_linear(&self, domain: &dns_types::DomainName) -> Option { - let _span = telemetry_span!("match_resource_linear").entered(); - let name = Candidate::from_domain(domain); for (pattern, r) in &self.dns_resources { diff --git a/rust/connlib/tunnel/src/io.rs b/rust/connlib/tunnel/src/io.rs index cb4e5ebb0..7a4cfb486 100644 --- a/rust/connlib/tunnel/src/io.rs +++ b/rust/connlib/tunnel/src/io.rs @@ -5,7 +5,6 @@ mod udp_dns; use crate::{device_channel::Device, dns, otel, sockets::Sockets}; use anyhow::{Context as _, Result}; -use firezone_logging::{telemetry_event, telemetry_span}; use futures::FutureExt as _; use futures_bounded::FuturesTupleSet; use gat_lending_iterator::LendingIterator; @@ -22,7 +21,6 @@ use std::{ task::{Context, Poll, ready}, time::{Duration, Instant}, }; -use tracing::Instrument; use tun::Tun; /// How many IP packets we will at most read from the MPSC-channel connected to our TUN device thread. @@ -371,8 +369,7 @@ impl Io { if self .dns_queries .try_push( - udp_dns::send(self.udp_socket_factory.clone(), query.server, query.message) - .instrument(telemetry_span!("recursive_udp_dns_query")), + udp_dns::send(self.udp_socket_factory.clone(), query.server, query.message), meta, ) .is_err() @@ -384,8 +381,7 @@ impl Io { if self .dns_queries .try_push( - tcp_dns::send(self.tcp_socket_factory.clone(), query.server, query.message) - .instrument(telemetry_span!("recursive_tcp_dns_query")), + tcp_dns::send(self.tcp_socket_factory.clone(), query.server, query.message), meta, ) .is_err() @@ -416,8 +412,6 @@ impl Io { fn is_max_wg_packet_size(d: &DatagramIn) -> bool { let len = d.packet.len(); if len > MAX_FZ_PAYLOAD { - telemetry_event!(from = %d.from, %len, "Dropping too large datagram (max allowed: {MAX_FZ_PAYLOAD} bytes)"); - return false; } diff --git a/rust/gateway/src/eventloop.rs b/rust/gateway/src/eventloop.rs index 65a7206b7..163eea393 100644 --- a/rust/gateway/src/eventloop.rs +++ b/rust/gateway/src/eventloop.rs @@ -4,8 +4,8 @@ use boringtun::x25519::PublicKey; use dns_lookup::{AddrInfoHints, AddrInfoIter, LookupError}; use dns_types::DomainName; use firezone_bin_shared::TunDeviceManager; -use firezone_logging::telemetry_span; use firezone_telemetry::{Telemetry, analytics}; + use firezone_tunnel::messages::gateway::{ AllowAccess, ClientIceCandidates, ClientsIceCandidates, ConnectionReady, EgressMessages, IngressMessages, RejectAccess, RequestConnection, @@ -24,7 +24,6 @@ use std::task::{Context, Poll}; use std::time::{Duration, Instant}; use std::{io, mem}; use tokio::sync::Mutex; -use tracing::Instrument; use crate::RELEASE; @@ -585,7 +584,6 @@ async fn resolve(domain: DomainName) -> Result> { let dname = domain.to_string(); let addresses = tokio::task::spawn_blocking(move || resolve_addresses(&dname)) - .instrument(telemetry_span!("resolve_dns_resource")) .await .context("DNS resolution task failed")? .context("DNS resolution failed")?; diff --git a/rust/gui-client/src-tauri/src/service.rs b/rust/gui-client/src-tauri/src/service.rs index 12058fee3..64e9f6324 100644 --- a/rust/gui-client/src-tauri/src/service.rs +++ b/rust/gui-client/src-tauri/src/service.rs @@ -13,7 +13,7 @@ use firezone_bin_shared::{ platform::{tcp_socket_factory, udp_socket_factory}, signals, }; -use firezone_logging::{FilterReloadHandle, err_with_src, telemetry_span}; +use firezone_logging::{FilterReloadHandle, err_with_src}; use firezone_telemetry::{Telemetry, analytics}; use futures::{ Future as _, SinkExt as _, Stream, StreamExt, @@ -577,8 +577,6 @@ impl<'a> Handler<'a> { fn try_connect(&mut self, api_url: &str, token: SecretString) -> Result { let started_at = Instant::now(); - let _connect_span = telemetry_span!("connect_to_firezone").entered(); - assert!(self.session.is_none()); let device_id = device_id::get_or_create().context("Failed to get-or-create device ID")?; @@ -624,13 +622,10 @@ impl<'a> Handler<'a> { tracing::debug!(?dns, "Calling `set_dns`..."); connlib.set_dns(dns); - let tun = { - let _guard = telemetry_span!("create_tun_device").entered(); - - self.tun_device - .make_tun() - .context("Failed to create TUN device")? - }; + let tun = self + .tun_device + .make_tun() + .context("Failed to create TUN device")?; connlib.set_tun(tun); Ok(Session::Creating { diff --git a/rust/headless-client/src/main.rs b/rust/headless-client/src/main.rs index 87d8763c6..7d6539127 100644 --- a/rust/headless-client/src/main.rs +++ b/rust/headless-client/src/main.rs @@ -11,7 +11,6 @@ use firezone_bin_shared::{ platform::{tcp_socket_factory, udp_socket_factory}, signals, }; -use firezone_logging::telemetry_span; use firezone_telemetry::{Telemetry, analytics, otel}; use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider}; use phoenix_channel::PhoenixChannel; @@ -245,8 +244,6 @@ fn main() -> Result<()> { opentelemetry::global::set_meter_provider(provider); } - let connect_span = telemetry_span!("connect_to_firezone").entered(); - // The Headless Client will bail out here if there's no Internet, because `PhoenixChannel` will try to // resolve the portal host and fail. This is intentional behavior. The Headless Client should always be running under a manager like `systemd` or Windows' Service Controller, // so when it fails it will be restarted with backoff. `systemd` can additionally make us wait @@ -288,16 +285,10 @@ fn main() -> Result<()> { let mut telemetry_refresh = tokio::time::interval(Duration::from_secs(60)); - let tun = { - let _guard = telemetry_span!("create_tun_device").entered(); - - tun_device.make_tun()? - }; + let tun = tun_device.make_tun()?; session.set_tun(tun); session.set_dns(dns_controller.system_resolvers()); - drop(connect_span); - let result = loop { let event = tokio::select! { () = terminate.recv() => { diff --git a/rust/logging/Cargo.toml b/rust/logging/Cargo.toml index 84c4c7be1..ea062e4c5 100644 --- a/rust/logging/Cargo.toml +++ b/rust/logging/Cargo.toml @@ -12,7 +12,6 @@ anyhow = { workspace = true } nu-ansi-term = { workspace = true } output_vt100 = { workspace = true } parking_lot = { workspace = true } -rand = { workspace = true } sentry-tracing = { workspace = true } supports-color = { workspace = true } time = { workspace = true, features = ["formatting"] } diff --git a/rust/logging/src/lib.rs b/rust/logging/src/lib.rs index 991004613..f813e343b 100644 --- a/rust/logging/src/lib.rs +++ b/rust/logging/src/lib.rs @@ -198,21 +198,6 @@ pub fn test_global(directives: &str) { /// /// - error and warn events are reported as sentry exceptions /// - info and debug events are captured as breadcrumbs (and submitted together with warns & errors) -/// -/// ## Telemetry events -/// -/// This layer configuration supports a special `telemetry` event. -/// Telemetry events are events logged on the `TRACE` level for the `telemetry` target. -/// These events SHOULD be created using [`telemetry_event`] to ensure that they are sampled correctly. -/// The idea here is that some events logged via `tracing` should not necessarily end up in the users log file. -/// Yet, if they happen a lot, we still want to know about them. -/// Coupling the `telemetry` target to the `TRACE` level pretty much prevents these events from ever showing up in log files. -/// By sampling them, we prevent flooding Sentry with lots of these logs. -/// -/// ## Telemetry spans -/// -/// Only spans with the `telemetry` target on level `TRACE` will be submitted to Sentry. -/// Similar to telemetry events, these should be created with [`telemetry_span`] to ensure they are sampled correctly. pub fn sentry_layer() -> impl Layer + Send + Sync where S: Subscriber + for<'a> LookupSpan<'a>, @@ -223,7 +208,6 @@ where .event_filter(move |md| match *md.level() { Level::ERROR | Level::WARN => EventFilter::Event | EventFilter::Breadcrumb | EventFilter::Log, Level::INFO | Level::DEBUG => EventFilter::Breadcrumb | EventFilter::Log, - Level::TRACE if md.target() == TELEMETRY_TARGET => EventFilter::Event, _ => EventFilter::Ignore, }) .span_filter(|md| { @@ -264,43 +248,3 @@ where ], ).not()) } - -#[doc(hidden)] -pub const TELEMETRY_TARGET: &str = "telemetry"; -#[doc(hidden)] -pub const TELEMETRY_SAMPLE_RATE: f32 = 0.01; - -/// Creates a `telemetry` span that will be active until dropped. -/// -/// In order to save CPU power, `telemetry` spans are sampled at a rate of 1% at creation time. -#[macro_export] -macro_rules! telemetry_span { - ($($arg:tt)*) => { - if $crate::__export::rand::random::() < $crate::TELEMETRY_SAMPLE_RATE { - $crate::__export::tracing::trace_span!(target: $crate::TELEMETRY_TARGET, $($arg)*) - } else { - $crate::__export::tracing::Span::none() - } - }; -} - -/// Creates a `telemetry` event. -/// -/// In order to save CPU power, `telemetry` events are sampled at a rate of 1% at creation time. -/// In addition, all telemetry events are logged at the `DEBUG` level. -#[macro_export] -macro_rules! telemetry_event { - ($($arg:tt)*) => { - if $crate::__export::rand::random::() < $crate::TELEMETRY_SAMPLE_RATE { - $crate::__export::tracing::trace!(target: $crate::TELEMETRY_TARGET, $($arg)*); - } - - $crate::__export::tracing::debug!($($arg)*); - }; -} - -#[doc(hidden)] -pub mod __export { - pub use rand; - pub use tracing; -} diff --git a/rust/telemetry/src/lib.rs b/rust/telemetry/src/lib.rs index 4e48ca143..6d209c5ed 100644 --- a/rust/telemetry/src/lib.rs +++ b/rust/telemetry/src/lib.rs @@ -156,11 +156,6 @@ impl Telemetry { environment: Some(Cow::Borrowed(environment.as_str())), // We can't get the release number ourselves because we don't know if we're embedded in a GUI Client or a Headless Client. release: Some(release.to_owned().into()), - traces_sampler: Some(Arc::new(|tx| { - // Only submit `telemetry` spans to Sentry. - // Those get sampled at creation time (to save CPU power) so we want to submit all of them. - if tx.name() == "telemetry" { 1.0 } else { 0.0 } - })), max_breadcrumbs: 500, before_send: Some(event_rate_limiter(Duration::from_secs(60 * 5))), enable_logs,