diff --git a/rust/Cargo.lock b/rust/Cargo.lock index be3749b25..712b2e3c3 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -6360,9 +6360,9 @@ dependencies = [ [[package]] name = "sentry" -version = "0.38.1" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a505499b38861edd82b5a688fa06ba4ba5875bb832adeeeba22b7b23fc4bc39a" +checksum = "507ac2be9bf2da56c831da57faf1dadd81f434bd282935cdb06193d0c94e8811" dependencies = [ "httpdate", "reqwest", @@ -6372,18 +6372,18 @@ dependencies = [ "sentry-contexts", "sentry-core", "sentry-debug-images", + "sentry-log", "sentry-panic", "sentry-tracing", "tokio", "ureq", - "webpki-roots 0.26.11", ] [[package]] name = "sentry-actix" -version = "0.38.1" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39ad8bfdcfbc6e0d0dacaa5728555085ef459fa9226cfc2fe64eefa4b8038b7f" +checksum = "8402c142005ee560ae361c73ebece13a299ec3e9cce5b8654479ea9aac8dc8df" dependencies = [ "actix-http", "actix-web", @@ -6394,9 +6394,9 @@ dependencies = [ [[package]] name = "sentry-backtrace" -version = "0.38.1" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dace796060e4ad10e3d1405b122ae184a8b2e71dce05ae450e4f81b7686b0d9" +checksum = "eb4416302fa5325181a120e0fe7d4afd83cd95e52a9b86afa34a8161383fe0dc" dependencies = [ "backtrace", "regex", @@ -6405,9 +6405,9 @@ dependencies = [ [[package]] name = "sentry-contexts" -version = "0.38.1" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87bd9e6b51ffe2bc7188ebe36cb67557cb95749c08a3f81f33e8c9b135e0d1bc" +checksum = "936752f42b6f651dcb257da0bfa235ecc79e82011c49ed3383c212cc582263ff" dependencies = [ "hostname", "libc", @@ -6419,9 +6419,9 @@ dependencies = [ [[package]] name = "sentry-core" -version = "0.38.1" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7426d4beec270cfdbb50f85f0bb2ce176ea57eed0b11741182a163055a558187" +checksum = "00e9bd2cadaeda3af41e9fa5d14645127d6f6a4aec73da3ae38e477ecafd3682" dependencies = [ "rand 0.9.1", "sentry-types", @@ -6431,19 +6431,29 @@ dependencies = [ [[package]] name = "sentry-debug-images" -version = "0.38.1" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9df15c066c04f34c4dfd496a8e76590106b93283f72ef1a47d8fb24d88493424" +checksum = "e1e074fe9a0970c91999b23ed3195e6e30990d589fba3a68f20a1686af0f5cda" dependencies = [ "findshlibs", "sentry-core", ] [[package]] -name = "sentry-panic" -version = "0.38.1" +name = "sentry-log" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c92beed69b776a162b6d269bef1eaa3e614090b6df45a88d9b239c4fdbffdfba" +checksum = "a693f27e3f63ae085cf7c176b5c44038af27c8a0170d01db30ccf776c2d40ce3" +dependencies = [ + "log", + "sentry-core", +] + +[[package]] +name = "sentry-panic" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4651d34f3ba649d9e6dc1268443cae6728b8f741c2f0264004f8ecf5b247330d" dependencies = [ "sentry-backtrace", "sentry-core", @@ -6451,10 +6461,11 @@ dependencies = [ [[package]] name = "sentry-tracing" -version = "0.38.1" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55c323492795de90824f3198562e33dd74ae3bc852fbb13c0cabec54a1cf73cd" +checksum = "c25c47d36bc80c74d26d568ffe970c37b337c061b7234ad6f2d159439c16f000" dependencies = [ + "bitflags 2.9.1", "sentry-backtrace", "sentry-core", "tracing-core", @@ -6463,9 +6474,9 @@ dependencies = [ [[package]] name = "sentry-types" -version = "0.38.1" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04b6c9287202294685cb1f749b944dbbce8160b81a1061ecddc073025fed129f" +checksum = "a08e7154abe2cd557f26fd70038452810748aefdf39bc973f674421224b147c1" dependencies = [ "debugid", "hex", @@ -8485,19 +8496,33 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "2.12.1" +version = "3.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +checksum = "9f0fde9bc91026e381155f8c67cb354bcd35260b2f4a29bcc84639f762760c39" dependencies = [ "base64 0.22.1", "log", - "once_cell", + "percent-encoding", "rustls", + "rustls-pemfile", "rustls-pki-types", - "url", + "ureq-proto", + "utf-8", "webpki-roots 0.26.11", ] +[[package]] +name = "ureq-proto" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59db78ad1923f2b1be62b6da81fe80b173605ca0d57f85da2e005382adf693f7" +dependencies = [ + "base64 0.22.1", + "http 1.3.1", + "httparse", + "log", +] + [[package]] name = "url" version = "2.5.4" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 5b07cde3a..2ac897ccd 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -139,8 +139,8 @@ sadness-generator = "0.6.0" sd-notify = "0.4.5" # This is a pure Rust re-implementation, so it isn't vulnerable to CVE-2024-3094 secrecy = "0.8" semver = "1.0.26" -sentry = { version = "0.38.1", default-features = false } -sentry-tracing = "0.38.1" +sentry = { version = "0.41.0", default-features = false } +sentry-tracing = "0.41.0" serde = "1.0.219" serde_json = "1.0.140" serde_variant = "0.1.3" diff --git a/rust/apple-client-ffi/src/lib.rs b/rust/apple-client-ffi/src/lib.rs index 073fb838e..a65b563eb 100644 --- a/rust/apple-client-ffi/src/lib.rs +++ b/rust/apple-client-ffi/src/lib.rs @@ -254,9 +254,14 @@ impl WrappedSession { callback_handler: ffi::CallbackHandler, device_info: String, ) -> Result { + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .thread_name("connlib") + .enable_all() + .build()?; + let mut telemetry = Telemetry::default(); - telemetry.start(&api_url, RELEASE, APPLE_DSN); - Telemetry::set_firezone_id(device_id.clone()); + runtime.block_on(telemetry.start(&api_url, RELEASE, APPLE_DSN, device_id.clone())); Telemetry::set_account_slug(account_slug.clone()); analytics::identify( @@ -281,11 +286,6 @@ impl WrappedSession { device_info, )?; - let runtime = tokio::runtime::Builder::new_multi_thread() - .worker_threads(1) - .thread_name("connlib") - .enable_all() - .build()?; let _guard = runtime.enter(); // Constructing `PhoenixChannel` requires a runtime context. let portal = PhoenixChannel::disconnected( diff --git a/rust/client-ffi/src/lib.rs b/rust/client-ffi/src/lib.rs index f9b6bdc34..a3e14cdc9 100644 --- a/rust/client-ffi/src/lib.rs +++ b/rust/client-ffi/src/lib.rs @@ -226,9 +226,15 @@ fn connect( serde_json::from_str(&device_info).context("Failed to deserialize `DeviceInfo`")?; let secret = SecretString::from(token); + let runtime = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .thread_name("connlib") + .enable_all() + .build() + .context("Failed to create tokio runtime")?; + let mut telemetry = Telemetry::default(); - telemetry.start(&api_url, RELEASE, platform::DSN); - Telemetry::set_firezone_id(device_id.clone()); + runtime.block_on(telemetry.start(&api_url, RELEASE, platform::DSN, device_id.clone())); Telemetry::set_account_slug(account_slug.clone()); analytics::identify( @@ -250,12 +256,6 @@ fn connect( ) .context("Failed to create login URL")?; - let runtime = tokio::runtime::Builder::new_multi_thread() - .worker_threads(1) - .thread_name("connlib") - .enable_all() - .build() - .context("Failed to create tokio runtime")?; let _guard = runtime.enter(); // Constructing `PhoenixChannel` requires a runtime context. let portal = PhoenixChannel::disconnected( diff --git a/rust/gateway/src/eventloop.rs b/rust/gateway/src/eventloop.rs index 80964971e..65a7206b7 100644 --- a/rust/gateway/src/eventloop.rs +++ b/rust/gateway/src/eventloop.rs @@ -49,7 +49,7 @@ enum ResolveTrigger { SetupNat(ResolveDnsRequest), } -pub struct Eventloop { +pub struct Eventloop<'a> { tunnel: GatewayTunnel, portal: PhoenixChannel<(), IngressMessages, (), PublicKeyParam>, tun_device_manager: Arc>, @@ -61,15 +61,19 @@ pub struct Eventloop { set_interface_tasks: futures_bounded::FuturesSet>, + telemetry_refresh: tokio::time::Interval, + telemetry: &'a mut Telemetry, + logged_permission_denied: bool, } -impl Eventloop { +impl<'a> Eventloop<'a> { pub(crate) fn new( tunnel: GatewayTunnel, mut portal: PhoenixChannel<(), IngressMessages, (), PublicKeyParam>, tun_device_manager: TunDeviceManager, firezone_id: String, + telemetry: &'a mut Telemetry, ) -> Self { portal.connect(PublicKeyParam(tunnel.public_key().to_bytes())); @@ -88,11 +92,13 @@ impl Eventloop { tracing::debug!(%domain, ?ips, ?cause, "DNS cache entry evicted"); }) .build(), + telemetry_refresh: tokio::time::interval(Duration::from_secs(60)), + telemetry, } } } -impl Eventloop { +impl<'a> Eventloop<'a> { pub fn poll(&mut self, cx: &mut Context<'_>) -> Poll> { loop { match self.tunnel.poll_next_event(cx) { @@ -210,6 +216,14 @@ impl Eventloop { Poll::Pending => {} } + match self.telemetry_refresh.poll_tick(cx) { + Poll::Ready(_) => { + self.telemetry.refresh_config(); + continue; + } + Poll::Pending => {} + } + return Poll::Pending; } } diff --git a/rust/gateway/src/main.rs b/rust/gateway/src/main.rs index bbc46df07..e0280a611 100644 --- a/rust/gateway/src/main.rs +++ b/rust/gateway/src/main.rs @@ -52,13 +52,6 @@ fn main() -> ExitCode { .expect("Calling `install_default` only once per process should always succeed"); let mut telemetry = Telemetry::default(); - if cli.is_telemetry_allowed() { - telemetry.start( - cli.api_url.as_str(), - RELEASE, - firezone_telemetry::GATEWAY_DSN, - ); - } let runtime = tokio::runtime::Builder::new_current_thread() .enable_all() @@ -66,21 +59,17 @@ fn main() -> ExitCode { .expect("Failed to create tokio runtime"); match runtime - .block_on(try_main(cli)) + .block_on(try_main(cli, &mut telemetry)) .context("Failed to start Gateway") { - Ok(ExitCode::SUCCESS) => { + Ok(()) => { + tracing::info!("Received CTRL+C, goodbye!"); runtime.block_on(telemetry.stop()); ExitCode::SUCCESS } - Ok(_) => { - runtime.block_on(telemetry.stop_on_crash()); - - ExitCode::FAILURE - } Err(e) => { - tracing::error!("{e:#}"); + tracing::info!("{e:#}"); runtime.block_on(telemetry.stop_on_crash()); ExitCode::FAILURE @@ -102,15 +91,25 @@ fn has_necessary_permissions() -> bool { is_root || has_net_admin } -async fn try_main(cli: Cli) -> Result { +async fn try_main(cli: Cli, telemetry: &mut Telemetry) -> Result<()> { firezone_logging::setup_global_subscriber(layer::Identity::default()) .context("Failed to set up logging")?; tracing::debug!(?cli); - let firezone_id = get_firezone_id(cli.firezone_id).await + let firezone_id = get_firezone_id(cli.firezone_id.clone()).await .context("Couldn't read FIREZONE_ID or write it to disk: Please provide it through the env variable or provide rw access to /var/lib/firezone/")?; - Telemetry::set_firezone_id(firezone_id.clone()); + + if cli.is_telemetry_allowed() { + telemetry + .start( + cli.api_url.as_str(), + concat!("gateway@", env!("CARGO_PKG_VERSION")), + firezone_telemetry::GATEWAY_DSN, + firezone_id.clone(), + ) + .await; + } if cli.metrics { let exporter = opentelemetry_stdout::MetricExporter::default(); @@ -176,12 +175,12 @@ async fn try_main(cli: Cli) -> Result { tunnel.set_tun(tun); } - let task = tokio::spawn(future::poll_fn({ - let mut eventloop = Eventloop::new(tunnel, portal, tun_device_manager, firezone_id); + let eventloop = future::poll_fn({ + let mut eventloop = + Eventloop::new(tunnel, portal, tun_device_manager, firezone_id, telemetry); move |cx| eventloop.poll(cx) - })) - .err_into(); + }); let ctrl_c = pin!(ctrl_c().map_err(anyhow::Error::new)); tokio::spawn(http_health_check::serve( @@ -189,20 +188,12 @@ async fn try_main(cli: Cli) -> Result { || true, )); - match future::try_select(task, ctrl_c) + match future::try_select(eventloop, ctrl_c) .await .map_err(|e| e.factor_first().0)? { - future::Either::Left((Err(e), _)) => { - tracing::info!("{e}"); - - Ok(ExitCode::FAILURE) - } - future::Either::Right(((), _)) => { - tracing::info!("Received CTRL+C, goodbye!"); - - Ok(ExitCode::SUCCESS) - } + future::Either::Left((never, _)) => match never {}, + future::Either::Right(((), _)) => Ok(()), } } diff --git a/rust/gui-client/src-tauri/src/bin/firezone-gui-client.rs b/rust/gui-client/src-tauri/src/bin/firezone-gui-client.rs index 4158045d1..4129e12a6 100644 --- a/rust/gui-client/src-tauri/src/bin/firezone-gui-client.rs +++ b/rust/gui-client/src-tauri/src/bin/firezone-gui-client.rs @@ -11,6 +11,7 @@ use clap::{Args, Parser}; use controller::Failure; use firezone_gui_client::{controller, deep_link, elevation, gui, logging, settings}; use firezone_telemetry::Telemetry; +use firezone_telemetry::analytics; use settings::AdvancedSettingsLegacy; use tokio::runtime::Runtime; use tracing::subscriber::DefaultGuard; @@ -77,11 +78,22 @@ fn try_main( .unwrap_or(&advanced_settings.api_url) .to_string(); - telemetry.start( + // Get the device ID before starting Tokio, so that all the worker threads will inherit the correct scope. + // Technically this means we can fail to get the device ID on a newly-installed system, since the Tunnel service may not have fully started up when the GUI process reaches this point, but in practice it's unlikely. + let id = firezone_bin_shared::device_id::get().context("Failed to get device ID")?; + analytics::identify( + id.id.clone(), + api_url.clone(), + firezone_gui_client::RELEASE.to_owned(), + None, + ); + + rt.block_on(telemetry.start( &api_url, firezone_gui_client::RELEASE, firezone_telemetry::GUI_DSN, - ); + id.id, + )); // Don't fix the log filter for smoke tests because we can't show a dialog there. if !config.smoke_test { @@ -100,12 +112,6 @@ fn try_main( reloader, } = firezone_gui_client::logging::setup_gui(&log_filter)?; - // Get the device ID before starting Tokio, so that all the worker threads will inherit the correct scope. - // Technically this means we can fail to get the device ID on a newly-installed system, since the Tunnel service may not have fully started up when the GUI process reaches this point, but in practice it's unlikely. - if let Ok(id) = firezone_bin_shared::device_id::get() { - Telemetry::set_firezone_id(id.id); - } - match cli.command { None if cli.check_elevation() => match elevation::gui_check() { Ok(true) => {} diff --git a/rust/gui-client/src-tauri/src/service.rs b/rust/gui-client/src-tauri/src/service.rs index 74ac5fa95..12058fee3 100644 --- a/rust/gui-client/src-tauri/src/service.rs +++ b/rust/gui-client/src-tauri/src/service.rs @@ -548,8 +548,13 @@ impl<'a> Handler<'a> { account_slug, } => { self.telemetry - .start(&environment, &release, firezone_telemetry::GUI_DSN); - Telemetry::set_firezone_id(self.device_id.id.clone()); + .start( + &environment, + &release, + firezone_telemetry::GUI_DSN, + self.device_id.id.clone(), + ) + .await; if let Some(account_slug) = account_slug { Telemetry::set_account_slug(account_slug.clone()); @@ -576,7 +581,6 @@ impl<'a> Handler<'a> { assert!(self.session.is_none()); let device_id = device_id::get_or_create().context("Failed to get-or-create device ID")?; - Telemetry::set_firezone_id(device_id.id.clone()); let url = LoginUrl::client( Url::parse(api_url).context("Failed to parse URL")?, diff --git a/rust/headless-client/src/main.rs b/rust/headless-client/src/main.rs index b56bdd69e..87d8763c6 100644 --- a/rust/headless-client/src/main.rs +++ b/rust/headless-client/src/main.rs @@ -21,6 +21,7 @@ use secrecy::{Secret, SecretString}; use std::{ path::{Path, PathBuf}, sync::Arc, + time::Duration, }; use tokio::time::Instant; @@ -170,21 +171,35 @@ fn main() -> Result<()> { // and we need to recover. dns_controller.deactivate()?; - let mut telemetry = Telemetry::default(); - if cli.is_telemetry_allowed() { - telemetry.start( - cli.api_url.as_ref(), - RELEASE, - firezone_telemetry::HEADLESS_DSN, - ); - } - - tracing::info!(arch = std::env::consts::ARCH, version = VERSION); - let rt = tokio::runtime::Builder::new_current_thread() .enable_all() .build()?; + // AKA "Device ID", not the Firezone slug + let firezone_id = match cli.firezone_id.clone() { + Some(id) => id, + None => device_id::get_or_create().context("Could not get `firezone_id` from CLI, could not read it from disk, could not generate it and save it to disk")?.id, + }; + + analytics::identify( + firezone_id.clone(), + cli.api_url.to_string(), + RELEASE.to_owned(), + None, + ); + + let mut telemetry = Telemetry::default(); + if cli.is_telemetry_allowed() { + rt.block_on(telemetry.start( + cli.api_url.as_ref(), + RELEASE, + firezone_telemetry::HEADLESS_DSN, + firezone_id.clone(), + )); + } + + tracing::info!(arch = std::env::consts::ARCH, version = VERSION); + let token = get_token(token_env_var, &cli.token_path)?.with_context(|| { format!( "Can't find the Firezone token in ${TOKEN_ENV_KEY} or in `{}`", @@ -194,20 +209,6 @@ fn main() -> Result<()> { // TODO: Should this default to 30 days? let max_partition_time = cli.max_partition_time.map(|d| d.into()); - // AKA "Device ID", not the Firezone slug - let firezone_id = match cli.firezone_id { - Some(id) => id, - None => device_id::get_or_create().context("Could not get `firezone_id` from CLI, could not read it from disk, could not generate it and save it to disk")?.id, - }; - Telemetry::set_firezone_id(firezone_id.clone()); - - analytics::identify( - firezone_id.clone(), - cli.api_url.to_string(), - RELEASE.to_owned(), - None, - ); - let url = LoginUrl::client( cli.api_url.clone(), &token, @@ -285,6 +286,8 @@ fn main() -> Result<()> { new_network_notifier(tokio_handle.clone(), dns_control_method).await?; drop(tokio_handle); + let mut telemetry_refresh = tokio::time::interval(Duration::from_secs(60)); + let tun = { let _guard = telemetry_span!("create_tun_device").entered(); @@ -320,6 +323,10 @@ fn main() -> Result<()> { session.reset(); continue; }, + _ = telemetry_refresh.tick() => { + telemetry.refresh_config(); + continue; + } event = event_stream.next() => event.context("event stream unexpectedly ran empty")?, }; diff --git a/rust/logging/src/lib.rs b/rust/logging/src/lib.rs index c3e51b366..991004613 100644 --- a/rust/logging/src/lib.rs +++ b/rust/logging/src/lib.rs @@ -111,8 +111,7 @@ fn parse_filter(directives: &str) -> Result { /// /// By prepending this directive to the active log filter, a simple directive like `debug` actually produces useful logs. /// If necessary, you can still activate logs from these crates by restating them in your directive with a lower filter, i.e. `netlink_proto=debug`. - const IRRELEVANT_CRATES: &str = - "netlink_proto=warn,os_info=warn,rustls=warn,opentelemetry_sdk=info,opentelemetry=info"; + const IRRELEVANT_CRATES: &str = "netlink_proto=warn,os_info=warn,rustls=warn,opentelemetry_sdk=info,opentelemetry=info,hyper_util=info"; let env_filter = if directives.is_empty() { EnvFilter::try_new(IRRELEVANT_CRATES)? @@ -222,8 +221,8 @@ where sentry_tracing::layer() .event_filter(move |md| match *md.level() { - Level::ERROR | Level::WARN => EventFilter::Event, - Level::INFO | Level::DEBUG => EventFilter::Breadcrumb, + Level::ERROR | Level::WARN => EventFilter::Event | EventFilter::Breadcrumb | EventFilter::Log, + Level::INFO | Level::DEBUG => EventFilter::Breadcrumb | EventFilter::Log, Level::TRACE if md.target() == TELEMETRY_TARGET => EventFilter::Event, _ => EventFilter::Ignore, }) diff --git a/rust/relay/server/src/main.rs b/rust/relay/server/src/main.rs index 872714863..701c64e3f 100644 --- a/rust/relay/server/src/main.rs +++ b/rust/relay/server/src/main.rs @@ -118,20 +118,21 @@ fn main() { let args = Args::parse(); - let mut telemetry = Telemetry::default(); - if args.telemetry { - telemetry.start( - args.api_url.as_str(), - VERSION.unwrap_or("unknown"), - RELAY_DSN, - ); - } - let runtime = tokio::runtime::Builder::new_current_thread() .enable_all() .build() .expect("Failed to build tokio runtime"); + let mut telemetry = Telemetry::default(); + if args.telemetry { + runtime.block_on(telemetry.start( + args.api_url.as_str(), + VERSION.unwrap_or("unknown"), + RELAY_DSN, + String::new(), // Relays don't have a Firezone ID. + )); + } + match runtime.block_on(try_main(args)) { Ok(()) => runtime.block_on(telemetry.stop()), Err(e) => { diff --git a/rust/telemetry/Cargo.toml b/rust/telemetry/Cargo.toml index 2954794c8..a6b24dc5a 100644 --- a/rust/telemetry/Cargo.toml +++ b/rust/telemetry/Cargo.toml @@ -13,7 +13,7 @@ opentelemetry = { workspace = true } opentelemetry_sdk = { workspace = true } parking_lot = { workspace = true } reqwest = { workspace = true } -sentry = { workspace = true, features = ["contexts", "backtrace", "debug-images", "panic", "reqwest", "rustls", "tracing", "release-health"] } +sentry = { workspace = true, features = ["contexts", "backtrace", "debug-images", "panic", "reqwest", "rustls", "tracing", "release-health", "logs"] } serde = { workspace = true } serde_json = { workspace = true } sha2 = { workspace = true } diff --git a/rust/telemetry/src/feature_flags.rs b/rust/telemetry/src/feature_flags.rs index 950e01418..47350739f 100644 --- a/rust/telemetry/src/feature_flags.rs +++ b/rust/telemetry/src/feature_flags.rs @@ -24,29 +24,43 @@ pub fn drop_llmnr_nxdomain_responses() -> bool { FEATURE_FLAGS.read().drop_llmnr_nxdomain_responses } -pub(crate) fn reevaluate(user_id: String, env: &str) { - let api_key = match env.parse() { - Ok(Env::Production) => POSTHOG_API_KEY_PROD, - Ok(Env::Staging) => POSTHOG_API_KEY_STAGING, - Ok(Env::OnPrem | Env::DockerCompose | Env::Localhost) | Err(_) => return, +pub fn stream_logs() -> bool { + FEATURE_FLAGS.read().stream_logs +} + +pub(crate) async fn evaluate_now(user_id: String, env: Env) { + if user_id.is_empty() { + return; + } + + let api_key = match env { + Env::Production => POSTHOG_API_KEY_PROD, + Env::Staging => POSTHOG_API_KEY_STAGING, + Env::OnPrem | Env::DockerCompose | Env::Localhost => return, }; - RUNTIME.spawn(async move { - let flags = decide(user_id, api_key.to_owned()) - .await - .inspect_err(|e| tracing::debug!("Failed to evaluate feature flags: {e:#}")) - .unwrap_or_default(); + let flags = decide(user_id, api_key.to_owned()) + .await + .inspect_err(|e| tracing::debug!("Failed to evaluate feature flags: {e:#}")) + .unwrap_or_default(); - tracing::debug!(?flags, "Evaluated feature-flags"); + tracing::debug!(?flags, "Evaluated feature-flags"); - *FEATURE_FLAGS.write() = flags; + *FEATURE_FLAGS.write() = flags; - sentry::Hub::main().configure_scope(|scope| { - scope.set_context("flags", sentry_flag_context(flags)); - }); + sentry::Hub::main().configure_scope(|scope| { + scope.set_context("flags", sentry_flag_context(flags)); }); } +pub(crate) fn reevaluate(user_id: String, env: &str) { + let Ok(env) = env.parse() else { + return; + }; + + RUNTIME.spawn(evaluate_now(user_id, env)); +} + pub(crate) async fn reeval_timer() { loop { tokio::time::sleep(RE_EVAL_DURATION).await; @@ -117,6 +131,8 @@ struct FeatureFlags { icmp_unreachable_instead_of_nat64: bool, #[serde(default)] drop_llmnr_nxdomain_responses: bool, + #[serde(default)] + stream_logs: bool, } fn sentry_flag_context(flags: FeatureFlags) -> sentry::protocol::Context { @@ -125,12 +141,14 @@ fn sentry_flag_context(flags: FeatureFlags) -> sentry::protocol::Context { enum SentryFlag { IcmpUnreachableInsteadOfNat64 { result: bool }, DropLlmnrNxdomainResponses { result: bool }, + StreamLogs { result: bool }, } // Exhaustive destruction so we don't forget to update this when we add a flag. let FeatureFlags { icmp_unreachable_instead_of_nat64, drop_llmnr_nxdomain_responses, + stream_logs, } = flags; let value = serde_json::json!({ @@ -139,6 +157,7 @@ fn sentry_flag_context(flags: FeatureFlags) -> sentry::protocol::Context { result: icmp_unreachable_instead_of_nat64, }, SentryFlag::DropLlmnrNxdomainResponses { result: drop_llmnr_nxdomain_responses }, + SentryFlag::StreamLogs { result: stream_logs } ] }); diff --git a/rust/telemetry/src/lib.rs b/rust/telemetry/src/lib.rs index 5c31f719b..4e48ca143 100644 --- a/rust/telemetry/src/lib.rs +++ b/rust/telemetry/src/lib.rs @@ -2,10 +2,10 @@ use std::{borrow::Cow, fmt, str::FromStr, sync::Arc, time::Duration}; -use anyhow::{Ok, Result, bail}; +use anyhow::{Result, bail}; use sentry::{ - BeforeCallback, - protocol::{Event, SessionStatus}, + BeforeCallback, User, + protocol::{Event, Log, LogAttribute, SessionStatus}, }; use sha2::Digest as _; @@ -113,7 +113,7 @@ impl Drop for Telemetry { } impl Telemetry { - pub fn start(&mut self, api_url: &str, release: &str, dsn: Dsn) { + pub async fn start(&mut self, api_url: &str, release: &str, dsn: Dsn, firezone_id: String) { // Can't use URLs as `environment` directly, because Sentry doesn't allow slashes in environments. // let environment = Env::from_api_url(api_url); @@ -144,7 +144,11 @@ impl Telemetry { return; } - tracing::info!(%environment, "Starting telemetry"); + // Important: Evaluate feature flags before checking `stream_logs` to avoid hitting the default. + feature_flags::evaluate_now(firezone_id.clone(), environment).await; + let enable_logs = feature_flags::stream_logs(); + + tracing::info!(%environment, %enable_logs, "Starting telemetry"); let inner = sentry::init(( dsn.0, @@ -159,11 +163,13 @@ impl Telemetry { })), max_breadcrumbs: 500, before_send: Some(event_rate_limiter(Duration::from_secs(60 * 5))), + enable_logs, + before_send_log: Some(Arc::new(append_tracing_fields_to_message)), ..Default::default() }, )); // Configure scope on the main hub so that all threads will get the tags - sentry::Hub::main().configure_scope(|scope| { + sentry::Hub::main().configure_scope(move |scope| { scope.set_tag("api_url", api_url); let ctx = sentry::integrations::contexts::utils::device_context(); scope.set_context("device", ctx); @@ -173,11 +179,42 @@ impl Telemetry { if let Some(ctx) = sentry::integrations::contexts::utils::os_context() { scope.set_context("os", ctx); } + + scope.set_user(Some(User { + id: Some(firezone_id), + ..User::default() + })); }); self.inner.replace(inner); sentry::start_session(); } + /// Refreshes the telemetry config. + /// + /// Looks at the current values of the relevant feature flags and re-initializes the client in case they changed. + pub fn refresh_config(&mut self) { + let Some(client) = self.inner.as_ref() else { + tracing::debug!("Cannot refresh config: no client"); + return; + }; + + let enable_logs = feature_flags::stream_logs(); + + if client.options().enable_logs == enable_logs { + tracing::debug!("Config is up-to-date"); + return; + } + + let options = client.options().clone(); + + tracing::info!(%enable_logs, "Re-initializing telemetry"); + + self.inner.replace(sentry::init(sentry::ClientOptions { + enable_logs, + ..options + })); + } + /// Flushes events to sentry.io and drops the guard pub async fn stop(&mut self) { self.end_session(SessionStatus::Exited).await; @@ -261,6 +298,33 @@ fn event_rate_limiter(timeout: Duration) -> BeforeCallback> { }) } +/// Appends all but certain attributes from a sentry [`Log`] to the message body. +/// +/// Sentry stores all [`tracing`] fields as attributes and only renders the message. +/// Within Firezone, we make extensive use of attributes to provide contextual information. +/// We want to see these attributes inline with the message which is why we emulate the behaviour of `tracing_subscriber::fmt` here. +#[expect( + clippy::unnecessary_wraps, + reason = "We need to match Sentry's config signature." +)] +fn append_tracing_fields_to_message(mut log: Log) -> Option { + const IGNORED_ATTRS: &[&str] = &["os.", "sentry.", "tracing.", "server.", "user."]; + + for (key, attribute) in &log.attributes { + let LogAttribute(serde_json::Value::String(attr_string)) = &attribute else { + continue; + }; + + if IGNORED_ATTRS.iter().any(|attr| key.starts_with(attr)) { + continue; + } + + log.body.push_str(&format!(" {key}={attr_string}")); + } + + Some(log) +} + fn update_user(update: impl FnOnce(&mut sentry::User)) { sentry::Hub::main().configure_scope(|scope| { let mut user = scope.user().cloned().unwrap_or_default(); @@ -278,11 +342,15 @@ fn set_current_user(user: Option) { mod tests { use super::*; - #[test] - fn starting_session_for_unsupported_env_disables_current_one() { + #[tokio::test] + async fn starting_session_for_unsupported_env_disables_current_one() { let mut telemetry = Telemetry::default(); - telemetry.start("wss://api.firez.one", "1.0.0", TESTING); - telemetry.start("wss://example.com", "1.0.0", TESTING); + telemetry + .start("wss://api.firez.one", "1.0.0", TESTING, String::new()) + .await; + telemetry + .start("wss://example.com", "1.0.0", TESTING, String::new()) + .await; assert!(telemetry.inner.is_none()); }