From 51250faa0dd7c3b158ed88a8501acbb31bb4d6d6 Mon Sep 17 00:00:00 2001 From: Reactor Scram Date: Wed, 30 Oct 2024 16:27:17 -0500 Subject: [PATCH] chore(telemetry): make the firezone device ID a context not a tag (#7179) Closes #7175 Also fixes a bug with the initialization order of Tokio and Sentry. Previously: 1. Start Tokio, executor threads inherit main thread context 2. Load device ID and set it on the main telemetry hub Now: 1. Load device ID and set it on the main telemetry hub 2. Start Tokio, executor threads inherit main thread context The context and possibly tags didn't seem to propagate from the main hub if we set them after the worker threads spawned. Based on this understanding, the IPC service process is still wrong, but a fix will have to wait, because telemetry in the IPC service is more complicated than in the GUI process. image --- rust/gui-client/src-common/src/controller.rs | 12 +---------- rust/gui-client/src-tauri/src/client.rs | 19 ++++++++++++++++++ rust/headless-client/src/ipc_service.rs | 21 ++++++++++++++++++-- rust/headless-client/src/main.rs | 11 +++++++++- rust/telemetry/src/lib.rs | 4 +++- 5 files changed, 52 insertions(+), 15 deletions(-) diff --git a/rust/gui-client/src-common/src/controller.rs b/rust/gui-client/src-common/src/controller.rs index bee570ce9..e52f871d3 100644 --- a/rust/gui-client/src-common/src/controller.rs +++ b/rust/gui-client/src-common/src/controller.rs @@ -13,7 +13,7 @@ use firezone_headless_client::{ IpcClientMsg::{self, SetDisabledResources}, IpcServerMsg, IpcServiceError, LogFilterReloader, }; -use firezone_telemetry::{self as telemetry, Telemetry}; +use firezone_telemetry::Telemetry; use secrecy::{ExposeSecret as _, SecretString}; use std::{collections::BTreeSet, ops::ControlFlow, path::PathBuf, time::Instant}; use tokio::sync::{mpsc, oneshot}; @@ -69,16 +69,6 @@ impl Builder { let (ipc_tx, ipc_rx) = mpsc::channel(1); let ipc_client = ipc::Client::new(ipc_tx).await?; - // Get the device ID after connecting to the IPC service, this creates a happens-before relationship where we know the IPC service has written a device ID to disk. - match firezone_headless_client::device_id::get() { - Ok(id) => { - telemetry::Hub::main() - .configure_scope(|scope| scope.set_tag("firezone_id", &id.id)); - } - Err(error) => { - telemetry::capture_anyhow(&error.context("Failed to read device ID")); - } - } Ok(Controller { advanced_settings, diff --git a/rust/gui-client/src-tauri/src/client.rs b/rust/gui-client/src-tauri/src/client.rs index db8c7d147..054e5da33 100644 --- a/rust/gui-client/src-tauri/src/client.rs +++ b/rust/gui-client/src-tauri/src/client.rs @@ -98,6 +98,25 @@ fn run_gui(cli: Cli) -> Result<()> { firezone_bin_shared::git_version!("gui-client-*"), telemetry::GUI_DSN, ); + // Get the device ID before starting Tokio, so that all the worker threads will inherit the correct scope. + // Technically this means we can fail to get the device ID on a newly-installed system, since the IPC service may not have fully started up when the GUI process reaches this point, but in practice it's unlikely. + match firezone_headless_client::device_id::get() { + Ok(id) => { + // We should still be in the main thread, but explicitly get the main thread hub anyway. + telemetry::Hub::main().configure_scope(|scope| { + scope.set_context( + "firezone", + firezone_telemetry::Context::Other(BTreeMap::from([( + "id".to_string(), + id.id.into(), + )])), + ) + }); + } + Err(error) => { + telemetry::capture_anyhow(&error.context("Failed to read device ID")); + } + } fix_log_filter(&mut settings)?; let common::logging::Handles { logger: _logger, diff --git a/rust/headless-client/src/ipc_service.rs b/rust/headless-client/src/ipc_service.rs index 6f96fb0cc..ee62634f7 100644 --- a/rust/headless-client/src/ipc_service.rs +++ b/rust/headless-client/src/ipc_service.rs @@ -18,7 +18,14 @@ use futures::{ use phoenix_channel::LoginUrl; use secrecy::SecretString; use serde::{Deserialize, Serialize}; -use std::{collections::BTreeSet, net::IpAddr, path::PathBuf, pin::pin, sync::Arc, time::Duration}; +use std::{ + collections::{BTreeMap, BTreeSet}, + net::IpAddr, + path::PathBuf, + pin::pin, + sync::Arc, + time::Duration, +}; use tokio::{sync::mpsc, task::spawn_blocking, time::Instant}; use tracing::subscriber::set_global_default; use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Layer, Registry}; @@ -215,7 +222,17 @@ async fn ipc_listen( let firezone_id = device_id::get_or_create() .context("Failed to read / create device ID")? .id; - firezone_telemetry::configure_scope(|scope| scope.set_tag("firezone_id", &firezone_id)); + // TODO: Telemetry is initialized wrong here: + // Sentry's contexts must be set on the main thread hub before starting Tokio, so that other thread hubs will inherit the context. + firezone_telemetry::configure_scope(|scope| { + scope.set_context( + "firezone", + firezone_telemetry::Context::Other(BTreeMap::from([( + "id".to_string(), + firezone_id.into(), + )])), + ) + }); let mut server = IpcServer::new(ServiceId::Prod).await?; let mut dns_controller = DnsController { dns_control_method }; let telemetry = Telemetry::default(); diff --git a/rust/headless-client/src/main.rs b/rust/headless-client/src/main.rs index 313952860..54c51659f 100644 --- a/rust/headless-client/src/main.rs +++ b/rust/headless-client/src/main.rs @@ -19,6 +19,7 @@ use phoenix_channel::LoginUrl; use phoenix_channel::PhoenixChannel; use secrecy::{Secret, SecretString}; use std::{ + collections::BTreeMap, path::{Path, PathBuf}, pin::pin, sync::Arc, @@ -175,7 +176,15 @@ fn main() -> Result<()> { Some(id) => id, None => device_id::get_or_create().context("Could not get `firezone_id` from CLI, could not read it from disk, could not generate it and save it to disk")?.id, }; - firezone_telemetry::configure_scope(|scope| scope.set_tag("firezone_id", &firezone_id)); + firezone_telemetry::configure_scope(|scope| { + scope.set_context( + "firezone", + firezone_telemetry::Context::Other(BTreeMap::from([( + "id".to_string(), + firezone_id.clone().into(), + )])), + ) + }); let url = LoginUrl::client( cli.api_url, diff --git a/rust/telemetry/src/lib.rs b/rust/telemetry/src/lib.rs index f25e2d1b7..bfe701ad7 100644 --- a/rust/telemetry/src/lib.rs +++ b/rust/telemetry/src/lib.rs @@ -3,7 +3,9 @@ use std::time::Duration; pub use sentry::{ add_breadcrumb, capture_error, capture_message, configure_scope, end_session, - end_session_with_status, types::protocol::v7::SessionStatus, Breadcrumb, Hub, Level, + end_session_with_status, + types::protocol::v7::{Context, SessionStatus}, + Breadcrumb, Hub, Level, }; pub use sentry_anyhow::capture_anyhow;