diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 54a1b801f..bf6e33bf8 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -181,12 +181,6 @@ dependencies = [ "x11rb", ] -[[package]] -name = "arc-swap" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" - [[package]] name = "arrayvec" version = "0.7.6" @@ -2013,6 +2007,7 @@ dependencies = [ "firezone-headless-client", "firezone-logging", "firezone-telemetry", + "futures", "native-dialog", "nix 0.29.0", "rand 0.8.5", @@ -2193,7 +2188,6 @@ dependencies = [ name = "firezone-telemetry" version = "0.1.0" dependencies = [ - "arc-swap", "sentry", "sentry-anyhow", "thiserror", diff --git a/rust/connlib/clients/android/src/lib.rs b/rust/connlib/clients/android/src/lib.rs index 8ea46f808..4e96deb98 100644 --- a/rust/connlib/clients/android/src/lib.rs +++ b/rust/connlib/clients/android/src/lib.rs @@ -348,8 +348,9 @@ fn connect( let device_info = string_from_jstring!(env, device_info); let device_info = serde_json::from_str(&device_info).unwrap(); - let telemetry = Telemetry::default(); + let mut telemetry = Telemetry::default(); telemetry.start(&api_url, env!("CARGO_PKG_VERSION"), ANDROID_DSN); + telemetry.set_firezone_id(device_id.clone()); let handle = init_logging(&PathBuf::from(log_dir), log_filter); install_rustls_crypto_provider(); @@ -469,7 +470,7 @@ pub unsafe extern "system" fn Java_dev_firezone_android_tunnel_ConnlibSession_di ) { let session = session_ptr as *mut SessionWrapper; catch_and_throw(&mut env, |_| { - let session = Box::from_raw(session); + let mut session = Box::from_raw(session); session.runtime.block_on(session.telemetry.stop()); session.inner.disconnect(); diff --git a/rust/connlib/clients/apple/src/lib.rs b/rust/connlib/clients/apple/src/lib.rs index 95fcd3d5e..a5f3ec44c 100644 --- a/rust/connlib/clients/apple/src/lib.rs +++ b/rust/connlib/clients/apple/src/lib.rs @@ -193,8 +193,9 @@ impl WrappedSession { callback_handler: ffi::CallbackHandler, device_info: String, ) -> Result { - let telemetry = Telemetry::default(); + let mut telemetry = Telemetry::default(); telemetry.start(&api_url, env!("CARGO_PKG_VERSION"), APPLE_DSN); + telemetry.set_firezone_id(device_id.clone()); let logger = init_logging(log_dir.into(), log_filter)?; install_rustls_crypto_provider(); @@ -264,7 +265,7 @@ impl WrappedSession { self.inner.set_disabled_resources(disabled_resources) } - fn disconnect(self) { + fn disconnect(mut self) { self.runtime.block_on(self.telemetry.stop()); self.inner.disconnect(); } diff --git a/rust/gateway/src/main.rs b/rust/gateway/src/main.rs index d5efba106..01eacd56d 100644 --- a/rust/gateway/src/main.rs +++ b/rust/gateway/src/main.rs @@ -39,7 +39,7 @@ async fn main() { .expect("Calling `install_default` only once per process should always succeed"); let cli = Cli::parse(); - let telemetry = Telemetry::default(); + let mut telemetry = Telemetry::default(); if cli.is_telemetry_allowed() { telemetry.start( cli.api_url.as_str(), @@ -53,17 +53,18 @@ async fn main() { // // By default, `anyhow` prints a stacktrace when it exits. // That looks like a "crash" but we "just" exit with a fatal error. - if let Err(e) = try_main(cli).await { + if let Err(e) = try_main(cli, &mut telemetry).await { tracing::error!(error = anyhow_dyn_err(&e)); std::process::exit(1); } } -async fn try_main(cli: Cli) -> Result<()> { +async fn try_main(cli: Cli, telemetry: &mut Telemetry) -> Result<()> { firezone_logging::setup_global_subscriber(layer::Identity::default()); let firezone_id = get_firezone_id(cli.firezone_id).await .context("Couldn't read FIREZONE_ID or write it to disk: Please provide it through the env variable or provide rw access to /var/lib/firezone/")?; + telemetry.set_firezone_id(firezone_id.clone()); let login = LoginUrl::gateway( cli.api_url, diff --git a/rust/gui-client/src-common/src/auth.rs b/rust/gui-client/src-common/src/auth.rs index 6cb3f0941..5a3fbb97c 100644 --- a/rust/gui-client/src-common/src/auth.rs +++ b/rust/gui-client/src-common/src/auth.rs @@ -73,11 +73,17 @@ pub(crate) struct Response { } #[derive(Default, Deserialize, Serialize)] -pub(crate) struct Session { +pub struct Session { pub(crate) account_slug: String, pub(crate) actor_name: String, } +impl Session { + pub fn account_slug(&self) -> &str { + &self.account_slug + } +} + struct SessionAndToken { session: Session, token: SecretString, @@ -118,7 +124,7 @@ impl Auth { } /// Returns the session iff we are signed in. - pub(crate) fn session(&self) -> Option<&Session> { + pub fn session(&self) -> Option<&Session> { match &self.state { State::SignedIn(x) => Some(x), State::NeedResponse(_) | State::SignedOut => None, @@ -234,7 +240,6 @@ impl Auth { match std::fs::read_to_string(session_data_path()?) { Ok(x) => { session = serde_json::from_str(&x).map_err(|_| Error::Serde)?; - firezone_telemetry::set_account_slug(session.account_slug.to_string()); } Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} Err(e) => return Err(Error::ReadFile(e)), diff --git a/rust/gui-client/src-common/src/controller.rs b/rust/gui-client/src-common/src/controller.rs index c6f687654..957fec6fa 100644 --- a/rust/gui-client/src-common/src/controller.rs +++ b/rust/gui-client/src-common/src/controller.rs @@ -26,7 +26,7 @@ mod ran_before; pub type CtlrTx = mpsc::Sender; -pub struct Controller { +pub struct Controller<'a, I: GuiIntegration> { /// Debugging-only settings like API URL, auth URL, log filter advanced_settings: AdvancedSettings, // Sign-in state with the portal / deep links @@ -41,23 +41,23 @@ pub struct Controller { release: Option, rx: mpsc::Receiver, status: Status, - telemetry: Telemetry, + telemetry: &'a mut Telemetry, updates_rx: mpsc::Receiver>, uptime: crate::uptime::Tracker, } -pub struct Builder { +pub struct Builder<'a, I: GuiIntegration> { pub advanced_settings: AdvancedSettings, pub ctlr_tx: CtlrTx, pub integration: I, pub log_filter_reloader: LogFilterReloader, pub rx: mpsc::Receiver, - pub telemetry: Telemetry, + pub telemetry: &'a mut Telemetry, pub updates_rx: mpsc::Receiver>, } -impl Builder { - pub async fn build(self) -> Result> { +impl<'a, I: GuiIntegration> Builder<'a, I> { + pub async fn build(self) -> Result> { let Builder { advanced_settings, ctlr_tx, @@ -203,8 +203,10 @@ impl Status { } } -impl Controller { +impl<'a, I: GuiIntegration> Controller<'a, I> { pub async fn main_loop(mut self) -> Result<(), Error> { + let account_slug = self.auth.session().map(|s| s.account_slug.to_owned()); + // Start telemetry { const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -212,10 +214,15 @@ impl Controller { let environment = self.advanced_settings.api_url.to_string(); self.telemetry .start(&environment, VERSION, firezone_telemetry::GUI_DSN); + if let Some(account_slug) = account_slug.clone() { + self.telemetry.set_account_slug(account_slug); + } + self.ipc_client .send_msg(&IpcClientMsg::StartTelemetry { environment, version: VERSION.to_owned(), + account_slug, }) .await?; } diff --git a/rust/gui-client/src-tauri/Cargo.toml b/rust/gui-client/src-tauri/Cargo.toml index dc6974f22..268b1a8eb 100644 --- a/rust/gui-client/src-tauri/Cargo.toml +++ b/rust/gui-client/src-tauri/Cargo.toml @@ -23,6 +23,7 @@ firezone-gui-client-common = { path = "../src-common" } firezone-headless-client = { path = "../../headless-client" } firezone-logging = { workspace = true } firezone-telemetry = { workspace = true } +futures = "0.3.31" native-dialog = "0.7.0" rand = "0.8.5" rustls = { workspace = true } diff --git a/rust/gui-client/src-tauri/src/client.rs b/rust/gui-client/src-tauri/src/client.rs index 09db91962..608870a6f 100644 --- a/rust/gui-client/src-tauri/src/client.rs +++ b/rust/gui-client/src-tauri/src/client.rs @@ -61,7 +61,7 @@ pub(crate) fn run() -> Result<()> { Some(Cmd::SmokeTest) => { // Can't check elevation here because the Windows CI is always elevated let settings = common::settings::load_advanced_settings().unwrap_or_default(); - let telemetry = telemetry::Telemetry::default(); + let mut telemetry = telemetry::Telemetry::default(); telemetry.start( settings.api_url.as_ref(), env!("CARGO_PKG_VERSION"), @@ -92,7 +92,7 @@ pub(crate) fn run() -> Result<()> { // Can't `instrument` this because logging isn't running when we enter it. fn run_gui(cli: Cli) -> Result<()> { let mut settings = common::settings::load_advanced_settings().unwrap_or_default(); - let telemetry = telemetry::Telemetry::default(); + let mut telemetry = telemetry::Telemetry::default(); // In the future telemetry will be opt-in per organization, that's why this isn't just at the top of `main` telemetry.start( settings.api_url.as_ref(), diff --git a/rust/gui-client/src-tauri/src/client/gui.rs b/rust/gui-client/src-tauri/src/client/gui.rs index c1430fb36..4c1e126dd 100644 --- a/rust/gui-client/src-tauri/src/client/gui.rs +++ b/rust/gui-client/src-tauri/src/client/gui.rs @@ -20,8 +20,9 @@ use firezone_gui_client_common::{ use firezone_headless_client::LogFilterReloader; use firezone_logging::{anyhow_dyn_err, std_dyn_err}; use firezone_telemetry as telemetry; +use futures::FutureExt; use secrecy::{ExposeSecret as _, SecretString}; -use std::{str::FromStr, time::Duration}; +use std::{panic::AssertUnwindSafe, str::FromStr, time::Duration}; use tauri::Manager; use tauri_plugin_shell::ShellExt as _; use tokio::sync::{mpsc, oneshot}; @@ -122,7 +123,7 @@ pub(crate) fn run( cli: client::Cli, advanced_settings: AdvancedSettings, reloader: LogFilterReloader, - telemetry: telemetry::Telemetry, + mut telemetry: telemetry::Telemetry, ) -> Result<(), Error> { // Needed for the deep link server let rt = tokio::runtime::Runtime::new().context("Couldn't start Tokio runtime")?; @@ -238,16 +239,15 @@ pub(crate) fn run( let app_handle = app.handle().clone(); let _ctlr_task = tokio::spawn(async move { - // Spawn two nested Tasks so the outer can catch panics from the inner - let task = tokio::spawn(run_controller( + let result = AssertUnwindSafe(run_controller( ctlr_tx, integration, ctlr_rx, advanced_settings, reloader, - telemetry.clone(), + &mut telemetry, updates_rx, - )); + )).catch_unwind().await; // See // This should be the ONLY place we call `app.exit` or `app_handle.exit`, @@ -256,9 +256,9 @@ pub(crate) fn run( // This seems to be a platform limitation that Tauri is unable to hide // from us. It was the source of much consternation at time of writing. - let exit_code = match task.await { - Err(error) => { - tracing::error!(error = std_dyn_err(&error), "run_controller panicked"); + let exit_code = match result { + Err(_panic) => { + // The panic will have been recorded already by Sentry's panic hook. telemetry::end_session_with_status(telemetry::SessionStatus::Crashed); 1 } @@ -476,7 +476,7 @@ async fn run_controller( rx: mpsc::Receiver, advanced_settings: AdvancedSettings, log_filter_reloader: LogFilterReloader, - telemetry: telemetry::Telemetry, + telemetry: &mut telemetry::Telemetry, updates_rx: mpsc::Receiver>, ) -> Result<(), Error> { tracing::debug!("Entered `run_controller`"); diff --git a/rust/headless-client/src/ipc_service.rs b/rust/headless-client/src/ipc_service.rs index 0ce7bfe62..dc4fd6ae4 100644 --- a/rust/headless-client/src/ipc_service.rs +++ b/rust/headless-client/src/ipc_service.rs @@ -19,14 +19,7 @@ use futures::{ use phoenix_channel::LoginUrl; use secrecy::SecretString; use serde::{Deserialize, Serialize}; -use std::{ - collections::{BTreeMap, BTreeSet}, - net::IpAddr, - path::PathBuf, - pin::pin, - sync::Arc, - time::Duration, -}; +use std::{collections::BTreeSet, net::IpAddr, path::PathBuf, pin::pin, sync::Arc, time::Duration}; use tokio::{sync::mpsc, task::spawn_blocking, time::Instant}; use tracing::subscriber::set_global_default; use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Layer, Registry}; @@ -96,6 +89,7 @@ pub enum ClientMsg { StartTelemetry { environment: String, version: String, + account_slug: Option, }, StopTelemetry, } @@ -201,7 +195,7 @@ fn run_smoke_test() -> Result<()> { // and we need to recover. dns_controller.deactivate()?; let mut signals = signals::Terminate::new()?; - let telemetry = Telemetry::default(); + let mut telemetry = Telemetry::default(); // Couldn't get the loop to work here yet, so SIGHUP is not implemented rt.block_on(async { @@ -211,7 +205,7 @@ fn run_smoke_test() -> Result<()> { &mut server, &mut dns_controller, &log_filter_reloader, - telemetry, + &mut telemetry, ) .await? .run(&mut signals) @@ -234,26 +228,18 @@ async fn ipc_listen( let firezone_id = device_id::get_or_create() .context("Failed to read / create device ID")? .id; - // TODO: Telemetry is initialized wrong here: - // Sentry's contexts must be set on the main thread hub before starting Tokio, so that other thread hubs will inherit the context. - firezone_telemetry::configure_scope(|scope| { - scope.set_context( - "firezone", - firezone_telemetry::Context::Other(BTreeMap::from([( - "id".to_string(), - firezone_id.into(), - )])), - ) - }); + + let mut telemetry = Telemetry::default(); + telemetry.set_firezone_id(firezone_id); + let mut server = IpcServer::new(ServiceId::Prod).await?; let mut dns_controller = DnsController { dns_control_method }; - let telemetry = Telemetry::default(); loop { let mut handler_fut = pin!(Handler::new( &mut server, &mut dns_controller, log_filter_reloader, - telemetry.clone(), + &mut telemetry, )); let Some(handler) = poll_fn(|cx| { if let Poll::Ready(()) = signals.poll_recv(cx) { @@ -285,7 +271,7 @@ struct Handler<'a> { last_connlib_start_instant: Option, log_filter_reloader: &'a LogFilterReloader, session: Option, - telemetry: Telemetry, // Handle to the sentry.io telemetry module + telemetry: &'a mut Telemetry, // Handle to the sentry.io telemetry module tun_device: TunDeviceManager, } @@ -316,7 +302,7 @@ impl<'a> Handler<'a> { server: &mut IpcServer, dns_controller: &'a mut DnsController, log_filter_reloader: &'a LogFilterReloader, - telemetry: Telemetry, + telemetry: &'a mut Telemetry, ) -> Result { dns_controller.deactivate()?; let (ipc_rx, ipc_tx) = server @@ -549,9 +535,15 @@ impl<'a> Handler<'a> { ClientMsg::StartTelemetry { environment, version, - } => self - .telemetry - .start(&environment, &version, firezone_telemetry::IPC_SERVICE_DSN), + account_slug, + } => { + self.telemetry + .start(&environment, &version, firezone_telemetry::IPC_SERVICE_DSN); + + if let Some(account_slug) = account_slug { + self.telemetry.set_account_slug(account_slug); + } + } ClientMsg::StopTelemetry => { self.telemetry.stop().await; } @@ -569,6 +561,7 @@ impl<'a> Handler<'a> { assert!(self.session.is_none()); let device_id = device_id::get_or_create().map_err(|e| Error::DeviceId(e.to_string()))?; + self.telemetry.set_firezone_id(device_id.id.clone()); let url = LoginUrl::client( Url::parse(api_url).map_err(|e| Error::UrlParse(e.to_string()))?, diff --git a/rust/headless-client/src/main.rs b/rust/headless-client/src/main.rs index 92c9db157..ba455d360 100644 --- a/rust/headless-client/src/main.rs +++ b/rust/headless-client/src/main.rs @@ -20,7 +20,6 @@ use phoenix_channel::LoginUrl; use phoenix_channel::PhoenixChannel; use secrecy::{Secret, SecretString}; use std::{ - collections::BTreeMap, path::{Path, PathBuf}, sync::Arc, }; @@ -128,7 +127,7 @@ fn main() -> Result<()> { // and we need to recover. dns_controller.deactivate()?; - let telemetry = Telemetry::default(); + let mut telemetry = Telemetry::default(); telemetry.start( cli.api_url.as_ref(), VERSION, @@ -175,15 +174,7 @@ fn main() -> Result<()> { Some(id) => id, None => device_id::get_or_create().context("Could not get `firezone_id` from CLI, could not read it from disk, could not generate it and save it to disk")?.id, }; - firezone_telemetry::configure_scope(|scope| { - scope.set_context( - "firezone", - firezone_telemetry::Context::Other(BTreeMap::from([( - "id".to_string(), - firezone_id.clone().into(), - )])), - ) - }); + telemetry.set_firezone_id(firezone_id.clone()); let url = LoginUrl::client( cli.api_url, diff --git a/rust/telemetry/Cargo.toml b/rust/telemetry/Cargo.toml index 9c718203f..6a3bd8e5e 100644 --- a/rust/telemetry/Cargo.toml +++ b/rust/telemetry/Cargo.toml @@ -4,7 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] -arc-swap = "1.7.1" sentry = { version = "0.34.0", default-features = false, features = ["contexts", "backtrace", "debug-images", "panic", "reqwest", "rustls", "tracing"] } sentry-anyhow = "0.34.0" tokio = { workspace = true, features = ["rt"] } diff --git a/rust/telemetry/src/lib.rs b/rust/telemetry/src/lib.rs index 6501d828b..80f7ba29b 100644 --- a/rust/telemetry/src/lib.rs +++ b/rust/telemetry/src/lib.rs @@ -1,9 +1,7 @@ -use arc_swap::ArcSwapOption; use std::time::Duration; pub use sentry::{ - add_breadcrumb, capture_error, capture_message, configure_scope, end_session, - end_session_with_status, + add_breadcrumb, capture_error, capture_message, end_session, end_session_with_status, types::protocol::v7::{Context, SessionStatus}, Breadcrumb, Hub, Level, }; @@ -36,22 +34,17 @@ pub struct Telemetry { /// be able to gracefully close down Sentry, even though it won't have /// a mutable handle to it, and even though `main` is actually what holds /// the handle. Wrapping it all in `ArcSwap` makes it simpler. - inner: ArcSwapOption, -} + inner: Option, -impl Clone for Telemetry { - fn clone(&self) -> Self { - Self { - inner: ArcSwapOption::new(self.inner.load().clone()), - } - } + account_slug: Option, + firezone_id: Option, } impl Telemetry { - pub fn start(&self, api_url: &str, release: &str, dsn: Dsn) { + pub fn start(&mut self, api_url: &str, release: &str, dsn: Dsn) { // Since it's `arc_swap` and not `Option`, there is a TOCTOU here, // but in practice it should never trigger - if self.inner.load().is_some() { + if self.inner.is_some() { return; } @@ -88,13 +81,15 @@ impl Telemetry { let ctx = sentry::integrations::contexts::utils::rust_context(); scope.set_context("rust", ctx); }); - self.inner.swap(Some(inner.into())); + self.inner.replace(inner); sentry::start_session(); + + self.update_user_context(); } /// Flushes events to sentry.io and drops the guard - pub async fn stop(&self) { - let Some(inner) = self.inner.swap(None) else { + pub async fn stop(&mut self) { + let Some(inner) = self.inner.take() else { return; }; tracing::info!("Stopping telemetry"); @@ -113,16 +108,31 @@ impl Telemetry { }) .await; } -} -/// Sets the Firezone account slug on the Sentry hub -/// -/// This sets the entire set of "user" info, so it will conflict if we set any other user ID later. -pub fn set_account_slug(account_slug: String) { - let mut user = sentry::User::default(); - user.other - .insert("account_slug".to_string(), account_slug.into()); - sentry::Hub::main().configure_scope(|scope| scope.set_user(Some(user))); + pub fn set_account_slug(&mut self, slug: String) { + self.account_slug = Some(slug); + self.update_user_context(); + } + + pub fn set_firezone_id(&mut self, id: String) { + self.firezone_id = Some(id); + self.update_user_context(); + } + + fn update_user_context(&self) { + let mut user = sentry::User { + id: self.firezone_id.clone(), + ..Default::default() + }; + + user.other.extend( + self.account_slug + .clone() + .map(|slug| ("account_slug".to_owned(), slug.into())), + ); + + sentry::Hub::main().configure_scope(|scope| scope.set_user(Some(user))); + } } #[cfg(test)] @@ -136,7 +146,7 @@ mod tests { async fn sentry() { // Smoke-test Sentry itself by turning it on and off a couple times { - let tele = Telemetry::default(); + let mut tele = Telemetry::default(); // Expect no telemetry because the telemetry module needs to be enabled before it can do anything negative_error("X7X4CKH3"); @@ -166,13 +176,13 @@ mod tests { // Test starting up with the choice opted-in { { - let tele = Telemetry::default(); + let mut tele = Telemetry::default(); negative_error("4H7HFTNX"); tele.start("test", ENV, HEADLESS_DSN); } { negative_error("GF46D6IL"); - let tele = Telemetry::default(); + let mut tele = Telemetry::default(); tele.start("test", ENV, HEADLESS_DSN); error("OKOEUKSW"); }