From ae872980aed5484eb7b1c2c2e1965ae1cc43ef8f Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Wed, 21 May 2025 09:18:18 +1000 Subject: [PATCH] refactor(gui-client): scope telemetry sessions to GUI client (#9179) For our telemetry sessions with Sentry, we need to know which environment we are running in, i.e. staging, production or on-prem. The GUI client's tunnel service doesn't have a concept of an environment until a GUI connects and sends the `StartTelemetry` message. Therefore, we should scope a telemetry session to a GUI being connected over IPC. Any errors around setting up / tearing down the background service are a catch-22. Until a GUI connects, we can't initialise the telemetry connection but if we fail to set up the background service, no GUI can ever connect. Hence, the current setup and tear down of the `Telemetry` module around the `ipc_listen` calls can safely be removed as they are effectively no-ops anyway. --- rust/bin-shared/src/device_id.rs | 1 + rust/gui-client/src-tauri/src/service.rs | 49 ++++++++----------- .../gui-client/src-tauri/src/service/linux.rs | 11 +---- .../src-tauri/src/service/windows.rs | 17 +------ 4 files changed, 23 insertions(+), 55 deletions(-) diff --git a/rust/bin-shared/src/device_id.rs b/rust/bin-shared/src/device_id.rs index ae18c2cd3..2944f8fdd 100644 --- a/rust/bin-shared/src/device_id.rs +++ b/rust/bin-shared/src/device_id.rs @@ -8,6 +8,7 @@ use std::{ path::{Path, PathBuf}, }; +#[derive(Debug, Clone)] pub struct DeviceId { pub id: String, } diff --git a/rust/gui-client/src-tauri/src/service.rs b/rust/gui-client/src-tauri/src/service.rs index 3ee07b581..cd177f82c 100644 --- a/rust/gui-client/src-tauri/src/service.rs +++ b/rust/gui-client/src-tauri/src/service.rs @@ -5,7 +5,9 @@ use backoff::ExponentialBackoffBuilder; use client_shared::ConnlibMsg; use connlib_model::{ResourceId, ResourceView}; use firezone_bin_shared::{ - DnsControlMethod, DnsController, TunDeviceManager, device_id, device_info, known_dirs, + DnsControlMethod, DnsController, TunDeviceManager, + device_id::{self, DeviceId}, + device_info, known_dirs, platform::{tcp_socket_factory, udp_socket_factory}, signals, }; @@ -116,24 +118,19 @@ async fn ipc_listen( dns_control_method: DnsControlMethod, log_filter_reloader: &FilterReloadHandle, signals: &mut signals::Terminate, - telemetry: &mut Telemetry, ) -> Result<()> { // Create the device ID and Tunnel service config dir if needed // This also gives the GUI a safe place to put the log filter config - let firezone_id = device_id::get_or_create() - .context("Failed to read / create device ID")? - .id; - - Telemetry::set_firezone_id(firezone_id); + let device_id = device_id::get_or_create().context("Failed to read / create device ID")?; let mut server = ipc::Server::new(SocketId::Tunnel)?; let mut dns_controller = DnsController { dns_control_method }; loop { let mut handler_fut = pin!(Handler::new( + device_id.clone(), &mut server, &mut dns_controller, log_filter_reloader, - telemetry, )); let Some(handler) = poll_fn(|cx| { if let Poll::Ready(()) = signals.poll_recv(cx) { @@ -167,13 +164,14 @@ async fn ipc_listen( /// Handles one IPC client struct Handler<'a> { + device_id: DeviceId, dns_controller: &'a mut DnsController, ipc_rx: ipc::ServerRead, ipc_tx: ipc::ServerWrite, last_connlib_start_instant: Option, log_filter_reloader: &'a FilterReloadHandle, session: Option, - telemetry: &'a mut Telemetry, // Handle to the sentry.io telemetry module + telemetry: Telemetry, tun_device: TunDeviceManager, } @@ -201,10 +199,10 @@ enum HandlerOk { impl<'a> Handler<'a> { async fn new( + device_id: DeviceId, server: &mut ipc::Server, dns_controller: &'a mut DnsController, log_filter_reloader: &'a FilterReloadHandle, - telemetry: &'a mut Telemetry, ) -> Result { dns_controller.deactivate()?; @@ -225,13 +223,14 @@ impl<'a> Handler<'a> { .context("Failed to greet to new GUI process")?; // Greet the GUI process. If the GUI process doesn't receive this after connecting, it knows that the tunnel service isn't responding. Ok(Self { + device_id, dns_controller, ipc_rx, ipc_tx, last_connlib_start_instant: None, log_filter_reloader, session: None, - telemetry, + telemetry: Telemetry::default(), tun_device, }) } @@ -243,7 +242,7 @@ impl<'a> Handler<'a> { /// /// The return type is infallible so that we only give up on an IPC client explicitly async fn run(&mut self, signals: &mut signals::Terminate) -> HandlerOk { - loop { + let ret = loop { match poll_fn(|cx| self.next_event(cx, signals)).await { Event::Callback(x) => { if let Err(error) = self.handle_connlib_cb(x).await { @@ -282,7 +281,11 @@ impl<'a> Handler<'a> { break HandlerOk::ServiceTerminating; } } - } + }; + + self.telemetry.stop().await; // Stop the telemetry session once the client disconnects or we are shutting down. + + ret } fn next_event( @@ -430,6 +433,7 @@ impl<'a> Handler<'a> { } => { self.telemetry .start(&environment, &release, firezone_telemetry::GUI_DSN); + Telemetry::set_firezone_id(self.device_id.id.clone()); if let Some(account_slug) = account_slug { Telemetry::set_account_slug(account_slug); @@ -538,20 +542,8 @@ pub fn run_debug(dns_control: DnsControlMethod) -> Result<()> { .build()?; let _guard = rt.enter(); let mut signals = signals::Terminate::new()?; - let mut telemetry = Telemetry::default(); - rt.block_on(ipc_listen( - dns_control, - &log_filter_reloader, - &mut signals, - &mut telemetry, - )) - .inspect(|_| rt.block_on(telemetry.stop())) - .inspect_err(|e| { - tracing::error!("Tunnel service failed: {e:#}"); - - rt.block_on(telemetry.stop_on_crash()) - }) + rt.block_on(ipc_listen(dns_control, &log_filter_reloader, &mut signals)) } /// Listen for exactly one connection from a GUI, then exit @@ -578,17 +570,16 @@ pub fn run_smoke_test() -> Result<()> { // and we need to recover. dns_controller.deactivate()?; let mut signals = signals::Terminate::new()?; - let mut telemetry = Telemetry::default(); // Couldn't get the loop to work here yet, so SIGHUP is not implemented rt.block_on(async { - device_id::get_or_create().context("Failed to read / create device ID")?; + let device_id = device_id::get_or_create().context("Failed to read / create device ID")?; let mut server = ipc::Server::new(SocketId::Tunnel)?; let _ = Handler::new( + device_id, &mut server, &mut dns_controller, &log_filter_reloader, - &mut telemetry, ) .await? .run(&mut signals) diff --git a/rust/gui-client/src-tauri/src/service/linux.rs b/rust/gui-client/src-tauri/src/service/linux.rs index a54b2102a..573d7e6e6 100644 --- a/rust/gui-client/src-tauri/src/service/linux.rs +++ b/rust/gui-client/src-tauri/src/service/linux.rs @@ -3,8 +3,6 @@ use std::path::PathBuf; use anyhow::{Result, bail}; use firezone_bin_shared::{DnsControlMethod, signals}; -use firezone_telemetry::Telemetry; - /// Cross-platform entry point for systemd / Windows services /// /// Linux uses the CLI args from here, Windows does not @@ -18,20 +16,13 @@ pub fn run(log_dir: Option, dns_control: DnsControlMethod) -> Result<() .build()?; let _guard = rt.enter(); let mut signals = signals::Terminate::new()?; - let mut telemetry = Telemetry::default(); rt.block_on(super::ipc_listen( dns_control, &log_filter_reloader, &mut signals, - &mut telemetry, )) - .inspect(|_| rt.block_on(telemetry.stop())) - .inspect_err(|e| { - tracing::error!("Tunnel service failed: {e:#}"); - - rt.block_on(telemetry.stop_on_crash()) - }) + .inspect_err(|e| tracing::error!("IPC service failed: {e:#}")) } /// Returns true if the Tunnel service can run properly diff --git a/rust/gui-client/src-tauri/src/service/windows.rs b/rust/gui-client/src-tauri/src/service/windows.rs index 9fb97da06..968e9ef30 100644 --- a/rust/gui-client/src-tauri/src/service/windows.rs +++ b/rust/gui-client/src-tauri/src/service/windows.rs @@ -1,6 +1,5 @@ use anyhow::{Context as _, Result}; use firezone_bin_shared::DnsControlMethod; -use firezone_telemetry::Telemetry; use futures::channel::mpsc; use std::path::PathBuf; use std::{ @@ -279,28 +278,14 @@ fn run_service(arguments: Vec) { }); let mut signals = firezone_bin_shared::signals::Terminate::from_channel(shutdown_rx); - let mut telemetry = Telemetry::default(); - // Add new features in `service_run_async` if possible. - // We don't want to bail out of `fallible_service_run` and forget to tell - // Windows that we're shutting down. let result = rt .block_on(super::ipc_listen( DnsControlMethod::Nrpt, &log_filter_reloader, &mut signals, - &mut telemetry, )) - .inspect(|()| { - tracing::info!("Windows service exited gracefully"); - - rt.block_on(telemetry.stop()) - }) - .inspect_err(|e| { - tracing::error!("Tunnel service failed: {e:#}"); - - rt.block_on(telemetry.stop_on_crash()) - }); + .inspect_err(|e| tracing::error!("Tunnel service failed: {e:#}")); // Tell Windows that we're stopping // Per Windows docs, this will cause Windows to kill our process eventually.