fix(gui-client): flush telemetry events on IPC service exit (#7557)

Due to how we currently initialise telemetry in the IPC service, I think
we are missing out on events when it _exits_ due to an error because we
don't explicitly stop the telemetry session. We have alerts from a fair
few users in Sentry where the IPC service appears to stop / disappear
but there are no corresponding events for the IPC service.
This commit is contained in:
Thomas Eizinger
2024-12-19 21:06:47 +01:00
committed by GitHub
parent bc2febed99
commit c7a4221cb7
3 changed files with 38 additions and 11 deletions

View File

@@ -101,7 +101,6 @@ pub enum ClientMsg {
release: String,
account_slug: Option<String>,
},
StopTelemetry,
}
/// Messages that end up in the GUI, either forwarded from connlib or from the IPC service.
@@ -182,12 +181,20 @@ fn run_debug_ipc_service(cli: Cli) -> Result<()> {
.build()?;
let _guard = rt.enter();
let mut signals = signals::Terminate::new()?;
let mut telemetry = Telemetry::default();
rt.block_on(ipc_listen(
cli.common.dns_control,
&log_filter_reloader,
&mut signals,
&mut telemetry,
))
.inspect(|_| rt.block_on(telemetry.stop()))
.inspect_err(|e| {
tracing::error!(error = anyhow_dyn_err(e), "IPC service failed");
rt.block_on(telemetry.stop_on_crash())
})
}
#[cfg(not(debug_assertions))]
@@ -242,6 +249,7 @@ async fn ipc_listen(
dns_control_method: DnsControlMethod,
log_filter_reloader: &LogFilterReloader,
signals: &mut signals::Terminate,
telemetry: &mut Telemetry,
) -> Result<()> {
// Create the device ID and IPC service config dir if needed
// This also gives the GUI a safe place to put the log filter config
@@ -249,7 +257,6 @@ async fn ipc_listen(
.context("Failed to read / create device ID")?
.id;
let mut telemetry = Telemetry::default();
telemetry.set_firezone_id(firezone_id);
let mut server = IpcServer::new(ServiceId::Prod).await?;
@@ -259,7 +266,7 @@ async fn ipc_listen(
&mut server,
&mut dns_controller,
log_filter_reloader,
&mut telemetry,
telemetry,
));
let Some(handler) = poll_fn(|cx| {
if let Poll::Ready(()) = signals.poll_recv(cx) {
@@ -564,9 +571,6 @@ impl<'a> Handler<'a> {
self.telemetry.set_account_slug(account_slug);
}
}
ClientMsg::StopTelemetry => {
self.telemetry.stop().await;
}
}
Ok(())
}

View File

@@ -1,6 +1,8 @@
use super::CliCommon;
use crate::signals;
use anyhow::{bail, Result};
use firezone_logging::anyhow_dyn_err;
use firezone_telemetry::Telemetry;
/// Cross-platform entry point for systemd / Windows services
///
@@ -15,12 +17,20 @@ pub(crate) fn run_ipc_service(cli: CliCommon) -> Result<()> {
.build()?;
let _guard = rt.enter();
let mut signals = signals::Terminate::new()?;
let mut telemetry = Telemetry::default();
rt.block_on(super::ipc_listen(
cli.dns_control,
&log_filter_reloader,
&mut signals,
&mut telemetry,
))
.inspect(|_| rt.block_on(telemetry.stop()))
.inspect_err(|e| {
tracing::error!(error = anyhow_dyn_err(e), "IPC service failed");
rt.block_on(telemetry.stop_on_crash())
})
}
/// Returns true if the IPC service can run properly

View File

@@ -2,6 +2,7 @@ use crate::CliCommon;
use anyhow::{bail, Context as _, Result};
use firezone_bin_shared::platform::DnsControlMethod;
use firezone_logging::anyhow_dyn_err;
use firezone_telemetry::Telemetry;
use futures::future::{self, Either};
use std::{
ffi::{c_void, OsString},
@@ -209,13 +210,23 @@ fn fallible_service_run(
process_id: None,
})?;
let mut telemetry = Telemetry::default();
// Add new features in `service_run_async` if possible.
// We don't want to bail out of `fallible_service_run` and forget to tell
// Windows that we're shutting down.
let result = rt.block_on(service_run_async(&log_filter_reloader, shutdown_rx));
if let Err(error) = &result {
tracing::error!(error = anyhow_dyn_err(error));
}
let result = rt
.block_on(service_run_async(
&log_filter_reloader,
&mut telemetry,
shutdown_rx,
))
.inspect(|_| rt.block_on(telemetry.stop()))
.inspect_err(|e| {
tracing::error!(error = anyhow_dyn_err(e), "IPC service failed");
rt.block_on(telemetry.stop_on_crash())
});
// Drop the logging handle so it flushes the logs before we let Windows kill our process.
// There is no obvious and elegant way to do this, since the logging and `ServiceState`
@@ -254,6 +265,7 @@ fn fallible_service_run(
/// Logging must already be set up before calling this.
async fn service_run_async(
log_filter_reloader: &crate::LogFilterReloader,
telemetry: &mut Telemetry,
mut shutdown_rx: mpsc::Receiver<()>,
) -> Result<()> {
// Useless - Windows will never send us Ctrl+C when running as a service
@@ -262,7 +274,8 @@ async fn service_run_async(
let listen_fut = pin!(super::ipc_listen(
DnsControlMethod::Nrpt,
log_filter_reloader,
&mut signals
&mut signals,
telemetry
));
match future::select(listen_fut, pin!(shutdown_rx.recv())).await {
Either::Left((Err(error), _)) => Err(error).context("`ipc_listen` threw an error"),