chore(telemetry): misc. clean-up (#7326)

Bundles together several minor improvements around telemetry:

- Removes the obsolete "Firezone" context: This is now included in the
user context as of #7310.
- Entirely encapsulates `sentry` within the `telemetry` module
- Concludes sessions that were not explicitly closed as "abnormal"
This commit is contained in:
Thomas Eizinger
2024-11-13 00:16:44 +00:00
committed by GitHub
parent 19dbff51f5
commit 3e18fa8ca2
3 changed files with 30 additions and 42 deletions

View File

@@ -5,7 +5,6 @@ use firezone_gui_client_common::{
};
use firezone_logging::{anyhow_dyn_err, std_dyn_err};
use firezone_telemetry as telemetry;
use std::collections::BTreeMap;
use tracing::instrument;
use tracing_subscriber::EnvFilter;
@@ -101,22 +100,8 @@ fn run_gui(cli: Cli) -> Result<()> {
);
// Get the device ID before starting Tokio, so that all the worker threads will inherit the correct scope.
// Technically this means we can fail to get the device ID on a newly-installed system, since the IPC service may not have fully started up when the GUI process reaches this point, but in practice it's unlikely.
match firezone_headless_client::device_id::get() {
Ok(id) => {
// We should still be in the main thread, but explicitly get the main thread hub anyway.
telemetry::Hub::main().configure_scope(|scope| {
scope.set_context(
"firezone",
firezone_telemetry::Context::Other(BTreeMap::from([(
"id".to_string(),
id.id.into(),
)])),
)
});
}
Err(error) => {
telemetry::capture_anyhow(&error.context("Failed to read device ID"));
}
if let Ok(id) = firezone_headless_client::device_id::get() {
telemetry.set_firezone_id(id.id);
}
fix_log_filter(&mut settings)?;
let common::logging::Handles {
@@ -165,15 +150,6 @@ fn start_logging(directives: &str) -> Result<common::logging::Handles> {
?system_uptime_seconds,
"`gui-client` started logging"
);
telemetry::add_breadcrumb(telemetry::Breadcrumb {
ty: "logging_start".into(),
category: None,
data: BTreeMap::from([
("directives".into(), directives.into()),
("system_uptime_seconds".into(), system_uptime_seconds.into()),
]),
..Default::default()
});
Ok(logging_handles)
}

View File

@@ -204,11 +204,6 @@ pub(crate) fn run(
tracing::warn!(
"Will crash / error / panic on purpose in {delay} seconds to test error handling."
);
telemetry::add_breadcrumb(telemetry::Breadcrumb {
ty: "fail_on_purpose".into(),
message: Some("Will crash / error / panic on purpose to test error handling.".into()),
..Default::default()
});
tokio::time::sleep(Duration::from_secs(delay)).await;
tracing::warn!("Crashing / erroring / panicking on purpose");
ctlr_tx.send(ControllerRequest::Fail(failure)).await?;
@@ -259,25 +254,27 @@ pub(crate) fn run(
let exit_code = match result {
Err(_panic) => {
// The panic will have been recorded already by Sentry's panic hook.
telemetry::end_session_with_status(telemetry::SessionStatus::Crashed);
telemetry.stop_on_crash().await;
1
}
Ok(Err(error)) => {
tracing::error!(error = std_dyn_err(&error), "run_controller returned an error");
errors::show_error_dialog(&error).unwrap();
telemetry::end_session_with_status(telemetry::SessionStatus::Crashed);
telemetry.stop_on_crash().await;
1
}
Ok(Ok(_)) => {
telemetry::end_session();
telemetry.stop().await;
0
}
};
// In a normal Rust application, Sentry's guard would flush on drop: https://docs.sentry.io/platforms/rust/configuration/draining/
// But due to a limit in `tao` we cannot return from the event loop and must call `std::process::exit` (or Tauri's wrapper), so we explicitly flush here.
// TODO: This limit may not exist in Tauri v2
telemetry.stop().await;
tracing::info!(?exit_code);
app_handle.exit(exit_code);

View File

@@ -1,10 +1,6 @@
use std::time::Duration;
pub use sentry::{
add_breadcrumb, capture_error, capture_message, end_session, end_session_with_status,
types::protocol::v7::{Context, SessionStatus},
Breadcrumb, Hub, Level,
};
use sentry::protocol::SessionStatus;
pub use sentry_anyhow::capture_anyhow;
pub struct Dsn(&'static str);
@@ -40,6 +36,17 @@ pub struct Telemetry {
firezone_id: Option<String>,
}
impl Drop for Telemetry {
fn drop(&mut self) {
if self.inner.is_none() {
return;
}
// Conclude telemetry session as "abnormal" if we get dropped without closing it properly first.
sentry::end_session_with_status(SessionStatus::Abnormal);
}
}
impl Telemetry {
pub fn start(&mut self, api_url: &str, release: &str, dsn: Dsn) {
// Since it's `arc_swap` and not `Option`, there is a TOCTOU here,
@@ -89,11 +96,19 @@ impl Telemetry {
/// Flushes events to sentry.io and drops the guard
pub async fn stop(&mut self) {
self.end_session(SessionStatus::Exited).await;
}
pub async fn stop_on_crash(&mut self) {
self.end_session(SessionStatus::Crashed).await;
}
async fn end_session(&mut self, status: SessionStatus) {
let Some(inner) = self.inner.take() else {
return;
};
tracing::info!("Stopping telemetry");
sentry::end_session();
sentry::end_session_with_status(status);
// Sentry uses blocking IO for flushing ..
let _ = tokio::task::spawn_blocking(move || {