mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 10:18:54 +00:00
chore(telemetry): misc. clean-up (#7326)
Bundles together several minor improvements around telemetry: - Removes the obsolete "Firezone" context: This is now included in the user context as of #7310. - Entirely encapsulates `sentry` within the `telemetry` module - Concludes sessions that were not explicitly closed as "abnormal"
This commit is contained in:
@@ -5,7 +5,6 @@ use firezone_gui_client_common::{
|
||||
};
|
||||
use firezone_logging::{anyhow_dyn_err, std_dyn_err};
|
||||
use firezone_telemetry as telemetry;
|
||||
use std::collections::BTreeMap;
|
||||
use tracing::instrument;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
@@ -101,22 +100,8 @@ fn run_gui(cli: Cli) -> Result<()> {
|
||||
);
|
||||
// Get the device ID before starting Tokio, so that all the worker threads will inherit the correct scope.
|
||||
// Technically this means we can fail to get the device ID on a newly-installed system, since the IPC service may not have fully started up when the GUI process reaches this point, but in practice it's unlikely.
|
||||
match firezone_headless_client::device_id::get() {
|
||||
Ok(id) => {
|
||||
// We should still be in the main thread, but explicitly get the main thread hub anyway.
|
||||
telemetry::Hub::main().configure_scope(|scope| {
|
||||
scope.set_context(
|
||||
"firezone",
|
||||
firezone_telemetry::Context::Other(BTreeMap::from([(
|
||||
"id".to_string(),
|
||||
id.id.into(),
|
||||
)])),
|
||||
)
|
||||
});
|
||||
}
|
||||
Err(error) => {
|
||||
telemetry::capture_anyhow(&error.context("Failed to read device ID"));
|
||||
}
|
||||
if let Ok(id) = firezone_headless_client::device_id::get() {
|
||||
telemetry.set_firezone_id(id.id);
|
||||
}
|
||||
fix_log_filter(&mut settings)?;
|
||||
let common::logging::Handles {
|
||||
@@ -165,15 +150,6 @@ fn start_logging(directives: &str) -> Result<common::logging::Handles> {
|
||||
?system_uptime_seconds,
|
||||
"`gui-client` started logging"
|
||||
);
|
||||
telemetry::add_breadcrumb(telemetry::Breadcrumb {
|
||||
ty: "logging_start".into(),
|
||||
category: None,
|
||||
data: BTreeMap::from([
|
||||
("directives".into(), directives.into()),
|
||||
("system_uptime_seconds".into(), system_uptime_seconds.into()),
|
||||
]),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
Ok(logging_handles)
|
||||
}
|
||||
|
||||
@@ -204,11 +204,6 @@ pub(crate) fn run(
|
||||
tracing::warn!(
|
||||
"Will crash / error / panic on purpose in {delay} seconds to test error handling."
|
||||
);
|
||||
telemetry::add_breadcrumb(telemetry::Breadcrumb {
|
||||
ty: "fail_on_purpose".into(),
|
||||
message: Some("Will crash / error / panic on purpose to test error handling.".into()),
|
||||
..Default::default()
|
||||
});
|
||||
tokio::time::sleep(Duration::from_secs(delay)).await;
|
||||
tracing::warn!("Crashing / erroring / panicking on purpose");
|
||||
ctlr_tx.send(ControllerRequest::Fail(failure)).await?;
|
||||
@@ -259,25 +254,27 @@ pub(crate) fn run(
|
||||
let exit_code = match result {
|
||||
Err(_panic) => {
|
||||
// The panic will have been recorded already by Sentry's panic hook.
|
||||
telemetry::end_session_with_status(telemetry::SessionStatus::Crashed);
|
||||
telemetry.stop_on_crash().await;
|
||||
|
||||
1
|
||||
}
|
||||
Ok(Err(error)) => {
|
||||
tracing::error!(error = std_dyn_err(&error), "run_controller returned an error");
|
||||
errors::show_error_dialog(&error).unwrap();
|
||||
telemetry::end_session_with_status(telemetry::SessionStatus::Crashed);
|
||||
|
||||
telemetry.stop_on_crash().await;
|
||||
|
||||
1
|
||||
}
|
||||
Ok(Ok(_)) => {
|
||||
telemetry::end_session();
|
||||
telemetry.stop().await;
|
||||
|
||||
0
|
||||
}
|
||||
};
|
||||
|
||||
// In a normal Rust application, Sentry's guard would flush on drop: https://docs.sentry.io/platforms/rust/configuration/draining/
|
||||
// But due to a limit in `tao` we cannot return from the event loop and must call `std::process::exit` (or Tauri's wrapper), so we explicitly flush here.
|
||||
// TODO: This limit may not exist in Tauri v2
|
||||
telemetry.stop().await;
|
||||
|
||||
tracing::info!(?exit_code);
|
||||
app_handle.exit(exit_code);
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
use std::time::Duration;
|
||||
|
||||
pub use sentry::{
|
||||
add_breadcrumb, capture_error, capture_message, end_session, end_session_with_status,
|
||||
types::protocol::v7::{Context, SessionStatus},
|
||||
Breadcrumb, Hub, Level,
|
||||
};
|
||||
use sentry::protocol::SessionStatus;
|
||||
pub use sentry_anyhow::capture_anyhow;
|
||||
|
||||
pub struct Dsn(&'static str);
|
||||
@@ -40,6 +36,17 @@ pub struct Telemetry {
|
||||
firezone_id: Option<String>,
|
||||
}
|
||||
|
||||
impl Drop for Telemetry {
|
||||
fn drop(&mut self) {
|
||||
if self.inner.is_none() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Conclude telemetry session as "abnormal" if we get dropped without closing it properly first.
|
||||
sentry::end_session_with_status(SessionStatus::Abnormal);
|
||||
}
|
||||
}
|
||||
|
||||
impl Telemetry {
|
||||
pub fn start(&mut self, api_url: &str, release: &str, dsn: Dsn) {
|
||||
// Since it's `arc_swap` and not `Option`, there is a TOCTOU here,
|
||||
@@ -89,11 +96,19 @@ impl Telemetry {
|
||||
|
||||
/// Flushes events to sentry.io and drops the guard
|
||||
pub async fn stop(&mut self) {
|
||||
self.end_session(SessionStatus::Exited).await;
|
||||
}
|
||||
|
||||
pub async fn stop_on_crash(&mut self) {
|
||||
self.end_session(SessionStatus::Crashed).await;
|
||||
}
|
||||
|
||||
async fn end_session(&mut self, status: SessionStatus) {
|
||||
let Some(inner) = self.inner.take() else {
|
||||
return;
|
||||
};
|
||||
tracing::info!("Stopping telemetry");
|
||||
sentry::end_session();
|
||||
sentry::end_session_with_status(status);
|
||||
|
||||
// Sentry uses blocking IO for flushing ..
|
||||
let _ = tokio::task::spawn_blocking(move || {
|
||||
|
||||
Reference in New Issue
Block a user