chore(telemetry): make the firezone device ID a context not a tag (#7179)

Closes #7175 

Also fixes a bug with the initialization order of Tokio and Sentry.

Previously:
1. Start Tokio, executor threads inherit main thread context
2. Load device ID and set it on the main telemetry hub

Now:
1. Load device ID and set it on the main telemetry hub
2. Start Tokio, executor threads inherit main thread context

The context and possibly tags didn't seem to propagate from the main hub
if we set them after the worker threads spawned.

Based on this understanding, the IPC service process is still wrong, but
a fix will have to wait, because telemetry in the IPC service is more
complicated than in the GUI process.

<img width="818" alt="image"
src="https://github.com/user-attachments/assets/9c9efec8-fc55-4863-99eb-5fe9ba5b36fa">
This commit is contained in:
Reactor Scram
2024-10-30 16:27:17 -05:00
committed by GitHub
parent e9b2e4735a
commit 51250faa0d
5 changed files with 52 additions and 15 deletions

View File

@@ -13,7 +13,7 @@ use firezone_headless_client::{
IpcClientMsg::{self, SetDisabledResources},
IpcServerMsg, IpcServiceError, LogFilterReloader,
};
use firezone_telemetry::{self as telemetry, Telemetry};
use firezone_telemetry::Telemetry;
use secrecy::{ExposeSecret as _, SecretString};
use std::{collections::BTreeSet, ops::ControlFlow, path::PathBuf, time::Instant};
use tokio::sync::{mpsc, oneshot};
@@ -69,16 +69,6 @@ impl<I: GuiIntegration> Builder<I> {
let (ipc_tx, ipc_rx) = mpsc::channel(1);
let ipc_client = ipc::Client::new(ipc_tx).await?;
// Get the device ID after connecting to the IPC service, this creates a happens-before relationship where we know the IPC service has written a device ID to disk.
match firezone_headless_client::device_id::get() {
Ok(id) => {
telemetry::Hub::main()
.configure_scope(|scope| scope.set_tag("firezone_id", &id.id));
}
Err(error) => {
telemetry::capture_anyhow(&error.context("Failed to read device ID"));
}
}
Ok(Controller {
advanced_settings,

View File

@@ -98,6 +98,25 @@ fn run_gui(cli: Cli) -> Result<()> {
firezone_bin_shared::git_version!("gui-client-*"),
telemetry::GUI_DSN,
);
// Get the device ID before starting Tokio, so that all the worker threads will inherit the correct scope.
// Technically this means we can fail to get the device ID on a newly-installed system, since the IPC service may not have fully started up when the GUI process reaches this point, but in practice it's unlikely.
match firezone_headless_client::device_id::get() {
Ok(id) => {
// We should still be in the main thread, but explicitly get the main thread hub anyway.
telemetry::Hub::main().configure_scope(|scope| {
scope.set_context(
"firezone",
firezone_telemetry::Context::Other(BTreeMap::from([(
"id".to_string(),
id.id.into(),
)])),
)
});
}
Err(error) => {
telemetry::capture_anyhow(&error.context("Failed to read device ID"));
}
}
fix_log_filter(&mut settings)?;
let common::logging::Handles {
logger: _logger,

View File

@@ -18,7 +18,14 @@ use futures::{
use phoenix_channel::LoginUrl;
use secrecy::SecretString;
use serde::{Deserialize, Serialize};
use std::{collections::BTreeSet, net::IpAddr, path::PathBuf, pin::pin, sync::Arc, time::Duration};
use std::{
collections::{BTreeMap, BTreeSet},
net::IpAddr,
path::PathBuf,
pin::pin,
sync::Arc,
time::Duration,
};
use tokio::{sync::mpsc, task::spawn_blocking, time::Instant};
use tracing::subscriber::set_global_default;
use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Layer, Registry};
@@ -215,7 +222,17 @@ async fn ipc_listen(
let firezone_id = device_id::get_or_create()
.context("Failed to read / create device ID")?
.id;
firezone_telemetry::configure_scope(|scope| scope.set_tag("firezone_id", &firezone_id));
// TODO: Telemetry is initialized wrong here:
// Sentry's contexts must be set on the main thread hub before starting Tokio, so that other thread hubs will inherit the context.
firezone_telemetry::configure_scope(|scope| {
scope.set_context(
"firezone",
firezone_telemetry::Context::Other(BTreeMap::from([(
"id".to_string(),
firezone_id.into(),
)])),
)
});
let mut server = IpcServer::new(ServiceId::Prod).await?;
let mut dns_controller = DnsController { dns_control_method };
let telemetry = Telemetry::default();

View File

@@ -19,6 +19,7 @@ use phoenix_channel::LoginUrl;
use phoenix_channel::PhoenixChannel;
use secrecy::{Secret, SecretString};
use std::{
collections::BTreeMap,
path::{Path, PathBuf},
pin::pin,
sync::Arc,
@@ -175,7 +176,15 @@ fn main() -> Result<()> {
Some(id) => id,
None => device_id::get_or_create().context("Could not get `firezone_id` from CLI, could not read it from disk, could not generate it and save it to disk")?.id,
};
firezone_telemetry::configure_scope(|scope| scope.set_tag("firezone_id", &firezone_id));
firezone_telemetry::configure_scope(|scope| {
scope.set_context(
"firezone",
firezone_telemetry::Context::Other(BTreeMap::from([(
"id".to_string(),
firezone_id.clone().into(),
)])),
)
});
let url = LoginUrl::client(
cli.api_url,

View File

@@ -3,7 +3,9 @@ use std::time::Duration;
pub use sentry::{
add_breadcrumb, capture_error, capture_message, configure_scope, end_session,
end_session_with_status, types::protocol::v7::SessionStatus, Breadcrumb, Hub, Level,
end_session_with_status,
types::protocol::v7::{Context, SessionStatus},
Breadcrumb, Hub, Level,
};
pub use sentry_anyhow::capture_anyhow;