Files
firezone/rust/telemetry/src/lib.rs
Thomas Eizinger dacc402721 chore(connlib): only log span field name into message (#9981)
When looking at logs, reducing noise is critical to make it easier to
spot important information. When sending logs to Sentry, we currently
append the fields of certain spans to message to make the output similar
to that of `tracing_subscriber::fmt`.

The actual name of a field inside a span is separated from the span name
by a colon. For example, here is a log message as we see it in Sentry
today:

> handle_input:class=success response
handle_input:from=C1A0479AA153FACA0722A5DF76343CF2BEECB10E:3478
handle_input:method=binding handle_input:rtt=34.7479ms
handle_input:tid=BB30E859ED88FFDF0786B634 request=["Software(snownet;
session=BCA42EF159C794F41AE45BF5099E54D3A193A7184C4D2C3560C2FE49C4C6CFB7)"]
response=["Software(firezone-relay; rev=e4ba5a69)",
"XorMappedAddress(B824B4035A78A6B188EF38BE13AA3C1B1B1196D6:52625)"]

Really, what we would like to see is only this:

> class=success response
from=C1A0479AA153FACA0722A5DF76343CF2BEECB10E:3478 method=binding
rtt=34.7479ms tid=BB30E859ED88FFDF0786B634 request=["Software(snownet;
session=BCA42EF159C794F41AE45BF5099E54D3A193A7184C4D2C3560C2FE49C4C6CFB7)"]
response=["Software(firezone-relay; rev=e4ba5a69)",
"XorMappedAddress(B824B4035A78A6B188EF38BE13AA3C1B1B1196D6:52625)"]

The duplication of `handle_input:` is just noise. In our local log
output, we already strip the name of the span to make it easier to read.
Here we now also do the same for the logs reported to Sentry.
2025-07-24 01:37:43 +00:00

413 lines
13 KiB
Rust

#![cfg_attr(test, allow(clippy::unwrap_used))]
use std::{borrow::Cow, collections::BTreeMap, fmt, str::FromStr, sync::Arc, time::Duration};
use anyhow::{Result, bail};
use api_url::ApiUrl;
use sentry::{
BeforeCallback, User,
protocol::{Event, Log, LogAttribute, SessionStatus},
};
use sha2::Digest as _;
pub mod analytics;
pub mod feature_flags;
pub mod otel;
mod api_url;
mod maybe_push_metrics_exporter;
mod noop_push_metrics_exporter;
mod posthog;
pub use maybe_push_metrics_exporter::MaybePushMetricsExporter;
pub use noop_push_metrics_exporter::NoopPushMetricsExporter;
pub struct Dsn(&'static str);
// TODO: Dynamic DSN
// Sentry docs say this does not need to be protected:
// > DSNs are safe to keep public because they only allow submission of new events and related event data; they do not allow read access to any information.
// <https://docs.sentry.io/concepts/key-terms/dsn-explainer/#dsn-utilization>
pub const ANDROID_DSN: Dsn = Dsn(
"https://928a6ee1f6af9734100b8bc89b2dc87d@o4507971108339712.ingest.us.sentry.io/4508175126233088",
);
pub const APPLE_DSN: Dsn = Dsn(
"https://66c71f83675f01abfffa8eb977bcbbf7@o4507971108339712.ingest.us.sentry.io/4508175177023488",
);
pub const GATEWAY_DSN: Dsn = Dsn(
"https://f763102cc3937199ec483fbdae63dfdc@o4507971108339712.ingest.us.sentry.io/4508162914451456",
);
pub const GUI_DSN: Dsn = Dsn(
"https://2e17bf5ed24a78c0ac9e84a5de2bd6fc@o4507971108339712.ingest.us.sentry.io/4508008945549312",
);
pub const HEADLESS_DSN: Dsn = Dsn(
"https://bc27dca8bb37be0142c48c4f89647c13@o4507971108339712.ingest.us.sentry.io/4508010028728320",
);
pub const RELAY_DSN: Dsn = Dsn(
"https://9d5f664d8f8f7f1716d4b63a58bcafd5@o4507971108339712.ingest.us.sentry.io/4508373298970624",
);
pub const TESTING: Dsn = Dsn(
"https://55ef451fca9054179a11f5d132c02f45@o4507971108339712.ingest.us.sentry.io/4508792604852224",
);
#[derive(Debug, PartialEq, Clone, Copy)]
pub(crate) enum Env {
Production,
Staging,
DockerCompose,
Localhost,
OnPrem,
}
impl From<ApiUrl<'_>> for Env {
fn from(value: ApiUrl) -> Self {
match value {
ApiUrl::PROD => Self::Production,
ApiUrl::STAGING => Self::Staging,
ApiUrl::DOCKER_COMPOSE | ApiUrl::LOCALHOST => Self::DockerCompose,
_ => Self::OnPrem,
}
}
}
impl Env {
pub(crate) fn as_str(&self) -> &'static str {
match self {
Env::Production => "production",
Env::Staging => "staging",
Env::DockerCompose => "docker-compose",
Env::Localhost => "localhost",
Env::OnPrem => "on-prem",
}
}
}
impl FromStr for Env {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"production" => Ok(Self::Production),
"staging" => Ok(Self::Staging),
"docker-compose" => Ok(Self::DockerCompose),
"localhost" => Ok(Self::Localhost),
"on-prem" => Ok(Self::OnPrem),
other => bail!("Unknown env `{other}`"),
}
}
}
impl fmt::Display for Env {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_str().fmt(f)
}
}
#[derive(Default)]
pub struct Telemetry {
inner: Option<sentry::ClientInitGuard>,
}
impl Drop for Telemetry {
fn drop(&mut self) {
if self.inner.is_none() {
return;
}
// Conclude telemetry session as "abnormal" if we get dropped without closing it properly first.
sentry::end_session_with_status(SessionStatus::Abnormal);
}
}
impl Telemetry {
pub async fn start(&mut self, api_url: &str, release: &str, dsn: Dsn, firezone_id: String) {
// Can't use URLs as `environment` directly, because Sentry doesn't allow slashes in environments.
// <https://docs.sentry.io/platforms/rust/configuration/environments/>
let environment = Env::from(ApiUrl::new(api_url));
if self
.inner
.as_ref()
.and_then(|i| i.options().environment.as_ref())
.is_some_and(|env| env == environment.as_str())
{
tracing::debug!(%environment, "Telemetry already initialised");
return;
}
// Stop any previous telemetry session.
if let Some(inner) = self.inner.take() {
tracing::debug!("Stopping previous telemetry session");
sentry::end_session();
drop(inner);
set_current_user(None);
}
if [Env::OnPrem, Env::Localhost, Env::DockerCompose].contains(&environment) {
tracing::debug!(%api_url, "Telemetry won't start in unofficial environment");
return;
}
// Important: Evaluate feature flags before checking `stream_logs` to avoid hitting the default.
feature_flags::evaluate_now(firezone_id.clone(), environment).await;
tracing::info!(%environment, "Starting telemetry");
let inner = sentry::init((
dsn.0,
sentry::ClientOptions {
environment: Some(Cow::Borrowed(environment.as_str())),
// We can't get the release number ourselves because we don't know if we're embedded in a GUI Client or a Headless Client.
release: Some(release.to_owned().into()),
max_breadcrumbs: 500,
before_send: Some(event_rate_limiter(Duration::from_secs(60 * 5))),
enable_logs: true,
before_send_log: Some(Arc::new(append_tracing_fields_to_message)),
..Default::default()
},
));
// Configure scope on the main hub so that all threads will get the tags
sentry::Hub::main().configure_scope(move |scope| {
scope.set_tag("api_url", api_url);
let ctx = sentry::integrations::contexts::utils::device_context();
scope.set_context("device", ctx);
let ctx = sentry::integrations::contexts::utils::rust_context();
scope.set_context("rust", ctx);
if let Some(ctx) = sentry::integrations::contexts::utils::os_context() {
scope.set_context("os", ctx);
}
scope.set_user(Some(compute_user(firezone_id)));
});
self.inner.replace(inner);
sentry::start_session();
}
/// Flushes events to sentry.io and drops the guard
pub async fn stop(&mut self) {
self.end_session(SessionStatus::Exited).await;
}
pub async fn stop_on_crash(&mut self) {
self.end_session(SessionStatus::Crashed).await;
}
async fn end_session(&mut self, status: SessionStatus) {
let Some(inner) = self.inner.take() else {
return;
};
tracing::info!("Stopping telemetry");
// Sentry uses blocking IO for flushing ..
let _ = tokio::task::spawn_blocking(move || {
if !inner.flush(Some(Duration::from_secs(5))) {
tracing::error!("Failed to flush telemetry events to sentry.io");
return;
};
tracing::debug!("Flushed telemetry");
})
.await;
sentry::end_session_with_status(status);
}
pub fn set_account_slug(slug: String) {
update_user(|user| {
user.other.insert("account_slug".to_owned(), slug.into());
});
}
pub(crate) fn current_env() -> Option<Env> {
let client = sentry::Hub::main().client()?;
let env = client.options().environment.as_deref()?;
let env = Env::from_str(env).ok()?;
Some(env)
}
pub(crate) fn current_user() -> Option<String> {
sentry::Hub::main().configure_scope(|s| s.user()?.id.clone())
}
}
/// Computes the [`User`] scope based on the contents of `firezone_id`.
///
/// If `firezone_id` looks like a UUID, we hash and hex-encode it.
/// This will align the ID with what we see in the portal.
///
/// If it is not a UUID, it is already from a newer installation of Firezone
/// where the ID is sent as-is.
///
/// As a result, this will allow us to always filter the user by the hex-encoded ID.
fn compute_user(firezone_id: String) -> User {
if uuid::Uuid::from_str(&firezone_id).is_ok() {
let encoded_id = hex::encode(sha2::Sha256::digest(firezone_id));
return User {
id: Some(encoded_id.clone()),
other: BTreeMap::from([("uuid".to_owned(), serde_json::Value::String(encoded_id))]),
..User::default()
};
}
User {
id: Some(firezone_id),
..User::default()
}
}
fn event_rate_limiter(timeout: Duration) -> BeforeCallback<Event<'static>> {
let cache = moka::sync::CacheBuilder::<String, (), _>::default()
.max_capacity(10_000)
.time_to_live(timeout)
.build();
Arc::new(move |event: Event<'static>| {
let Some(message) = &event.message else {
return Some(event);
};
if cache.contains_key(message) {
return None;
}
cache.insert(message.clone(), ());
Some(event)
})
}
/// Appends all but certain attributes from a sentry [`Log`] to the message body.
///
/// Sentry stores all [`tracing`] fields as attributes and only renders the message.
/// Within Firezone, we make extensive use of attributes to provide contextual information.
/// We want to see these attributes inline with the message which is why we emulate the behaviour of `tracing_subscriber::fmt` here.
#[expect(
clippy::unnecessary_wraps,
reason = "We need to match Sentry's config signature."
)]
fn append_tracing_fields_to_message(mut log: Log) -> Option<Log> {
const IGNORED_ATTRS: &[&str] = &[
"os.",
"sentry.",
"tracing.",
"server.",
"user.",
"log.",
"parent_span_id",
];
for (key, attribute) in &log.attributes {
let LogAttribute(serde_json::Value::String(attr_string)) = &attribute else {
continue;
};
if IGNORED_ATTRS.iter().any(|attr| key.starts_with(attr)) {
continue;
}
let key = match key.rsplit_once(':') {
Some((_, key)) => key,
None => key,
};
log.body.push_str(&format!(" {key}={attr_string}"));
}
Some(log)
}
fn update_user(update: impl FnOnce(&mut sentry::User)) {
sentry::Hub::main().configure_scope(|scope| {
let mut user = scope.user().cloned().unwrap_or_default();
update(&mut user);
scope.set_user(Some(user));
});
}
fn set_current_user(user: Option<sentry::User>) {
sentry::Hub::main().configure_scope(|scope| scope.set_user(user));
}
#[cfg(test)]
mod tests {
use std::time::SystemTime;
use super::*;
#[tokio::test]
async fn starting_session_for_unsupported_env_disables_current_one() {
let mut telemetry = Telemetry::default();
telemetry
.start("wss://api.firez.one", "1.0.0", TESTING, String::new())
.await;
telemetry
.start("wss://example.com", "1.0.0", TESTING, String::new())
.await;
assert!(telemetry.inner.is_none());
}
#[test]
fn rate_limits_events_with_same_message() {
let before_send = event_rate_limiter(Duration::from_secs(1));
let event1 = event("foo");
let event2 = event("bar");
assert!(before_send(event1.clone()).is_some());
assert!(before_send(event2.clone()).is_some());
assert!(before_send(event1.clone()).is_none());
std::thread::sleep(Duration::from_secs(1));
assert!(before_send(event1.clone()).is_some());
}
#[test]
fn trims_name_of_span_from_field_before_pushing_to_message() {
let log = log(
"Foobar",
&[
("handle_input:class", "success response"),
("handle_input:from", "1.1.1.1:3478"),
("handle_input:method", "binding"),
],
);
let log = append_tracing_fields_to_message(log).unwrap();
assert_eq!(
log.body,
"Foobar class=success response from=1.1.1.1:3478 method=binding"
)
}
fn event(msg: &str) -> Event<'static> {
Event {
message: Some(msg.to_owned()),
..Default::default()
}
}
fn log(msg: &str, attrs: &[(&str, &str)]) -> Log {
Log {
level: sentry::protocol::LogLevel::Info,
body: msg.to_owned(),
trace_id: None,
timestamp: SystemTime::now(),
severity_number: None,
attributes: attrs
.iter()
.map(|(k, v)| (k.to_string(), v.to_owned().into()))
.collect(),
}
}
}