feat(rust): stream logs to Sentry when enabled in PostHog (#9635)

Sentry has a new "Logs" feature where we can stream logs directly to
Sentry. Doing this for all Clients and Gateways would be way too much
data to collect though.

In order to aid debugging from customer installations, we add a
PostHog-managed feature flag that - if set to `true` - enables the
streaming of logs to Sentry. This feature flag is evaluated every time
the telemetry context is initialised:

- For all FFI usages of connlib, this happens every time a new session
is created.
- For the Windows/Linux Tunnel service, this also happens every time we
create a new session.
- For the Headless Client and Gateway, it happens on startup and
afterwards, every minute. The feature-flag context itself is only
checked every 5 minutes though so it might take up to 5 minutes before
this takes effect.

The default value - like all feature flags - is `false`. Therefore, if
there is any issue with the PostHog service, we will fallback to the
previous behaviour where logs are simply stored locally.

Resolves: #9600
This commit is contained in:
Thomas Eizinger
2025-06-25 18:14:14 +02:00
committed by GitHub
parent 02dd21018d
commit 3b972643b1
14 changed files with 286 additions and 152 deletions

73
rust/Cargo.lock generated
View File

@@ -6360,9 +6360,9 @@ dependencies = [
[[package]]
name = "sentry"
version = "0.38.1"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a505499b38861edd82b5a688fa06ba4ba5875bb832adeeeba22b7b23fc4bc39a"
checksum = "507ac2be9bf2da56c831da57faf1dadd81f434bd282935cdb06193d0c94e8811"
dependencies = [
"httpdate",
"reqwest",
@@ -6372,18 +6372,18 @@ dependencies = [
"sentry-contexts",
"sentry-core",
"sentry-debug-images",
"sentry-log",
"sentry-panic",
"sentry-tracing",
"tokio",
"ureq",
"webpki-roots 0.26.11",
]
[[package]]
name = "sentry-actix"
version = "0.38.1"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ad8bfdcfbc6e0d0dacaa5728555085ef459fa9226cfc2fe64eefa4b8038b7f"
checksum = "8402c142005ee560ae361c73ebece13a299ec3e9cce5b8654479ea9aac8dc8df"
dependencies = [
"actix-http",
"actix-web",
@@ -6394,9 +6394,9 @@ dependencies = [
[[package]]
name = "sentry-backtrace"
version = "0.38.1"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8dace796060e4ad10e3d1405b122ae184a8b2e71dce05ae450e4f81b7686b0d9"
checksum = "eb4416302fa5325181a120e0fe7d4afd83cd95e52a9b86afa34a8161383fe0dc"
dependencies = [
"backtrace",
"regex",
@@ -6405,9 +6405,9 @@ dependencies = [
[[package]]
name = "sentry-contexts"
version = "0.38.1"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87bd9e6b51ffe2bc7188ebe36cb67557cb95749c08a3f81f33e8c9b135e0d1bc"
checksum = "936752f42b6f651dcb257da0bfa235ecc79e82011c49ed3383c212cc582263ff"
dependencies = [
"hostname",
"libc",
@@ -6419,9 +6419,9 @@ dependencies = [
[[package]]
name = "sentry-core"
version = "0.38.1"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7426d4beec270cfdbb50f85f0bb2ce176ea57eed0b11741182a163055a558187"
checksum = "00e9bd2cadaeda3af41e9fa5d14645127d6f6a4aec73da3ae38e477ecafd3682"
dependencies = [
"rand 0.9.1",
"sentry-types",
@@ -6431,19 +6431,29 @@ dependencies = [
[[package]]
name = "sentry-debug-images"
version = "0.38.1"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9df15c066c04f34c4dfd496a8e76590106b93283f72ef1a47d8fb24d88493424"
checksum = "e1e074fe9a0970c91999b23ed3195e6e30990d589fba3a68f20a1686af0f5cda"
dependencies = [
"findshlibs",
"sentry-core",
]
[[package]]
name = "sentry-panic"
version = "0.38.1"
name = "sentry-log"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c92beed69b776a162b6d269bef1eaa3e614090b6df45a88d9b239c4fdbffdfba"
checksum = "a693f27e3f63ae085cf7c176b5c44038af27c8a0170d01db30ccf776c2d40ce3"
dependencies = [
"log",
"sentry-core",
]
[[package]]
name = "sentry-panic"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4651d34f3ba649d9e6dc1268443cae6728b8f741c2f0264004f8ecf5b247330d"
dependencies = [
"sentry-backtrace",
"sentry-core",
@@ -6451,10 +6461,11 @@ dependencies = [
[[package]]
name = "sentry-tracing"
version = "0.38.1"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55c323492795de90824f3198562e33dd74ae3bc852fbb13c0cabec54a1cf73cd"
checksum = "c25c47d36bc80c74d26d568ffe970c37b337c061b7234ad6f2d159439c16f000"
dependencies = [
"bitflags 2.9.1",
"sentry-backtrace",
"sentry-core",
"tracing-core",
@@ -6463,9 +6474,9 @@ dependencies = [
[[package]]
name = "sentry-types"
version = "0.38.1"
version = "0.41.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04b6c9287202294685cb1f749b944dbbce8160b81a1061ecddc073025fed129f"
checksum = "a08e7154abe2cd557f26fd70038452810748aefdf39bc973f674421224b147c1"
dependencies = [
"debugid",
"hex",
@@ -8485,19 +8496,33 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "ureq"
version = "2.12.1"
version = "3.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d"
checksum = "9f0fde9bc91026e381155f8c67cb354bcd35260b2f4a29bcc84639f762760c39"
dependencies = [
"base64 0.22.1",
"log",
"once_cell",
"percent-encoding",
"rustls",
"rustls-pemfile",
"rustls-pki-types",
"url",
"ureq-proto",
"utf-8",
"webpki-roots 0.26.11",
]
[[package]]
name = "ureq-proto"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59db78ad1923f2b1be62b6da81fe80b173605ca0d57f85da2e005382adf693f7"
dependencies = [
"base64 0.22.1",
"http 1.3.1",
"httparse",
"log",
]
[[package]]
name = "url"
version = "2.5.4"

View File

@@ -139,8 +139,8 @@ sadness-generator = "0.6.0"
sd-notify = "0.4.5" # This is a pure Rust re-implementation, so it isn't vulnerable to CVE-2024-3094
secrecy = "0.8"
semver = "1.0.26"
sentry = { version = "0.38.1", default-features = false }
sentry-tracing = "0.38.1"
sentry = { version = "0.41.0", default-features = false }
sentry-tracing = "0.41.0"
serde = "1.0.219"
serde_json = "1.0.140"
serde_variant = "0.1.3"

View File

@@ -254,9 +254,14 @@ impl WrappedSession {
callback_handler: ffi::CallbackHandler,
device_info: String,
) -> Result<Self> {
let runtime = tokio::runtime::Builder::new_multi_thread()
.worker_threads(1)
.thread_name("connlib")
.enable_all()
.build()?;
let mut telemetry = Telemetry::default();
telemetry.start(&api_url, RELEASE, APPLE_DSN);
Telemetry::set_firezone_id(device_id.clone());
runtime.block_on(telemetry.start(&api_url, RELEASE, APPLE_DSN, device_id.clone()));
Telemetry::set_account_slug(account_slug.clone());
analytics::identify(
@@ -281,11 +286,6 @@ impl WrappedSession {
device_info,
)?;
let runtime = tokio::runtime::Builder::new_multi_thread()
.worker_threads(1)
.thread_name("connlib")
.enable_all()
.build()?;
let _guard = runtime.enter(); // Constructing `PhoenixChannel` requires a runtime context.
let portal = PhoenixChannel::disconnected(

View File

@@ -226,9 +226,15 @@ fn connect(
serde_json::from_str(&device_info).context("Failed to deserialize `DeviceInfo`")?;
let secret = SecretString::from(token);
let runtime = tokio::runtime::Builder::new_multi_thread()
.worker_threads(1)
.thread_name("connlib")
.enable_all()
.build()
.context("Failed to create tokio runtime")?;
let mut telemetry = Telemetry::default();
telemetry.start(&api_url, RELEASE, platform::DSN);
Telemetry::set_firezone_id(device_id.clone());
runtime.block_on(telemetry.start(&api_url, RELEASE, platform::DSN, device_id.clone()));
Telemetry::set_account_slug(account_slug.clone());
analytics::identify(
@@ -250,12 +256,6 @@ fn connect(
)
.context("Failed to create login URL")?;
let runtime = tokio::runtime::Builder::new_multi_thread()
.worker_threads(1)
.thread_name("connlib")
.enable_all()
.build()
.context("Failed to create tokio runtime")?;
let _guard = runtime.enter(); // Constructing `PhoenixChannel` requires a runtime context.
let portal = PhoenixChannel::disconnected(

View File

@@ -49,7 +49,7 @@ enum ResolveTrigger {
SetupNat(ResolveDnsRequest),
}
pub struct Eventloop {
pub struct Eventloop<'a> {
tunnel: GatewayTunnel,
portal: PhoenixChannel<(), IngressMessages, (), PublicKeyParam>,
tun_device_manager: Arc<Mutex<TunDeviceManager>>,
@@ -61,15 +61,19 @@ pub struct Eventloop {
set_interface_tasks: futures_bounded::FuturesSet<Result<Interface>>,
telemetry_refresh: tokio::time::Interval,
telemetry: &'a mut Telemetry,
logged_permission_denied: bool,
}
impl Eventloop {
impl<'a> Eventloop<'a> {
pub(crate) fn new(
tunnel: GatewayTunnel,
mut portal: PhoenixChannel<(), IngressMessages, (), PublicKeyParam>,
tun_device_manager: TunDeviceManager,
firezone_id: String,
telemetry: &'a mut Telemetry,
) -> Self {
portal.connect(PublicKeyParam(tunnel.public_key().to_bytes()));
@@ -88,11 +92,13 @@ impl Eventloop {
tracing::debug!(%domain, ?ips, ?cause, "DNS cache entry evicted");
})
.build(),
telemetry_refresh: tokio::time::interval(Duration::from_secs(60)),
telemetry,
}
}
}
impl Eventloop {
impl<'a> Eventloop<'a> {
pub fn poll(&mut self, cx: &mut Context<'_>) -> Poll<Result<Infallible>> {
loop {
match self.tunnel.poll_next_event(cx) {
@@ -210,6 +216,14 @@ impl Eventloop {
Poll::Pending => {}
}
match self.telemetry_refresh.poll_tick(cx) {
Poll::Ready(_) => {
self.telemetry.refresh_config();
continue;
}
Poll::Pending => {}
}
return Poll::Pending;
}
}

View File

@@ -52,13 +52,6 @@ fn main() -> ExitCode {
.expect("Calling `install_default` only once per process should always succeed");
let mut telemetry = Telemetry::default();
if cli.is_telemetry_allowed() {
telemetry.start(
cli.api_url.as_str(),
RELEASE,
firezone_telemetry::GATEWAY_DSN,
);
}
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_all()
@@ -66,21 +59,17 @@ fn main() -> ExitCode {
.expect("Failed to create tokio runtime");
match runtime
.block_on(try_main(cli))
.block_on(try_main(cli, &mut telemetry))
.context("Failed to start Gateway")
{
Ok(ExitCode::SUCCESS) => {
Ok(()) => {
tracing::info!("Received CTRL+C, goodbye!");
runtime.block_on(telemetry.stop());
ExitCode::SUCCESS
}
Ok(_) => {
runtime.block_on(telemetry.stop_on_crash());
ExitCode::FAILURE
}
Err(e) => {
tracing::error!("{e:#}");
tracing::info!("{e:#}");
runtime.block_on(telemetry.stop_on_crash());
ExitCode::FAILURE
@@ -102,15 +91,25 @@ fn has_necessary_permissions() -> bool {
is_root || has_net_admin
}
async fn try_main(cli: Cli) -> Result<ExitCode> {
async fn try_main(cli: Cli, telemetry: &mut Telemetry) -> Result<()> {
firezone_logging::setup_global_subscriber(layer::Identity::default())
.context("Failed to set up logging")?;
tracing::debug!(?cli);
let firezone_id = get_firezone_id(cli.firezone_id).await
let firezone_id = get_firezone_id(cli.firezone_id.clone()).await
.context("Couldn't read FIREZONE_ID or write it to disk: Please provide it through the env variable or provide rw access to /var/lib/firezone/")?;
Telemetry::set_firezone_id(firezone_id.clone());
if cli.is_telemetry_allowed() {
telemetry
.start(
cli.api_url.as_str(),
concat!("gateway@", env!("CARGO_PKG_VERSION")),
firezone_telemetry::GATEWAY_DSN,
firezone_id.clone(),
)
.await;
}
if cli.metrics {
let exporter = opentelemetry_stdout::MetricExporter::default();
@@ -176,12 +175,12 @@ async fn try_main(cli: Cli) -> Result<ExitCode> {
tunnel.set_tun(tun);
}
let task = tokio::spawn(future::poll_fn({
let mut eventloop = Eventloop::new(tunnel, portal, tun_device_manager, firezone_id);
let eventloop = future::poll_fn({
let mut eventloop =
Eventloop::new(tunnel, portal, tun_device_manager, firezone_id, telemetry);
move |cx| eventloop.poll(cx)
}))
.err_into();
});
let ctrl_c = pin!(ctrl_c().map_err(anyhow::Error::new));
tokio::spawn(http_health_check::serve(
@@ -189,20 +188,12 @@ async fn try_main(cli: Cli) -> Result<ExitCode> {
|| true,
));
match future::try_select(task, ctrl_c)
match future::try_select(eventloop, ctrl_c)
.await
.map_err(|e| e.factor_first().0)?
{
future::Either::Left((Err(e), _)) => {
tracing::info!("{e}");
Ok(ExitCode::FAILURE)
}
future::Either::Right(((), _)) => {
tracing::info!("Received CTRL+C, goodbye!");
Ok(ExitCode::SUCCESS)
}
future::Either::Left((never, _)) => match never {},
future::Either::Right(((), _)) => Ok(()),
}
}

View File

@@ -11,6 +11,7 @@ use clap::{Args, Parser};
use controller::Failure;
use firezone_gui_client::{controller, deep_link, elevation, gui, logging, settings};
use firezone_telemetry::Telemetry;
use firezone_telemetry::analytics;
use settings::AdvancedSettingsLegacy;
use tokio::runtime::Runtime;
use tracing::subscriber::DefaultGuard;
@@ -77,11 +78,22 @@ fn try_main(
.unwrap_or(&advanced_settings.api_url)
.to_string();
telemetry.start(
// Get the device ID before starting Tokio, so that all the worker threads will inherit the correct scope.
// Technically this means we can fail to get the device ID on a newly-installed system, since the Tunnel service may not have fully started up when the GUI process reaches this point, but in practice it's unlikely.
let id = firezone_bin_shared::device_id::get().context("Failed to get device ID")?;
analytics::identify(
id.id.clone(),
api_url.clone(),
firezone_gui_client::RELEASE.to_owned(),
None,
);
rt.block_on(telemetry.start(
&api_url,
firezone_gui_client::RELEASE,
firezone_telemetry::GUI_DSN,
);
id.id,
));
// Don't fix the log filter for smoke tests because we can't show a dialog there.
if !config.smoke_test {
@@ -100,12 +112,6 @@ fn try_main(
reloader,
} = firezone_gui_client::logging::setup_gui(&log_filter)?;
// Get the device ID before starting Tokio, so that all the worker threads will inherit the correct scope.
// Technically this means we can fail to get the device ID on a newly-installed system, since the Tunnel service may not have fully started up when the GUI process reaches this point, but in practice it's unlikely.
if let Ok(id) = firezone_bin_shared::device_id::get() {
Telemetry::set_firezone_id(id.id);
}
match cli.command {
None if cli.check_elevation() => match elevation::gui_check() {
Ok(true) => {}

View File

@@ -548,8 +548,13 @@ impl<'a> Handler<'a> {
account_slug,
} => {
self.telemetry
.start(&environment, &release, firezone_telemetry::GUI_DSN);
Telemetry::set_firezone_id(self.device_id.id.clone());
.start(
&environment,
&release,
firezone_telemetry::GUI_DSN,
self.device_id.id.clone(),
)
.await;
if let Some(account_slug) = account_slug {
Telemetry::set_account_slug(account_slug.clone());
@@ -576,7 +581,6 @@ impl<'a> Handler<'a> {
assert!(self.session.is_none());
let device_id = device_id::get_or_create().context("Failed to get-or-create device ID")?;
Telemetry::set_firezone_id(device_id.id.clone());
let url = LoginUrl::client(
Url::parse(api_url).context("Failed to parse URL")?,

View File

@@ -21,6 +21,7 @@ use secrecy::{Secret, SecretString};
use std::{
path::{Path, PathBuf},
sync::Arc,
time::Duration,
};
use tokio::time::Instant;
@@ -170,21 +171,35 @@ fn main() -> Result<()> {
// and we need to recover. <https://github.com/firezone/firezone/issues/4899>
dns_controller.deactivate()?;
let mut telemetry = Telemetry::default();
if cli.is_telemetry_allowed() {
telemetry.start(
cli.api_url.as_ref(),
RELEASE,
firezone_telemetry::HEADLESS_DSN,
);
}
tracing::info!(arch = std::env::consts::ARCH, version = VERSION);
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()?;
// AKA "Device ID", not the Firezone slug
let firezone_id = match cli.firezone_id.clone() {
Some(id) => id,
None => device_id::get_or_create().context("Could not get `firezone_id` from CLI, could not read it from disk, could not generate it and save it to disk")?.id,
};
analytics::identify(
firezone_id.clone(),
cli.api_url.to_string(),
RELEASE.to_owned(),
None,
);
let mut telemetry = Telemetry::default();
if cli.is_telemetry_allowed() {
rt.block_on(telemetry.start(
cli.api_url.as_ref(),
RELEASE,
firezone_telemetry::HEADLESS_DSN,
firezone_id.clone(),
));
}
tracing::info!(arch = std::env::consts::ARCH, version = VERSION);
let token = get_token(token_env_var, &cli.token_path)?.with_context(|| {
format!(
"Can't find the Firezone token in ${TOKEN_ENV_KEY} or in `{}`",
@@ -194,20 +209,6 @@ fn main() -> Result<()> {
// TODO: Should this default to 30 days?
let max_partition_time = cli.max_partition_time.map(|d| d.into());
// AKA "Device ID", not the Firezone slug
let firezone_id = match cli.firezone_id {
Some(id) => id,
None => device_id::get_or_create().context("Could not get `firezone_id` from CLI, could not read it from disk, could not generate it and save it to disk")?.id,
};
Telemetry::set_firezone_id(firezone_id.clone());
analytics::identify(
firezone_id.clone(),
cli.api_url.to_string(),
RELEASE.to_owned(),
None,
);
let url = LoginUrl::client(
cli.api_url.clone(),
&token,
@@ -285,6 +286,8 @@ fn main() -> Result<()> {
new_network_notifier(tokio_handle.clone(), dns_control_method).await?;
drop(tokio_handle);
let mut telemetry_refresh = tokio::time::interval(Duration::from_secs(60));
let tun = {
let _guard = telemetry_span!("create_tun_device").entered();
@@ -320,6 +323,10 @@ fn main() -> Result<()> {
session.reset();
continue;
},
_ = telemetry_refresh.tick() => {
telemetry.refresh_config();
continue;
}
event = event_stream.next() => event.context("event stream unexpectedly ran empty")?,
};

View File

@@ -111,8 +111,7 @@ fn parse_filter(directives: &str) -> Result<EnvFilter, ParseError> {
///
/// By prepending this directive to the active log filter, a simple directive like `debug` actually produces useful logs.
/// If necessary, you can still activate logs from these crates by restating them in your directive with a lower filter, i.e. `netlink_proto=debug`.
const IRRELEVANT_CRATES: &str =
"netlink_proto=warn,os_info=warn,rustls=warn,opentelemetry_sdk=info,opentelemetry=info";
const IRRELEVANT_CRATES: &str = "netlink_proto=warn,os_info=warn,rustls=warn,opentelemetry_sdk=info,opentelemetry=info,hyper_util=info";
let env_filter = if directives.is_empty() {
EnvFilter::try_new(IRRELEVANT_CRATES)?
@@ -222,8 +221,8 @@ where
sentry_tracing::layer()
.event_filter(move |md| match *md.level() {
Level::ERROR | Level::WARN => EventFilter::Event,
Level::INFO | Level::DEBUG => EventFilter::Breadcrumb,
Level::ERROR | Level::WARN => EventFilter::Event | EventFilter::Breadcrumb | EventFilter::Log,
Level::INFO | Level::DEBUG => EventFilter::Breadcrumb | EventFilter::Log,
Level::TRACE if md.target() == TELEMETRY_TARGET => EventFilter::Event,
_ => EventFilter::Ignore,
})

View File

@@ -118,20 +118,21 @@ fn main() {
let args = Args::parse();
let mut telemetry = Telemetry::default();
if args.telemetry {
telemetry.start(
args.api_url.as_str(),
VERSION.unwrap_or("unknown"),
RELAY_DSN,
);
}
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("Failed to build tokio runtime");
let mut telemetry = Telemetry::default();
if args.telemetry {
runtime.block_on(telemetry.start(
args.api_url.as_str(),
VERSION.unwrap_or("unknown"),
RELAY_DSN,
String::new(), // Relays don't have a Firezone ID.
));
}
match runtime.block_on(try_main(args)) {
Ok(()) => runtime.block_on(telemetry.stop()),
Err(e) => {

View File

@@ -13,7 +13,7 @@ opentelemetry = { workspace = true }
opentelemetry_sdk = { workspace = true }
parking_lot = { workspace = true }
reqwest = { workspace = true }
sentry = { workspace = true, features = ["contexts", "backtrace", "debug-images", "panic", "reqwest", "rustls", "tracing", "release-health"] }
sentry = { workspace = true, features = ["contexts", "backtrace", "debug-images", "panic", "reqwest", "rustls", "tracing", "release-health", "logs"] }
serde = { workspace = true }
serde_json = { workspace = true }
sha2 = { workspace = true }

View File

@@ -24,29 +24,43 @@ pub fn drop_llmnr_nxdomain_responses() -> bool {
FEATURE_FLAGS.read().drop_llmnr_nxdomain_responses
}
pub(crate) fn reevaluate(user_id: String, env: &str) {
let api_key = match env.parse() {
Ok(Env::Production) => POSTHOG_API_KEY_PROD,
Ok(Env::Staging) => POSTHOG_API_KEY_STAGING,
Ok(Env::OnPrem | Env::DockerCompose | Env::Localhost) | Err(_) => return,
pub fn stream_logs() -> bool {
FEATURE_FLAGS.read().stream_logs
}
pub(crate) async fn evaluate_now(user_id: String, env: Env) {
if user_id.is_empty() {
return;
}
let api_key = match env {
Env::Production => POSTHOG_API_KEY_PROD,
Env::Staging => POSTHOG_API_KEY_STAGING,
Env::OnPrem | Env::DockerCompose | Env::Localhost => return,
};
RUNTIME.spawn(async move {
let flags = decide(user_id, api_key.to_owned())
.await
.inspect_err(|e| tracing::debug!("Failed to evaluate feature flags: {e:#}"))
.unwrap_or_default();
let flags = decide(user_id, api_key.to_owned())
.await
.inspect_err(|e| tracing::debug!("Failed to evaluate feature flags: {e:#}"))
.unwrap_or_default();
tracing::debug!(?flags, "Evaluated feature-flags");
tracing::debug!(?flags, "Evaluated feature-flags");
*FEATURE_FLAGS.write() = flags;
*FEATURE_FLAGS.write() = flags;
sentry::Hub::main().configure_scope(|scope| {
scope.set_context("flags", sentry_flag_context(flags));
});
sentry::Hub::main().configure_scope(|scope| {
scope.set_context("flags", sentry_flag_context(flags));
});
}
pub(crate) fn reevaluate(user_id: String, env: &str) {
let Ok(env) = env.parse() else {
return;
};
RUNTIME.spawn(evaluate_now(user_id, env));
}
pub(crate) async fn reeval_timer() {
loop {
tokio::time::sleep(RE_EVAL_DURATION).await;
@@ -117,6 +131,8 @@ struct FeatureFlags {
icmp_unreachable_instead_of_nat64: bool,
#[serde(default)]
drop_llmnr_nxdomain_responses: bool,
#[serde(default)]
stream_logs: bool,
}
fn sentry_flag_context(flags: FeatureFlags) -> sentry::protocol::Context {
@@ -125,12 +141,14 @@ fn sentry_flag_context(flags: FeatureFlags) -> sentry::protocol::Context {
enum SentryFlag {
IcmpUnreachableInsteadOfNat64 { result: bool },
DropLlmnrNxdomainResponses { result: bool },
StreamLogs { result: bool },
}
// Exhaustive destruction so we don't forget to update this when we add a flag.
let FeatureFlags {
icmp_unreachable_instead_of_nat64,
drop_llmnr_nxdomain_responses,
stream_logs,
} = flags;
let value = serde_json::json!({
@@ -139,6 +157,7 @@ fn sentry_flag_context(flags: FeatureFlags) -> sentry::protocol::Context {
result: icmp_unreachable_instead_of_nat64,
},
SentryFlag::DropLlmnrNxdomainResponses { result: drop_llmnr_nxdomain_responses },
SentryFlag::StreamLogs { result: stream_logs }
]
});

View File

@@ -2,10 +2,10 @@
use std::{borrow::Cow, fmt, str::FromStr, sync::Arc, time::Duration};
use anyhow::{Ok, Result, bail};
use anyhow::{Result, bail};
use sentry::{
BeforeCallback,
protocol::{Event, SessionStatus},
BeforeCallback, User,
protocol::{Event, Log, LogAttribute, SessionStatus},
};
use sha2::Digest as _;
@@ -113,7 +113,7 @@ impl Drop for Telemetry {
}
impl Telemetry {
pub fn start(&mut self, api_url: &str, release: &str, dsn: Dsn) {
pub async fn start(&mut self, api_url: &str, release: &str, dsn: Dsn, firezone_id: String) {
// Can't use URLs as `environment` directly, because Sentry doesn't allow slashes in environments.
// <https://docs.sentry.io/platforms/rust/configuration/environments/>
let environment = Env::from_api_url(api_url);
@@ -144,7 +144,11 @@ impl Telemetry {
return;
}
tracing::info!(%environment, "Starting telemetry");
// Important: Evaluate feature flags before checking `stream_logs` to avoid hitting the default.
feature_flags::evaluate_now(firezone_id.clone(), environment).await;
let enable_logs = feature_flags::stream_logs();
tracing::info!(%environment, %enable_logs, "Starting telemetry");
let inner = sentry::init((
dsn.0,
@@ -159,11 +163,13 @@ impl Telemetry {
})),
max_breadcrumbs: 500,
before_send: Some(event_rate_limiter(Duration::from_secs(60 * 5))),
enable_logs,
before_send_log: Some(Arc::new(append_tracing_fields_to_message)),
..Default::default()
},
));
// Configure scope on the main hub so that all threads will get the tags
sentry::Hub::main().configure_scope(|scope| {
sentry::Hub::main().configure_scope(move |scope| {
scope.set_tag("api_url", api_url);
let ctx = sentry::integrations::contexts::utils::device_context();
scope.set_context("device", ctx);
@@ -173,11 +179,42 @@ impl Telemetry {
if let Some(ctx) = sentry::integrations::contexts::utils::os_context() {
scope.set_context("os", ctx);
}
scope.set_user(Some(User {
id: Some(firezone_id),
..User::default()
}));
});
self.inner.replace(inner);
sentry::start_session();
}
/// Refreshes the telemetry config.
///
/// Looks at the current values of the relevant feature flags and re-initializes the client in case they changed.
pub fn refresh_config(&mut self) {
let Some(client) = self.inner.as_ref() else {
tracing::debug!("Cannot refresh config: no client");
return;
};
let enable_logs = feature_flags::stream_logs();
if client.options().enable_logs == enable_logs {
tracing::debug!("Config is up-to-date");
return;
}
let options = client.options().clone();
tracing::info!(%enable_logs, "Re-initializing telemetry");
self.inner.replace(sentry::init(sentry::ClientOptions {
enable_logs,
..options
}));
}
/// Flushes events to sentry.io and drops the guard
pub async fn stop(&mut self) {
self.end_session(SessionStatus::Exited).await;
@@ -261,6 +298,33 @@ fn event_rate_limiter(timeout: Duration) -> BeforeCallback<Event<'static>> {
})
}
/// Appends all but certain attributes from a sentry [`Log`] to the message body.
///
/// Sentry stores all [`tracing`] fields as attributes and only renders the message.
/// Within Firezone, we make extensive use of attributes to provide contextual information.
/// We want to see these attributes inline with the message which is why we emulate the behaviour of `tracing_subscriber::fmt` here.
#[expect(
clippy::unnecessary_wraps,
reason = "We need to match Sentry's config signature."
)]
fn append_tracing_fields_to_message(mut log: Log) -> Option<Log> {
const IGNORED_ATTRS: &[&str] = &["os.", "sentry.", "tracing.", "server.", "user."];
for (key, attribute) in &log.attributes {
let LogAttribute(serde_json::Value::String(attr_string)) = &attribute else {
continue;
};
if IGNORED_ATTRS.iter().any(|attr| key.starts_with(attr)) {
continue;
}
log.body.push_str(&format!(" {key}={attr_string}"));
}
Some(log)
}
fn update_user(update: impl FnOnce(&mut sentry::User)) {
sentry::Hub::main().configure_scope(|scope| {
let mut user = scope.user().cloned().unwrap_or_default();
@@ -278,11 +342,15 @@ fn set_current_user(user: Option<sentry::User>) {
mod tests {
use super::*;
#[test]
fn starting_session_for_unsupported_env_disables_current_one() {
#[tokio::test]
async fn starting_session_for_unsupported_env_disables_current_one() {
let mut telemetry = Telemetry::default();
telemetry.start("wss://api.firez.one", "1.0.0", TESTING);
telemetry.start("wss://example.com", "1.0.0", TESTING);
telemetry
.start("wss://api.firez.one", "1.0.0", TESTING, String::new())
.await;
telemetry
.start("wss://example.com", "1.0.0", TESTING, String::new())
.await;
assert!(telemetry.inner.is_none());
}