From 46afa52f78b615b3dc59ab13ca01a440a6fa6653 Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Thu, 21 Aug 2025 03:28:05 +0000 Subject: [PATCH] feat(telemetry): pre-resolve Sentry ingest host (#10206) Our Sentry client needs to resolve DNS before being able to send logs or errors to the backend. Currently, this DNS resolution happens on-demand as we don't take any control of the underlying HTTP client. In addition, this will use HTTP/1.1 by default which isn't as efficient as it could be, especially with concurrent requests. Finally, if we decide to ever proxy all Sentry for traffic through our own domain, we have to take control of the underlying client anyway. To resolve all of the above, we create a custom `TransportFactory` where we reuse the existing `ReqwestHttpTransport` but provide an already configured `reqwest::Client` that always uses HTTP/2 with a pre-configured set of DNS records for the given ingest host. --- rust/Cargo.lock | 1 + rust/apple-client-ffi/src/lib.rs | 2 +- rust/client-ffi/src/lib.rs | 2 +- rust/gateway/src/main.rs | 2 +- .../src-tauri/src/bin/firezone-gui-client.rs | 14 +- rust/gui-client/src-tauri/src/service.rs | 4 +- rust/headless-client/src/main.rs | 11 +- rust/logging/src/lib.rs | 2 +- rust/relay/server/src/main.rs | 11 +- rust/telemetry/Cargo.toml | 2 +- rust/telemetry/src/lib.rs | 160 ++++++++++++++---- 11 files changed, 161 insertions(+), 50 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index aea92aab6..769a9f7f5 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -6005,6 +6005,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", + "h2", "http 1.3.1", "http-body", "http-body-util", diff --git a/rust/apple-client-ffi/src/lib.rs b/rust/apple-client-ffi/src/lib.rs index f80f6ceed..d49435a2e 100644 --- a/rust/apple-client-ffi/src/lib.rs +++ b/rust/apple-client-ffi/src/lib.rs @@ -263,7 +263,7 @@ impl WrappedSession { .enable_all() .build()?; - let mut telemetry = Telemetry::default(); + let mut telemetry = Telemetry::new().context("Failed to create telemetry client")?; runtime.block_on(telemetry.start(&api_url, RELEASE, APPLE_DSN, device_id.clone())); Telemetry::set_account_slug(account_slug.clone()); diff --git a/rust/client-ffi/src/lib.rs b/rust/client-ffi/src/lib.rs index e509e4205..c1b0c6c58 100644 --- a/rust/client-ffi/src/lib.rs +++ b/rust/client-ffi/src/lib.rs @@ -242,7 +242,7 @@ fn connect( .build() .context("Failed to create tokio runtime")?; - let mut telemetry = Telemetry::default(); + let mut telemetry = Telemetry::new().context("Failed to create telemetry client")?; runtime.block_on(telemetry.start(&api_url, RELEASE, platform::DSN, device_id.clone())); Telemetry::set_account_slug(account_slug.clone()); diff --git a/rust/gateway/src/main.rs b/rust/gateway/src/main.rs index 9550f5683..98076704c 100644 --- a/rust/gateway/src/main.rs +++ b/rust/gateway/src/main.rs @@ -50,7 +50,7 @@ fn main() -> ExitCode { .install_default() .expect("Calling `install_default` only once per process should always succeed"); - let mut telemetry = Telemetry::default(); + let mut telemetry = Telemetry::new().expect("Failed to create telemetry client"); let runtime = tokio::runtime::Builder::new_current_thread() .enable_all() diff --git a/rust/gui-client/src-tauri/src/bin/firezone-gui-client.rs b/rust/gui-client/src-tauri/src/bin/firezone-gui-client.rs index a1c237a4d..d46316e60 100644 --- a/rust/gui-client/src-tauri/src/bin/firezone-gui-client.rs +++ b/rust/gui-client/src-tauri/src/bin/firezone-gui-client.rs @@ -26,10 +26,17 @@ fn main() -> ExitCode { std::env::set_var("GDK_BACKEND", "x11"); } - let mut telemetry = Telemetry::default(); + let cli = Cli::parse(); + + let mut telemetry = if cli.is_telemetry_allowed() { + Telemetry::new().expect("Failed to create telemetry client") + } else { + Telemetry::disabled() + }; + let rt = tokio::runtime::Runtime::new().expect("failed to build runtime"); - match try_main(&rt, &mut bootstrap_log_guard, &mut telemetry) { + match try_main(cli, &rt, &mut bootstrap_log_guard, &mut telemetry) { Ok(()) => { rt.block_on(telemetry.stop()); @@ -46,12 +53,11 @@ fn main() -> ExitCode { } fn try_main( + cli: Cli, rt: &Runtime, bootstrap_log_guard: &mut Option, telemetry: &mut Telemetry, ) -> Result<()> { - let cli = Cli::parse(); - let config = gui::RunConfig { inject_faults: cli.inject_faults, debug_update_check: cli.debug_update_check, diff --git a/rust/gui-client/src-tauri/src/service.rs b/rust/gui-client/src-tauri/src/service.rs index 286b0e9da..56e74a92b 100644 --- a/rust/gui-client/src-tauri/src/service.rs +++ b/rust/gui-client/src-tauri/src/service.rs @@ -281,6 +281,8 @@ impl<'a> Handler<'a> { ) -> Result { dns_controller.deactivate()?; + let telemetry = Telemetry::new().context("Failed to create telemetry client")?; + tracing::info!( server_pid = std::process::id(), "Listening for GUI to connect over IPC..." @@ -306,7 +308,7 @@ impl<'a> Handler<'a> { ipc_tx, log_filter_reloader, session: Session::None, - telemetry: Telemetry::default(), + telemetry, tun_device, dns_notifier, network_notifier, diff --git a/rust/headless-client/src/main.rs b/rust/headless-client/src/main.rs index eeb5e7853..5b430e61c 100644 --- a/rust/headless-client/src/main.rs +++ b/rust/headless-client/src/main.rs @@ -188,8 +188,9 @@ fn main() -> Result<()> { None => device_id::get_or_create().context("Could not get `firezone_id` from CLI, could not read it from disk, could not generate it and save it to disk")?.id, }; - let mut telemetry = Telemetry::default(); - if cli.is_telemetry_allowed() { + let mut telemetry = if cli.is_telemetry_allowed() { + let mut telemetry = Telemetry::new().context("Failed to create telemetry client")?; + rt.block_on(telemetry.start( cli.api_url.as_ref(), RELEASE, @@ -198,7 +199,11 @@ fn main() -> Result<()> { )); analytics::identify(RELEASE.to_owned(), None); - } + + telemetry + } else { + Telemetry::disabled() + }; tracing::info!(arch = std::env::consts::ARCH, version = VERSION); diff --git a/rust/logging/src/lib.rs b/rust/logging/src/lib.rs index 3be7620d3..1166cf0bc 100644 --- a/rust/logging/src/lib.rs +++ b/rust/logging/src/lib.rs @@ -112,7 +112,7 @@ fn parse_filter(directives: &str) -> Result { /// /// By prepending this directive to the active log filter, a simple directive like `debug` actually produces useful logs. /// If necessary, you can still activate logs from these crates by restating them in your directive with a lower filter, i.e. `netlink_proto=debug`. - const IRRELEVANT_CRATES: &str = "netlink_proto=warn,os_info=warn,rustls=warn,opentelemetry_sdk=info,opentelemetry=info,hyper_util=info"; + const IRRELEVANT_CRATES: &str = "netlink_proto=warn,os_info=warn,rustls=warn,opentelemetry_sdk=info,opentelemetry=info,hyper_util=info,h2=info"; let env_filter = if directives.is_empty() { EnvFilter::try_new(IRRELEVANT_CRATES)? diff --git a/rust/relay/server/src/main.rs b/rust/relay/server/src/main.rs index a465a61e3..65a5ebb82 100644 --- a/rust/relay/server/src/main.rs +++ b/rust/relay/server/src/main.rs @@ -125,15 +125,20 @@ fn main() { .build() .expect("Failed to build tokio runtime"); - let mut telemetry = Telemetry::default(); - if args.telemetry { + let mut telemetry = if args.telemetry { + let mut telemetry = Telemetry::new().expect("Failed to create telemetry client"); + runtime.block_on(telemetry.start( args.api_url.as_str(), VERSION.unwrap_or("unknown"), RELAY_DSN, String::new(), // Relays don't have a Firezone ID. )); - } + + telemetry + } else { + Telemetry::disabled() + }; match runtime.block_on(try_main(args)) { Ok(()) => runtime.block_on(telemetry.stop()), diff --git a/rust/telemetry/Cargo.toml b/rust/telemetry/Cargo.toml index 47b432de5..7cf3fae33 100644 --- a/rust/telemetry/Cargo.toml +++ b/rust/telemetry/Cargo.toml @@ -13,7 +13,7 @@ moka = { workspace = true, features = ["sync"] } opentelemetry = { workspace = true } opentelemetry_sdk = { workspace = true, features = ["metrics"] } parking_lot = { workspace = true } -reqwest = { workspace = true } +reqwest = { workspace = true, features = ["http2"] } sentry = { workspace = true, features = ["contexts", "backtrace", "debug-images", "panic", "reqwest", "rustls", "tracing", "release-health", "logs"] } serde = { workspace = true } serde_json = { workspace = true } diff --git a/rust/telemetry/src/lib.rs b/rust/telemetry/src/lib.rs index 2e4c7a520..8d37dd456 100644 --- a/rust/telemetry/src/lib.rs +++ b/rust/telemetry/src/lib.rs @@ -1,12 +1,21 @@ #![cfg_attr(test, allow(clippy::unwrap_used))] -use std::{borrow::Cow, collections::BTreeMap, fmt, str::FromStr, sync::Arc, time::Duration}; +use std::{ + borrow::Cow, + collections::BTreeMap, + fmt, + net::{SocketAddr, ToSocketAddrs as _}, + str::FromStr, + sync::Arc, + time::Duration, +}; use anyhow::{Context, Result, anyhow, bail}; use api_url::ApiUrl; use sentry::{ BeforeCallback, User, protocol::{Event, Log, LogAttribute, SessionStatus}, + transports::ReqwestHttpTransport, }; use sha2::Digest as _; @@ -22,34 +31,56 @@ mod posthog; pub use maybe_push_metrics_exporter::MaybePushMetricsExporter; pub use noop_push_metrics_exporter::NoopPushMetricsExporter; -pub struct Dsn(&'static str); +pub struct Dsn { + public_key: &'static str, + project_id: u64, +} // TODO: Dynamic DSN // Sentry docs say this does not need to be protected: // > DSNs are safe to keep public because they only allow submission of new events and related event data; they do not allow read access to any information. // -pub const ANDROID_DSN: Dsn = Dsn( - "https://928a6ee1f6af9734100b8bc89b2dc87d@o4507971108339712.ingest.us.sentry.io/4508175126233088", -); -pub const APPLE_DSN: Dsn = Dsn( - "https://66c71f83675f01abfffa8eb977bcbbf7@o4507971108339712.ingest.us.sentry.io/4508175177023488", -); -pub const GATEWAY_DSN: Dsn = Dsn( - "https://f763102cc3937199ec483fbdae63dfdc@o4507971108339712.ingest.us.sentry.io/4508162914451456", -); -pub const GUI_DSN: Dsn = Dsn( - "https://2e17bf5ed24a78c0ac9e84a5de2bd6fc@o4507971108339712.ingest.us.sentry.io/4508008945549312", -); -pub const HEADLESS_DSN: Dsn = Dsn( - "https://bc27dca8bb37be0142c48c4f89647c13@o4507971108339712.ingest.us.sentry.io/4508010028728320", -); -pub const RELAY_DSN: Dsn = Dsn( - "https://9d5f664d8f8f7f1716d4b63a58bcafd5@o4507971108339712.ingest.us.sentry.io/4508373298970624", -); -pub const TESTING: Dsn = Dsn( - "https://55ef451fca9054179a11f5d132c02f45@o4507971108339712.ingest.us.sentry.io/4508792604852224", -); +const INGEST_HOST: &str = "o4507971108339712.ingest.us.sentry.io"; + +pub const ANDROID_DSN: Dsn = Dsn { + public_key: "928a6ee1f6af9734100b8bc89b2dc87d", + project_id: 4508175126233088, +}; +pub const APPLE_DSN: Dsn = Dsn { + public_key: "66c71f83675f01abfffa8eb977bcbbf7", + project_id: 4508175177023488, +}; +pub const GATEWAY_DSN: Dsn = Dsn { + public_key: "f763102cc3937199ec483fbdae63dfdc", + project_id: 4508162914451456, +}; +pub const GUI_DSN: Dsn = Dsn { + public_key: "2e17bf5ed24a78c0ac9e84a5de2bd6fc", + project_id: 4508008945549312, +}; +pub const HEADLESS_DSN: Dsn = Dsn { + public_key: "bc27dca8bb37be0142c48c4f89647c13", + project_id: 4508010028728320, +}; +pub const RELAY_DSN: Dsn = Dsn { + public_key: "9d5f664d8f8f7f1716d4b63a58bcafd5", + project_id: 4508373298970624, +}; +pub const TESTING: Dsn = Dsn { + public_key: "55ef451fca9054179a11f5d132c02f45", + project_id: 4508792604852224, +}; + +impl fmt::Display for Dsn { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "https://{}@{INGEST_HOST}/{}", + self.public_key, self.project_id + ) + } +} #[derive(Debug, PartialEq, Clone, Copy)] pub(crate) enum Env { @@ -104,9 +135,9 @@ impl fmt::Display for Env { } } -#[derive(Default)] pub struct Telemetry { inner: Option, + transport: TransportFactory, } impl Drop for Telemetry { @@ -121,6 +152,22 @@ impl Drop for Telemetry { } impl Telemetry { + pub fn new() -> Result { + Ok(Self { + inner: Default::default(), + // The instances for `Telemetry` get created at the very beginning of each program entry-point. + // Therefore, it is safe to perform DNS resolution there right away before the tunnel is up. + transport: TransportFactory::resolve_ingest_host()?, + }) + } + + pub fn disabled() -> Self { + Self { + inner: None, + transport: TransportFactory::without_addresses(), + } + } + pub async fn start(&mut self, api_url: &str, release: &str, dsn: Dsn, firezone_id: String) { // Can't use URLs as `environment` directly, because Sentry doesn't allow slashes in environments. // @@ -156,17 +203,21 @@ impl Telemetry { feature_flags::evaluate_now(firezone_id.clone(), environment).await; tracing::info!(%environment, "Starting telemetry"); + let client_options = sentry::ClientOptions { + environment: Some(Cow::Borrowed(environment.as_str())), + // We can't get the release number ourselves because we don't know if we're embedded in a GUI Client or a Headless Client. + release: Some(release.to_owned().into()), + max_breadcrumbs: 500, + before_send: Some(event_rate_limiter(Duration::from_secs(60 * 5))), + enable_logs: true, + before_send_log: Some(Arc::new(append_tracing_fields_to_message)), + ..Default::default() + }; let inner = sentry::init(( - dsn.0, + dsn.to_string(), sentry::ClientOptions { - environment: Some(Cow::Borrowed(environment.as_str())), - // We can't get the release number ourselves because we don't know if we're embedded in a GUI Client or a Headless Client. - release: Some(release.to_owned().into()), - max_breadcrumbs: 500, - before_send: Some(event_rate_limiter(Duration::from_secs(60 * 5))), - enable_logs: true, - before_send_log: Some(Arc::new(append_tracing_fields_to_message)), - ..Default::default() + transport: Some(Arc::new(self.transport.clone())), + ..client_options }, )); // Configure scope on the main hub so that all threads will get the tags @@ -345,6 +396,47 @@ fn set_current_user(user: Option) { sentry::Hub::main().configure_scope(|scope| scope.set_user(user)); } +#[derive(Debug, Clone)] +pub struct TransportFactory { + ingest_domain_addresses: Vec, +} + +impl TransportFactory { + pub fn resolve_ingest_host() -> Result { + let resolved_addresses = (INGEST_HOST, 443u16) + .to_socket_addrs() + .with_context(|| format!("Failed to resolve {INGEST_HOST}"))? + .collect(); + + tracing::debug!(host = %INGEST_HOST, addresses = ?resolved_addresses, "Resolved ingest host IPs"); + + Ok(Self { + ingest_domain_addresses: resolved_addresses, + }) + } + + fn without_addresses() -> Self { + Self { + ingest_domain_addresses: Default::default(), + } + } +} + +impl sentry::TransportFactory for TransportFactory { + fn create_transport(&self, options: &sentry::ClientOptions) -> Arc { + let client = reqwest::ClientBuilder::new() + .http2_prior_knowledge() + .http2_keep_alive_while_idle(true) + .http2_keep_alive_timeout(Duration::from_secs(1)) + .http2_keep_alive_interval(Duration::from_secs(5)) // Ensure we detect broken connections, i.e. when enabling / disabling the Internet Resource. + .resolve_to_addrs(INGEST_HOST, &self.ingest_domain_addresses) + .build() + .expect("Failed to build HTTP client"); + + Arc::new(ReqwestHttpTransport::with_client(options, client)) + } +} + #[cfg(test)] mod tests { use std::time::SystemTime; @@ -353,7 +445,7 @@ mod tests { #[tokio::test] async fn starting_session_for_unsupported_env_disables_current_one() { - let mut telemetry = Telemetry::default(); + let mut telemetry = Telemetry::new().unwrap(); telemetry .start("wss://api.firez.one", "1.0.0", TESTING, String::new()) .await;