From b7b7626cfa1beb89e6ca7b8a84a335a563b38b97 Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Wed, 23 Oct 2024 07:40:28 +1100 Subject: [PATCH] feat(gateway): add error reporting via Sentry (#7103) Similar to the GUI and headless clients, adding error reporting via Sentry should give us much better insight into how well gateways are performing. Resolves: #7099. --------- Signed-off-by: Thomas Eizinger Co-authored-by: Jamil --- rust/Cargo.lock | 1 + rust/gateway/Cargo.toml | 1 + rust/gateway/src/main.rs | 27 ++++++++++++++++--- rust/telemetry/src/lib.rs | 1 + website/src/app/kb/deploy/gateways/readme.mdx | 7 +++++ website/src/components/Changelog/Gateway.tsx | 4 +++ 6 files changed, 38 insertions(+), 3 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index d1feef4ca..11d18273d 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1865,6 +1865,7 @@ dependencies = [ "either", "firezone-bin-shared", "firezone-logging", + "firezone-telemetry", "firezone-tunnel", "futures", "futures-bounded", diff --git a/rust/gateway/Cargo.toml b/rust/gateway/Cargo.toml index 2f0a42958..af4603f72 100644 --- a/rust/gateway/Cargo.toml +++ b/rust/gateway/Cargo.toml @@ -18,6 +18,7 @@ domain = { workspace = true } either = "1" firezone-bin-shared = { workspace = true } firezone-logging = { workspace = true } +firezone-telemetry = { workspace = true } firezone-tunnel = { workspace = true } futures = "0.3.29" futures-bounded = { workspace = true } diff --git a/rust/gateway/src/main.rs b/rust/gateway/src/main.rs index 667cd38be..30dd453f0 100644 --- a/rust/gateway/src/main.rs +++ b/rust/gateway/src/main.rs @@ -8,6 +8,7 @@ use firezone_bin_shared::{ TunDeviceManager, }; use firezone_logging::anyhow_dyn_err; +use firezone_telemetry::Telemetry; use firezone_tunnel::messages::Interface; use firezone_tunnel::{GatewayTunnel, IPV4_PEERS, IPV6_PEERS}; use phoenix_channel::get_user_agent; @@ -37,19 +38,29 @@ async fn main() { .install_default() .expect("Calling `install_default` only once per process should always succeed"); + let cli = Cli::parse(); + let telemetry = Telemetry::default(); + if cli.is_telemetry_allowed() { + telemetry.start( + cli.api_url.as_str(), + firezone_bin_shared::git_version!("gateway-*"), + firezone_telemetry::GATEWAY_DSN, + ); + } + // Enforce errors only being printed on a single line using the technique recommended in the anyhow docs: // https://docs.rs/anyhow/latest/anyhow/struct.Error.html#display-representations // // By default, `anyhow` prints a stacktrace when it exits. // That looks like a "crash" but we "just" exit with a fatal error. - if let Err(e) = try_main().await { + if let Err(e) = try_main(cli).await { tracing::error!(error = anyhow_dyn_err(&e)); + firezone_telemetry::capture_anyhow(&e); std::process::exit(1); } } -async fn try_main() -> Result<()> { - let cli = Cli::parse(); +async fn try_main(cli: Cli) -> Result<()> { firezone_logging::setup_global_subscriber(layer::Identity::new()); let firezone_id = get_firezone_id(cli.firezone_id).await @@ -174,6 +185,10 @@ struct Cli { #[arg(short = 'n', long, env = "FIREZONE_NAME")] firezone_name: Option, + /// Friendly name to display in the UI + #[arg(long, env = "FIREZONE_NO_TELEMETRY", default_value_t = false)] + no_telemetry: bool, + #[command(flatten)] health_check: http_health_check::HealthCheckArgs, @@ -181,3 +196,9 @@ struct Cli { #[arg(short = 'i', long, env = "FIREZONE_ID")] pub firezone_id: Option, } + +impl Cli { + fn is_telemetry_allowed(&self) -> bool { + !self.no_telemetry + } +} diff --git a/rust/telemetry/src/lib.rs b/rust/telemetry/src/lib.rs index bede25af2..2f83dc29c 100644 --- a/rust/telemetry/src/lib.rs +++ b/rust/telemetry/src/lib.rs @@ -15,6 +15,7 @@ pub struct Dsn(&'static str); // > DSNs are safe to keep public because they only allow submission of new events and related event data; they do not allow read access to any information. // +pub const GATEWAY_DSN: Dsn = Dsn("https://f763102cc3937199ec483fbdae63dfdc@o4507971108339712.ingest.us.sentry.io/4508162914451456"); pub const GUI_DSN: Dsn = Dsn("https://2e17bf5ed24a78c0ac9e84a5de2bd6fc@o4507971108339712.ingest.us.sentry.io/4508008945549312"); pub const HEADLESS_DSN: Dsn = Dsn("https://bc27dca8bb37be0142c48c4f89647c13@o4507971108339712.ingest.us.sentry.io/4508010028728320"); pub const IPC_SERVICE_DSN: Dsn = Dsn("https://0590b89fd4479494a1e7ffa4dc705001@o4507971108339712.ingest.us.sentry.io/4508008896069632"); diff --git a/website/src/app/kb/deploy/gateways/readme.mdx b/website/src/app/kb/deploy/gateways/readme.mdx index 4264d8af7..6a169e75b 100644 --- a/website/src/app/kb/deploy/gateways/readme.mdx +++ b/website/src/app/kb/deploy/gateways/readme.mdx @@ -61,6 +61,7 @@ you'll need to make sure the following outbound traffic is allowed: | N/A | See [relay-ips.json](/relay-ips.json) | `3478` | STUN | STUN protocol signaling | | N/A | See [relay-ips.json](/relay-ips.json) | `49152-65535` | TURN | TURN protocol channel data | | github.com, www.firezone.dev | Varies | `443` | HTTPS | Only required for [Gateway upgrades](/kb/administer/upgrading). | +| sentry.io | Varies | `443` | HTTPS | Crash-reporting, see [Telemetry](#telemetry) | ## Where to deploy Gateways @@ -156,6 +157,12 @@ It's a good idea to keep your Gateways up to date with the latest version available. See [upgrading Gateways](/kb/administer/upgrading) for ways to automate this. +## Telemetry + +By default, Gateways will run a https://sentry.io crash-reporting agent. If +you'd like to opt-out of this, set the environment variable +`FIREZONE_NO_TELEMETRY=1`. + Next: Create Resources diff --git a/website/src/components/Changelog/Gateway.tsx b/website/src/components/Changelog/Gateway.tsx index 7c191b31f..e8e8378cc 100644 --- a/website/src/components/Changelog/Gateway.tsx +++ b/website/src/components/Changelog/Gateway.tsx @@ -19,6 +19,10 @@ export default function Gateway() { Implements support for the new control protocol; delivering faster and more robust connection establishment. + + Adds on-by-default error reporting using sentry.io. + Disable by setting `FIREZONE_NO_TELEMETRY=1`. +