diff --git a/rust/Cargo.lock b/rust/Cargo.lock index a5417e0c3..342ae5420 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -2386,6 +2386,7 @@ dependencies = [ "nix 0.30.1", "num_cpus", "opentelemetry", + "opentelemetry-otlp", "opentelemetry-stdout", "opentelemetry_sdk", "phoenix-channel", @@ -2583,6 +2584,7 @@ name = "firezone-telemetry" version = "0.1.0" dependencies = [ "anyhow", + "futures", "hex", "ip-packet", "moka", diff --git a/rust/gateway/Cargo.toml b/rust/gateway/Cargo.toml index b0978d344..a0bcdc49a 100644 --- a/rust/gateway/Cargo.toml +++ b/rust/gateway/Cargo.toml @@ -27,6 +27,7 @@ libc = { workspace = true, features = ["std", "const-extern-fn", "extra_traits"] moka = { workspace = true, features = ["future"] } num_cpus = { workspace = true } opentelemetry = { workspace = true, features = ["metrics"] } +opentelemetry-otlp = { workspace = true, features = ["metrics", "grpc-tonic"] } opentelemetry-stdout = { workspace = true, features = ["metrics"] } opentelemetry_sdk = { workspace = true, features = ["rt-tokio"] } phoenix-channel = { workspace = true } diff --git a/rust/gateway/src/main.rs b/rust/gateway/src/main.rs index 8a4d4b575..a0b7a74b4 100644 --- a/rust/gateway/src/main.rs +++ b/rust/gateway/src/main.rs @@ -7,10 +7,13 @@ use firezone_bin_shared::{ platform::{tcp_socket_factory, udp_socket_factory}, }; -use firezone_telemetry::{Telemetry, otel}; +use firezone_telemetry::{ + MaybePushMetricsExporter, NoopPushMetricsExporter, Telemetry, feature_flags, otel, +}; use firezone_tunnel::GatewayTunnel; use ip_packet::IpPacket; -use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider}; +use opentelemetry_otlp::WithExportConfig; +use opentelemetry_sdk::metrics::SdkMeterProvider; use phoenix_channel::LoginUrl; use phoenix_channel::get_user_agent; @@ -115,17 +118,34 @@ async fn try_main(cli: Cli, telemetry: &mut Telemetry) -> Result<()> { .await; } - if cli.metrics { - let exporter = opentelemetry_stdout::MetricExporter::default(); - let reader = PeriodicReader::builder(exporter).build(); - let provider = SdkMeterProvider::builder() - .with_reader(reader) - .with_resource(otel::default_resource_with([ - otel::attr::service_name!(), - otel::attr::service_version!(), - otel::attr::service_instance_id(firezone_id.clone()), - ])) - .build(); + if let Some(backend) = cli.metrics { + let resource = otel::default_resource_with([ + otel::attr::service_name!(), + otel::attr::service_version!(), + otel::attr::service_instance_id(firezone_id.clone()), + ]); + + let provider = match (backend, cli.otlp_grpc_endpoint) { + (MetricsExporter::Stdout, _) => SdkMeterProvider::builder() + .with_periodic_exporter(opentelemetry_stdout::MetricExporter::default()) + .with_resource(resource) + .build(), + (MetricsExporter::OtelCollector, Some(endpoint)) => SdkMeterProvider::builder() + .with_periodic_exporter(tonic_otlp_exporter(endpoint)?) + .with_resource(resource) + .build(), + (MetricsExporter::OtelCollector, None) => SdkMeterProvider::builder() + .with_periodic_exporter(MaybePushMetricsExporter { + inner: { + // TODO: Once Firezone has a hosted OTLP exporter, it will go here. + + NoopPushMetricsExporter + }, + should_export: feature_flags::export_metrics, + }) + .with_resource(resource) + .build(), + }; opentelemetry::global::set_meter_provider(provider); } @@ -195,6 +215,18 @@ async fn try_main(cli: Cli, telemetry: &mut Telemetry) -> Result<()> { } } +fn tonic_otlp_exporter( + endpoint: String, +) -> Result { + let metric_exporter = opentelemetry_otlp::MetricExporter::builder() + .with_tonic() + .with_endpoint(format!("http://{endpoint}")) + .build() + .context("Failed to build OTLP metric exporter")?; + + Ok(metric_exporter) +} + async fn get_firezone_id(env_id: Option) -> Result { if let Some(id) = env_id && !id.is_empty() @@ -250,9 +282,15 @@ struct Cli { #[arg(long, env = "FIREZONE_NUM_TUN_THREADS", default_value_t)] tun_threads: NumThreads, - /// Dump internal metrics to stdout every 60s. - #[arg(long, hide = true, env = "FIREZONE_METRICS", default_value_t = false)] - metrics: bool, + /// Where to export metrics to. + #[arg(long, hide = true, env = "FIREZONE_METRICS")] + metrics: Option, + + /// Send metrics to a custom OTLP collector. + /// + /// By default, Firezone's hosted OTLP collector is used. + #[arg(long, env, hide = true)] + otlp_grpc_endpoint: Option, /// Validates the checksums of all packets leaving the TUN device. #[arg( @@ -264,6 +302,12 @@ struct Cli { validate_checksums: bool, } +#[derive(Debug, Clone, Copy, clap::ValueEnum)] +enum MetricsExporter { + Stdout, + OtelCollector, +} + impl Cli { fn is_telemetry_allowed(&self) -> bool { !self.no_telemetry diff --git a/rust/headless-client/src/main.rs b/rust/headless-client/src/main.rs index b2454b4f6..54dc501fd 100644 --- a/rust/headless-client/src/main.rs +++ b/rust/headless-client/src/main.rs @@ -102,8 +102,8 @@ struct Cli { no_telemetry: bool, /// Dump internal metrics to stdout every 60s. - #[arg(long, env = "FIREZONE_METRICS", default_value_t = false)] - metrics: bool, + #[arg(long, hide = true, env = "FIREZONE_METRICS")] + metrics: Option, /// A filesystem path where the token can be found // Apparently passing secrets through stdin is the most secure method, but @@ -114,6 +114,11 @@ struct Cli { token_path: PathBuf, } +#[derive(Debug, Clone, Copy, clap::ValueEnum)] +enum MetricsExporter { + Stdout, +} + impl Cli { fn is_telemetry_allowed(&self) -> bool { !self.no_telemetry @@ -223,7 +228,7 @@ fn main() -> Result<()> { let mut last_connlib_start_instant = Some(Instant::now()); rt.block_on(async { - if cli.metrics { + if let Some(MetricsExporter::Stdout) = cli.metrics { let exporter = opentelemetry_stdout::MetricExporter::default(); let reader = PeriodicReader::builder(exporter).build(); let provider = SdkMeterProvider::builder() diff --git a/rust/telemetry/Cargo.toml b/rust/telemetry/Cargo.toml index a6b24dc5a..9902b2db0 100644 --- a/rust/telemetry/Cargo.toml +++ b/rust/telemetry/Cargo.toml @@ -6,11 +6,12 @@ license = { workspace = true } [dependencies] anyhow = { workspace = true } +futures = { workspace = true } hex = { workspace = true } ip-packet = { workspace = true } moka = { workspace = true, features = ["sync"] } opentelemetry = { workspace = true } -opentelemetry_sdk = { workspace = true } +opentelemetry_sdk = { workspace = true, features = ["metrics"] } parking_lot = { workspace = true } reqwest = { workspace = true } sentry = { workspace = true, features = ["contexts", "backtrace", "debug-images", "panic", "reqwest", "rustls", "tracing", "release-health", "logs"] } diff --git a/rust/telemetry/src/feature_flags.rs b/rust/telemetry/src/feature_flags.rs index d9f9c12de..e3b9be0a4 100644 --- a/rust/telemetry/src/feature_flags.rs +++ b/rust/telemetry/src/feature_flags.rs @@ -39,6 +39,10 @@ pub fn map_enobufs_to_would_block() -> bool { FEATURE_FLAGS.map_enobufs_to_wouldblock() } +pub fn export_metrics() -> bool { + false // Placeholder until we actually deploy an OTEL collector. +} + pub(crate) async fn evaluate_now(user_id: String, env: Env) { if user_id.is_empty() { return; diff --git a/rust/telemetry/src/lib.rs b/rust/telemetry/src/lib.rs index d77326bd2..1078436af 100644 --- a/rust/telemetry/src/lib.rs +++ b/rust/telemetry/src/lib.rs @@ -15,8 +15,13 @@ pub mod feature_flags; pub mod otel; mod api_url; +mod maybe_push_metrics_exporter; +mod noop_push_metrics_exporter; mod posthog; +pub use maybe_push_metrics_exporter::MaybePushMetricsExporter; +pub use noop_push_metrics_exporter::NoopPushMetricsExporter; + pub struct Dsn(&'static str); // TODO: Dynamic DSN diff --git a/rust/telemetry/src/maybe_push_metrics_exporter.rs b/rust/telemetry/src/maybe_push_metrics_exporter.rs new file mode 100644 index 000000000..da244532b --- /dev/null +++ b/rust/telemetry/src/maybe_push_metrics_exporter.rs @@ -0,0 +1,38 @@ +use std::future::Future; + +use futures::future::Either; +use opentelemetry_sdk::{ + error::OTelSdkResult, + metrics::{Temporality, data::ResourceMetrics, exporter::PushMetricExporter}, +}; + +pub struct MaybePushMetricsExporter { + pub inner: E, + pub should_export: F, +} + +impl PushMetricExporter for MaybePushMetricsExporter +where + E: PushMetricExporter, + F: Fn() -> bool + Send + Sync + 'static, +{ + fn export(&self, metrics: &mut ResourceMetrics) -> impl Future + Send { + if (self.should_export)() { + return Either::Left(self.inner.export(metrics)); + } + + Either::Right(std::future::ready(Ok(()))) + } + + fn force_flush(&self) -> OTelSdkResult { + self.inner.force_flush() + } + + fn shutdown(&self) -> OTelSdkResult { + self.inner.shutdown() + } + + fn temporality(&self) -> Temporality { + self.inner.temporality() + } +} diff --git a/rust/telemetry/src/noop_push_metrics_exporter.rs b/rust/telemetry/src/noop_push_metrics_exporter.rs new file mode 100644 index 000000000..5a7955d24 --- /dev/null +++ b/rust/telemetry/src/noop_push_metrics_exporter.rs @@ -0,0 +1,26 @@ +use std::future::Future; + +use opentelemetry_sdk::{ + error::OTelSdkResult, + metrics::{Temporality, data::ResourceMetrics, exporter::PushMetricExporter}, +}; + +pub struct NoopPushMetricsExporter; + +impl PushMetricExporter for NoopPushMetricsExporter { + fn export(&self, _: &mut ResourceMetrics) -> impl Future + Send { + std::future::ready(Ok(())) + } + + fn force_flush(&self) -> OTelSdkResult { + Ok(()) + } + + fn shutdown(&self) -> OTelSdkResult { + Ok(()) + } + + fn temporality(&self) -> Temporality { + Temporality::default() + } +} diff --git a/rust/telemetry/src/otel.rs b/rust/telemetry/src/otel.rs index 0f6d780e3..97e69c674 100644 --- a/rust/telemetry/src/otel.rs +++ b/rust/telemetry/src/otel.rs @@ -1,7 +1,7 @@ use opentelemetry::KeyValue; use opentelemetry_sdk::{ Resource, - resource::{ResourceDetector, TelemetryResourceDetector}, + resource::{EnvResourceDetector, ResourceDetector, TelemetryResourceDetector}, }; pub mod attr { @@ -117,6 +117,7 @@ pub fn default_resource_with(attributes: [KeyValue; N]) -> Resou Resource::builder_empty() .with_detector(Box::new(TelemetryResourceDetector)) .with_detector(Box::new(OsResourceDetector)) + .with_detector(Box::new(EnvResourceDetector::new())) .with_attributes(attributes) .build() }