mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 10:18:54 +00:00
feat(gateway): allow exporting metrics to an OTEL collector (#9838)
As a first step in preparation for sending OTEL metrics from Clients and Gateways to a cloud-hosted OTEL collector, we extend the CLI of the Gateway with configuration options to provide a gRPC endpoint to an OTEL collector. If `FIREZONE_METRICS` is set to `otel-collector` and an endpoint is configured via `OTLP_GRPC_ENDPOINT`, we will report our metrics to that collector. The future plan for extending this is such that if `FIREZONE_METRICS` is set to `otel-collector` (which will likely be the default) and no `OTLP_GRPC_ENDPOINT` is set, then we will use our own, hosted OTEL collector and report metrics IF the `export-metrics` feature-flag is set to `true`. This is a similar integration as we have done it with streaming logs to Sentry. We can therefore enable it on a similar granularity as we do with the logs and e.g. only enable it for the `firezone` account to start with. In meantime, customers can already make use of those metrics if they'd like by using the current integration. Resolves: #1550 Related: #7419 --------- Co-authored-by: Antoine Labarussias <antoinelabarussias@gmail.com>
This commit is contained in:
2
rust/Cargo.lock
generated
2
rust/Cargo.lock
generated
@@ -2386,6 +2386,7 @@ dependencies = [
|
||||
"nix 0.30.1",
|
||||
"num_cpus",
|
||||
"opentelemetry",
|
||||
"opentelemetry-otlp",
|
||||
"opentelemetry-stdout",
|
||||
"opentelemetry_sdk",
|
||||
"phoenix-channel",
|
||||
@@ -2583,6 +2584,7 @@ name = "firezone-telemetry"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"futures",
|
||||
"hex",
|
||||
"ip-packet",
|
||||
"moka",
|
||||
|
||||
@@ -27,6 +27,7 @@ libc = { workspace = true, features = ["std", "const-extern-fn", "extra_traits"]
|
||||
moka = { workspace = true, features = ["future"] }
|
||||
num_cpus = { workspace = true }
|
||||
opentelemetry = { workspace = true, features = ["metrics"] }
|
||||
opentelemetry-otlp = { workspace = true, features = ["metrics", "grpc-tonic"] }
|
||||
opentelemetry-stdout = { workspace = true, features = ["metrics"] }
|
||||
opentelemetry_sdk = { workspace = true, features = ["rt-tokio"] }
|
||||
phoenix-channel = { workspace = true }
|
||||
|
||||
@@ -7,10 +7,13 @@ use firezone_bin_shared::{
|
||||
platform::{tcp_socket_factory, udp_socket_factory},
|
||||
};
|
||||
|
||||
use firezone_telemetry::{Telemetry, otel};
|
||||
use firezone_telemetry::{
|
||||
MaybePushMetricsExporter, NoopPushMetricsExporter, Telemetry, feature_flags, otel,
|
||||
};
|
||||
use firezone_tunnel::GatewayTunnel;
|
||||
use ip_packet::IpPacket;
|
||||
use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider};
|
||||
use opentelemetry_otlp::WithExportConfig;
|
||||
use opentelemetry_sdk::metrics::SdkMeterProvider;
|
||||
use phoenix_channel::LoginUrl;
|
||||
use phoenix_channel::get_user_agent;
|
||||
|
||||
@@ -115,17 +118,34 @@ async fn try_main(cli: Cli, telemetry: &mut Telemetry) -> Result<()> {
|
||||
.await;
|
||||
}
|
||||
|
||||
if cli.metrics {
|
||||
let exporter = opentelemetry_stdout::MetricExporter::default();
|
||||
let reader = PeriodicReader::builder(exporter).build();
|
||||
let provider = SdkMeterProvider::builder()
|
||||
.with_reader(reader)
|
||||
.with_resource(otel::default_resource_with([
|
||||
otel::attr::service_name!(),
|
||||
otel::attr::service_version!(),
|
||||
otel::attr::service_instance_id(firezone_id.clone()),
|
||||
]))
|
||||
.build();
|
||||
if let Some(backend) = cli.metrics {
|
||||
let resource = otel::default_resource_with([
|
||||
otel::attr::service_name!(),
|
||||
otel::attr::service_version!(),
|
||||
otel::attr::service_instance_id(firezone_id.clone()),
|
||||
]);
|
||||
|
||||
let provider = match (backend, cli.otlp_grpc_endpoint) {
|
||||
(MetricsExporter::Stdout, _) => SdkMeterProvider::builder()
|
||||
.with_periodic_exporter(opentelemetry_stdout::MetricExporter::default())
|
||||
.with_resource(resource)
|
||||
.build(),
|
||||
(MetricsExporter::OtelCollector, Some(endpoint)) => SdkMeterProvider::builder()
|
||||
.with_periodic_exporter(tonic_otlp_exporter(endpoint)?)
|
||||
.with_resource(resource)
|
||||
.build(),
|
||||
(MetricsExporter::OtelCollector, None) => SdkMeterProvider::builder()
|
||||
.with_periodic_exporter(MaybePushMetricsExporter {
|
||||
inner: {
|
||||
// TODO: Once Firezone has a hosted OTLP exporter, it will go here.
|
||||
|
||||
NoopPushMetricsExporter
|
||||
},
|
||||
should_export: feature_flags::export_metrics,
|
||||
})
|
||||
.with_resource(resource)
|
||||
.build(),
|
||||
};
|
||||
|
||||
opentelemetry::global::set_meter_provider(provider);
|
||||
}
|
||||
@@ -195,6 +215,18 @@ async fn try_main(cli: Cli, telemetry: &mut Telemetry) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
fn tonic_otlp_exporter(
|
||||
endpoint: String,
|
||||
) -> Result<opentelemetry_otlp::MetricExporter, anyhow::Error> {
|
||||
let metric_exporter = opentelemetry_otlp::MetricExporter::builder()
|
||||
.with_tonic()
|
||||
.with_endpoint(format!("http://{endpoint}"))
|
||||
.build()
|
||||
.context("Failed to build OTLP metric exporter")?;
|
||||
|
||||
Ok(metric_exporter)
|
||||
}
|
||||
|
||||
async fn get_firezone_id(env_id: Option<String>) -> Result<String> {
|
||||
if let Some(id) = env_id
|
||||
&& !id.is_empty()
|
||||
@@ -250,9 +282,15 @@ struct Cli {
|
||||
#[arg(long, env = "FIREZONE_NUM_TUN_THREADS", default_value_t)]
|
||||
tun_threads: NumThreads,
|
||||
|
||||
/// Dump internal metrics to stdout every 60s.
|
||||
#[arg(long, hide = true, env = "FIREZONE_METRICS", default_value_t = false)]
|
||||
metrics: bool,
|
||||
/// Where to export metrics to.
|
||||
#[arg(long, hide = true, env = "FIREZONE_METRICS")]
|
||||
metrics: Option<MetricsExporter>,
|
||||
|
||||
/// Send metrics to a custom OTLP collector.
|
||||
///
|
||||
/// By default, Firezone's hosted OTLP collector is used.
|
||||
#[arg(long, env, hide = true)]
|
||||
otlp_grpc_endpoint: Option<String>,
|
||||
|
||||
/// Validates the checksums of all packets leaving the TUN device.
|
||||
#[arg(
|
||||
@@ -264,6 +302,12 @@ struct Cli {
|
||||
validate_checksums: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, clap::ValueEnum)]
|
||||
enum MetricsExporter {
|
||||
Stdout,
|
||||
OtelCollector,
|
||||
}
|
||||
|
||||
impl Cli {
|
||||
fn is_telemetry_allowed(&self) -> bool {
|
||||
!self.no_telemetry
|
||||
|
||||
@@ -102,8 +102,8 @@ struct Cli {
|
||||
no_telemetry: bool,
|
||||
|
||||
/// Dump internal metrics to stdout every 60s.
|
||||
#[arg(long, env = "FIREZONE_METRICS", default_value_t = false)]
|
||||
metrics: bool,
|
||||
#[arg(long, hide = true, env = "FIREZONE_METRICS")]
|
||||
metrics: Option<MetricsExporter>,
|
||||
|
||||
/// A filesystem path where the token can be found
|
||||
// Apparently passing secrets through stdin is the most secure method, but
|
||||
@@ -114,6 +114,11 @@ struct Cli {
|
||||
token_path: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, clap::ValueEnum)]
|
||||
enum MetricsExporter {
|
||||
Stdout,
|
||||
}
|
||||
|
||||
impl Cli {
|
||||
fn is_telemetry_allowed(&self) -> bool {
|
||||
!self.no_telemetry
|
||||
@@ -223,7 +228,7 @@ fn main() -> Result<()> {
|
||||
let mut last_connlib_start_instant = Some(Instant::now());
|
||||
|
||||
rt.block_on(async {
|
||||
if cli.metrics {
|
||||
if let Some(MetricsExporter::Stdout) = cli.metrics {
|
||||
let exporter = opentelemetry_stdout::MetricExporter::default();
|
||||
let reader = PeriodicReader::builder(exporter).build();
|
||||
let provider = SdkMeterProvider::builder()
|
||||
|
||||
@@ -6,11 +6,12 @@ license = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
hex = { workspace = true }
|
||||
ip-packet = { workspace = true }
|
||||
moka = { workspace = true, features = ["sync"] }
|
||||
opentelemetry = { workspace = true }
|
||||
opentelemetry_sdk = { workspace = true }
|
||||
opentelemetry_sdk = { workspace = true, features = ["metrics"] }
|
||||
parking_lot = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
sentry = { workspace = true, features = ["contexts", "backtrace", "debug-images", "panic", "reqwest", "rustls", "tracing", "release-health", "logs"] }
|
||||
|
||||
@@ -39,6 +39,10 @@ pub fn map_enobufs_to_would_block() -> bool {
|
||||
FEATURE_FLAGS.map_enobufs_to_wouldblock()
|
||||
}
|
||||
|
||||
pub fn export_metrics() -> bool {
|
||||
false // Placeholder until we actually deploy an OTEL collector.
|
||||
}
|
||||
|
||||
pub(crate) async fn evaluate_now(user_id: String, env: Env) {
|
||||
if user_id.is_empty() {
|
||||
return;
|
||||
|
||||
@@ -15,8 +15,13 @@ pub mod feature_flags;
|
||||
pub mod otel;
|
||||
|
||||
mod api_url;
|
||||
mod maybe_push_metrics_exporter;
|
||||
mod noop_push_metrics_exporter;
|
||||
mod posthog;
|
||||
|
||||
pub use maybe_push_metrics_exporter::MaybePushMetricsExporter;
|
||||
pub use noop_push_metrics_exporter::NoopPushMetricsExporter;
|
||||
|
||||
pub struct Dsn(&'static str);
|
||||
|
||||
// TODO: Dynamic DSN
|
||||
|
||||
38
rust/telemetry/src/maybe_push_metrics_exporter.rs
Normal file
38
rust/telemetry/src/maybe_push_metrics_exporter.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
use std::future::Future;
|
||||
|
||||
use futures::future::Either;
|
||||
use opentelemetry_sdk::{
|
||||
error::OTelSdkResult,
|
||||
metrics::{Temporality, data::ResourceMetrics, exporter::PushMetricExporter},
|
||||
};
|
||||
|
||||
pub struct MaybePushMetricsExporter<E, F> {
|
||||
pub inner: E,
|
||||
pub should_export: F,
|
||||
}
|
||||
|
||||
impl<E, F> PushMetricExporter for MaybePushMetricsExporter<E, F>
|
||||
where
|
||||
E: PushMetricExporter,
|
||||
F: Fn() -> bool + Send + Sync + 'static,
|
||||
{
|
||||
fn export(&self, metrics: &mut ResourceMetrics) -> impl Future<Output = OTelSdkResult> + Send {
|
||||
if (self.should_export)() {
|
||||
return Either::Left(self.inner.export(metrics));
|
||||
}
|
||||
|
||||
Either::Right(std::future::ready(Ok(())))
|
||||
}
|
||||
|
||||
fn force_flush(&self) -> OTelSdkResult {
|
||||
self.inner.force_flush()
|
||||
}
|
||||
|
||||
fn shutdown(&self) -> OTelSdkResult {
|
||||
self.inner.shutdown()
|
||||
}
|
||||
|
||||
fn temporality(&self) -> Temporality {
|
||||
self.inner.temporality()
|
||||
}
|
||||
}
|
||||
26
rust/telemetry/src/noop_push_metrics_exporter.rs
Normal file
26
rust/telemetry/src/noop_push_metrics_exporter.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
use std::future::Future;
|
||||
|
||||
use opentelemetry_sdk::{
|
||||
error::OTelSdkResult,
|
||||
metrics::{Temporality, data::ResourceMetrics, exporter::PushMetricExporter},
|
||||
};
|
||||
|
||||
pub struct NoopPushMetricsExporter;
|
||||
|
||||
impl PushMetricExporter for NoopPushMetricsExporter {
|
||||
fn export(&self, _: &mut ResourceMetrics) -> impl Future<Output = OTelSdkResult> + Send {
|
||||
std::future::ready(Ok(()))
|
||||
}
|
||||
|
||||
fn force_flush(&self) -> OTelSdkResult {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn shutdown(&self) -> OTelSdkResult {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn temporality(&self) -> Temporality {
|
||||
Temporality::default()
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry_sdk::{
|
||||
Resource,
|
||||
resource::{ResourceDetector, TelemetryResourceDetector},
|
||||
resource::{EnvResourceDetector, ResourceDetector, TelemetryResourceDetector},
|
||||
};
|
||||
|
||||
pub mod attr {
|
||||
@@ -117,6 +117,7 @@ pub fn default_resource_with<const N: usize>(attributes: [KeyValue; N]) -> Resou
|
||||
Resource::builder_empty()
|
||||
.with_detector(Box::new(TelemetryResourceDetector))
|
||||
.with_detector(Box::new(OsResourceDetector))
|
||||
.with_detector(Box::new(EnvResourceDetector::new()))
|
||||
.with_attributes(attributes)
|
||||
.build()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user