fix(relay): don't hang when connecting to OTLP exporter (#6034)

The dependency update in #6003 introduced a regression: Connecting to
the OTLP exporter was hanging forever and thus the relay failed to start
up.

The hang seems to be related to _dropping_ the `meter_provider`. Looking
at the changelog update, this change was actually called out:
https://github.com/open-telemetry/opentelemetry-rust/blob/main/opentelemetry-otlp/CHANGELOG.md#v0170.

By setting these providers globally, the relay starts up just fine.

To ensure this doesn't regress again, we add an OTEL collector to our
`docker-compose.yml` and configure the `relay-1` to connect to it.
This commit is contained in:
Thomas Eizinger
2024-07-26 02:36:42 +10:00
committed by GitHub
parent cc1478adc2
commit f800875aff
2 changed files with 20 additions and 12 deletions

View File

@@ -429,6 +429,7 @@ services:
RUST_LOG: ${RUST_LOG:-debug}
RUST_BACKTRACE: 1
FIREZONE_API_URL: ws://api:8081
OTLP_GRPC_ENDPOINT: otlp:4317
build:
target: dev
context: rust
@@ -491,6 +492,10 @@ services:
app:
ipv4_address: ${RELAY_2_PUBLIC_IP4_ADDR:-172.28.0.201}
otel:
image: otel/opentelemetry-collector:latest
networks:
app:
# IPv6 is currently causing flakiness with GH actions and on our testbed.
# Disabling until there's more time to debug.

View File

@@ -7,8 +7,6 @@ use firezone_relay::{
PeerSocket, Server, Sleep,
};
use futures::{future, FutureExt};
use opentelemetry::KeyValue;
use opentelemetry_otlp::WithExportConfig;
use phoenix_channel::{Event, LoginUrl, PhoenixChannel};
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
@@ -175,7 +173,9 @@ async fn main() -> Result<()> {
///
/// If the user has specified [`TraceCollector::Otlp`], we will set up an OTLP-exporter that connects to an OTLP collector specified at `Args.otlp_grpc_endpoint`.
fn setup_tracing(args: &Args) -> Result<()> {
use opentelemetry::trace::TracerProvider as _;
use opentelemetry::{global, trace::TracerProvider as _, KeyValue};
use opentelemetry_otlp::WithExportConfig;
use opentelemetry_sdk::{runtime::Tokio, trace::Config, Resource};
// Use `tracing_core` directly for the temp logger because that one does not initialize a `log` logger.
// A `log` Logger cannot be unset once set, so we can't use that for our temp logger during the setup.
@@ -196,14 +196,16 @@ fn setup_tracing(args: &Args) -> Result<()> {
.tonic()
.with_endpoint(grpc_endpoint.clone());
let provider = opentelemetry_otlp::new_pipeline()
let tracer_provider = opentelemetry_otlp::new_pipeline()
.tracing()
.with_exporter(exporter)
.with_trace_config(opentelemetry_sdk::trace::Config::default().with_resource(
opentelemetry_sdk::Resource::new(vec![KeyValue::new("service.name", "relay")]),
))
.install_batch(opentelemetry_sdk::runtime::Tokio)
.with_trace_config(
Config::default()
.with_resource(Resource::new(vec![KeyValue::new("service.name", "relay")])),
)
.install_batch(Tokio)
.context("Failed to create OTLP trace pipeline")?;
global::set_tracer_provider(tracer_provider.clone());
tracing::trace!(target: "relay", "Successfully initialized trace provider on tokio runtime");
@@ -211,19 +213,20 @@ fn setup_tracing(args: &Args) -> Result<()> {
.tonic()
.with_endpoint(grpc_endpoint);
opentelemetry_otlp::new_pipeline()
.metrics(opentelemetry_sdk::runtime::Tokio)
let meter_provider = opentelemetry_otlp::new_pipeline()
.metrics(Tokio)
.with_exporter(exporter)
.build()
.context("Failed to create OTLP metrics pipeline")?;
global::set_meter_provider(meter_provider);
tracing::trace!(target: "relay", "Successfully initialized metric controller on tokio runtime");
tracing::trace!(target: "relay", "Successfully initialized metric provider on tokio runtime");
tracing_subscriber::registry()
.with(log_layer(args).with_filter(env_filter()))
.with(
tracing_opentelemetry::layer()
.with_tracer(provider.tracer("relay"))
.with_tracer(tracer_provider.tracer("relay"))
.with_filter(env_filter()),
)
.into()