feat(relay): push spans to OTLP-receiver (#1995)

When I built https://github.com/firezone/firezone/pull/1994, I hadn't
yet fully understood how tracing works on Google Cloud. Logs and traces
are separated in Google Cloud. Most importantly, traces need to be
_pushed_ to Google Cloud whereas logs are scraped automatically. Logs
can _reference_ traces via particular fields, in particular
`logging.googleapis.com/spanId` and
`projects/{project_id}/traces/{trace_id}`.

Within the container-optimised OS that we are running on, we are already
authenticated to all the Google APIs. Thus, we can utilize the
`GcpAuthorizer` from the `opentelemetry_stackdriver` module which will
automatically obtain a token from the internal metadata endpoint. Thus
no external configuration is necessary.

We split the configuration for logs / traces into two components:

- `LOG_FORMAT`: Specifies how the logs are formatted. Can be `human`,
`json` or `google-cloud`.
- `TRACE_RECEIVER`: Optional. Specifies where the traces are sent to. If
specified, we will also slightly tweak the log configuration to embed
the project ID which allows Google Cloud Trace to cross-reference log
entries with traces.
This commit is contained in:
Thomas Eizinger
2023-09-08 15:45:44 +10:00
committed by GitHub
parent 6750f6fd65
commit cdfe086ca9
4 changed files with 416 additions and 29 deletions

308
rust/Cargo.lock generated
View File

@@ -302,6 +302,28 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "async-stream"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51"
dependencies = [
"async-stream-impl",
"futures-core",
"pin-project-lite",
]
[[package]]
name = "async-stream-impl"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.28",
]
[[package]]
name = "async-trait"
version = "0.1.73"
@@ -855,6 +877,15 @@ version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cace84e55f07e7301bae1c519df89cdad8cc3cd868413d3fdbdeca9ff3db484"
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.8"
@@ -1040,6 +1071,27 @@ dependencies = [
"subtle",
]
[[package]]
name = "dirs-next"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
dependencies = [
"cfg-if 1.0.0",
"dirs-sys-next",
]
[[package]]
name = "dirs-sys-next"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
dependencies = [
"libc",
"redox_users",
"winapi",
]
[[package]]
name = "displaydoc"
version = "0.2.4"
@@ -1277,6 +1329,16 @@ dependencies = [
"wintun",
]
[[package]]
name = "flate2"
version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "fnv"
version = "1.0.7"
@@ -1394,6 +1456,31 @@ dependencies = [
"url",
]
[[package]]
name = "gcp_auth"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ab5724fa45095bf1965ff491e75182818ba13f2a8755b04d484a9ec6408b622"
dependencies = [
"async-trait",
"base64 0.21.3",
"dirs-next",
"hyper",
"hyper-rustls",
"ring",
"rustls 0.20.9",
"rustls-pemfile",
"serde",
"serde_json",
"thiserror",
"time 0.3.22",
"tokio",
"tracing",
"tracing-futures",
"url",
"which",
]
[[package]]
name = "generic-array"
version = "0.14.7"
@@ -1457,6 +1544,25 @@ dependencies = [
"subtle",
]
[[package]]
name = "h2"
version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833"
dependencies = [
"bytes",
"fnv",
"futures-core",
"futures-sink",
"futures-util",
"http",
"indexmap",
"slab",
"tokio",
"tokio-util",
"tracing",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
@@ -1585,6 +1691,7 @@ dependencies = [
"futures-channel",
"futures-core",
"futures-util",
"h2",
"http",
"http-body",
"httparse",
@@ -1598,6 +1705,32 @@ dependencies = [
"want",
]
[[package]]
name = "hyper-rustls"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c"
dependencies = [
"http",
"hyper",
"rustls 0.20.9",
"rustls-native-certs",
"tokio",
"tokio-rustls 0.23.4",
]
[[package]]
name = "hyper-timeout"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1"
dependencies = [
"hyper",
"pin-project-lite",
"tokio",
"tokio-io-timeout",
]
[[package]]
name = "iana-time-zone"
version = "0.1.57"
@@ -2243,6 +2376,35 @@ dependencies = [
"opentelemetry_sdk",
]
[[package]]
name = "opentelemetry-semantic-conventions"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24e33428e6bf08c6f7fcea4ddb8e358fab0fe48ab877a87c70c6ebe20f673ce5"
dependencies = [
"opentelemetry",
]
[[package]]
name = "opentelemetry-stackdriver"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff78d1abfa634182471924e542004d1b46996a31a481da114eda4abaad9d5b61"
dependencies = [
"async-trait",
"futures",
"gcp_auth",
"hex",
"http",
"hyper",
"opentelemetry",
"opentelemetry-semantic-conventions",
"prost",
"prost-types",
"thiserror",
"tonic",
]
[[package]]
name = "opentelemetry_api"
version = "0.19.0"
@@ -2277,6 +2439,8 @@ dependencies = [
"percent-encoding",
"rand",
"thiserror",
"tokio",
"tokio-stream",
]
[[package]]
@@ -2335,7 +2499,7 @@ checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447"
dependencies = [
"cfg-if 1.0.0",
"libc",
"redox_syscall",
"redox_syscall 0.3.5",
"smallvec",
"windows-targets 0.48.1",
]
@@ -2563,6 +2727,38 @@ dependencies = [
"unarray",
]
[[package]]
name = "prost"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd"
dependencies = [
"bytes",
"prost-derive",
]
[[package]]
name = "prost-derive"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4"
dependencies = [
"anyhow",
"itertools 0.10.5",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "prost-types"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13"
dependencies = [
"prost",
]
[[package]]
name = "quick-error"
version = "1.2.3"
@@ -2649,6 +2845,15 @@ dependencies = [
"url",
]
[[package]]
name = "redox_syscall"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "redox_syscall"
version = "0.3.5"
@@ -2658,6 +2863,17 @@ dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "redox_users"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
dependencies = [
"getrandom",
"redox_syscall 0.2.16",
"thiserror",
]
[[package]]
name = "regex"
version = "1.9.3"
@@ -2719,7 +2935,8 @@ dependencies = [
"hex",
"hex-literal",
"once_cell",
"opentelemetry_sdk",
"opentelemetry",
"opentelemetry-stackdriver",
"phoenix-channel",
"prometheus-client",
"proptest",
@@ -2876,6 +3093,18 @@ dependencies = [
"webpki 0.21.4",
]
[[package]]
name = "rustls"
version = "0.20.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99"
dependencies = [
"log",
"ring",
"sct 0.7.0",
"webpki 0.22.0",
]
[[package]]
name = "rustls"
version = "0.21.2"
@@ -3386,7 +3615,7 @@ dependencies = [
"autocfg",
"cfg-if 1.0.0",
"fastrand",
"redox_syscall",
"redox_syscall 0.3.5",
"rustix 0.37.22",
"windows-sys 0.48.0",
]
@@ -3514,6 +3743,16 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "tokio-io-timeout"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30b74022ada614a1b4834de765f9bb43877f910cc8ce4be40e89042c9223a8bf"
dependencies = [
"pin-project-lite",
"tokio",
]
[[package]]
name = "tokio-macros"
version = "2.1.0"
@@ -3525,6 +3764,17 @@ dependencies = [
"syn 2.0.28",
]
[[package]]
name = "tokio-rustls"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59"
dependencies = [
"rustls 0.20.9",
"tokio",
"webpki 0.22.0",
]
[[package]]
name = "tokio-rustls"
version = "0.24.1"
@@ -3557,7 +3807,7 @@ dependencies = [
"rustls 0.21.2",
"rustls-native-certs",
"tokio",
"tokio-rustls",
"tokio-rustls 0.24.1",
"tungstenite",
"webpki-roots",
]
@@ -3576,6 +3826,41 @@ dependencies = [
"tracing",
]
[[package]]
name = "tonic"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f219fad3b929bef19b1f86fbc0358d35daed8f2cac972037ac0dc10bbb8d5fb"
dependencies = [
"async-stream",
"async-trait",
"axum",
"base64 0.13.1",
"bytes",
"flate2",
"futures-core",
"futures-util",
"h2",
"http",
"http-body",
"hyper",
"hyper-timeout",
"percent-encoding",
"pin-project",
"prost",
"prost-derive",
"rustls-pemfile",
"tokio",
"tokio-rustls 0.23.4",
"tokio-stream",
"tokio-util",
"tower",
"tower-layer",
"tower-service",
"tracing",
"tracing-futures",
]
[[package]]
name = "tower"
version = "0.4.13"
@@ -3584,11 +3869,16 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
dependencies = [
"futures-core",
"futures-util",
"indexmap",
"pin-project",
"pin-project-lite",
"rand",
"slab",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
@@ -3637,6 +3927,16 @@ dependencies = [
"valuable",
]
[[package]]
name = "tracing-futures"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
dependencies = [
"pin-project",
"tracing",
]
[[package]]
name = "tracing-log"
version = "0.1.3"

View File

@@ -14,10 +14,11 @@ rand = "0.8.5"
stun_codec = "0.3.1"
tokio = { version = "1.32.0", features = ["macros", "rt-multi-thread", "net", "time"] }
tracing = { version = "0.1.37", features = ["log"] }
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
tracing-stackdriver = { version = "0.7.2", features = ["opentelemetry"] }
opentelemetry-stackdriver = { version = "0.16.0", default-features = false, features = ["gcp_auth"] }
tracing-opentelemetry = "0.19.0"
opentelemetry_sdk = "0.19.0"
opentelemetry = { version = "0.19.0", features = ["rt-tokio"] }
env_logger = "0.10.0"
bytes = "1.4.0"
sha2 = "0.10.6"

View File

@@ -2,6 +2,9 @@ use anyhow::{anyhow, bail, Context, Result};
use clap::Parser;
use futures::channel::mpsc;
use futures::{future, FutureExt, SinkExt, StreamExt};
use opentelemetry::sdk::trace::TracerProvider;
use opentelemetry::trace::TracerProvider as _;
use opentelemetry_stackdriver::Authorizer;
use phoenix_channel::{Error, Event, PhoenixChannel};
use prometheus_client::registry::Registry;
use rand::rngs::StdRng;
@@ -18,6 +21,7 @@ use std::pin::Pin;
use std::task::Poll;
use std::time::SystemTime;
use tracing::level_filters::LevelFilter;
use tracing::Subscriber;
use tracing_stackdriver::CloudTraceConfiguration;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
@@ -57,35 +61,38 @@ struct Args {
#[arg(long, env)]
rng_seed: Option<u64>,
/// The Google Cloud `project-id`.
///
/// If specified, logs will be tuned for Google Cloud Trace.
/// How to format the logs.
#[arg(long, env, default_value = "human")]
log_format: LogFormat,
/// Where to send trace data to.
#[arg(long, env)]
google_cloud_project_id: Option<String>,
trace_collector: Option<TraceCollector>,
}
#[derive(clap::ValueEnum, Debug, Clone, Copy)]
enum LogFormat {
Human,
Json,
GoogleCloud,
}
#[derive(clap::ValueEnum, Debug, Clone, Copy)]
enum TraceCollector {
/// Sends traces to Google Cloud Trace.
GoogleCloudTrace,
// TODO: Extend with OTLP receiver
}
#[tokio::main]
async fn main() -> Result<()> {
let args = Args::parse();
let env_filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
.from_env_lossy();
setup_tracing(&args).await?;
if let Some(project_id) = args.google_cloud_project_id {
tracing_subscriber::registry()
.with(
tracing_stackdriver::layer()
.with_cloud_trace(CloudTraceConfiguration { project_id })
.with_filter(env_filter),
)
.with(tracing_opentelemetry::layer().with_tracer(
opentelemetry_sdk::export::trace::stdout::new_pipeline().install_simple(),
))
.init()
} else {
tracing_subscriber::fmt().with_env_filter(env_filter).init()
}
// Must create a root span for traces to be sampled.
let root = tracing::error_span!("root");
let _root = root.enter();
let public_addr = match (args.public_ip4_addr, args.public_ip6_addr) {
(Some(ip4), Some(ip6)) => IpStack::Dual { ip4, ip6 },
@@ -180,6 +187,81 @@ async fn main() -> Result<()> {
Ok(())
}
/// Sets up our tracing infrastructure.
///
/// See [`log_layer`] for details on the base log layer.
///
/// If the user has specified [`TraceCollector::GoogleCloudTrace`], we will attempt to connec to Google Cloud Trace.
/// This requires authentication.
/// Here is how we will attempt to obtain those, for details see <https://docs.rs/gcp_auth/0.9.0/gcp_auth/struct.AuthenticationManager.html#method.new>.
///
/// 1. Check if the `GOOGLE_APPLICATION_CREDENTIALS` environment variable if set; if so, use a custom service account as the token source.
/// 2. Look for credentials in `.config/gcloud/application_default_credentials.json`; if found, use these credentials to request refresh tokens.
/// 3. Send a HTTP request to the internal metadata server to retrieve a token; if it succeeds, use the default service account as the token source.
/// 4. Check if the `gcloud` tool is available on the PATH; if so, use the `gcloud auth print-access-token` command as the token source.
async fn setup_tracing(args: &Args) -> Result<()> {
let registry = tracing_subscriber::registry();
match args.trace_collector {
None => registry.with(log_layer(args, None)).try_init(),
Some(TraceCollector::GoogleCloudTrace) => {
let authorizer = opentelemetry_stackdriver::GcpAuthorizer::new()
.await
.context("Failed to find GCP credentials")?;
let project_id = authorizer.project_id().to_owned();
let (exporter, driver) = opentelemetry_stackdriver::Builder::default()
.build(authorizer)
.await?;
tokio::spawn(driver);
let tracer = TracerProvider::builder()
.with_batch_exporter(exporter, opentelemetry::runtime::Tokio)
.build()
.tracer("relay");
registry
.with(log_layer(args, Some(project_id)))
.with(tracing_opentelemetry::layer().with_tracer(tracer))
.try_init()
}
}
.context("Failed to init tracing")?;
Ok(())
}
/// Constructs the base log layer.
///
/// The user has a choice between:
///
/// - human-centered formatting
/// - JSON-formatting
/// - Google Cloud optimised formatting
fn log_layer<T>(
args: &Args,
google_cloud_trace_project_id: Option<String>,
) -> Box<dyn Layer<T> + Send + Sync>
where
T: Subscriber + for<'a> tracing_subscriber::registry::LookupSpan<'a>,
{
let env_filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
.from_env_lossy();
let log_layer = match (args.log_format, google_cloud_trace_project_id) {
(LogFormat::Human, _) => tracing_subscriber::fmt::layer().boxed(),
(LogFormat::Json, _) => tracing_subscriber::fmt::layer().json().boxed(),
(LogFormat::GoogleCloud, None) => tracing_stackdriver::layer().boxed(),
(LogFormat::GoogleCloud, Some(project_id)) => tracing_stackdriver::layer()
.with_cloud_trace(CloudTraceConfiguration { project_id })
.boxed(),
};
log_layer.with_filter(env_filter).boxed()
}
#[derive(serde::Serialize, PartialEq, Debug)]
struct JoinMessage {
stamp_secret: String,

View File

@@ -23,8 +23,12 @@ locals {
value = var.observability_log_level
},
{
name = "GOOGLE_CLOUD_PROJECT_ID"
value = var.project_id
name = "LOG_FORMAT"
value = "google-cloud"
},
{
name = "TRACE_RECEIVER"
value = "google-cloud-trace"
},
{
name = "METRICS_ADDR"