From 9767bddccaadb6c75279f83b136b6319084ce78d Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Thu, 14 Mar 2024 08:05:21 +1100 Subject: [PATCH] feat(gateway): add HTTP health check (#4120) This adds the same kind of HTTP health-check that is already present in the relay to the gateway. The health-check returns 200 OK for as long as the gateway is active. The gateway automatically shuts down on fatal errors (like authentication failures with the portal). To enable this, I've extracted a crate `http-health-check` that shares this code between the relay and the gateway. Resolves: #2465. --------- Signed-off-by: Thomas Eizinger Co-authored-by: Reactor Scram --- rust/Cargo.lock | 12 +++++++++++- rust/Cargo.toml | 2 ++ rust/gateway/Cargo.toml | 1 + rust/gateway/src/main.rs | 6 ++++++ rust/http-health-check/Cargo.toml | 12 ++++++++++++ rust/http-health-check/src/lib.rs | 27 +++++++++++++++++++++++++++ rust/relay/Cargo.toml | 2 +- rust/relay/src/health_check.rs | 16 ---------------- rust/relay/src/lib.rs | 1 - rust/relay/src/main.rs | 12 ++++++------ 10 files changed, 66 insertions(+), 25 deletions(-) create mode 100644 rust/http-health-check/Cargo.toml create mode 100644 rust/http-health-check/src/lib.rs delete mode 100644 rust/relay/src/health_check.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 44b2a942b..5d3118b94 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1905,6 +1905,7 @@ dependencies = [ "firezone-tunnel", "futures", "futures-bounded", + "http-health-check", "ip_network", "libc", "phoenix-channel", @@ -1995,7 +1996,6 @@ name = "firezone-relay" version = "1.0.0" dependencies = [ "anyhow", - "axum 0.7.4", "backoff", "base64 0.22.0", "bytecodec", @@ -2007,6 +2007,7 @@ dependencies = [ "futures", "hex", "hex-literal", + "http-health-check", "once_cell", "opentelemetry", "opentelemetry-otlp", @@ -2877,6 +2878,15 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-health-check" +version = "1.0.0" +dependencies = [ + "axum 0.7.4", + "clap", + "tokio", +] + [[package]] name = "http-range" version = "0.1.5" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 2a93b2712..8b8e19aad 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -13,6 +13,7 @@ members = [ "phoenix-channel", "relay", "gui-client/src-tauri", + "http-health-check", ] resolver = "2" @@ -45,6 +46,7 @@ firezone-relay = { path = "relay"} connlib-shared = { path = "connlib/shared"} firezone-tunnel = { path = "connlib/tunnel"} phoenix-channel = { path = "phoenix-channel"} +http-health-check = { path = "http-health-check"} [patch.crates-io] boringtun = { git = "https://github.com/cloudflare/boringtun", branch = "master" } diff --git a/rust/gateway/Cargo.toml b/rust/gateway/Cargo.toml index 25f975145..2a61edcfd 100644 --- a/rust/gateway/Cargo.toml +++ b/rust/gateway/Cargo.toml @@ -32,6 +32,7 @@ ip_network = { version = "0.4", default-features = false } dns-lookup = { workspace = true } libc = { version = "0.2", default-features = false, features = ["std", "const-extern-fn", "extra_traits"] } either = "1" +http-health-check = { workspace = true } [dev-dependencies] serde_json = { version = "1.0", default-features = false, features = ["std"] } diff --git a/rust/gateway/src/main.rs b/rust/gateway/src/main.rs index 901894b0f..ae00a8b9b 100644 --- a/rust/gateway/src/main.rs +++ b/rust/gateway/src/main.rs @@ -54,6 +54,8 @@ async fn try_main() -> Result<()> { let ctrl_c = pin!(ctrl_c().map_err(anyhow::Error::new)); + tokio::spawn(http_health_check::serve(cli.health_check.health_check_addr)); + match future::try_select(task, ctrl_c) .await .map_err(|e| e.factor_first().0)? @@ -127,6 +129,10 @@ impl Callbacks for CallbackHandler { struct Cli { #[command(flatten)] common: CommonArgs, + + #[command(flatten)] + health_check: http_health_check::HealthCheckArgs, + /// Identifier generated by the portal to identify and display the device. #[arg(short = 'i', long, env = "FIREZONE_ID")] pub firezone_id: Option, diff --git a/rust/http-health-check/Cargo.toml b/rust/http-health-check/Cargo.toml new file mode 100644 index 000000000..cdf7f2a68 --- /dev/null +++ b/rust/http-health-check/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "http-health-check" +# mark:automatic-version +version = "1.0.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +axum = { version = "0.7.3", default-features = false, features = ["http1", "tokio"] } +tokio = { version = "1.36.0", features = ["net"] } +clap = { version = "4.5.2", features = ["derive", "env"] } diff --git a/rust/http-health-check/src/lib.rs b/rust/http-health-check/src/lib.rs new file mode 100644 index 000000000..796511649 --- /dev/null +++ b/rust/http-health-check/src/lib.rs @@ -0,0 +1,27 @@ +use axum::routing::get; +use axum::Router; +use std::net::SocketAddr; + +/// Runs an HTTP server that always responds to `GET /healthz` with 200 OK. +/// +/// To signal an unhealthy state, simply stop the task. +pub async fn serve(addr: impl Into) -> std::io::Result<()> { + let addr = addr.into(); + + let service = Router::new() + .route("/healthz", get(|| async { "" })) + .into_make_service(); + + axum::serve(tokio::net::TcpListener::bind(addr).await?, service).await?; + + Ok(()) +} + +#[derive(clap::Args, Debug, Clone)] +pub struct HealthCheckArgs { + /// The address of the local interface where we should serve our health-check endpoint. + /// + /// The actual health-check endpoint will be at `http:///healthz`. + #[arg(long, env, hide = true, default_value = "0.0.0.0:8080")] + pub health_check_addr: SocketAddr, +} diff --git a/rust/relay/Cargo.toml b/rust/relay/Cargo.toml index 483457045..474ed8a13 100644 --- a/rust/relay/Cargo.toml +++ b/rust/relay/Cargo.toml @@ -37,8 +37,8 @@ url = "2.4.1" serde = { version = "1.0.196", features = ["derive"] } trackable = "1.3.0" socket2 = "0.5.6" -axum = { version = "0.7.3", default-features = false, features = ["http1", "tokio"] } backoff = "0.4" +http-health-check = { workspace = true } [dev-dependencies] redis = { version = "0.25.0", default-features = false, features = ["tokio-comp"] } diff --git a/rust/relay/src/health_check.rs b/rust/relay/src/health_check.rs deleted file mode 100644 index 9fb77ec2d..000000000 --- a/rust/relay/src/health_check.rs +++ /dev/null @@ -1,16 +0,0 @@ -use anyhow::Result; -use axum::routing::get; -use axum::Router; -use std::net::SocketAddr; - -pub async fn serve(addr: impl Into) -> Result<()> { - let addr = addr.into(); - - let service = Router::new() - .route("/healthz", get(|| async { "" })) - .into_make_service(); - - axum::serve(tokio::net::TcpListener::bind(addr).await?, service).await?; - - Ok(()) -} diff --git a/rust/relay/src/lib.rs b/rust/relay/src/lib.rs index 2c283e28d..764a5f9b6 100644 --- a/rust/relay/src/lib.rs +++ b/rust/relay/src/lib.rs @@ -6,7 +6,6 @@ mod sleep; mod time_events; mod udp_socket; -pub mod health_check; #[cfg(feature = "proptest")] pub mod proptest; diff --git a/rust/relay/src/main.rs b/rust/relay/src/main.rs index 1fecc55c7..4400a9585 100644 --- a/rust/relay/src/main.rs +++ b/rust/relay/src/main.rs @@ -36,11 +36,6 @@ struct Args { /// The public (i.e. internet-reachable) IPv6 address of the relay server. #[arg(long, env)] public_ip6_addr: Option, - /// The address of the local interface where we should serve our health-check endpoint. - /// - /// The actual health-check endpoint will be at `http:///healthz`. - #[arg(long, env, hide = true, default_value = "0.0.0.0:8080")] - health_check_addr: SocketAddr, // See https://www.rfc-editor.org/rfc/rfc8656.html#name-allocations /// The lowest port used for TURN allocations. #[arg(long, env, hide = true, default_value = "49152")] @@ -86,6 +81,9 @@ struct Args { /// OTLP is vendor-agnostic but for spans to be correctly recognised by Google Cloud, they need the project ID to be set. #[arg(long, env, hide = true)] google_cloud_project_id: Option, + + #[command(flatten)] + health_check: http_health_check::HealthCheckArgs, } #[derive(clap::ValueEnum, Debug, Clone, Copy)] @@ -134,7 +132,9 @@ async fn main() -> Result<()> { let mut eventloop = Eventloop::new(server, channel, public_addr)?; - tokio::spawn(firezone_relay::health_check::serve(args.health_check_addr)); + tokio::spawn(http_health_check::serve( + args.health_check.health_check_addr, + )); tracing::info!(target: "relay", "Listening for incoming traffic on UDP port 3478");