feat(gateway): add HTTP health check (#4120)

This adds the same kind of HTTP health-check that is already present in
the relay to the gateway. The health-check returns 200 OK for as long as
the gateway is active. The gateway automatically shuts down on fatal
errors (like authentication failures with the portal).

To enable this, I've extracted a crate `http-health-check` that shares
this code between the relay and the gateway.

Resolves: #2465.

---------

Signed-off-by: Thomas Eizinger <thomas@eizinger.io>
Co-authored-by: Reactor Scram <ReactorScram@users.noreply.github.com>
This commit is contained in:
Thomas Eizinger
2024-03-14 08:05:21 +11:00
committed by GitHub
parent 4c77aae3d2
commit 9767bddcca
10 changed files with 66 additions and 25 deletions

12
rust/Cargo.lock generated
View File

@@ -1905,6 +1905,7 @@ dependencies = [
"firezone-tunnel",
"futures",
"futures-bounded",
"http-health-check",
"ip_network",
"libc",
"phoenix-channel",
@@ -1995,7 +1996,6 @@ name = "firezone-relay"
version = "1.0.0"
dependencies = [
"anyhow",
"axum 0.7.4",
"backoff",
"base64 0.22.0",
"bytecodec",
@@ -2007,6 +2007,7 @@ dependencies = [
"futures",
"hex",
"hex-literal",
"http-health-check",
"once_cell",
"opentelemetry",
"opentelemetry-otlp",
@@ -2877,6 +2878,15 @@ dependencies = [
"pin-project-lite",
]
[[package]]
name = "http-health-check"
version = "1.0.0"
dependencies = [
"axum 0.7.4",
"clap",
"tokio",
]
[[package]]
name = "http-range"
version = "0.1.5"

View File

@@ -13,6 +13,7 @@ members = [
"phoenix-channel",
"relay",
"gui-client/src-tauri",
"http-health-check",
]
resolver = "2"
@@ -45,6 +46,7 @@ firezone-relay = { path = "relay"}
connlib-shared = { path = "connlib/shared"}
firezone-tunnel = { path = "connlib/tunnel"}
phoenix-channel = { path = "phoenix-channel"}
http-health-check = { path = "http-health-check"}
[patch.crates-io]
boringtun = { git = "https://github.com/cloudflare/boringtun", branch = "master" }

View File

@@ -32,6 +32,7 @@ ip_network = { version = "0.4", default-features = false }
dns-lookup = { workspace = true }
libc = { version = "0.2", default-features = false, features = ["std", "const-extern-fn", "extra_traits"] }
either = "1"
http-health-check = { workspace = true }
[dev-dependencies]
serde_json = { version = "1.0", default-features = false, features = ["std"] }

View File

@@ -54,6 +54,8 @@ async fn try_main() -> Result<()> {
let ctrl_c = pin!(ctrl_c().map_err(anyhow::Error::new));
tokio::spawn(http_health_check::serve(cli.health_check.health_check_addr));
match future::try_select(task, ctrl_c)
.await
.map_err(|e| e.factor_first().0)?
@@ -127,6 +129,10 @@ impl Callbacks for CallbackHandler {
struct Cli {
#[command(flatten)]
common: CommonArgs,
#[command(flatten)]
health_check: http_health_check::HealthCheckArgs,
/// Identifier generated by the portal to identify and display the device.
#[arg(short = 'i', long, env = "FIREZONE_ID")]
pub firezone_id: Option<String>,

View File

@@ -0,0 +1,12 @@
[package]
name = "http-health-check"
# mark:automatic-version
version = "1.0.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
axum = { version = "0.7.3", default-features = false, features = ["http1", "tokio"] }
tokio = { version = "1.36.0", features = ["net"] }
clap = { version = "4.5.2", features = ["derive", "env"] }

View File

@@ -0,0 +1,27 @@
use axum::routing::get;
use axum::Router;
use std::net::SocketAddr;
/// Runs an HTTP server that always responds to `GET /healthz` with 200 OK.
///
/// To signal an unhealthy state, simply stop the task.
pub async fn serve(addr: impl Into<SocketAddr>) -> std::io::Result<()> {
let addr = addr.into();
let service = Router::new()
.route("/healthz", get(|| async { "" }))
.into_make_service();
axum::serve(tokio::net::TcpListener::bind(addr).await?, service).await?;
Ok(())
}
#[derive(clap::Args, Debug, Clone)]
pub struct HealthCheckArgs {
/// The address of the local interface where we should serve our health-check endpoint.
///
/// The actual health-check endpoint will be at `http://<health_check_addr>/healthz`.
#[arg(long, env, hide = true, default_value = "0.0.0.0:8080")]
pub health_check_addr: SocketAddr,
}

View File

@@ -37,8 +37,8 @@ url = "2.4.1"
serde = { version = "1.0.196", features = ["derive"] }
trackable = "1.3.0"
socket2 = "0.5.6"
axum = { version = "0.7.3", default-features = false, features = ["http1", "tokio"] }
backoff = "0.4"
http-health-check = { workspace = true }
[dev-dependencies]
redis = { version = "0.25.0", default-features = false, features = ["tokio-comp"] }

View File

@@ -1,16 +0,0 @@
use anyhow::Result;
use axum::routing::get;
use axum::Router;
use std::net::SocketAddr;
pub async fn serve(addr: impl Into<SocketAddr>) -> Result<()> {
let addr = addr.into();
let service = Router::new()
.route("/healthz", get(|| async { "" }))
.into_make_service();
axum::serve(tokio::net::TcpListener::bind(addr).await?, service).await?;
Ok(())
}

View File

@@ -6,7 +6,6 @@ mod sleep;
mod time_events;
mod udp_socket;
pub mod health_check;
#[cfg(feature = "proptest")]
pub mod proptest;

View File

@@ -36,11 +36,6 @@ struct Args {
/// The public (i.e. internet-reachable) IPv6 address of the relay server.
#[arg(long, env)]
public_ip6_addr: Option<Ipv6Addr>,
/// The address of the local interface where we should serve our health-check endpoint.
///
/// The actual health-check endpoint will be at `http://<health_check_addr>/healthz`.
#[arg(long, env, hide = true, default_value = "0.0.0.0:8080")]
health_check_addr: SocketAddr,
// See https://www.rfc-editor.org/rfc/rfc8656.html#name-allocations
/// The lowest port used for TURN allocations.
#[arg(long, env, hide = true, default_value = "49152")]
@@ -86,6 +81,9 @@ struct Args {
/// OTLP is vendor-agnostic but for spans to be correctly recognised by Google Cloud, they need the project ID to be set.
#[arg(long, env, hide = true)]
google_cloud_project_id: Option<String>,
#[command(flatten)]
health_check: http_health_check::HealthCheckArgs,
}
#[derive(clap::ValueEnum, Debug, Clone, Copy)]
@@ -134,7 +132,9 @@ async fn main() -> Result<()> {
let mut eventloop = Eventloop::new(server, channel, public_addr)?;
tokio::spawn(firezone_relay::health_check::serve(args.health_check_addr));
tokio::spawn(http_health_check::serve(
args.health_check.health_check_addr,
));
tracing::info!(target: "relay", "Listening for incoming traffic on UDP port 3478");