fix(rust): Check /proc for health checks (#3250)

Debian slim is slimmer than we could ever have imagined.
This commit is contained in:
Jamil
2024-01-16 08:46:44 -08:00
committed by GitHub
parent 4ff70746c8
commit 36209c7d2d
8 changed files with 26 additions and 8 deletions

12
.github/README_CI.md vendored
View File

@@ -59,3 +59,15 @@ difference using a larger runner.
We maintain a baremetal testbed for running our end-to-end test suite. See
[the `e2e`](../e2e) directory. Please don't target those runners unless you're
specifically trying to run workflows that require a baremetal runner.
## Busting the GCP Docker layer cache
If you find yourself hitting strange Docker image issues like Rust binaries
failing to start inside Docker images, you may need to bust the GCP layer cache.
To do so:
- Login to [GCP](console.cloud.google.com)
- Ensure `firezone-staging` project is selected
- Navigate to the artifact registry service
- Delete all image versions for the appropriate `cache/` image repository

View File

@@ -211,6 +211,7 @@ jobs:
- name: Build and push release Docker images by digest
id: build
uses: docker/build-push-action@v5
target: release
with:
platforms: ${{ matrix.arch.platform }}
build-args: |

View File

@@ -138,7 +138,10 @@ services:
depends_on:
gateway:
condition: "service_healthy"
httpbin:
# Doesn't work on aarch64 Macs
# httpbin:
# condition: "service_healthy"
iperf3:
condition: "service_healthy"
api:
condition: "service_healthy"
@@ -150,7 +153,7 @@ services:
healthcheck:
test: ["CMD-SHELL", "cat /proc/net/dev | grep tun-firezone"]
environment:
FIREZONE_TOKEN: "SFMyNTY.g2gDaAJtAAAAJDNjZWYwNTY2LWFkZmQtNDhmZS1hMGYxLTU4MDY3OTYwOGY2Zm0AAABAamp0enhSRkpQWkdCYy1vQ1o5RHkyRndqd2FIWE1BVWRwenVScjJzUnJvcHg3NS16bmhfeHBfNWJUNU9uby1yYm4GAEC0b0KJAWIAAVGA.9Oirn9t8rvQpfOhW7hwGBFVzeMm9di0xYGTlwf9cFFk"
FIREZONE_TOKEN: "SFMyNTY.g2gDaAJtAAAAJDNjZWYwNTY2LWFkZmQtNDhmZS1hMGYxLTU4MDY3OTYwOGY2Zm0AAABAamp0enhSRkpQWkdCYy1vQ1o5RHkyRndqd2FIWE1BVWRwenVScjJzUnJvcHg3NS16bmhfeHBfNWJUNU9uby1yYm4GAIC98hKNAWIAAVGA.-0Shqu5DAwS2pN9EZ5aIcMK08vSVFqA_kuXsLWxJ__o"
RUST_LOG: firezone_gateway=trace,wire=trace,connlib_gateway_shared=trace,firezone_tunnel=trace,connlib_shared=trace,warn
FIREZONE_ENABLE_MASQUERADE: 1
FIREZONE_API_URL: ws://api:8081
@@ -222,7 +225,8 @@ services:
PACKAGE: firezone-relay
image: us-east1-docker.pkg.dev/firezone-staging/firezone/relay:${VERSION:-main}
healthcheck:
test: ["CMD-SHELL", "lsof -i UDP | grep firezone-relay"]
# Poor man's netstat -- Check for listening on 3478 (D96 in hex)
test: ["CMD-SHELL", "cat /proc/net/udp | grep D96"]
start_period: 3s
interval: 30s
retries: 5

View File

@@ -260,7 +260,7 @@ defmodule Web.RelayGroups.NewToken do
"docker run -d",
"--restart=unless-stopped",
"--pull=always",
"--health-cmd=\"lsof -i UDP | grep firezone-relay\"",
"--health-cmd=\"cat /proc/net/udp | grep D96\"",
"--name=firezone-relay",
"--cap-add=NET_ADMIN",
"--volume /var/lib/firezone",

View File

@@ -170,7 +170,7 @@ defmodule Web.Sites.NewToken do
"docker run -d",
"--restart=unless-stopped",
"--pull=always",
"--health-cmd=\"ip link | grep tun-firezone\"",
"--health-cmd=\"cat /proc/net/dev | grep tun-firezone\"",
"--name=firezone-gateway",
"--cap-add=NET_ADMIN",
"--volume /var/lib/firezone",

View File

@@ -68,7 +68,8 @@ services:
# PACKAGE: firezone-relay
# init: true
# healthcheck:
# test: ["CMD-SHELL", "lsof -i UDP | grep firezone-relay"]
# Poor man's netstat -- Check for listening on 3478 (D96 in hex)
# test: ["CMD-SHELL", "cat /proc/net/udp | grep D96"]
# start_period: 20s
# interval: 30s
# retries: 5

View File

@@ -278,7 +278,7 @@ impl<CB: Callbacks + 'static> ControlPlane<CB> {
match (reply_error.error, reference) {
(ErrorInfo::Offline, Some(reference)) => {
let Ok(resource_id) = reference.parse::<ResourceId>() else {
tracing::warn!("The portal responded with an Offline error. Is the Resource associated with any online Gateways? Reference: {reference}");
tracing::warn!("The portal responded with an Offline error. Is the Resource associated with any online Gateways or Relays?");
return Ok(());
};
// TODO: Rate limit the number of attempts of getting the relays before just trying a local network connection

View File

@@ -30,7 +30,7 @@ do
docker run -d \
--restart=unless-stopped \
--pull=always \
--health-cmd="ip link | grep tun-firezone" \
--health-cmd="cat /proc/net/dev | grep tun-firezone" \
--name="$RUNNING_NAME" \
--cap-add=NET_ADMIN \
--volume /var/lib/firezone \