From 36209c7d2d2c141e53ace9c7ca557bd53bcb4b7b Mon Sep 17 00:00:00 2001 From: Jamil Date: Tue, 16 Jan 2024 08:46:44 -0800 Subject: [PATCH] fix(rust): Check `/proc` for health checks (#3250) Debian slim is slimmer than we could ever have imagined. --- .github/README_CI.md | 12 ++++++++++++ .github/workflows/cd.yml | 1 + docker-compose.yml | 10 +++++++--- .../apps/web/lib/web/live/relay_groups/new_token.ex | 2 +- elixir/apps/web/lib/web/live/sites/new_token.ex | 2 +- rust/connection-tests/docker-compose.lan.yml | 3 ++- rust/connlib/clients/shared/src/control.rs | 2 +- scripts/gateway-docker-upgrade.sh | 2 +- 8 files changed, 26 insertions(+), 8 deletions(-) diff --git a/.github/README_CI.md b/.github/README_CI.md index d85df280a..39cee1e60 100644 --- a/.github/README_CI.md +++ b/.github/README_CI.md @@ -59,3 +59,15 @@ difference using a larger runner. We maintain a baremetal testbed for running our end-to-end test suite. See [the `e2e`](../e2e) directory. Please don't target those runners unless you're specifically trying to run workflows that require a baremetal runner. + +## Busting the GCP Docker layer cache + +If you find yourself hitting strange Docker image issues like Rust binaries +failing to start inside Docker images, you may need to bust the GCP layer cache. + +To do so: + +- Login to [GCP](console.cloud.google.com) +- Ensure `firezone-staging` project is selected +- Navigate to the artifact registry service +- Delete all image versions for the appropriate `cache/` image repository diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index edcf10601..632fa022c 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -211,6 +211,7 @@ jobs: - name: Build and push release Docker images by digest id: build uses: docker/build-push-action@v5 + target: release with: platforms: ${{ matrix.arch.platform }} build-args: | diff --git a/docker-compose.yml b/docker-compose.yml index a0d4469ad..643c672a9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -138,7 +138,10 @@ services: depends_on: gateway: condition: "service_healthy" - httpbin: + # Doesn't work on aarch64 Macs + # httpbin: + # condition: "service_healthy" + iperf3: condition: "service_healthy" api: condition: "service_healthy" @@ -150,7 +153,7 @@ services: healthcheck: test: ["CMD-SHELL", "cat /proc/net/dev | grep tun-firezone"] environment: - FIREZONE_TOKEN: "SFMyNTY.g2gDaAJtAAAAJDNjZWYwNTY2LWFkZmQtNDhmZS1hMGYxLTU4MDY3OTYwOGY2Zm0AAABAamp0enhSRkpQWkdCYy1vQ1o5RHkyRndqd2FIWE1BVWRwenVScjJzUnJvcHg3NS16bmhfeHBfNWJUNU9uby1yYm4GAEC0b0KJAWIAAVGA.9Oirn9t8rvQpfOhW7hwGBFVzeMm9di0xYGTlwf9cFFk" + FIREZONE_TOKEN: "SFMyNTY.g2gDaAJtAAAAJDNjZWYwNTY2LWFkZmQtNDhmZS1hMGYxLTU4MDY3OTYwOGY2Zm0AAABAamp0enhSRkpQWkdCYy1vQ1o5RHkyRndqd2FIWE1BVWRwenVScjJzUnJvcHg3NS16bmhfeHBfNWJUNU9uby1yYm4GAIC98hKNAWIAAVGA.-0Shqu5DAwS2pN9EZ5aIcMK08vSVFqA_kuXsLWxJ__o" RUST_LOG: firezone_gateway=trace,wire=trace,connlib_gateway_shared=trace,firezone_tunnel=trace,connlib_shared=trace,warn FIREZONE_ENABLE_MASQUERADE: 1 FIREZONE_API_URL: ws://api:8081 @@ -222,7 +225,8 @@ services: PACKAGE: firezone-relay image: us-east1-docker.pkg.dev/firezone-staging/firezone/relay:${VERSION:-main} healthcheck: - test: ["CMD-SHELL", "lsof -i UDP | grep firezone-relay"] + # Poor man's netstat -- Check for listening on 3478 (D96 in hex) + test: ["CMD-SHELL", "cat /proc/net/udp | grep D96"] start_period: 3s interval: 30s retries: 5 diff --git a/elixir/apps/web/lib/web/live/relay_groups/new_token.ex b/elixir/apps/web/lib/web/live/relay_groups/new_token.ex index c3e57cb47..ef540deaf 100644 --- a/elixir/apps/web/lib/web/live/relay_groups/new_token.ex +++ b/elixir/apps/web/lib/web/live/relay_groups/new_token.ex @@ -260,7 +260,7 @@ defmodule Web.RelayGroups.NewToken do "docker run -d", "--restart=unless-stopped", "--pull=always", - "--health-cmd=\"lsof -i UDP | grep firezone-relay\"", + "--health-cmd=\"cat /proc/net/udp | grep D96\"", "--name=firezone-relay", "--cap-add=NET_ADMIN", "--volume /var/lib/firezone", diff --git a/elixir/apps/web/lib/web/live/sites/new_token.ex b/elixir/apps/web/lib/web/live/sites/new_token.ex index a5260b1a7..6bb8e3800 100644 --- a/elixir/apps/web/lib/web/live/sites/new_token.ex +++ b/elixir/apps/web/lib/web/live/sites/new_token.ex @@ -170,7 +170,7 @@ defmodule Web.Sites.NewToken do "docker run -d", "--restart=unless-stopped", "--pull=always", - "--health-cmd=\"ip link | grep tun-firezone\"", + "--health-cmd=\"cat /proc/net/dev | grep tun-firezone\"", "--name=firezone-gateway", "--cap-add=NET_ADMIN", "--volume /var/lib/firezone", diff --git a/rust/connection-tests/docker-compose.lan.yml b/rust/connection-tests/docker-compose.lan.yml index 594df84a5..145422c2b 100644 --- a/rust/connection-tests/docker-compose.lan.yml +++ b/rust/connection-tests/docker-compose.lan.yml @@ -68,7 +68,8 @@ services: # PACKAGE: firezone-relay # init: true # healthcheck: - # test: ["CMD-SHELL", "lsof -i UDP | grep firezone-relay"] + # Poor man's netstat -- Check for listening on 3478 (D96 in hex) + # test: ["CMD-SHELL", "cat /proc/net/udp | grep D96"] # start_period: 20s # interval: 30s # retries: 5 diff --git a/rust/connlib/clients/shared/src/control.rs b/rust/connlib/clients/shared/src/control.rs index 825b55be3..09eb16ef9 100644 --- a/rust/connlib/clients/shared/src/control.rs +++ b/rust/connlib/clients/shared/src/control.rs @@ -278,7 +278,7 @@ impl ControlPlane { match (reply_error.error, reference) { (ErrorInfo::Offline, Some(reference)) => { let Ok(resource_id) = reference.parse::() else { - tracing::warn!("The portal responded with an Offline error. Is the Resource associated with any online Gateways? Reference: {reference}"); + tracing::warn!("The portal responded with an Offline error. Is the Resource associated with any online Gateways or Relays?"); return Ok(()); }; // TODO: Rate limit the number of attempts of getting the relays before just trying a local network connection diff --git a/scripts/gateway-docker-upgrade.sh b/scripts/gateway-docker-upgrade.sh index 09a7a4e2e..e4a581f29 100755 --- a/scripts/gateway-docker-upgrade.sh +++ b/scripts/gateway-docker-upgrade.sh @@ -30,7 +30,7 @@ do docker run -d \ --restart=unless-stopped \ --pull=always \ - --health-cmd="ip link | grep tun-firezone" \ + --health-cmd="cat /proc/net/dev | grep tun-firezone" \ --name="$RUNNING_NAME" \ --cap-add=NET_ADMIN \ --volume /var/lib/firezone \