Files
firezone/.github/workflows/_integration_tests.yml
Thomas Eizinger 4e95dd1cb6 ci: fail fast inside the merge queue (#10746)
Setting `fail-fast: false` unsurprisingly makes our CI fail pretty
slowly. This is especially noticable in the merge queue where a
long-running job could still hold up the entire queue even though a
different job has failed already and the PR is never going to make it in
anyway.

To avoid this scenario, we set `fail-fast: true` whenever we are in the
merge queue.
2025-10-28 10:42:02 -07:00

275 lines
10 KiB
YAML

name: Integration Tests
run-name: Triggered from ${{ github.event_name }} by ${{ github.actor }}
on:
workflow_call:
inputs:
domain_image:
required: false
type: string
default: "ghcr.io/firezone/domain"
domain_tag:
required: false
type: string
default: ${{ github.sha }}
api_image:
required: false
type: string
default: "ghcr.io/firezone/api"
api_tag:
required: false
type: string
default: ${{ github.sha }}
web_image:
required: false
type: string
default: "ghcr.io/firezone/web"
web_tag:
required: false
type: string
default: ${{ github.sha }}
elixir_image:
required: false
type: string
default: "ghcr.io/firezone/elixir"
elixir_tag:
required: false
type: string
default: ${{ github.sha }}
relay_image:
required: false
type: string
default: "ghcr.io/firezone/debug/relay"
relay_tag:
required: false
type: string
default: ${{ github.sha }}
gateway_image:
required: false
type: string
default: "ghcr.io/firezone/debug/gateway"
gateway_tag:
required: false
type: string
default: ${{ github.sha }}
client_image:
required: false
type: string
default: "ghcr.io/firezone/debug/client"
client_tag:
required: false
type: string
default: ${{ github.sha }}
http_test_server_image:
required: false
type: string
default: "ghcr.io/firezone/debug/http-test-server"
http_test_server_tag:
required: false
type: string
default: ${{ github.sha }}
env:
COMPOSE_PARALLEL_LIMIT: 1 # Temporary fix for https://github.com/docker/compose/pull/12752 until compose v2.36.0 lands on GitHub actions runners.
jobs:
integration-tests:
name: ${{ matrix.test.name || matrix.test.script }}
runs-on: ubuntu-24.04
permissions:
contents: read
id-token: write
pull-requests: write
env:
DOMAIN_IMAGE: ${{ inputs.domain_image }}
DOMAIN_TAG: ${{ inputs.domain_tag }}
API_IMAGE: ${{ inputs.api_image }}
API_TAG: ${{ inputs.api_tag }}
WEB_IMAGE: ${{ inputs.web_image }}
WEB_TAG: ${{ inputs.web_tag }}
RELAY_IMAGE: ${{ inputs.relay_image }}
RELAY_TAG: ${{ inputs.relay_tag }}
GATEWAY_IMAGE: ${{ inputs.gateway_image }}
GATEWAY_TAG: ${{ inputs.gateway_tag }}
CLIENT_IMAGE: ${{ inputs.client_image }}
CLIENT_TAG: ${{ inputs.client_tag }}
ELIXIR_IMAGE: ${{ inputs.elixir_image }}
ELIXIR_TAG: ${{ inputs.elixir_tag }}
HTTP_TEST_SERVER_IMAGE: ${{ inputs.http_test_server_image }}
HTTP_TEST_SERVER_TAG: ${{ inputs.http_test_server_tag }}
FIREZONE_INC_BUF: true
strategy:
fail-fast: ${{ github.event_name == 'merge_group' }}
matrix:
test:
- script: create-flow-from-icmp-error
min_client_version: 1.5.4
- script: download-rst
min_gateway_version: 1.4.18
- script: curl-api-down
- script: curl-api-restart
- script: curl-ecn
- script: dns
- script: dns-api-down
- script: dns-nm
- script: dns-two-resources
- name: dns-systemd-resolved
script: systemd/dns-systemd-resolved
- script: tcp-dns
- script: download-concurrent
min_gateway_version: 1.4.18
- name: download-double-symmetric-nat
script: download
# Setting both client and gateway to random masquerade will force relay-relay candidate pair
client_masquerade: random
gateway_masquerade: random
rust_log: debug,flow_logs=trace
single_relay: true # Force single relay
min_gateway_version: 1.4.18
- script: download-packet-loss
rust_log: debug
- script: download-roaming-network
min_gateway_version: 1.4.18
rust_log: debug,flow_logs=trace # Too noisy can cause flaky tests due to the amount of data
steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- uses: ./.github/actions/ghcr-docker-login
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- name: Check minimum client version
id: client_version_check
if: ${{ matrix.test.min_client_version }}
continue-on-error: true
run: |
ACTUAL_VERSION=$(docker run ${{ inputs.client_image }}:${{ inputs.client_tag }} firezone-headless-client --version | awk '{print $2}')
MIN_VERSION="${{ matrix.test.min_client_version }}"
[ "$(printf '%s\n' "$MIN_VERSION" "$ACTUAL_VERSION" | sort --version-sort | head -n1)" == "$MIN_VERSION" ]
- name: Check minimum gateway version
id: gateway_version_check
if: ${{ matrix.test.min_gateway_version }}
continue-on-error: true
run: |
ACTUAL_VERSION=$(docker run ${{ inputs.gateway_image }}:${{ inputs.gateway_tag }} firezone-gateway --version | awk '{print $2}')
MIN_VERSION="${{ matrix.test.min_gateway_version }}"
[ "$(printf '%s\n' "$MIN_VERSION" "$ACTUAL_VERSION" | sort --version-sort | head -n1)" == "$MIN_VERSION" ]
# We need at least Docker v28.1 which is not yet available on GitHub actions runners
- uses: docker/setup-docker-action@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0
- name: Seed database
run: docker compose run elixir /bin/sh -c 'cd apps/domain && mix ecto.migrate --migrations-path priv/repo/migrations --migrations-path priv/repo/manual_migrations && mix ecto.seed'
- name: Start docker compose in the background
run: |
set -xe
if [[ -n "${{ matrix.test.rust_log }}" ]]; then
export RUST_LOG="${{ matrix.test.rust_log }}"
fi
if [[ -n "${{ matrix.test.client_masquerade }}" ]]; then
export CLIENT_MASQUERADE="${{ matrix.test.client_masquerade }}"
fi
if [[ -n "${{ matrix.test.gateway_masquerade }}" ]]; then
export GATEWAY_MASQUERADE="${{ matrix.test.gateway_masquerade }}"
fi
docker compose build client-router gateway-router relay-1-router relay-2-router api-router
# Start one-by-one to avoid variability in service startup order
docker compose up -d dns.httpbin.search.test --no-build
docker compose up -d httpbin --no-build
docker compose up -d download.httpbin --no-build
docker compose up -d api web domain --no-build
docker compose up -d otel --no-build
docker compose up -d relay-1 --no-build
docker compose up -d relay-2 --no-build
docker compose up -d gateway --no-build
docker compose up -d client --no-build
docker compose up -d network-config
docker compose exec -d relay-1 /bin/sh -c 'xdpdump -i eth0 -w /tmp/packets.pcap --rx-capture entry,exit'
docker compose exec -d relay-2 /bin/sh -c 'xdpdump -i eth0 -w /tmp/packets.pcap --rx-capture entry,exit'
if [[ -n "${{ matrix.test.single_relay }}" ]]; then
docker compose stop relay-2
fi
sleep 3 # Let everything settle for a bit
- name: Disable checksum offloading
run: |
# Force checksum calculation on the host since some tests run on the host
sudo ethtool -K eth0 tx off
sudo ethtool -K docker0 tx off
- run: ./scripts/tests/${{ matrix.test.script }}.sh
if: ${{ steps.client_version_check.outcome != 'failure' && steps.gateway_version_check.outcome != 'failure' }} # Run the script if version checks succeed or are skipped
- name: Ensure Client emitted no warnings
if: "!cancelled()"
run: |
# Disabling checksum offloading causes one or two "I/O error (os error 5)" warnings
docker compose logs client | \
grep --invert "I/O error (os error 5)" | \
grep "WARN" && exit 1 || exit 0
- name: Show Client logs
if: "!cancelled()"
run: docker compose logs client
- name: Show Relay-1 logs
if: "!cancelled()"
run: docker compose logs relay-1
- name: Show Relay-2 logs
if: "!cancelled()"
run: docker compose logs relay-2
- name: Ensure Gateway emitted no warnings
if: "!cancelled()"
run: |
# Disabling checksum offloading causes one or two "I/O error (os error 5)" warnings
docker compose logs gateway | \
grep --invert "I/O error (os error 5)" | \
grep "WARN" && exit 1 || exit 0
- name: Show Gateway logs
if: "!cancelled()"
run: docker compose logs gateway
- name: Show API logs
if: "!cancelled()"
run: docker compose logs api
- name: Ensure no eBPF checksum errors on relay-1
if: "!cancelled()"
run: |
set -xe
docker compose exec relay-1 pkill xdpdump
docker compose cp relay-1:/tmp/packets.pcap ./relay-1-packets.pcap
! tcpdump -nnnr ./relay-1-packets.pcap -v | grep "bad \w* cksum"
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
if: "!success()"
with:
overwrite: true
name: ${{ matrix.test.name || matrix.test.script }}-relay-1-xdpdump
path: ./relay-1-packets.pcap
- name: Ensure no eBPF checksum errors on relay-2
if: "!cancelled() && !matrix.test.single_relay"
run: |
set -xe
docker compose exec relay-2 pkill xdpdump
docker compose cp relay-2:/tmp/packets.pcap ./relay-2-packets.pcap
! tcpdump -nnnr ./relay-2-packets.pcap -v | grep "bad \w* cksum"
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
if: "!success() && !matrix.test.single_relay"
with:
overwrite: true
name: ${{ matrix.test.name || matrix.test.script }}-relay-2-xdpdump
path: ./relay-2-packets.pcap