Files
firezone/.github/workflows/_integration_tests.yml
Thomas Eizinger 3e6094af8d feat(linux): try to set rmem_max and wmem_max on startup (#10349)
The default send and receive buffer sizes on Linux are too small (only
~200 KB). Checking `nstat` after an iperf run revealed that the number
of dropped packets in the first interval directly correlates with the
number of receive buffer errors reported by `nstat`.

We already try to increase the send and receive buffer sizes for our UDP
socket but unfortunately, we cannot increase them beyond what the system
limits them to. To workaround this, we try to set `rmem_max` and
`wmem_max` during startup of the Linux headless client and Gateway. This
behaviour can be disabled by setting `FIREZONE_NO_INC_BUF=true`.

This doesn't work in Docker unfortunately, so we set the values manually
in the CI perf tests and verify after the test that we didn't encounter
any send and receive buffer errors.

It is yet to be determined how we should deal with this problem for all
the GUI clients. See #10350 as an issue tracking that.

Unfortunately, this doesn't fix all packet drops during the first iperf
interval. With this PR, we now see packet drops on the interface itself.
2025-09-17 23:05:01 +00:00

210 lines
7.2 KiB
YAML

name: Integration Tests
run-name: Triggered from ${{ github.event_name }} by ${{ github.actor }}
on:
workflow_call:
inputs:
domain_image:
required: false
type: string
default: "ghcr.io/firezone/domain"
domain_tag:
required: false
type: string
default: ${{ github.sha }}
api_image:
required: false
type: string
default: "ghcr.io/firezone/api"
api_tag:
required: false
type: string
default: ${{ github.sha }}
web_image:
required: false
type: string
default: "ghcr.io/firezone/web"
web_tag:
required: false
type: string
default: ${{ github.sha }}
elixir_image:
required: false
type: string
default: "ghcr.io/firezone/elixir"
elixir_tag:
required: false
type: string
default: ${{ github.sha }}
relay_image:
required: false
type: string
default: "ghcr.io/firezone/debug/relay"
relay_tag:
required: false
type: string
default: ${{ github.sha }}
gateway_image:
required: false
type: string
default: "ghcr.io/firezone/debug/gateway"
gateway_tag:
required: false
type: string
default: ${{ github.sha }}
client_image:
required: false
type: string
default: "ghcr.io/firezone/debug/client"
client_tag:
required: false
type: string
default: ${{ github.sha }}
http_test_server_image:
required: false
type: string
default: "ghcr.io/firezone/debug/http-test-server"
http_test_server_tag:
required: false
type: string
default: ${{ github.sha }}
env:
COMPOSE_PARALLEL_LIMIT: 1 # Temporary fix for https://github.com/docker/compose/pull/12752 until compose v2.36.0 lands on GitHub actions runners.
jobs:
integration-tests:
name: ${{ matrix.test.name || matrix.test.script }}
runs-on: ubuntu-24.04
permissions:
contents: read
id-token: write
pull-requests: write
env:
DOMAIN_IMAGE: ${{ inputs.domain_image }}
DOMAIN_TAG: ${{ inputs.domain_tag }}
API_IMAGE: ${{ inputs.api_image }}
API_TAG: ${{ inputs.api_tag }}
WEB_IMAGE: ${{ inputs.web_image }}
WEB_TAG: ${{ inputs.web_tag }}
RELAY_IMAGE: ${{ inputs.relay_image }}
RELAY_TAG: ${{ inputs.relay_tag }}
GATEWAY_IMAGE: ${{ inputs.gateway_image }}
GATEWAY_TAG: ${{ inputs.gateway_tag }}
CLIENT_IMAGE: ${{ inputs.client_image }}
CLIENT_TAG: ${{ inputs.client_tag }}
ELIXIR_IMAGE: ${{ inputs.elixir_image }}
ELIXIR_TAG: ${{ inputs.elixir_tag }}
HTTP_TEST_SERVER_IMAGE: ${{ inputs.http_test_server_image }}
HTTP_TEST_SERVER_TAG: ${{ inputs.http_test_server_tag }}
FIREZONE_INC_BUF: true
strategy:
fail-fast: false
matrix:
test:
- script: curl-api-down
- script: curl-api-restart
- script: curl-ecn
- script: dns
- script: dns-api-down
- script: dns-nm
- script: dns-two-resources
- script: systemd/dns-systemd-resolved
- script: tcp-dns
# Setting both client and gateway to random masquerade will force relay-relay candidate pair
- name: download-double-symmetric-nat
script: download
client_masquerade: random
gateway_masquerade: random
rust_log: debug
stop_containers: relay-2 # Force single relay
- script: download-packet-loss
rust_log: debug
- script: download-roaming-network
# Too noisy can cause flaky tests due to the amount of data
rust_log: debug
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: ./.github/actions/ghcr-docker-login
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
# We need at least Docker v28.1 which is not yet available on GitHub actions runners
- uses: docker/setup-docker-action@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0
- name: Seed database
run: docker compose run elixir /bin/sh -c 'cd apps/domain && mix ecto.migrate --migrations-path priv/repo/migrations --migrations-path priv/repo/manual_migrations && mix ecto.seed'
- name: Start docker compose in the background
run: |
set -xe
if [[ -n "${{ matrix.test.rust_log }}" ]]; then
export RUST_LOG="${{ matrix.test.rust_log }}"
fi
if [[ -n "${{ matrix.test.client_masquerade }}" ]]; then
export CLIENT_MASQUERADE="${{ matrix.test.client_masquerade }}"
fi
if [[ -n "${{ matrix.test.gateway_masquerade }}" ]]; then
export GATEWAY_MASQUERADE="${{ matrix.test.gateway_masquerade }}"
fi
docker compose build client-router gateway-router relay-1-router relay-2-router api-router
# Start one-by-one to avoid variability in service startup order
docker compose up -d dns.httpbin.search.test --no-build
docker compose up -d httpbin --no-build
docker compose up -d download.httpbin --no-build
docker compose up -d api web domain --no-build
docker compose up -d otel --no-build
docker compose up -d relay-1 --no-build
docker compose up -d relay-2 --no-build
docker compose up -d gateway --no-build
docker compose up -d client --no-build
if [[ -n "${{ matrix.test.stop_containers }}" ]]; then
docker compose stop ${{ matrix.test.stop_containers }}
fi
# Wait a few seconds for the services to fully start. GH runners are
# slow, so this gives the Client enough time to initialize its tun interface,
# for example.
# Intended to mitigate <https://github.com/firezone/firezone/issues/5830>
sleep 3
docker compose up veth-config
- run: ./scripts/tests/${{ matrix.test.script }}.sh
- name: Ensure Client emitted no warnings
if: "!cancelled()"
run: |
# Disabling checksum offloading causes one or two "I/O error (os error 5)" warnings
docker compose logs client | \
grep --invert "I/O error (os error 5)" | \
grep "WARN" && exit 1 || exit 0
- name: Show Client logs
if: "!cancelled()"
run: docker compose logs client
- name: Show Relay-1 logs
if: "!cancelled()"
run: docker compose logs relay-1
- name: Show Relay-2 logs
if: "!cancelled()"
run: docker compose logs relay-2
- name: Ensure Gateway emitted no warnings
if: "!cancelled()"
run: |
# Disabling checksum offloading causes one or two "I/O error (os error 5)" warnings
docker compose logs gateway | \
grep --invert "I/O error (os error 5)" | \
grep "WARN" && exit 1 || exit 0
- name: Show Gateway logs
if: "!cancelled()"
run: docker compose logs gateway
- name: Show API logs
if: "!cancelled()"
run: docker compose logs api