Files
firezone/.github/workflows/ci.yml
Thomas Eizinger 3e6094af8d feat(linux): try to set rmem_max and wmem_max on startup (#10349)
The default send and receive buffer sizes on Linux are too small (only
~200 KB). Checking `nstat` after an iperf run revealed that the number
of dropped packets in the first interval directly correlates with the
number of receive buffer errors reported by `nstat`.

We already try to increase the send and receive buffer sizes for our UDP
socket but unfortunately, we cannot increase them beyond what the system
limits them to. To workaround this, we try to set `rmem_max` and
`wmem_max` during startup of the Linux headless client and Gateway. This
behaviour can be disabled by setting `FIREZONE_NO_INC_BUF=true`.

This doesn't work in Docker unfortunately, so we set the values manually
in the CI perf tests and verify after the test that we didn't encounter
any send and receive buffer errors.

It is yet to be determined how we should deal with this problem for all
the GUI clients. See #10350 as an issue tracking that.

Unfortunately, this doesn't fix all packet drops during the first iperf
interval. With this PR, we now see packet drops on the interface itself.
2025-09-17 23:05:01 +00:00

436 lines
16 KiB
YAML

name: Continuous Integration
run-name: Triggered by ${{ github.actor }} on ${{ github.event_name }}
on:
pull_request:
merge_group:
types: [checks_requested]
workflow_dispatch:
workflow_call:
inputs:
stage:
required: true
type: string
profile:
required: true
type: string
# Cancel old workflow runs if new code is pushed
concurrency:
group: "ci-${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: ${{ github.event_name != 'workflow_call' }}
jobs:
planner:
runs-on: ubuntu-latest
outputs:
jobs_to_run: ${{ steps.plan.outputs.jobs_to_run }}
steps:
- uses: actions/checkout@v4
- name: Plan jobs to run
id: plan
run: |
set -e
jobs="static-analysis,elixir,rust,tauri,kotlin,swift,codeql,build-artifacts,build-perf-artifacts";
# For workflow_dispatch or workflow_call, run all jobs
if [ "${{ github.event_name }}" = "workflow_dispatch" ] || [ "${{ github.event_name }}" = "workflow_call" ]; then
echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"
exit 0;
fi
# For main branch runs, run all jobs
if [ "${{ github.event_name }}" = "push" ] && [ "${{ github.ref_name }}" = "main" ]; then
echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"
exit 0;
fi
# Fetch base ref for PRs
if [ "${{ github.event_name }}" = "pull_request" ]; then
git fetch origin ${{ github.base_ref }} --depth=1
git diff --name-only origin/${{ github.base_ref }} ${{ github.sha }} > changed_files.txt
echo "Changed files:"
cat changed_files.txt
fi
# Fetch base ref for merge_group
if [ "${{ github.event_name }}" = "merge_group" ]; then
# Base ref could be a few commits away, so fetch a few commits in case the queue is long
git fetch origin ${{ github.event.merge_group.base_ref }} --depth=20
git diff --name-only ${{ github.event.merge_group.base_sha }} ${{ github.sha }} > changed_files.txt
echo "Changed files:"
cat changed_files.txt
fi
# Run all jobs if CI configuration changes
if grep -q '^\.github/' changed_files.txt; then
echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"
exit 0;
fi
# Run all jobs if tool versions change
if grep -q '^\.tool-versions' changed_files.txt; then
echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"
exit 0;
fi
# Run all jobs if docker-compose changes
if grep -q '^docker-compose.yml' changed_files.txt; then
echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"
exit 0;
fi
jobs="static-analysis" # Always run static-analysis
if grep -q '^rust/' changed_files.txt; then
jobs="${jobs},rust,kotlin,swift,build-artifacts,build-perf-artifacts"
fi
if grep -q '^rust/gui-client/' changed_files.txt; then
jobs="${jobs},tauri"
fi
if grep -q '^rust/tests/gui-smoke-test/' changed_files.txt; then
jobs="${jobs},tauri"
fi
if grep -q '^elixir/' changed_files.txt; then
jobs="${jobs},elixir,codeql,build-artifacts"
fi
if grep -q '^kotlin/' changed_files.txt; then
jobs="${jobs},kotlin"
fi
if grep -q '^swift/' changed_files.txt; then
jobs="${jobs},swift"
fi
if grep -q '^website/' changed_files.txt; then
jobs="${jobs},codeql"
fi
if grep -q '^scripts/tests/' changed_files.txt; then
jobs="${jobs},build-artifacts,build-perf-artifacts"
fi
echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"
required-check:
name: required-check
needs: planner
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Wait for all jobs to complete
timeout-minutes: 60
env:
GH_TOKEN: ${{ github.token }}
run: |
set -e
while true; do
jobs_json=$(gh run view ${{ github.run_id }} --json jobs --jq '.jobs | map(select((.name | contains("required-check") | not) and (.name | contains("upload-bencher") | not)))')
total_jobs=$(echo "$jobs_json" | jq 'length')
failed_jobs=$(echo "$jobs_json" | jq -r '[.[] | select(.conclusion == "failure")] | length')
completed_jobs=$(echo "$jobs_json" | jq '[.[] | select(.status == "completed")] | length')
if [ "$failed_jobs" -gt 0 ]; then
echo "At least one job has failed."
exit 1
fi
echo "Completed: $completed_jobs/$total_jobs"
if [ "$completed_jobs" -eq "$total_jobs" ]; then
break
fi
echo "Jobs not yet completed:"
echo "$jobs_json" | jq -r '.[] | select(.status != "completed") | "- " + .name + " (Status: " + .status + ")" '
sleep 10
done
kotlin:
needs: planner
if: contains(needs.planner.outputs.jobs_to_run, 'kotlin')
uses: ./.github/workflows/_kotlin.yml
secrets: inherit
swift:
needs: planner
if: contains(needs.planner.outputs.jobs_to_run, 'swift')
uses: ./.github/workflows/_swift.yml
secrets: inherit
elixir:
needs: planner
if: contains(needs.planner.outputs.jobs_to_run, 'elixir')
uses: ./.github/workflows/_elixir.yml
secrets: inherit
rust:
needs: planner
if: contains(needs.planner.outputs.jobs_to_run, 'rust')
uses: ./.github/workflows/_rust.yml
secrets: inherit
tauri:
needs: planner
if: contains(needs.planner.outputs.jobs_to_run, 'tauri')
uses: ./.github/workflows/_tauri.yml
secrets: inherit
static-analysis:
needs: planner
if: contains(needs.planner.outputs.jobs_to_run, 'static-analysis')
uses: ./.github/workflows/_static-analysis.yml
secrets: inherit
codeql:
needs: planner
if: contains(needs.planner.outputs.jobs_to_run, 'codeql')
uses: ./.github/workflows/_codeql.yml
secrets: inherit
update-release-draft:
name: update-release-draft-${{ matrix.config_name }}
runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
include:
# mark:next-gateway-version
- release_name: gateway-1.4.17
config_name: release-drafter-gateway.yml
# mark:next-headless-version
- release_name: headless-client-1.5.4
config_name: release-drafter-headless-client.yml
steps:
- uses: release-drafter/release-drafter@b1476f6e6eb133afa41ed8589daba6dc69b4d3f5 # v6.1.0
# Only draft releases on merges to main
if: ${{ github.ref_name == 'main' }}
id: update-release-draft
with:
config-name: ${{ matrix.config_name }}
tag: ${{ matrix.release_name }}
version: ${{ matrix.release_name }}
name: ${{ matrix.release_name }}
commitish: ${{ github.sha }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
build-artifacts:
needs: [update-release-draft, planner]
if: contains(needs.planner.outputs.jobs_to_run, 'build-artifacts')
uses: ./.github/workflows/_build_artifacts.yml
secrets: inherit
with:
# Build debug/ on PRs and merge group, no prefix for production release images
image_prefix: ${{ ((github.event_name == 'pull_request' || github.event_name == 'merge_group') && 'debug') || '' }}
profile: ${{ inputs.profile || 'debug' }}
stage: ${{ inputs.stage || 'debug' }}
build-perf-artifacts:
needs: [update-release-draft, planner]
if: contains(needs.planner.outputs.jobs_to_run, 'build-perf-artifacts')
uses: ./.github/workflows/_build_artifacts.yml
secrets: inherit
with:
sha: ${{ github.sha }}
image_prefix: "perf"
profile: "release"
stage: "debug" # Only the debug images have perf tooling
integration-tests:
uses: ./.github/workflows/_integration_tests.yml
needs: build-artifacts
secrets: inherit
with:
gateway_image: ${{ needs.build-artifacts.outputs.gateway_image }}
client_image: ${{ needs.build-artifacts.outputs.client_image }}
relay_image: ${{ needs.build-artifacts.outputs.relay_image }}
http_test_server_image: ${{ needs.build-artifacts.outputs.http_test_server_image }}
compatibility-tests:
strategy:
fail-fast: false
matrix:
client:
- image: "ghcr.io/firezone/client"
tag: "latest"
gateway:
- image: ${{ needs.build-artifacts.outputs.gateway_image }}
tag: ${{ github.sha }}
ci-name: sha
- image: "ghcr.io/firezone/gateway"
tag: "latest"
ci-name: latest
# Don't run compatibility tests when called from hotfix.yml or publish.yml on `main` because
# it'll be red if there was a breaking change we're trying to publish,
# and the deploy_production workflow checks for main to be green.
if: ${{ github.event_name == 'pull_request' || github.event_name == 'merge_group' }}
name: compatibility-tests-client(${{ matrix.client.tag }})-gateway(${{ matrix.gateway.ci-name }})
uses: ./.github/workflows/_integration_tests.yml
needs: build-artifacts
secrets: inherit
with:
gateway_image: ${{ matrix.gateway.image }}
gateway_tag: ${{ matrix.gateway.tag }}
client_image: ${{ matrix.client.image }}
client_tag: ${{ matrix.client.tag }}
perf-tests:
name: perf-tests
needs: build-perf-artifacts
runs-on: ubuntu-24.04
permissions:
contents: read
id-token: write
pull-requests: write
env:
API_IMAGE: "ghcr.io/firezone/api"
API_TAG: ${{ github.sha }}
WEB_IMAGE: "ghcr.io/firezone/web"
WEB_TAG: ${{ github.sha }}
ELIXIR_IMAGE: "ghcr.io/firezone/elixir"
ELIXIR_TAG: ${{ github.sha }}
GATEWAY_IMAGE: "ghcr.io/firezone/perf/gateway"
GATEWAY_TAG: ${{ github.sha }}
CLIENT_IMAGE: "ghcr.io/firezone/perf/client"
CLIENT_TAG: ${{ github.sha }}
RELAY_IMAGE: "ghcr.io/firezone/perf/relay"
RELAY_TAG: ${{ github.sha }}
FIREZONE_INC_BUF: true
strategy:
fail-fast: false
matrix:
test:
- tcp-client2server
- tcp-server2client
- udp-client2server
- udp-server2client
flavour:
- direct
- relayed
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: ./.github/actions/ghcr-docker-login
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
# We need at least Docker v28.1 which is not yet available on GitHub actions runners
- uses: docker/setup-docker-action@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0
- name: Seed database
run: docker compose run elixir /bin/sh -c 'cd apps/domain && mix ecto.seed --migrations-path priv/repo/migrations --migrations-path priv/repo/manual_migrations'
- name: Increase max UDP buffer sizes
run: |
sudo sysctl -w net.core.wmem_max=16777216 # 16 MB
sudo sysctl -w net.core.rmem_max=134217728 # 128 MB
- name: Start docker compose in the background
run: |
# We need to increase the log level to make sure that they don't hold off storm of packets
# generated by UDP tests. Wire is especially chatty.
sed -i 's/^\(\s*\)RUST_LOG:.*$/\1RUST_LOG: wire=error,opentelemetry_sdk=error,debug/' docker-compose.yml
grep RUST_LOG docker-compose.yml
if [ "${{ matrix.flavour }}" = "relayed" ]; then
echo "CLIENT_MASQUERADE=random" >> "$GITHUB_ENV"
echo "UDP_BITRATE=300M" >> "$GITHUB_ENV"
fi
docker compose build client-router gateway-router relay-1-router relay-2-router api-router
# Start services in the same order each time for the tests
docker compose up -d iperf3
docker compose up -d api web domain --no-build
docker compose up -d relay-1 relay-2 --no-build
docker compose up -d gateway --no-build
docker compose up -d client --no-build
docker compose up veth-config
- name: "Performance test: ${{ matrix.flavour }}-${{ matrix.test }}"
timeout-minutes: 5
env:
TEST_NAME: ${{ matrix.flavour }}-${{ matrix.test }}
run: |
./scripts/tests/perf/${{ matrix.test }}.sh
jq '{ "${{ matrix.flavour }}-${{ matrix.test }}": { "retransmits": { "value": (.end.sum_sent.retransmits // -1) }, "throughput": { "value": .end.sum_received.bits_per_second } } }' ./${{ matrix.flavour }}-${{ matrix.test }}.json > ./${{ matrix.flavour }}-${{ matrix.test }}.bmf.json
- name: "Save performance test results: ${{ matrix.flavour }}-${{ matrix.test }}"
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
overwrite: true
name: ${{ matrix.flavour }}-${{ matrix.test }}-${{ github.sha }}-iperf3results
path: ./${{ matrix.flavour }}-${{ matrix.test }}.bmf.json
- name: Show Client logs
if: "!cancelled()"
run: docker compose logs client
- name: Show Relay-1 logs
if: "!cancelled()"
run: docker compose logs relay-1
- name: Show Relay-2 logs
if: "!cancelled()"
run: docker compose logs relay-2
- name: Show Gateway logs
if: "!cancelled()"
run: docker compose logs gateway
- name: Show API logs
if: "!cancelled()"
run: docker compose logs api
- name: Show iperf3 logs
if: "!cancelled()"
run: docker compose logs iperf3
- name: Ensure no warnings are logged
if: "!cancelled()"
run: |
docker compose logs client |
grep "WARN" && exit 1 || exit 0
docker compose logs gateway |
grep "WARN" && exit 1 || exit 0
# BTF doesn't load for veth interfaces
docker compose logs relay-1 | \
grep --invert "Object BTF couldn't be loaded in the kernel: the BPF_BTF_LOAD syscall failed." | \
grep "WARN" && exit 1 || exit 0
docker compose logs relay-2 | \
grep --invert "Object BTF couldn't be loaded in the kernel: the BPF_BTF_LOAD syscall failed." | \
grep "WARN" && exit 1 || exit 0
- name: Ensure no UDP socket errors
if: "!cancelled() && startsWith(matrix.test, 'tcp')"
run: |
docker compose exec client /bin/sh -c 'nstat -s' |
grep -i "error" && exit 1 || exit 0
docker compose exec gateway /bin/sh -c 'nstat -s' |
grep -i "error" && exit 1 || exit 0
upload-bencher:
continue-on-error: true
needs: perf-tests
runs-on: ubuntu-24.04
permissions:
contents: read
id-token: write
pull-requests: write
checks: write
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: bencherdev/bencher@main
- name: Download performance test results
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
pattern: "*-${{ github.sha }}-iperf3results"
merge-multiple: true
path: ./${{ github.sha }}
- name: Merge benchmarks results into one report
run: jq -s 'reduce .[] as $item ({}; . * $item)' ./${{ github.sha }}/*.bmf.json > bmf.json
- name: Report results to bencher
run: |
bencher run \
--project firezone-1l75jv1z \
--testbed github-actions \
--file bmf.json \
--adapter json \
--branch "${{ env.BRANCH }}" \
--branch-start-point "${{ github.base_ref }}" \
--github-actions ${{ secrets.GITHUB_TOKEN }} \
--ci-only-on-alert
env:
BENCHER_API_TOKEN: ${{ secrets.BENCHER_API_TOKEN }}
BRANCH: "${{ github.head_ref || github.ref_name }}"