firezone/.github/workflows/ci.yml

name: Continuous Integration
run-name: Triggered by ${{ github.actor }} on ${{ github.event_name }}
on:
  pull_request:
  merge_group:
    types: [checks_requested]
  workflow_dispatch:
  workflow_call:
    inputs:
      stage:
        required: true
        type: string
      profile:
        required: true
        type: string

# Cancel old workflow runs if new code is pushed
concurrency:
  group: "ci-${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}"
  cancel-in-progress: ${{ github.event_name != 'workflow_call' }}

jobs:
  planner:
    runs-on: ubuntu-latest
    outputs:
      jobs_to_run: ${{ steps.plan.outputs.jobs_to_run }}
    steps:
      - uses: actions/checkout@v4
      - name: Plan jobs to run
        id: plan
        run: |
          set -e

          jobs="static-analysis,elixir,rust,tauri,kotlin,swift,codeql,build-artifacts,build-perf-artifacts";

          # For workflow_dispatch or workflow_call, run all jobs
          if [ "${{ github.event_name }}" = "workflow_dispatch" ] || [ "${{ github.event_name }}" = "workflow_call" ]; then
            echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"

            exit 0;
          fi

          # For main branch runs, run all jobs
          if [ "${{ github.event_name }}" = "push" ] && [ "${{ github.ref_name }}" = "main" ]; then
            echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"

            exit 0;
          fi

          # Fetch base ref for PRs
          if [ "${{ github.event_name }}" = "pull_request" ]; then
            git fetch origin ${{ github.base_ref }} --depth=1
            git diff --name-only origin/${{ github.base_ref }} ${{ github.sha }} > changed_files.txt

            echo "Changed files:"
            cat changed_files.txt
          fi

          # Fetch base ref for merge_group
          if [ "${{ github.event_name }}" = "merge_group" ]; then
            # Base ref could be a few commits away, so fetch a few commits in case the queue is long
            git fetch origin ${{ github.event.merge_group.base_ref }} --depth=20
            git diff --name-only ${{ github.event.merge_group.base_sha }} ${{ github.sha }} > changed_files.txt

            echo "Changed files:"
            cat changed_files.txt
          fi

          # Run all jobs if CI configuration changes
          if grep -q '^\.github/' changed_files.txt; then
            echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"
            exit 0;
          fi

          # Run all jobs if tool versions change
          if grep -q '^\.tool-versions' changed_files.txt; then
            echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"
            exit 0;
          fi

          # Run all jobs if docker-compose changes
          if grep -q '^docker-compose.yml' changed_files.txt; then
            echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"
            exit 0;
          fi

          jobs="static-analysis" # Always run static-analysis

          if grep -q '^rust/' changed_files.txt; then
            jobs="${jobs},rust,kotlin,swift,build-artifacts,build-perf-artifacts"
          fi
          if grep -q '^rust/gui-client/' changed_files.txt; then
            jobs="${jobs},tauri"
          fi
          if grep -q '^rust/tests/gui-smoke-test/' changed_files.txt; then
            jobs="${jobs},tauri"
          fi
          if grep -q '^elixir/' changed_files.txt; then
            jobs="${jobs},elixir,codeql,build-artifacts"
          fi
          if grep -q '^kotlin/' changed_files.txt; then
            jobs="${jobs},kotlin"
          fi
          if grep -q '^swift/' changed_files.txt; then
            jobs="${jobs},swift"
          fi
          if grep -q '^website/' changed_files.txt; then
            jobs="${jobs},codeql"
          fi
          if grep -q '^scripts/tests/' changed_files.txt; then
            jobs="${jobs},build-artifacts,build-perf-artifacts"
          fi

          echo "jobs_to_run=$jobs" >> "$GITHUB_OUTPUT"

  required-check:
    name: required-check
    needs: planner
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Wait for all jobs to complete
        timeout-minutes: 60
        env:
          GH_TOKEN: ${{ github.token }}
        run: |
          set -e

          while true; do
            jobs_json=$(gh run view ${{ github.run_id }} --json jobs --jq '.jobs | map(select((.name | contains("required-check") | not) and (.name | contains("upload-bencher") | not)))')

            total_jobs=$(echo "$jobs_json" | jq 'length')
            failed_jobs=$(echo "$jobs_json" | jq -r '[.[] | select(.conclusion == "failure")] | length')
            completed_jobs=$(echo "$jobs_json" | jq '[.[] | select(.status == "completed")] | length')

            if [ "$failed_jobs" -gt 0 ]; then
              echo "At least one job has failed."
              exit 1
            fi

            echo "Completed: $completed_jobs/$total_jobs"

            if [ "$completed_jobs" -eq "$total_jobs" ]; then
              break
            fi

            echo "Jobs not yet completed:"
            echo "$jobs_json" | jq -r '.[] | select(.status != "completed") | "- " + .name + " (Status: " + .status + ")" '

            sleep 10
          done

  kotlin:
    needs: planner
    if: contains(needs.planner.outputs.jobs_to_run, 'kotlin')
    uses: ./.github/workflows/_kotlin.yml
    secrets: inherit
  swift:
    needs: planner
    if: contains(needs.planner.outputs.jobs_to_run, 'swift')
    uses: ./.github/workflows/_swift.yml
    secrets: inherit
  elixir:
    needs: planner
    if: contains(needs.planner.outputs.jobs_to_run, 'elixir')
    uses: ./.github/workflows/_elixir.yml
    secrets: inherit
  rust:
    needs: planner
    if: contains(needs.planner.outputs.jobs_to_run, 'rust')
    uses: ./.github/workflows/_rust.yml
    secrets: inherit
  tauri:
    needs: planner
    if: contains(needs.planner.outputs.jobs_to_run, 'tauri')
    uses: ./.github/workflows/_tauri.yml
    secrets: inherit
  static-analysis:
    needs: planner
    if: contains(needs.planner.outputs.jobs_to_run, 'static-analysis')
    uses: ./.github/workflows/_static-analysis.yml
    secrets: inherit
  codeql:
    needs: planner
    if: contains(needs.planner.outputs.jobs_to_run, 'codeql')
    uses: ./.github/workflows/_codeql.yml
    secrets: inherit

  update-release-draft:
    name: update-release-draft-${{ matrix.config_name }}
    runs-on: ubuntu-24.04
    strategy:
      fail-fast: false
      matrix:
        include:
          # mark:next-gateway-version
          - release_name: gateway-1.4.17
            config_name: release-drafter-gateway.yml
          # mark:next-headless-version
          - release_name: headless-client-1.5.4
            config_name: release-drafter-headless-client.yml

    steps:
      - uses: release-drafter/release-drafter@b1476f6e6eb133afa41ed8589daba6dc69b4d3f5 # v6.1.0
        # Only draft releases on merges to main
        if: ${{ github.ref_name == 'main' }}
        id: update-release-draft
        with:
          config-name: ${{ matrix.config_name }}
          tag: ${{ matrix.release_name }}
          version: ${{ matrix.release_name }}
          name: ${{ matrix.release_name }}
          commitish: ${{ github.sha }}
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

  build-artifacts:
    needs: [update-release-draft, planner]
    if: contains(needs.planner.outputs.jobs_to_run, 'build-artifacts')
    uses: ./.github/workflows/_build_artifacts.yml
    secrets: inherit
    with:
      # Build debug/ on PRs and merge group, no prefix for production release images
      image_prefix: ${{ ((github.event_name == 'pull_request' || github.event_name == 'merge_group') && 'debug') || '' }}
      profile: ${{ inputs.profile || 'debug' }}
      stage: ${{ inputs.stage || 'debug' }}

  build-perf-artifacts:
    needs: [update-release-draft, planner]
    if: contains(needs.planner.outputs.jobs_to_run, 'build-perf-artifacts')
    uses: ./.github/workflows/_build_artifacts.yml
    secrets: inherit
    with:
      sha: ${{ github.sha }}
      image_prefix: "perf"
      profile: "release"
      stage: "debug" # Only the debug images have perf tooling

  integration-tests:
    uses: ./.github/workflows/_integration_tests.yml
    needs: build-artifacts
    secrets: inherit
    with:
      gateway_image: ${{ needs.build-artifacts.outputs.gateway_image }}
      client_image: ${{ needs.build-artifacts.outputs.client_image }}
      relay_image: ${{ needs.build-artifacts.outputs.relay_image }}
      http_test_server_image: ${{ needs.build-artifacts.outputs.http_test_server_image }}

  compatibility-tests:
    strategy:
      fail-fast: false
      matrix:
        client:
          - image: "ghcr.io/firezone/client"
            tag: "latest"
        gateway:
          - image: ${{ needs.build-artifacts.outputs.gateway_image }}
            tag: ${{ github.sha }}
            ci-name: sha
          - image: "ghcr.io/firezone/gateway"
            tag: "latest"
            ci-name: latest
    # Don't run compatibility tests when called from hotfix.yml or publish.yml on `main` because
    # it'll be red if there was a breaking change we're trying to publish,
    # and the deploy_production workflow checks for main to be green.
    if: ${{ github.event_name == 'pull_request' || github.event_name == 'merge_group' }}
    name: compatibility-tests-client(${{ matrix.client.tag }})-gateway(${{ matrix.gateway.ci-name }})
    uses: ./.github/workflows/_integration_tests.yml
    needs: build-artifacts
    secrets: inherit
    with:
      gateway_image: ${{ matrix.gateway.image }}
      gateway_tag: ${{ matrix.gateway.tag }}
      client_image: ${{ matrix.client.image }}
      client_tag: ${{ matrix.client.tag }}

  perf-tests:
    name: perf-tests
    needs: build-perf-artifacts
    runs-on: ubuntu-24.04
    permissions:
      contents: read
      id-token: write
      pull-requests: write
    env:
      API_IMAGE: "ghcr.io/firezone/api"
      API_TAG: ${{ github.sha }}
      WEB_IMAGE: "ghcr.io/firezone/web"
      WEB_TAG: ${{ github.sha }}
      ELIXIR_IMAGE: "ghcr.io/firezone/elixir"
      ELIXIR_TAG: ${{ github.sha }}
      GATEWAY_IMAGE: "ghcr.io/firezone/perf/gateway"
      GATEWAY_TAG: ${{ github.sha }}
      CLIENT_IMAGE: "ghcr.io/firezone/perf/client"
      CLIENT_TAG: ${{ github.sha }}
      RELAY_IMAGE: "ghcr.io/firezone/perf/relay"
      RELAY_TAG: ${{ github.sha }}
      FIREZONE_INC_BUF: true
    strategy:
      fail-fast: false
      matrix:
        test:
          - tcp-client2server
          - tcp-server2client
          - udp-client2server
          - udp-server2client
        flavour:
          - direct
          - relayed
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      - uses: ./.github/actions/ghcr-docker-login
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
      # We need at least Docker v28.1 which is not yet available on GitHub actions runners
      - uses: docker/setup-docker-action@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0
      - name: Seed database
        run: docker compose run elixir /bin/sh -c 'cd apps/domain && mix ecto.seed --migrations-path priv/repo/migrations --migrations-path priv/repo/manual_migrations'
      - name: Increase max UDP buffer sizes
        run: |
          sudo sysctl -w net.core.wmem_max=16777216 # 16 MB
          sudo sysctl -w net.core.rmem_max=134217728 # 128 MB
      - name: Start docker compose in the background
        run: |
          # We need to increase the log level to make sure that they don't hold off storm of packets
          # generated by UDP tests. Wire is especially chatty.
          sed -i 's/^\(\s*\)RUST_LOG:.*$/\1RUST_LOG: wire=error,opentelemetry_sdk=error,debug/' docker-compose.yml
          grep RUST_LOG docker-compose.yml

          if [ "${{ matrix.flavour }}" = "relayed" ]; then
            echo "CLIENT_MASQUERADE=random" >> "$GITHUB_ENV"
            echo "UDP_BITRATE=300M" >> "$GITHUB_ENV"
          fi

          docker compose build client-router gateway-router relay-1-router relay-2-router api-router

          # Start services in the same order each time for the tests
          docker compose up -d iperf3
          docker compose up -d api web domain --no-build
          docker compose up -d relay-1 relay-2 --no-build
          docker compose up -d gateway --no-build
          docker compose up -d client --no-build
          docker compose up veth-config
      - name: "Performance test: ${{ matrix.flavour }}-${{ matrix.test }}"
        timeout-minutes: 5
        env:
          TEST_NAME: ${{ matrix.flavour }}-${{ matrix.test }}
        run: |
          ./scripts/tests/perf/${{ matrix.test }}.sh
          jq '{ "${{ matrix.flavour }}-${{ matrix.test }}": { "retransmits": { "value": (.end.sum_sent.retransmits // -1) }, "throughput": { "value": .end.sum_received.bits_per_second } } }' ./${{ matrix.flavour }}-${{ matrix.test }}.json > ./${{ matrix.flavour }}-${{ matrix.test }}.bmf.json
      - name: "Save performance test results: ${{ matrix.flavour }}-${{ matrix.test }}"
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        with:
          overwrite: true
          name: ${{ matrix.flavour }}-${{ matrix.test }}-${{ github.sha }}-iperf3results
          path: ./${{ matrix.flavour }}-${{ matrix.test }}.bmf.json
      - name: Show Client logs
        if: "!cancelled()"
        run: docker compose logs client
      - name: Show Relay-1 logs
        if: "!cancelled()"
        run: docker compose logs relay-1
      - name: Show Relay-2 logs
        if: "!cancelled()"
        run: docker compose logs relay-2
      - name: Show Gateway logs
        if: "!cancelled()"
        run: docker compose logs gateway
      - name: Show API logs
        if: "!cancelled()"
        run: docker compose logs api
      - name: Show iperf3 logs
        if: "!cancelled()"
        run: docker compose logs iperf3

      - name: Ensure no warnings are logged
        if: "!cancelled()"
        run: |
          docker compose logs client |
            grep "WARN" && exit 1 || exit 0

          docker compose logs gateway |
            grep "WARN" && exit 1 || exit 0

          # BTF doesn't load for veth interfaces
          docker compose logs relay-1 | \
            grep --invert "Object BTF couldn't be loaded in the kernel: the BPF_BTF_LOAD syscall failed." | \
            grep "WARN" && exit 1 || exit 0
          docker compose logs relay-2 | \
            grep --invert "Object BTF couldn't be loaded in the kernel: the BPF_BTF_LOAD syscall failed." | \
            grep "WARN" && exit 1 || exit 0

      - name: Ensure no UDP socket errors
        if: "!cancelled() && startsWith(matrix.test, 'tcp')"
        run: |
          docker compose exec client /bin/sh -c 'nstat -s' |
            grep -i "error" && exit 1 || exit 0

          docker compose exec gateway /bin/sh -c 'nstat -s' |
            grep -i "error" && exit 1 || exit 0

  upload-bencher:
    continue-on-error: true
    needs: perf-tests
    runs-on: ubuntu-24.04
    permissions:
      contents: read
      id-token: write
      pull-requests: write
      checks: write
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      - uses: bencherdev/bencher@main
      - name: Download performance test results
        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
        with:
          pattern: "*-${{ github.sha }}-iperf3results"
          merge-multiple: true
          path: ./${{ github.sha }}
      - name: Merge benchmarks results into one report
        run: jq -s 'reduce .[] as $item ({}; . * $item)' ./${{ github.sha }}/*.bmf.json > bmf.json
      - name: Report results to bencher
        run: |
          bencher run \
            --project firezone-1l75jv1z \
            --testbed github-actions \
            --file bmf.json \
            --adapter json \
            --branch "${{ env.BRANCH }}" \
            --branch-start-point "${{ github.base_ref }}" \
            --github-actions ${{ secrets.GITHUB_TOKEN }} \
            --ci-only-on-alert
        env:
          BENCHER_API_TOKEN: ${{ secrets.BENCHER_API_TOKEN }}
          BRANCH: "${{ github.head_ref || github.ref_name }}"