From 9865e033438dc8224cc76a0afbb88977869a4052 Mon Sep 17 00:00:00 2001
From: Thomas Eizinger <thomas@eizinger.io>
Date: Thu, 25 Sep 2025 07:53:17 +0000
Subject: [PATCH] ci: fix double symmetric NAT test failure (#10410)

As it turns out, the flaky test was caused by a bug in the eBPF kernel where we read the old channel data header from the wrong offset. This made us essentially read garbage data for the channel number, causing us to:

a. Compute a bad checksum
b. Send the packet on a completely wrong channel

The reason this caused a flaky test is that it requires on side to pick IPv4 to talk to the relay and the other side IPv6. The happy-eyeballs approach of the `allocation` module made that non-deterministic, only exposing this bug occasionally.

To ensure these kind of things are detected earlier in the future, I am adding an additional CI step that checks all packets emitted by the eBPF kernel for checksum errors.

Fixes: #10404

Co-authored-by: Jamil Bou Kheir <jamilbk@users.noreply.github.com>
---
 .github/workflows/_integration_tests.yml      | 62 +++++++++++++++----
 .github/workflows/ci.yml                      |  2 +-
 docker-compose.yml                            | 24 ++++++-
 .../from_ipv4_channel/to_ipv6_channel.rs      |  7 ++-
 scripts/tests/download-roaming-network.sh     |  3 +
 5 files changed, 80 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/_integration_tests.yml b/.github/workflows/_integration_tests.yml
index 590b33286..97a0c682d 100644
--- a/.github/workflows/_integration_tests.yml
+++ b/.github/workflows/_integration_tests.yml
@@ -108,7 +108,8 @@ jobs:
           - script: dns-api-down
           - script: dns-nm
           - script: dns-two-resources
-          - script: systemd/dns-systemd-resolved
+          - name: dns-systemd-resolved
+            script: systemd/dns-systemd-resolved
           - script: tcp-dns
             # Setting both client and gateway to random masquerade will force relay-relay candidate pair
           - name: download-double-symmetric-nat
@@ -116,8 +117,7 @@ jobs:
             client_masquerade: random
             gateway_masquerade: random
             rust_log: debug
-            stop_containers: relay-2 # Force single relay
-            skip: true
+            single_relay: true # Force single relay
           - script: download-packet-loss
             rust_log: debug
           - script: download-roaming-network
@@ -160,22 +160,24 @@ jobs:
           docker compose up -d relay-2 --no-build
           docker compose up -d gateway --no-build
           docker compose up -d client --no-build
+          docker compose up -d network-config
 
-          if [[ -n "${{ matrix.test.stop_containers }}" ]]; then
-            docker compose stop ${{ matrix.test.stop_containers }}
+          docker compose exec -d relay-1 /bin/sh -c 'xdpdump -i eth0 -w /tmp/packets.pcap --rx-capture entry,exit'
+          docker compose exec -d relay-2 /bin/sh -c 'xdpdump -i eth0 -w /tmp/packets.pcap --rx-capture entry,exit'
+
+          if [[ -n "${{ matrix.test.single_relay }}" ]]; then
+            docker compose stop relay-2
           fi
 
-          # Wait a few seconds for the services to fully start. GH runners are
-          # slow, so this gives the Client enough time to initialize its tun interface,
-          # for example.
-          # Intended to mitigate <https://github.com/firezone/firezone/issues/5830>
-          sleep 3
+          sleep 3 # Let everything settle for a bit
 
-          docker compose up veth-config
+      - name: Disable checksum offloading
+        run: |
+          # Force checksum calculation on the host since some tests run on the host
+          sudo ethtool -K eth0 tx off
+          sudo ethtool -K docker0 tx off
 
       - run: ./scripts/tests/${{ matrix.test.script }}.sh
-        if: ${{ matrix.test.skip != 'true' }}
-
       - name: Ensure Client emitted no warnings
         if: "!cancelled()"
         run: |
@@ -209,3 +211,37 @@ jobs:
       - name: Show API logs
         if: "!cancelled()"
         run: docker compose logs api
+
+      - name: Ensure no eBPF checksum errors on relay-1
+        if: "!cancelled()"
+        run: |
+          set -xe
+
+          docker compose exec relay-1 pkill xdpdump
+          docker compose cp relay-1:/tmp/packets.pcap ./relay-1-packets.pcap
+
+          ! tcpdump -nnnr ./relay-1-packets.pcap -v | grep "bad \w* cksum"
+
+      - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        if: "!success()"
+        with:
+          overwrite: true
+          name: ${{ matrix.test.name || matrix.test.script }}-relay-1-xdpdump
+          path: ./relay-1-packets.pcap
+
+      - name: Ensure no eBPF checksum errors on relay-2
+        if: "!cancelled() && !matrix.test.single_relay"
+        run: |
+          set -xe
+
+          docker compose exec relay-2 pkill xdpdump
+          docker compose cp relay-2:/tmp/packets.pcap ./relay-2-packets.pcap
+
+          ! tcpdump -nnnr ./relay-2-packets.pcap -v | grep "bad \w* cksum"
+
+      - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        if: "!success() && !matrix.test.single_relay"
+        with:
+          overwrite: true
+          name: ${{ matrix.test.name || matrix.test.script }}-relay-2-xdpdump
+          path: ./relay-2-packets.pcap
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b59405ef5..41f02222d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -335,7 +335,7 @@ jobs:
           docker compose up -d relay-1 relay-2 --no-build
           docker compose up -d gateway --no-build
           docker compose up -d client --no-build
-          docker compose up veth-config
+          docker compose up -d network-config
       - name: "Performance test: ${{ matrix.flavour }}-${{ matrix.test }}"
         timeout-minutes: 5
         env:
diff --git a/docker-compose.yml b/docker-compose.yml
index 9732b5f24..433909006 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -326,6 +326,9 @@ services:
         ip -4 route add 203.0.113.0/24 via 172.29.1.254
         ip -6 route add 203:0:113::/64 via 172:29:1::254
 
+        apk add --no-cache ethtool
+        ethtool -K eth0 tx off
+
         firezone-relay
     depends_on:
       relay-1-router:
@@ -373,6 +376,9 @@ services:
         ip -4 route add 203.0.113.0/24 via 172.29.2.254
         ip -6 route add 203:0:113::/64 via 172:29:2::254
 
+        apk add --no-cache ethtool
+        ethtool -K eth0 tx off
+
         firezone-relay
     depends_on:
       relay-2-router:
@@ -411,7 +417,10 @@ services:
   # The "recommended" way to do this is to set both veth interfaces' GRO to on, or attach an XDP program
   # that does XDP_PASS to the host side veth interface. The GRO method is not reliable and was shown to
   # only pass packets in large bursts every 15-20 seconds which breaks ICE setup, so we use the XDP method.
-  veth-config:
+  #
+  # For correct behaviour, we also disable any kind of offloading for all veth and bridge devices.
+  # This forces the kernel to calculate all checksums in software.
+  network-config:
     image: ghcr.io/firezone/xdp-pass
     pid: host
     network_mode: host
@@ -423,16 +432,25 @@ services:
       - |
         set -e
 
-        VETHS=$$(ip link show type veth | grep '^[0-9]' | awk '{print $$2}' | cut -d: -f1 | cut -d@ -f1)
+        VETHS=$$(ip -json link show type veth | jq -r '.[].ifname')
 
-        # Safe to attach to all veth interfaces on the host
         for dev in $$VETHS; do
           echo "Attaching XDP to: $$dev"
           ip link set dev $$dev xdpdrv off # Clear any existing XDP program.
           ip link set dev $$dev xdpdrv obj /xdp/xdp_pass.o sec xdp
+
+          ethtool -K $$dev tx off # Disable offloading.
         done
 
         echo "Done configuring $$(echo "$$VETHS" | wc -w) veth interfaces"
+
+        BRIDGES=$$(ip -json link show type bridge | jq -r '.[].ifname')
+
+        for dev in $$BRIDGES; do
+          ethtool -K $$dev tx off # Disable offloading.
+        done
+
+        echo "Done configuring $$(echo "$$BRIDGES" | wc -w) bridge interfaces"
     depends_on:
       relay-1:
         condition: "service_healthy"
diff --git a/rust/relay/ebpf-turn-router/src/try_handle_turn/from_ipv4_channel/to_ipv6_channel.rs b/rust/relay/ebpf-turn-router/src/try_handle_turn/from_ipv4_channel/to_ipv6_channel.rs
index 96e31cf62..47a3f24d1 100644
--- a/rust/relay/ebpf-turn-router/src/try_handle_turn/from_ipv4_channel/to_ipv6_channel.rs
+++ b/rust/relay/ebpf-turn-router/src/try_handle_turn/from_ipv4_channel/to_ipv6_channel.rs
@@ -55,7 +55,12 @@ pub fn to_ipv6_channel(
     };
 
     let (old_channel_number, old_channel_data_length) = {
-        let old_cd = unsafe { ref_mut_at::<CdHdr>(ctx, EthHdr::LEN + Ipv4Hdr::LEN + UdpHdr::LEN)? };
+        let old_cd = unsafe {
+            ref_mut_at::<CdHdr>(
+                ctx,
+                old_data_offset + EthHdr::LEN + Ipv4Hdr::LEN + UdpHdr::LEN,
+            )?
+        };
 
         (old_cd.number(), old_cd.length())
     };
diff --git a/scripts/tests/download-roaming-network.sh b/scripts/tests/download-roaming-network.sh
index bdeace270..e45de6beb 100755
--- a/scripts/tests/download-roaming-network.sh
+++ b/scripts/tests/download-roaming-network.sh
@@ -25,6 +25,9 @@ docker network connect firezone_client-internal firezone-client-1 --ip 172.30.0.
 client ip -4 route add 203.0.113.0/24 via 172.30.0.254
 client ip -6 route add 203:0:113::/64 via 172:30:0::254
 
+# Disable checksum offload again to calculate checksums in software so that checksum verification passes
+client ethtool -K eth0 tx off
+
 # Send SIGHUP, triggering `reconnect` internally
 sudo kill -s HUP "$(ps -C firezone-headless-client -o pid=)"