diff --git a/.github/workflows/_integration_tests.yml b/.github/workflows/_integration_tests.yml index 590b33286..97a0c682d 100644 --- a/.github/workflows/_integration_tests.yml +++ b/.github/workflows/_integration_tests.yml @@ -108,7 +108,8 @@ jobs: - script: dns-api-down - script: dns-nm - script: dns-two-resources - - script: systemd/dns-systemd-resolved + - name: dns-systemd-resolved + script: systemd/dns-systemd-resolved - script: tcp-dns # Setting both client and gateway to random masquerade will force relay-relay candidate pair - name: download-double-symmetric-nat @@ -116,8 +117,7 @@ jobs: client_masquerade: random gateway_masquerade: random rust_log: debug - stop_containers: relay-2 # Force single relay - skip: true + single_relay: true # Force single relay - script: download-packet-loss rust_log: debug - script: download-roaming-network @@ -160,22 +160,24 @@ jobs: docker compose up -d relay-2 --no-build docker compose up -d gateway --no-build docker compose up -d client --no-build + docker compose up -d network-config - if [[ -n "${{ matrix.test.stop_containers }}" ]]; then - docker compose stop ${{ matrix.test.stop_containers }} + docker compose exec -d relay-1 /bin/sh -c 'xdpdump -i eth0 -w /tmp/packets.pcap --rx-capture entry,exit' + docker compose exec -d relay-2 /bin/sh -c 'xdpdump -i eth0 -w /tmp/packets.pcap --rx-capture entry,exit' + + if [[ -n "${{ matrix.test.single_relay }}" ]]; then + docker compose stop relay-2 fi - # Wait a few seconds for the services to fully start. GH runners are - # slow, so this gives the Client enough time to initialize its tun interface, - # for example. - # Intended to mitigate - sleep 3 + sleep 3 # Let everything settle for a bit - docker compose up veth-config + - name: Disable checksum offloading + run: | + # Force checksum calculation on the host since some tests run on the host + sudo ethtool -K eth0 tx off + sudo ethtool -K docker0 tx off - run: ./scripts/tests/${{ matrix.test.script }}.sh - if: ${{ matrix.test.skip != 'true' }} - - name: Ensure Client emitted no warnings if: "!cancelled()" run: | @@ -209,3 +211,37 @@ jobs: - name: Show API logs if: "!cancelled()" run: docker compose logs api + + - name: Ensure no eBPF checksum errors on relay-1 + if: "!cancelled()" + run: | + set -xe + + docker compose exec relay-1 pkill xdpdump + docker compose cp relay-1:/tmp/packets.pcap ./relay-1-packets.pcap + + ! tcpdump -nnnr ./relay-1-packets.pcap -v | grep "bad \w* cksum" + + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + if: "!success()" + with: + overwrite: true + name: ${{ matrix.test.name || matrix.test.script }}-relay-1-xdpdump + path: ./relay-1-packets.pcap + + - name: Ensure no eBPF checksum errors on relay-2 + if: "!cancelled() && !matrix.test.single_relay" + run: | + set -xe + + docker compose exec relay-2 pkill xdpdump + docker compose cp relay-2:/tmp/packets.pcap ./relay-2-packets.pcap + + ! tcpdump -nnnr ./relay-2-packets.pcap -v | grep "bad \w* cksum" + + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + if: "!success() && !matrix.test.single_relay" + with: + overwrite: true + name: ${{ matrix.test.name || matrix.test.script }}-relay-2-xdpdump + path: ./relay-2-packets.pcap diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b59405ef5..41f02222d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -335,7 +335,7 @@ jobs: docker compose up -d relay-1 relay-2 --no-build docker compose up -d gateway --no-build docker compose up -d client --no-build - docker compose up veth-config + docker compose up -d network-config - name: "Performance test: ${{ matrix.flavour }}-${{ matrix.test }}" timeout-minutes: 5 env: diff --git a/docker-compose.yml b/docker-compose.yml index 9732b5f24..433909006 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -326,6 +326,9 @@ services: ip -4 route add 203.0.113.0/24 via 172.29.1.254 ip -6 route add 203:0:113::/64 via 172:29:1::254 + apk add --no-cache ethtool + ethtool -K eth0 tx off + firezone-relay depends_on: relay-1-router: @@ -373,6 +376,9 @@ services: ip -4 route add 203.0.113.0/24 via 172.29.2.254 ip -6 route add 203:0:113::/64 via 172:29:2::254 + apk add --no-cache ethtool + ethtool -K eth0 tx off + firezone-relay depends_on: relay-2-router: @@ -411,7 +417,10 @@ services: # The "recommended" way to do this is to set both veth interfaces' GRO to on, or attach an XDP program # that does XDP_PASS to the host side veth interface. The GRO method is not reliable and was shown to # only pass packets in large bursts every 15-20 seconds which breaks ICE setup, so we use the XDP method. - veth-config: + # + # For correct behaviour, we also disable any kind of offloading for all veth and bridge devices. + # This forces the kernel to calculate all checksums in software. + network-config: image: ghcr.io/firezone/xdp-pass pid: host network_mode: host @@ -423,16 +432,25 @@ services: - | set -e - VETHS=$$(ip link show type veth | grep '^[0-9]' | awk '{print $$2}' | cut -d: -f1 | cut -d@ -f1) + VETHS=$$(ip -json link show type veth | jq -r '.[].ifname') - # Safe to attach to all veth interfaces on the host for dev in $$VETHS; do echo "Attaching XDP to: $$dev" ip link set dev $$dev xdpdrv off # Clear any existing XDP program. ip link set dev $$dev xdpdrv obj /xdp/xdp_pass.o sec xdp + + ethtool -K $$dev tx off # Disable offloading. done echo "Done configuring $$(echo "$$VETHS" | wc -w) veth interfaces" + + BRIDGES=$$(ip -json link show type bridge | jq -r '.[].ifname') + + for dev in $$BRIDGES; do + ethtool -K $$dev tx off # Disable offloading. + done + + echo "Done configuring $$(echo "$$BRIDGES" | wc -w) bridge interfaces" depends_on: relay-1: condition: "service_healthy" diff --git a/rust/relay/ebpf-turn-router/src/try_handle_turn/from_ipv4_channel/to_ipv6_channel.rs b/rust/relay/ebpf-turn-router/src/try_handle_turn/from_ipv4_channel/to_ipv6_channel.rs index 96e31cf62..47a3f24d1 100644 --- a/rust/relay/ebpf-turn-router/src/try_handle_turn/from_ipv4_channel/to_ipv6_channel.rs +++ b/rust/relay/ebpf-turn-router/src/try_handle_turn/from_ipv4_channel/to_ipv6_channel.rs @@ -55,7 +55,12 @@ pub fn to_ipv6_channel( }; let (old_channel_number, old_channel_data_length) = { - let old_cd = unsafe { ref_mut_at::(ctx, EthHdr::LEN + Ipv4Hdr::LEN + UdpHdr::LEN)? }; + let old_cd = unsafe { + ref_mut_at::( + ctx, + old_data_offset + EthHdr::LEN + Ipv4Hdr::LEN + UdpHdr::LEN, + )? + }; (old_cd.number(), old_cd.length()) }; diff --git a/scripts/tests/download-roaming-network.sh b/scripts/tests/download-roaming-network.sh index bdeace270..e45de6beb 100755 --- a/scripts/tests/download-roaming-network.sh +++ b/scripts/tests/download-roaming-network.sh @@ -25,6 +25,9 @@ docker network connect firezone_client-internal firezone-client-1 --ip 172.30.0. client ip -4 route add 203.0.113.0/24 via 172.30.0.254 client ip -6 route add 203:0:113::/64 via 172:30:0::254 +# Disable checksum offload again to calculate checksums in software so that checksum verification passes +client ethtool -K eth0 tx off + # Send SIGHUP, triggering `reconnect` internally sudo kill -s HUP "$(ps -C firezone-headless-client -o pid=)"