From d1d46fdfb43819c2a4d3b504d45249f3c7176989 Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Wed, 10 Sep 2025 23:37:16 +0000 Subject: [PATCH] ci: create a more realistic network setup (#10301) Currently, the setup we have in docker-compose does not reflect real-world scenarios very well because most components share the same subnet. In reality, Clients, Gateways, relays and the backend are all in separate subnets, connected via multiple routers on the Internet. The current setup makes it hard to properly test relayed connections. To fix this, we move all components into their own subnet with a dedicated router container that performs source and destination NAT as well as acts as a firewall for the client and gateway containers to not allow inbound traffic. This setup will allow us to more easily test #10286 which requires port randomization for outgoing traffic on the Client and Gateway side. --- .github/workflows/_integration_tests.yml | 4 + .github/workflows/ci.yml | 48 +-- docker-compose.yml | 312 ++++++++++++------ scripts/router/Dockerfile | 12 + scripts/router/README.md | 15 + scripts/router/router.nft | 36 ++ scripts/router/router.sh | 78 +++++ .../tests/direct-download-roaming-network.sh | 10 +- scripts/tests/lib.sh | 49 --- .../tests/perf/direct-udp-client2server.sh | 15 - .../tests/perf/relayed-tcp-client2server.sh | 16 - .../tests/perf/relayed-tcp-server2client.sh | 17 - .../tests/perf/relayed-udp-client2server.sh | 18 - .../tests/perf/relayed-udp-server2client.sh | 19 -- ...-client2server.sh => tcp-client2server.sh} | 0 ...-server2client.sh => tcp-server2client.sh} | 0 scripts/tests/perf/udp-client2server.sh | 15 + ...-server2client.sh => udp-server2client.sh} | 6 +- scripts/tests/systemd/dns-systemd-resolved.sh | 3 +- 19 files changed, 413 insertions(+), 260 deletions(-) create mode 100644 scripts/router/Dockerfile create mode 100644 scripts/router/README.md create mode 100644 scripts/router/router.nft create mode 100644 scripts/router/router.sh delete mode 100755 scripts/tests/perf/direct-udp-client2server.sh delete mode 100755 scripts/tests/perf/relayed-tcp-client2server.sh delete mode 100755 scripts/tests/perf/relayed-tcp-server2client.sh delete mode 100755 scripts/tests/perf/relayed-udp-client2server.sh delete mode 100755 scripts/tests/perf/relayed-udp-server2client.sh rename scripts/tests/perf/{direct-tcp-client2server.sh => tcp-client2server.sh} (100%) rename scripts/tests/perf/{direct-tcp-server2client.sh => tcp-server2client.sh} (100%) create mode 100755 scripts/tests/perf/udp-client2server.sh rename scripts/tests/perf/{direct-udp-server2client.sh => udp-server2client.sh} (61%) diff --git a/.github/workflows/_integration_tests.yml b/.github/workflows/_integration_tests.yml index 784c77315..211418196 100644 --- a/.github/workflows/_integration_tests.yml +++ b/.github/workflows/_integration_tests.yml @@ -118,6 +118,8 @@ jobs: - uses: ./.github/actions/ghcr-docker-login with: github_token: ${{ secrets.GITHUB_TOKEN }} + # We need at least Docker v28.1 which is not yet available on GitHub actions runners + - uses: docker/setup-docker-action@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0 - name: Seed database run: docker compose run elixir /bin/sh -c 'cd apps/domain && mix ecto.migrate --migrations-path priv/repo/migrations --migrations-path priv/repo/manual_migrations && mix ecto.seed' - name: Start docker compose in the background @@ -128,6 +130,8 @@ jobs: export RUST_LOG="${{ matrix.test.rust_log }}" fi + docker compose build client-router gateway-router relay-1-router relay-2-router api-router + # Start one-by-one to avoid variability in service startup order docker compose up -d dns.httpbin.search.test --no-build docker compose up -d httpbin --no-build diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 759a8ed22..71b630aca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -295,27 +295,24 @@ jobs: CLIENT_TAG: ${{ github.sha }} RELAY_IMAGE: "ghcr.io/firezone/perf/relay" RELAY_TAG: ${{ github.sha }} - RELAY_1_PUBLIC_IP4_ADDR: 172.29.0.101 - RELAY_1_PUBLIC_IP6_ADDR: 172:29:0::101 - RELAY_2_PUBLIC_IP4_ADDR: 172.29.0.102 - RELAY_2_PUBLIC_IP6_ADDR: 172:29:0::102 strategy: fail-fast: false matrix: - test_name: - - direct-tcp-client2server - - direct-tcp-server2client - - direct-udp-client2server - - direct-udp-server2client - - relayed-tcp-client2server - - relayed-tcp-server2client - - relayed-udp-client2server - - relayed-udp-server2client + test: + - tcp-client2server + - tcp-server2client + - udp-client2server + - udp-server2client + flavour: + - direct + - relayed steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: ./.github/actions/ghcr-docker-login with: github_token: ${{ secrets.GITHUB_TOKEN }} + # We need at least Docker v28.1 which is not yet available on GitHub actions runners + - uses: docker/setup-docker-action@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0 - name: Seed database run: docker compose run elixir /bin/sh -c 'cd apps/domain && mix ecto.seed --migrations-path priv/repo/migrations --migrations-path priv/repo/manual_migrations' - name: Start docker compose in the background @@ -325,11 +322,17 @@ jobs: sed -i 's/^\(\s*\)RUST_LOG:.*$/\1RUST_LOG: wire=error,opentelemetry_sdk=error,debug/' docker-compose.yml grep RUST_LOG docker-compose.yml + if [ "${{ matrix.flavour }}" = "relayed" ]; then + echo "CLIENT_MASQUERADE=random" >> "$GITHUB_ENV" + echo "UDP_BITRATE=300M" >> "$GITHUB_ENV" + fi + + docker compose build client-router gateway-router relay-1-router relay-2-router api-router + # Start services in the same order each time for the tests docker compose up -d iperf3 docker compose up -d api web domain --no-build - docker compose up -d relay-1 --no-build - docker compose up -d relay-2 --no-build + docker compose up -d relay-1 relay-2 --no-build docker compose up -d gateway --no-build docker compose up -d client --no-build docker compose up veth-config @@ -338,20 +341,19 @@ jobs: docker compose exec -T client tc qdisc add dev eth0 root netem delay 10ms docker compose exec -T gateway tc qdisc add dev eth0 root netem delay 10ms docker compose exec -T relay-1 tc qdisc add dev eth0 root netem delay 10ms - docker compose exec -T relay-2 tc qdisc add dev eth0 root netem delay 10ms - - name: "Performance test: ${{ matrix.test_name }}" + - name: "Performance test: ${{ matrix.flavour }}-${{ matrix.test }}" timeout-minutes: 5 env: - TEST_NAME: ${{ matrix.test_name }} + TEST_NAME: ${{ matrix.flavour }}-${{ matrix.test }} run: | - ./scripts/tests/perf/${{ matrix.test_name }}.sh - jq '{ "${{ matrix.test_name }}": { "retransmits": { "value": (.end.sum_sent.retransmits // -1) }, "throughput": { "value": .end.sum_received.bits_per_second } } }' ./${{ matrix.test_name }}.json > ./${{ matrix.test_name }}.bmf.json - - name: "Save performance test results: ${{ matrix.test_name }}" + ./scripts/tests/perf/${{ matrix.test }}.sh + jq '{ "${{ matrix.flavour }}-${{ matrix.test }}": { "retransmits": { "value": (.end.sum_sent.retransmits // -1) }, "throughput": { "value": .end.sum_received.bits_per_second } } }' ./${{ matrix.flavour }}-${{ matrix.test }}.json > ./${{ matrix.flavour }}-${{ matrix.test }}.bmf.json + - name: "Save performance test results: ${{ matrix.flavour }}-${{ matrix.test }}" uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: overwrite: true - name: ${{ matrix.test_name }}-${{ github.sha }}-iperf3results - path: ./${{ matrix.test_name }}.bmf.json + name: ${{ matrix.flavour }}-${{ matrix.test }}-${{ github.sha }}-iperf3results + path: ./${{ matrix.flavour }}-${{ matrix.test }}.bmf.json - name: Show Client logs if: "!cancelled()" run: docker compose logs client diff --git a/docker-compose.yml b/docker-compose.yml index 151170973..e3723e70a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,12 @@ # Run with DOCKER_BUILD_TARGET=dev to build Rust inside Docker + +x-ip-forwarding: &ip-forwarding + - net.ipv4.ip_forward=1 + - net.ipv6.conf.all.disable_ipv6=0 + - net.ipv6.conf.default.disable_ipv6=0 + - net.ipv6.conf.all.forwarding=1 + - net.ipv6.conf.default.forwarding=1 + services: # Dependencies postgres: @@ -21,7 +29,7 @@ services: ports: - 5432:5432/tcp networks: - - app + - app-internal vault: image: vault:1.13.3 @@ -34,7 +42,7 @@ services: cap_add: - IPC_LOCK networks: - - app + - app-internal healthcheck: test: [ @@ -50,7 +58,6 @@ services: retries: 10 start_period: 5s - # Firezone Components web: build: context: elixir @@ -118,8 +125,10 @@ services: condition: "service_healthy" postgres: condition: "service_healthy" + api-router: + condition: "service_healthy" networks: - - app + - app-internal api: build: @@ -186,9 +195,9 @@ services: - | set -e - # Add static route to relay subnet via router - ip route add 172.29.0.0/24 via 172.28.0.254 - ip -6 route add 172:29:0::/64 via 172:28:0::254 + # Add static route to internet subnet via router + ip -4 route add 203.0.113.0/24 via 172.28.0.254 + ip -6 route add 203:0:113::/64 via 172:28:0::254 exec su default -c "bin/server" depends_on: @@ -196,6 +205,8 @@ services: condition: "service_healthy" postgres: condition: "service_healthy" + api-router: + condition: "service_healthy" healthcheck: test: ["CMD-SHELL", "curl -f localhost:8081/healthz"] start_period: 10s @@ -203,9 +214,30 @@ services: retries: 5 timeout: 5s networks: - app: - ipv4_address: 172.28.0.10 - ipv6_address: 172:28:0::10 + app-internal: + ipv4_address: 172.28.0.100 + ipv6_address: 172:28:0::100 + + api-router: + build: + context: scripts/router + cap_add: + - NET_ADMIN + sysctls: *ip-forwarding + environment: + PORT_FORWARDS: | + 8081 172.28.0.100 tcp + 8081 172:28:0::100 tcp + MASQUERADE_TYPE: "" + networks: + app-internal: + ipv4_address: 172.28.0.254 + ipv6_address: 172:28:0::254 + interface_name: internal + internet: + ipv4_address: 203.0.113.10 + ipv6_address: 203:0:113::10 + interface_name: internet domain: build: @@ -267,7 +299,7 @@ services: postgres: condition: "service_healthy" networks: - - app + - app-internal # This is a service container which allows to run mix tasks for local development # without having to install Elixir and Erlang on the host machine. @@ -328,7 +360,7 @@ services: postgres: condition: "service_healthy" networks: - - app + - app-internal # Run with DOCKER_BUILD_TARGET=dev to build Rust inside Docker client: @@ -344,9 +376,9 @@ services: - | set -e - # Add static route to relay subnet via router - ip route add 172.29.0.0/24 via 172.28.0.254 - ip -6 route add 172:29:0::/64 via 172:28:0::254 + # Add static route to internet subnet via router + ip -4 route add 203.0.113.0/24 via 172.30.0.254 + ip -6 route add 203:0:113::/64 via 172:30:0::254 # Disable checksum offloading so that checksums are correct when they reach the relay apk add --no-cache ethtool @@ -370,14 +402,34 @@ services: devices: - "/dev/net/tun:/dev/net/tun" depends_on: - router: - condition: "service_started" + client-router: + condition: "service_healthy" api: condition: "service_healthy" networks: - app: - ipv4_address: 172.28.0.100 - ipv6_address: 172:28:0::100 + client-internal: + ipv4_address: 172.30.0.100 + ipv6_address: 172:30:0::100 + extra_hosts: + - "api:203.0.113.10" + - "api:203:0:113::10" + + # Client Router (NAT only, symmetric NAT) + client-router: + build: + context: scripts/router + cap_add: + - NET_ADMIN + sysctls: *ip-forwarding + environment: + MASQUERADE_TYPE: ${CLIENT_MASQUERADE:-} + networks: + client-internal: + ipv4_address: 172.30.0.254 + ipv6_address: 172:30:0::254 + interface_name: internal + internet: + interface_name: internet gateway: healthcheck: @@ -385,7 +437,6 @@ services: environment: FIREZONE_TOKEN: ".SFMyNTY.g2gDaANtAAAAJGM4OWJjYzhjLTkzOTItNGRhZS1hNDBkLTg4OGFlZjZkMjhlMG0AAAAkMjI3NDU2MGItZTk3Yi00NWU0LThiMzQtNjc5Yzc2MTdlOThkbQAAADhPMDJMN1VTMkozVklOT01QUjlKNklMODhRSVFQNlVPOEFRVk82VTVJUEwwVkpDMjJKR0gwPT09PW4GAAH8sImUAWIAAVGA.tAm2O9FcyF67VAF3rZdwQpeADrYOIs3S2l2K51G26OM" RUST_LOG: ${RUST_LOG:-phoenix_channel=trace,firezone_gateway=trace,wire=trace,connlib_gateway_shared=trace,firezone_tunnel=trace,connlib_shared=trace,phoenix_channel=debug,boringtun=debug,snownet=debug,str0m=debug,info} - FIREZONE_ENABLE_MASQUERADE: 1 # FIXME: NOOP in latest version. Remove after next release. FIREZONE_API_URL: ws://api:8081 FIREZONE_ID: 4694E56C-7643-4A15-9DF3-638E5B05F570 command: @@ -394,9 +445,9 @@ services: - | set -e - # Add static route to relay subnet via router - ip route add 172.29.0.0/24 via 172.28.0.254 - ip -6 route add 172:29:0::/64 via 172:28:0::254 + # Add static route to internet subnet via router + ip -4 route add 203.0.113.0/24 via 172.31.0.254 + ip -6 route add 203:0:113::/64 via 172:31:0::254 # Disable checksum offloading so that checksums are correct when they reach the relay apk add --no-cache ethtool @@ -415,26 +466,40 @@ services: image: ${GATEWAY_IMAGE:-ghcr.io/firezone/debug/gateway}:${GATEWAY_TAG:-main} cap_add: - NET_ADMIN - sysctls: - - net.ipv4.ip_forward=1 - - net.ipv4.conf.all.src_valid_mark=1 - - net.ipv6.conf.all.disable_ipv6=0 - - net.ipv6.conf.default.disable_ipv6=0 - - net.ipv6.conf.all.forwarding=1 - - net.ipv6.conf.default.forwarding=1 + sysctls: *ip-forwarding devices: - "/dev/net/tun:/dev/net/tun" depends_on: - router: - condition: "service_started" + gateway-router: + condition: "service_healthy" api: condition: "service_healthy" networks: - app: - ipv4_address: 172.28.0.105 - ipv6_address: 172:28:0::105 + gateway-internal: + ipv4_address: 172.31.0.100 + ipv6_address: 172:31:0::100 dns_resources: resources: + extra_hosts: + - "api:203.0.113.10" + - "api:203:0:113::10" + + # Gateway Router (NAT only, cone NAT) + gateway-router: + build: + context: scripts/router + cap_add: + - NET_ADMIN + sysctls: *ip-forwarding + environment: + MASQUERADE_TYPE: ${GATEWAY_MASQUERADE:-} + networks: + gateway-internal: + ipv4_address: 172.31.0.254 + ipv6_address: 172:31:0::254 + interface_name: internal + internet: + interface_name: internet httpbin: image: kennethreitz/httpbin @@ -485,8 +550,8 @@ services: relay-1: environment: - PUBLIC_IP4_ADDR: ${RELAY_1_PUBLIC_IP4_ADDR:-172.29.0.101} - PUBLIC_IP6_ADDR: ${RELAY_1_PUBLIC_IP6_ADDR:-172:29:0::101} + PUBLIC_IP4_ADDR: 203.0.113.101 + PUBLIC_IP6_ADDR: 203:0:113::101 # LOWEST_PORT: 55555 # HIGHEST_PORT: 55666 # Token for self-hosted Relay @@ -495,7 +560,7 @@ services: FIREZONE_TOKEN: ".SFMyNTY.g2gDaAN3A25pbG0AAAAkZTgyZmNkYzEtMDU3YS00MDE1LWI5MGItM2IxOGYwZjI4MDUzbQAAADhDMTROR0E4N0VKUlIwM0c0UVBSMDdBOUM2Rzc4NFRTU1RIU0Y0VEk1VDBHRDhENkwwVlJHPT09PW4GAOb7sImUAWIAAVGA.e_k2YXxBOSmqVSu5RRscjZJBkZ7OAGzkpr5X2ge1MNo" RUST_LOG: ${RUST_LOG:-debug} RUST_BACKTRACE: 1 - FIREZONE_API_URL: ws://172.28.0.10:8081 + FIREZONE_API_URL: ws://api:8081 OTLP_GRPC_ENDPOINT: otel:4317 EBPF_OFFLOADING: eth0 command: @@ -504,9 +569,9 @@ services: - | set -e - # Add static route to app subnet via router - ip route add 172.28.0.0/24 via 172.29.0.254 - ip -6 route add 172:28:0::/64 via 172:29:0::254 + # Add static route to internet subnet via router + ip -4 route add 203.0.113.0/24 via 172.29.1.254 + ip -6 route add 203:0:113::/64 via 172:29:1::254 firezone-relay privileged: true @@ -528,8 +593,8 @@ services: retries: 5 timeout: 5s depends_on: - router: - condition: "service_started" + relay-1-router: + condition: "service_healthy" api: condition: "service_healthy" # ports: @@ -540,21 +605,46 @@ services: # - "55555-55666:55555-55666/udp" # - 3478:3478/udp networks: - relays: - ipv4_address: ${RELAY_1_PUBLIC_IP4_ADDR:-172.29.0.101} - ipv6_address: ${RELAY_1_PUBLIC_IP6_ADDR:-172:29:0::101} + relay-1-internal: + ipv4_address: 172.29.1.100 + ipv6_address: 172:29:1::100 + extra_hosts: + - "api:203.0.113.10" + - "api:203:0:113::10" + + relay-1-router: + build: + context: scripts/router + cap_add: + - NET_ADMIN + sysctls: *ip-forwarding + environment: + PORT_FORWARDS: | + 3478 172.29.1.100 udp + 49152-65535 172.29.1.100 udp + 3478 172:29:1::100 udp + 49152-65535 172:29:1::100 udp + networks: + relay-1-internal: + ipv4_address: 172.29.1.254 + ipv6_address: 172:29:1::254 + interface_name: internal + internet: + ipv4_address: 203.0.113.101 + ipv6_address: 203:0:113::101 + interface_name: internet relay-2: environment: - PUBLIC_IP4_ADDR: ${RELAY_2_PUBLIC_IP4_ADDR:-172.29.0.102} - PUBLIC_IP6_ADDR: ${RELAY_2_PUBLIC_IP6_ADDR:-172:29:0::102} + PUBLIC_IP4_ADDR: 203.0.113.102 + PUBLIC_IP6_ADDR: 203:0:113::102 # Token for self-hosted Relay # FIREZONE_TOKEN: ".SFMyNTY.g2gDaANtAAAAJGM4OWJjYzhjLTkzOTItNGRhZS1hNDBkLTg4OGFlZjZkMjhlMG0AAAAkNTQ5YzQxMDctMTQ5Mi00ZjhmLWE0ZWMtYTlkMmE2NmQ4YWE5bQAAADhQVTVBSVRFMU84VkRWTk1ITU9BQzc3RElLTU9HVERJQTY3MlM2RzFBQjAyT1MzNEg1TUUwPT09PW4GAEngLBONAWIAAVGA.E-f2MFdGMX7JTL2jwoHBdWcUd2G3UNz2JRZLbQrlf0k" # Token for global Relay FIREZONE_TOKEN: ".SFMyNTY.g2gDaAN3A25pbG0AAAAkZTgyZmNkYzEtMDU3YS00MDE1LWI5MGItM2IxOGYwZjI4MDUzbQAAADhDMTROR0E4N0VKUlIwM0c0UVBSMDdBOUM2Rzc4NFRTU1RIU0Y0VEk1VDBHRDhENkwwVlJHPT09PW4GAOb7sImUAWIAAVGA.e_k2YXxBOSmqVSu5RRscjZJBkZ7OAGzkpr5X2ge1MNo" RUST_LOG: ${RUST_LOG:-debug} RUST_BACKTRACE: 1 - FIREZONE_API_URL: ws://172.28.0.10:8081 + FIREZONE_API_URL: ws://api:8081 OTLP_GRPC_ENDPOINT: otel:4317 EBPF_OFFLOADING: eth0 command: @@ -563,9 +653,9 @@ services: - | set -e - # Add static route to app subnet via router - ip route add 172.28.0.0/24 via 172.29.0.254 - ip -6 route add 172:28:0::/64 via 172:29:0::254 + # Add static route to internet subnet via router + ip -4 route add 203.0.113.0/24 via 172.29.2.254 + ip -6 route add 203:0:113::/64 via 172:29:2::254 firezone-relay privileged: true @@ -587,36 +677,39 @@ services: retries: 5 timeout: 5s depends_on: - router: - condition: "service_started" + relay-2-router: + condition: "service_healthy" api: condition: "service_healthy" networks: - relays: - ipv4_address: ${RELAY_2_PUBLIC_IP4_ADDR:-172.29.0.102} - ipv6_address: ${RELAY_2_PUBLIC_IP6_ADDR:-172:29:0::102} + relay-2-internal: + ipv4_address: 172.29.2.100 + ipv6_address: 172:29:2::100 + extra_hosts: + - "api:203.0.113.10" + - "api:203:0:113::10" - # Relays in prod always talk to a router to reach the Internet. We leverage this to avoid a map lookup and simply swap the - # MACs for all relayed traffic. So we mimic this setup for local dev and CI to ensure this eBPF code path is getting exercised. - # For this to work, we need to ensure the relays and client/gateway are *not* connected to the same Docker network, otherwise - # they will learn each other's MAC addresses via ARP and the next-hop MAC swap will not be valid. - router: - image: alpine:3.22 - sysctls: - - net.ipv4.ip_forward=1 - - net.ipv6.conf.all.forwarding=1 - - net.ipv6.conf.default.forwarding=1 - - net.ipv6.conf.all.disable_ipv6=0 - - net.ipv6.conf.default.disable_ipv6=0 - command: ["sleep", "infinity"] - init: true + relay-2-router: + build: + context: scripts/router + cap_add: + - NET_ADMIN + sysctls: *ip-forwarding + environment: + PORT_FORWARDS: | + 3478 172.29.2.100 udp + 49152-65535 172.29.2.100 udp + 3478 172:29:2::100 udp + 49152-65535 172:29:2::100 udp networks: - app: - ipv4_address: 172.28.0.254 - ipv6_address: 172:28:0::254 - relays: - ipv4_address: 172.29.0.254 - ipv6_address: 172:29:0::254 + relay-2-internal: + ipv4_address: 172.29.2.254 + ipv6_address: 172:29:2::254 + interface_name: internal + internet: + ipv4_address: 203.0.113.102 + ipv6_address: 203:0:113::102 + interface_name: internet # The veth driver uses a pair of interfaces to connect the docker bridge to the container namespace. # For containers that have an eBPF program attached and do XDP_TX, we need to attach a dummy program @@ -655,7 +748,7 @@ services: otel: image: otel/opentelemetry-collector:latest networks: - app: + app-internal: # EdgeShark is useful for attaching wireshark to TUN devices within containers. It is reachable at http://localhost:5001 # You'll also need the extcap plugin: https://github.com/siemens/cshargextcap @@ -741,32 +834,63 @@ services: 99-ghost-in-da-edge: priority: 100 -# IPv6 is currently causing flakiness with GH actions and on our testbed. -# Disabling until there's more time to debug. networks: - # Using a separate subnet here so that the CIDR resource for 172.20.0.0 won't catch DNS resources + # Internet network - where all public IPs live + internet: + enable_ipv6: true + ipam: + config: + - subnet: 203.0.113.0/24 + - subnet: 203:0:113::/64 + + app-internal: + enable_ipv6: true + ipam: + config: + - subnet: 172.28.0.0/24 + - subnet: 172:28:0::/64 + + relay-1-internal: + enable_ipv6: true + ipam: + config: + - subnet: 172.29.1.0/24 + - subnet: 172:29:1::/64 + + relay-2-internal: + enable_ipv6: true + ipam: + config: + - subnet: 172.29.2.0/24 + - subnet: 172:29:2::/64 + + client-internal: + enable_ipv6: true + ipam: + config: + - subnet: 172.30.0.0/24 + - subnet: 172:30:0::/64 + + gateway-internal: + enable_ipv6: true + ipam: + config: + - subnet: 172.31.0.0/24 + - subnet: 172:31:0::/64 + dns_resources: ipam: config: - subnet: 172.21.0.0/24 + resources: enable_ipv6: true ipam: config: - subnet: 172.20.0.0/24 - subnet: 172:20:0::/64 - app: - enable_ipv6: true - ipam: - config: - - subnet: 172.28.0.0/24 - - subnet: 172:28:0::/64 - relays: - enable_ipv6: true - ipam: - config: - - subnet: 172.29.0.0/24 - - subnet: 172:29:0::/64 + + # Monitoring network 99-ghost-in-da-edge: name: ghost-in-da-edge internal: false diff --git a/scripts/router/Dockerfile b/scripts/router/Dockerfile new file mode 100644 index 000000000..61e71b89e --- /dev/null +++ b/scripts/router/Dockerfile @@ -0,0 +1,12 @@ +FROM alpine:3.22 + +RUN apk add --no-cache iproute2 nftables jq bash + +HEALTHCHECK --interval=1s --timeout=1s --retries=5 CMD [ "sh", "-c", "test $(cat /tmp/setup_done) = 1" ] + +WORKDIR /bin +COPY ./router.sh /bin/router.sh +COPY ./router.nft /bin/router.nft +RUN chmod +x /bin/router.sh + +CMD ["router.sh"] diff --git a/scripts/router/README.md b/scripts/router/README.md new file mode 100644 index 000000000..0186f3a32 --- /dev/null +++ b/scripts/router/README.md @@ -0,0 +1,15 @@ +# Router + +This container acts as a simple router how they are found on the public Internet. +By default, no inbound traffic is allowed, except for: + +- responses of previously outgoing connections +- explicit port forwarding + +The router uses `nftables` to enforce these rules. + +We also make several assumptions about the docker-compose setup that we are running in: + +- The network interface between the router and its container must be called `internal` +- The public network interface on the other side must be called `internet` +- IPv4 and IPv6 must be available on both interfaces diff --git a/scripts/router/router.nft b/scripts/router/router.nft new file mode 100644 index 000000000..37493aa98 --- /dev/null +++ b/scripts/router/router.nft @@ -0,0 +1,36 @@ +table inet router { + # Input chain - drop by default, allow established connections + chain input { + type filter hook input priority filter; policy drop; + + # Allow loopback + iif "lo" accept + + # Allow established and related connections + ct state established,related accept + + # Allow ICMP/ICMPv6 for basic connectivity + ip protocol icmp accept + ip6 nexthdr ipv6-icmp accept + } + + # Forward chain - accept by default for router functionality + chain forward { + type filter hook forward priority filter; policy accept; + } + + # Output chain - accept by default + chain output { + type filter hook output priority filter; policy accept; + } + + # Prerouting chain for DNAT + chain prerouting { + type nat hook prerouting priority dstnat; + } + + # Postrouting chain for SNAT/masquerading + chain postrouting { + type nat hook postrouting priority srcnat; + } +} diff --git a/scripts/router/router.sh b/scripts/router/router.sh new file mode 100644 index 000000000..64f93fdd8 --- /dev/null +++ b/scripts/router/router.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +set -euo pipefail + +# Get network configuration +INTERNAL_NET_V4=$(ip -4 --json route | jq -r '.[] | select(.dev == "internal") | select(.dst == "default" | not) | .dst') +INTERNAL_NET_V6=$(ip -6 --json route | jq -r '.[] | select(.dev == "internal") | select(.dst | startswith("fe80") | not) | select(.dst == "default" | not) | .dst') +PUBLIC_IPV4=$(ip -4 -json addr show internet | jq -r '.[0].addr_info[0].local') +PUBLIC_IPV6=$(ip -6 -json addr show internet | jq -r '.[0].addr_info[0].local') + +# Validate required configuration +if [ -z "$INTERNAL_NET_V4" ]; then + echo "Error: Failed to identify internal IPv4 subnet" + exit 1 +fi + +if [ -z "$INTERNAL_NET_V6" ]; then + echo "Error: Failed to identify internal IPv6 subnet" + exit 1 +fi + +if [ -z "$PUBLIC_IPV4" ]; then + echo "Error: Failed to get public IPv4" + exit 1 +fi + +if [ -z "$PUBLIC_IPV6" ]; then + echo "Error: Failed to get public IPv6" + exit 1 +fi + +echo "INTERNAL_NET_V4 = $INTERNAL_NET_V4" +echo "INTERNAL_NET_V6 = $INTERNAL_NET_V6" +echo "PUBLIC_IPV4 = $PUBLIC_IPV4" +echo "PUBLIC_IPV6 = $PUBLIC_IPV6" + +TEMPLATE_FILE="router.nft" +CONFIG_FILE="/tmp/router.nft" + +# Copy template file to working config +cp "$TEMPLATE_FILE" "$CONFIG_FILE" + +echo "add rule inet router postrouting ip saddr $INTERNAL_NET_V4 oifname \"internet\" masquerade ${MASQUERADE_TYPE:-}" >>"$CONFIG_FILE" +echo "add rule inet router postrouting ip6 saddr $INTERNAL_NET_V6 oifname \"internet\" masquerade ${MASQUERADE_TYPE:-}" >>"$CONFIG_FILE" + +# Add port forwarding rules if specified +if [ -n "${PORT_FORWARDS:-}" ]; then + echo "$PORT_FORWARDS" | tr ',' '\n' | while IFS=' ' read -r port internal_ip protocol; do + if [ -z "$port" ] || [ -z "$internal_ip" ] || [ -z "$protocol" ]; then + continue + fi + + # Determine if internal IP is IPv4 or IPv6 and append rules to config file + case "$internal_ip" in + *:*) # IPv6 address + echo "add rule inet router prerouting ip6 daddr $PUBLIC_IPV6 $protocol dport $port dnat to [$internal_ip]:$port" >>"$CONFIG_FILE" + echo "add rule inet router input ip6 daddr $internal_ip $protocol dport $port accept" >>"$CONFIG_FILE" + ;; + *) # IPv4 address + echo "add rule inet router prerouting ip daddr $PUBLIC_IPV4 $protocol dport $port dnat to $internal_ip:$port" >>"$CONFIG_FILE" + echo "add rule inet router input ip daddr $internal_ip $protocol dport $port accept" >>"$CONFIG_FILE" + ;; + esac + done +fi + +echo "-----------------------------------------------------------------------------------------------" +cat "$CONFIG_FILE" +echo "-----------------------------------------------------------------------------------------------" + +nft -f "$CONFIG_FILE" + +rm "$CONFIG_FILE" + +echo "1" >/tmp/setup_done # Health check marker + +# Keep container running +exec tail -f /dev/null diff --git a/scripts/tests/direct-download-roaming-network.sh b/scripts/tests/direct-download-roaming-network.sh index 5b595cc5b..bdeace270 100755 --- a/scripts/tests/direct-download-roaming-network.sh +++ b/scripts/tests/direct-download-roaming-network.sh @@ -17,13 +17,13 @@ DOWNLOAD_PID=$! sleep 3 # Download a bit -docker network disconnect firezone_app firezone-client-1 # Disconnect the client +docker network disconnect firezone_client-internal firezone-client-1 # Disconnect the client sleep 3 -docker network connect firezone_app firezone-client-1 --ip 172.28.0.200 # Reconnect client with a different IP +docker network connect firezone_client-internal firezone-client-1 --ip 172.30.0.200 --ip6 172:30::200 # Reconnect client with a different IP -# Re-add static route to relays through router -client ip route add 172.29.0.0/24 via 172.28.0.254 dev eth0 -client ip -6 route add 172:29:0::/64 via 172:28:0::254 dev eth0 +# Add static route to internet subnet via router; they get removed when the network interface disappears +client ip -4 route add 203.0.113.0/24 via 172.30.0.254 +client ip -6 route add 203:0:113::/64 via 172:30:0::254 # Send SIGHUP, triggering `reconnect` internally sudo kill -s HUP "$(ps -C firezone-headless-client -o pid=)" diff --git a/scripts/tests/lib.sh b/scripts/tests/lib.sh index 8704dc261..aee76cbc3 100755 --- a/scripts/tests/lib.sh +++ b/scripts/tests/lib.sh @@ -18,55 +18,6 @@ function relay2() { docker compose exec -T relay-2 "$@" } -# Takes two optional arguments to force the client and gateway to use a specific IP stack. -# 1. client_stack: "ipv4", "ipv6" -# 2. gateway_stack: "ipv4", "ipv6" -# -# By default, the client and gateway will use happy eyeballs to use pick the first working IP stack. -function force_relayed_connections() { - # Install `iptables` to have it available in the compatibility tests - client apk add --no-cache iptables - - # Execute within the client container because doing so from the host is not reliable in CI. - client iptables -A OUTPUT -d 172.28.0.105 -j DROP - client ip6tables -A OUTPUT -d 172:28:0::105 -j DROP - - local client_stack="${1:-}" - local gateway_stack="${2:-}" - - # If both are empty, we don't care which stack they use; just return - if [[ -z "$client_stack" && -z "$gateway_stack" ]]; then - return - fi - - gateway apk add --no-cache iptables - - if [[ "$client_stack" == "ipv4" && "$gateway_stack" == "ipv4" ]]; then - client ip6tables -A OUTPUT -d $RELAY_1_PUBLIC_IP6_ADDR -j DROP - client ip6tables -A OUTPUT -d $RELAY_2_PUBLIC_IP6_ADDR -j DROP - gateway ip6tables -A OUTPUT -d $RELAY_1_PUBLIC_IP6_ADDR -j DROP - gateway ip6tables -A OUTPUT -d $RELAY_2_PUBLIC_IP6_ADDR -j DROP - elif [[ "$client_stack" == "ipv4" && "$gateway_stack" == "ipv6" ]]; then - client ip6tables -A OUTPUT -d $RELAY_1_PUBLIC_IP6_ADDR -j DROP - client ip6tables -A OUTPUT -d $RELAY_2_PUBLIC_IP6_ADDR -j DROP - gateway iptables -A OUTPUT -d $RELAY_1_PUBLIC_IP4_ADDR -j DROP - gateway iptables -A OUTPUT -d $RELAY_2_PUBLIC_IP4_ADDR -j DROP - elif [[ "$client_stack" == "ipv6" && "$gateway_stack" == "ipv4" ]]; then - client iptables -A OUTPUT -d $RELAY_1_PUBLIC_IP4_ADDR -j DROP - client iptables -A OUTPUT -d $RELAY_2_PUBLIC_IP4_ADDR -j DROP - gateway ip6tables -A OUTPUT -d $RELAY_1_PUBLIC_IP6_ADDR -j DROP - gateway ip6tables -A OUTPUT -d $RELAY_2_PUBLIC_IP6_ADDR -j DROP - elif [[ "$client_stack" == "ipv6" && "$gateway_stack" == "ipv6" ]]; then - client iptables -A OUTPUT -d $RELAY_1_PUBLIC_IP4_ADDR -j DROP - client iptables -A OUTPUT -d $RELAY_2_PUBLIC_IP4_ADDR -j DROP - gateway iptables -A OUTPUT -d $RELAY_1_PUBLIC_IP4_ADDR -j DROP - gateway iptables -A OUTPUT -d $RELAY_2_PUBLIC_IP4_ADDR -j DROP - else - echo "Invalid stack combination: client_stack=$client_stack, gateway_stack=$gateway_stack" - exit 1 - fi -} - function client_curl_resource() { client curl --connect-timeout 30 --fail "$1" >/dev/null } diff --git a/scripts/tests/perf/direct-udp-client2server.sh b/scripts/tests/perf/direct-udp-client2server.sh deleted file mode 100755 index 4dc5f1ea1..000000000 --- a/scripts/tests/perf/direct-udp-client2server.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash - -set -euox pipefail - -source "./scripts/tests/lib.sh" - -docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ - --time 30 \ - --udp \ - --bandwidth 600M \ - --client 172.20.0.110 \ - --json' >>"${TEST_NAME}.json" - -assert_process_state "gateway" "S" -assert_process_state "client" "S" diff --git a/scripts/tests/perf/relayed-tcp-client2server.sh b/scripts/tests/perf/relayed-tcp-client2server.sh deleted file mode 100755 index d92614808..000000000 --- a/scripts/tests/perf/relayed-tcp-client2server.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -set -euox pipefail - -source "./scripts/tests/lib.sh" -force_relayed_connections ipv4 ipv4 - -docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ - --time 30 \ - --client 172.20.0.110 \ - --json' >>"${TEST_NAME}.json" - -assert_process_state "relay-1" "S" -assert_process_state "relay-2" "S" -assert_process_state "gateway" "S" -assert_process_state "client" "S" diff --git a/scripts/tests/perf/relayed-tcp-server2client.sh b/scripts/tests/perf/relayed-tcp-server2client.sh deleted file mode 100755 index 466fad2a2..000000000 --- a/scripts/tests/perf/relayed-tcp-server2client.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash - -set -euox pipefail - -source "./scripts/tests/lib.sh" -force_relayed_connections ipv6 ipv4 - -docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ - --time 30 \ - --reverse \ - --client 172.20.0.110 \ - --json' >>"${TEST_NAME}.json" - -assert_process_state "relay-1" "S" -assert_process_state "relay-2" "S" -assert_process_state "gateway" "S" -assert_process_state "client" "S" diff --git a/scripts/tests/perf/relayed-udp-client2server.sh b/scripts/tests/perf/relayed-udp-client2server.sh deleted file mode 100755 index 2b8c48eba..000000000 --- a/scripts/tests/perf/relayed-udp-client2server.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -set -euox pipefail - -source "./scripts/tests/lib.sh" -force_relayed_connections ipv6 ipv6 - -docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ - --time 30 \ - --udp \ - --bandwidth 300M \ - --client 172.20.0.110 \ - --json' >>"${TEST_NAME}.json" - -assert_process_state "relay-1" "S" -assert_process_state "relay-2" "S" -assert_process_state "gateway" "S" -assert_process_state "client" "S" diff --git a/scripts/tests/perf/relayed-udp-server2client.sh b/scripts/tests/perf/relayed-udp-server2client.sh deleted file mode 100755 index fc06e9d40..000000000 --- a/scripts/tests/perf/relayed-udp-server2client.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -set -euox pipefail - -source "./scripts/tests/lib.sh" -force_relayed_connections ipv4 ipv6 - -docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ - --time 30 \ - --reverse \ - --udp \ - --bandwidth 300M \ - --client 172.20.0.110 \ - --json' >>"${TEST_NAME}.json" - -assert_process_state "relay-1" "S" -assert_process_state "relay-2" "S" -assert_process_state "gateway" "S" -assert_process_state "client" "S" diff --git a/scripts/tests/perf/direct-tcp-client2server.sh b/scripts/tests/perf/tcp-client2server.sh similarity index 100% rename from scripts/tests/perf/direct-tcp-client2server.sh rename to scripts/tests/perf/tcp-client2server.sh diff --git a/scripts/tests/perf/direct-tcp-server2client.sh b/scripts/tests/perf/tcp-server2client.sh similarity index 100% rename from scripts/tests/perf/direct-tcp-server2client.sh rename to scripts/tests/perf/tcp-server2client.sh diff --git a/scripts/tests/perf/udp-client2server.sh b/scripts/tests/perf/udp-client2server.sh new file mode 100755 index 000000000..9496e9c81 --- /dev/null +++ b/scripts/tests/perf/udp-client2server.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +set -euox pipefail + +source "./scripts/tests/lib.sh" + +docker compose exec --env RUST_LOG=info -it client /bin/sh -c "iperf3 \ + --time 30 \ + --udp \ + --bandwidth ${UDP_BITRATE:-450M} \ + --client 172.20.0.110 \ + --json" >>"${TEST_NAME}.json" + +assert_process_state "gateway" "S" +assert_process_state "client" "S" diff --git a/scripts/tests/perf/direct-udp-server2client.sh b/scripts/tests/perf/udp-server2client.sh similarity index 61% rename from scripts/tests/perf/direct-udp-server2client.sh rename to scripts/tests/perf/udp-server2client.sh index c74ac4d96..1d37c48b4 100755 --- a/scripts/tests/perf/direct-udp-server2client.sh +++ b/scripts/tests/perf/udp-server2client.sh @@ -4,13 +4,13 @@ set -euox pipefail source "./scripts/tests/lib.sh" -docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ +docker compose exec --env RUST_LOG=info -it client /bin/sh -c "iperf3 \ --time 30 \ --reverse \ --udp \ - --bandwidth 600M \ + --bandwidth ${UDP_BITRATE:-450M} \ --client 172.20.0.110 \ - --json' >>"${TEST_NAME}.json" + --json" >>"${TEST_NAME}.json" assert_process_state "gateway" "S" assert_process_state "client" "S" diff --git a/scripts/tests/systemd/dns-systemd-resolved.sh b/scripts/tests/systemd/dns-systemd-resolved.sh index c3fe7c47e..7e5332909 100755 --- a/scripts/tests/systemd/dns-systemd-resolved.sh +++ b/scripts/tests/systemd/dns-systemd-resolved.sh @@ -13,6 +13,7 @@ debug_exit() { docker compose ps -a resolvectl dns tun-firezone || true systemctl status "$SERVICE_NAME" || true + journalctl -eu "$SERVICE_NAME" || true exit 1 } @@ -52,7 +53,7 @@ resolvectl query "$HTTPBIN" || debug_exit # Accessing a resource should succeed after the client is up # Block off Docker's DNS. sudo resolvectl dns "$DOCKER_IFACE" "" -curl -v $HTTPBIN/get +curl -v $HTTPBIN/get || debug_exit # Make sure it's going through the tunnel nslookup "$HTTPBIN" | grep "100\\.96\\.0\\."