mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 18:18:55 +00:00
As it turns out, the flaky test was caused by a bug in the eBPF kernel where we read the old channel data header from the wrong offset. This made us essentially read garbage data for the channel number, causing us to: a. Compute a bad checksum b. Send the packet on a completely wrong channel The reason this caused a flaky test is that it requires on side to pick IPv4 to talk to the relay and the other side IPv6. The happy-eyeballs approach of the `allocation` module made that non-deterministic, only exposing this bug occasionally. To ensure these kind of things are detected earlier in the future, I am adding an additional CI step that checks all packets emitted by the eBPF kernel for checksum errors. Fixes: #10404 Co-authored-by: Jamil Bou Kheir <jamilbk@users.noreply.github.com>
520 lines
14 KiB
YAML
520 lines
14 KiB
YAML
# Run with DOCKER_BUILD_TARGET=dev to build Rust inside Docker
|
|
include:
|
|
- scripts/compose/resources.yml
|
|
- scripts/compose/edgeshark.yml
|
|
- scripts/compose/portal.yml
|
|
|
|
x-erlang-cluster: &erlang-cluster
|
|
ERLANG_CLUSTER_ADAPTER: "Elixir.Cluster.Strategy.Epmd"
|
|
ERLANG_CLUSTER_ADAPTER_CONFIG: '{"hosts":["api@api.cluster.local","web@web.cluster.local","domain@domain.cluster.local"]}'
|
|
|
|
x-portal-urls: &portal-urls
|
|
WEB_EXTERNAL_URL: http://localhost:8080/
|
|
API_EXTERNAL_URL: http://localhost:8081/
|
|
|
|
x-phoenix-config: &phoenix-config
|
|
PHOENIX_HTTP_WEB_PORT: "8080"
|
|
PHOENIX_HTTP_API_PORT: "8081"
|
|
PHOENIX_SECURE_COOKIES: "false"
|
|
|
|
x-health-check: &health-check
|
|
interval: 1s
|
|
retries: 15
|
|
timeout: 1s
|
|
|
|
services:
|
|
web:
|
|
extends:
|
|
file: scripts/compose/portal.yml
|
|
service: common
|
|
build:
|
|
context: elixir
|
|
args:
|
|
APPLICATION_NAME: web
|
|
image: ${WEB_IMAGE:-ghcr.io/firezone/web}:${WEB_TAG:-main}
|
|
hostname: web.cluster.local
|
|
ports:
|
|
- 8080:8080/tcp
|
|
environment:
|
|
<<: [*portal-urls, *erlang-cluster, *phoenix-config]
|
|
RELEASE_HOSTNAME: "web.cluster.local"
|
|
RELEASE_NAME: "web"
|
|
LOG_LEVEL: "debug"
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f localhost:8080/healthz"]
|
|
<<: *health-check
|
|
depends_on:
|
|
vault:
|
|
condition: "service_healthy"
|
|
postgres:
|
|
condition: "service_healthy"
|
|
|
|
api:
|
|
extends:
|
|
file: scripts/compose/portal.yml
|
|
service: common
|
|
build:
|
|
context: elixir
|
|
args:
|
|
APPLICATION_NAME: api
|
|
image: ${API_IMAGE:-ghcr.io/firezone/api}:${API_TAG:-main}
|
|
hostname: api.cluster.local
|
|
ports:
|
|
- 8081:8081/tcp
|
|
environment:
|
|
<<: [*portal-urls, *erlang-cluster, *phoenix-config]
|
|
RELEASE_HOSTNAME: "api.cluster.local"
|
|
RELEASE_NAME: "api"
|
|
LOG_LEVEL: "debug"
|
|
user: root # Needed to run `ip route` commands
|
|
cap_add:
|
|
- NET_ADMIN # Needed to run `tc` commands to add simulated delay
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
|
|
# Add static route to internet subnet via router
|
|
ip -4 route add 203.0.113.0/24 via 172.28.0.254
|
|
ip -6 route add 203:0:113::/64 via 172:28:0::254
|
|
|
|
exec su default -c "bin/server"
|
|
depends_on:
|
|
vault:
|
|
condition: "service_healthy"
|
|
postgres:
|
|
condition: "service_healthy"
|
|
api-router:
|
|
condition: "service_healthy"
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f localhost:8081/healthz"]
|
|
<<: *health-check
|
|
networks:
|
|
app-internal:
|
|
ipv4_address: 172.28.0.100
|
|
ipv6_address: 172:28:0::100
|
|
|
|
api-router:
|
|
extends:
|
|
file: scripts/compose/router.yml
|
|
service: router
|
|
environment:
|
|
PORT_FORWARDS: |
|
|
8081 172.28.0.100 tcp
|
|
8081 172:28:0::100 tcp
|
|
MASQUERADE_TYPE: ""
|
|
NETWORK_LATENCY_MS: 50
|
|
networks:
|
|
app-internal:
|
|
ipv4_address: 172.28.0.254
|
|
ipv6_address: 172:28:0::254
|
|
interface_name: internal
|
|
internet:
|
|
ipv4_address: 203.0.113.10
|
|
ipv6_address: 203:0:113::10
|
|
interface_name: internet
|
|
|
|
domain:
|
|
extends:
|
|
file: scripts/compose/portal.yml
|
|
service: common
|
|
build:
|
|
context: elixir
|
|
args:
|
|
APPLICATION_NAME: domain
|
|
image: ${DOMAIN_IMAGE:-ghcr.io/firezone/domain}:${DOMAIN_TAG:-main}
|
|
hostname: domain.cluster.local
|
|
environment:
|
|
<<: *erlang-cluster
|
|
RELEASE_HOSTNAME: "domain.cluster.local"
|
|
RELEASE_NAME: "domain"
|
|
LOG_LEVEL: "debug"
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f localhost:4000/healthz"]
|
|
<<: *health-check
|
|
depends_on:
|
|
vault:
|
|
condition: "service_healthy"
|
|
postgres:
|
|
condition: "service_healthy"
|
|
|
|
# This is a service container which allows to run mix tasks for local development
|
|
# without having to install Elixir and Erlang on the host machine.
|
|
elixir:
|
|
extends:
|
|
file: scripts/compose/portal.yml
|
|
service: common
|
|
build:
|
|
context: elixir
|
|
target: compiler
|
|
args:
|
|
APPLICATION_NAME: api
|
|
image: ${ELIXIR_IMAGE:-ghcr.io/firezone/elixir}:${ELIXIR_TAG:-main}
|
|
hostname: elixir
|
|
environment:
|
|
<<: *portal-urls
|
|
RELEASE_HOSTNAME: "mix.cluster.local"
|
|
RELEASE_NAME: "mix"
|
|
# Higher log level not to make seeds output too verbose
|
|
LOG_LEVEL: "info"
|
|
# Mix env should be set to prod to use secrets declared above,
|
|
# otherwise seeds will generate invalid tokens
|
|
MIX_ENV: "prod"
|
|
depends_on:
|
|
postgres:
|
|
condition: "service_healthy"
|
|
|
|
# Run with DOCKER_BUILD_TARGET=dev to build Rust inside Docker
|
|
client:
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "ip link | grep tun-firezone"]
|
|
<<: *health-check
|
|
environment:
|
|
FIREZONE_DNS_CONTROL: "${FIREZONE_DNS_CONTROL:-etc-resolv-conf}"
|
|
FIREZONE_TOKEN: "n.SFMyNTY.g2gDaANtAAAAJGM4OWJjYzhjLTkzOTItNGRhZS1hNDBkLTg4OGFlZjZkMjhlMG0AAAAkN2RhN2QxY2QtMTExYy00NGE3LWI1YWMtNDAyN2I5ZDIzMGU1bQAAACtBaUl5XzZwQmstV0xlUkFQenprQ0ZYTnFJWktXQnMyRGR3XzJ2Z0lRdkZnbgYAR_ywiZQBYgABUYA.PLNlzyqMSgZlbQb1QX5EzZgYNuY9oeOddP0qDkTwtGg"
|
|
RUST_LOG: ${RUST_LOG:-wire=trace,debug}
|
|
FIREZONE_API_URL: ws://api:8081
|
|
FIREZONE_ID: EFC7A9E3-3576-4633-B633-7D47BA9E14AC
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
|
|
# Add static route to internet subnet via router
|
|
ip -4 route add 203.0.113.0/24 via 172.30.0.254
|
|
ip -6 route add 203:0:113::/64 via 172:30:0::254
|
|
|
|
# Disable checksum offloading so that checksums are correct when they reach the relay
|
|
apk add --no-cache ethtool
|
|
ethtool -K eth0 tx off
|
|
|
|
exec firezone-headless-client
|
|
init: true
|
|
build:
|
|
target: ${DOCKER_BUILD_TARGET:-debug}
|
|
context: rust
|
|
dockerfile: Dockerfile
|
|
args:
|
|
PACKAGE: firezone-headless-client
|
|
image: ${CLIENT_IMAGE:-ghcr.io/firezone/debug/client}:${CLIENT_TAG:-main}
|
|
privileged: true # Needed to tune `sysctl` inside container.
|
|
cap_add:
|
|
- NET_ADMIN
|
|
sysctls:
|
|
- net.ipv6.conf.all.disable_ipv6=0
|
|
- net.ipv6.conf.default.disable_ipv6=0
|
|
devices:
|
|
- "/dev/net/tun:/dev/net/tun"
|
|
depends_on:
|
|
client-router:
|
|
condition: "service_healthy"
|
|
api:
|
|
condition: "service_healthy"
|
|
networks:
|
|
client-internal:
|
|
ipv4_address: 172.30.0.100
|
|
ipv6_address: 172:30:0::100
|
|
extra_hosts:
|
|
- "api:203.0.113.10"
|
|
- "api:203:0:113::10"
|
|
|
|
client-router:
|
|
extends:
|
|
file: scripts/compose/router.yml
|
|
service: router
|
|
environment:
|
|
MASQUERADE_TYPE: ${CLIENT_MASQUERADE:-}
|
|
NETWORK_LATENCY_MS: 10
|
|
networks:
|
|
client-internal:
|
|
ipv4_address: 172.30.0.254
|
|
ipv6_address: 172:30:0::254
|
|
interface_name: internal
|
|
internet:
|
|
interface_name: internet
|
|
|
|
gateway:
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "ip link | grep tun-firezone"]
|
|
<<: *health-check
|
|
environment:
|
|
FIREZONE_TOKEN: ".SFMyNTY.g2gDaANtAAAAJGM4OWJjYzhjLTkzOTItNGRhZS1hNDBkLTg4OGFlZjZkMjhlMG0AAAAkMjI3NDU2MGItZTk3Yi00NWU0LThiMzQtNjc5Yzc2MTdlOThkbQAAADhPMDJMN1VTMkozVklOT01QUjlKNklMODhRSVFQNlVPOEFRVk82VTVJUEwwVkpDMjJKR0gwPT09PW4GAAH8sImUAWIAAVGA.tAm2O9FcyF67VAF3rZdwQpeADrYOIs3S2l2K51G26OM"
|
|
RUST_LOG: ${RUST_LOG:-wire=trace,debug}
|
|
FIREZONE_API_URL: ws://api:8081
|
|
FIREZONE_ID: 4694E56C-7643-4A15-9DF3-638E5B05F570
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
|
|
# Add static route to internet subnet via router
|
|
ip -4 route add 203.0.113.0/24 via 172.31.0.254
|
|
ip -6 route add 203:0:113::/64 via 172:31:0::254
|
|
|
|
# Disable checksum offloading so that checksums are correct when they reach the relay
|
|
apk add --no-cache ethtool
|
|
ethtool -K eth0 tx off
|
|
ethtool -K eth1 tx off
|
|
ethtool -K eth2 tx off
|
|
|
|
exec firezone-gateway
|
|
init: true
|
|
build:
|
|
target: ${DOCKER_BUILD_TARGET:-debug}
|
|
context: rust
|
|
dockerfile: Dockerfile
|
|
args:
|
|
PACKAGE: firezone-gateway
|
|
image: ${GATEWAY_IMAGE:-ghcr.io/firezone/debug/gateway}:${GATEWAY_TAG:-main}
|
|
cap_add:
|
|
- NET_ADMIN
|
|
sysctls:
|
|
- net.ipv4.ip_forward=1
|
|
- net.ipv6.conf.all.disable_ipv6=0
|
|
- net.ipv6.conf.default.disable_ipv6=0
|
|
- net.ipv6.conf.all.forwarding=1
|
|
- net.ipv6.conf.default.forwarding=1
|
|
devices:
|
|
- "/dev/net/tun:/dev/net/tun"
|
|
depends_on:
|
|
gateway-router:
|
|
condition: "service_healthy"
|
|
api:
|
|
condition: "service_healthy"
|
|
networks:
|
|
gateway-internal:
|
|
ipv4_address: 172.31.0.100
|
|
ipv6_address: 172:31:0::100
|
|
dns_resources:
|
|
resources:
|
|
extra_hosts:
|
|
- "api:203.0.113.10"
|
|
- "api:203:0:113::10"
|
|
|
|
gateway-router:
|
|
extends:
|
|
file: scripts/compose/router.yml
|
|
service: router
|
|
environment:
|
|
MASQUERADE_TYPE: ${GATEWAY_MASQUERADE:-}
|
|
NETWORK_LATENCY_MS: 10
|
|
networks:
|
|
gateway-internal:
|
|
ipv4_address: 172.31.0.254
|
|
ipv6_address: 172:31:0::254
|
|
interface_name: internal
|
|
internet:
|
|
interface_name: internet
|
|
|
|
relay-1:
|
|
extends:
|
|
file: scripts/compose/relay.yml
|
|
service: relay
|
|
environment:
|
|
PUBLIC_IP4_ADDR: 203.0.113.101
|
|
PUBLIC_IP6_ADDR: 203:0:113::101
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
|
|
# Add static route to internet subnet via router
|
|
ip -4 route add 203.0.113.0/24 via 172.29.1.254
|
|
ip -6 route add 203:0:113::/64 via 172:29:1::254
|
|
|
|
apk add --no-cache ethtool
|
|
ethtool -K eth0 tx off
|
|
|
|
firezone-relay
|
|
depends_on:
|
|
relay-1-router:
|
|
condition: "service_healthy"
|
|
networks:
|
|
relay-1-internal:
|
|
ipv4_address: 172.29.1.100
|
|
ipv6_address: 172:29:1::100
|
|
|
|
relay-1-router:
|
|
extends:
|
|
file: scripts/compose/router.yml
|
|
service: router
|
|
environment:
|
|
PORT_FORWARDS: |
|
|
3478 172.29.1.100 udp
|
|
49152-65535 172.29.1.100 udp
|
|
3478 172:29:1::100 udp
|
|
49152-65535 172:29:1::100 udp
|
|
NETWORK_LATENCY_MS: 30
|
|
networks:
|
|
relay-1-internal:
|
|
ipv4_address: 172.29.1.254
|
|
ipv6_address: 172:29:1::254
|
|
interface_name: internal
|
|
internet:
|
|
ipv4_address: 203.0.113.101
|
|
ipv6_address: 203:0:113::101
|
|
interface_name: internet
|
|
|
|
relay-2:
|
|
extends:
|
|
file: scripts/compose/relay.yml
|
|
service: relay
|
|
environment:
|
|
PUBLIC_IP4_ADDR: 203.0.113.102
|
|
PUBLIC_IP6_ADDR: 203:0:113::102
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
|
|
# Add static route to internet subnet via router
|
|
ip -4 route add 203.0.113.0/24 via 172.29.2.254
|
|
ip -6 route add 203:0:113::/64 via 172:29:2::254
|
|
|
|
apk add --no-cache ethtool
|
|
ethtool -K eth0 tx off
|
|
|
|
firezone-relay
|
|
depends_on:
|
|
relay-2-router:
|
|
condition: "service_healthy"
|
|
networks:
|
|
relay-2-internal:
|
|
ipv4_address: 172.29.2.100
|
|
ipv6_address: 172:29:2::100
|
|
|
|
relay-2-router:
|
|
extends:
|
|
file: scripts/compose/router.yml
|
|
service: router
|
|
environment:
|
|
PORT_FORWARDS: |
|
|
3478 172.29.2.100 udp
|
|
49152-65535 172.29.2.100 udp
|
|
3478 172:29:2::100 udp
|
|
49152-65535 172:29:2::100 udp
|
|
NETWORK_LATENCY_MS: 30
|
|
networks:
|
|
relay-2-internal:
|
|
ipv4_address: 172.29.2.254
|
|
ipv6_address: 172:29:2::254
|
|
interface_name: internal
|
|
internet:
|
|
ipv4_address: 203.0.113.102
|
|
ipv6_address: 203:0:113::102
|
|
interface_name: internet
|
|
|
|
# The veth driver uses a pair of interfaces to connect the docker bridge to the container namespace.
|
|
# For containers that have an eBPF program attached and do XDP_TX, we need to attach a dummy program
|
|
# to the corresponding veth interface on the host to be able to receive the XDP_TX traffic and pass
|
|
# it up to the docker bridge successfully.
|
|
#
|
|
# The "recommended" way to do this is to set both veth interfaces' GRO to on, or attach an XDP program
|
|
# that does XDP_PASS to the host side veth interface. The GRO method is not reliable and was shown to
|
|
# only pass packets in large bursts every 15-20 seconds which breaks ICE setup, so we use the XDP method.
|
|
#
|
|
# For correct behaviour, we also disable any kind of offloading for all veth and bridge devices.
|
|
# This forces the kernel to calculate all checksums in software.
|
|
network-config:
|
|
image: ghcr.io/firezone/xdp-pass
|
|
pid: host
|
|
network_mode: host
|
|
privileged: true
|
|
restart: on-failure
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
|
|
VETHS=$$(ip -json link show type veth | jq -r '.[].ifname')
|
|
|
|
for dev in $$VETHS; do
|
|
echo "Attaching XDP to: $$dev"
|
|
ip link set dev $$dev xdpdrv off # Clear any existing XDP program.
|
|
ip link set dev $$dev xdpdrv obj /xdp/xdp_pass.o sec xdp
|
|
|
|
ethtool -K $$dev tx off # Disable offloading.
|
|
done
|
|
|
|
echo "Done configuring $$(echo "$$VETHS" | wc -w) veth interfaces"
|
|
|
|
BRIDGES=$$(ip -json link show type bridge | jq -r '.[].ifname')
|
|
|
|
for dev in $$BRIDGES; do
|
|
ethtool -K $$dev tx off # Disable offloading.
|
|
done
|
|
|
|
echo "Done configuring $$(echo "$$BRIDGES" | wc -w) bridge interfaces"
|
|
depends_on:
|
|
relay-1:
|
|
condition: "service_healthy"
|
|
relay-2:
|
|
condition: "service_healthy"
|
|
relay-1-router:
|
|
condition: "service_healthy"
|
|
relay-2-router:
|
|
condition: "service_healthy"
|
|
gateway-router:
|
|
condition: "service_healthy"
|
|
client-router:
|
|
condition: "service_healthy"
|
|
gateway:
|
|
condition: "service_healthy"
|
|
client:
|
|
condition: "service_healthy"
|
|
|
|
otel:
|
|
image: otel/opentelemetry-collector:latest
|
|
networks:
|
|
app-internal:
|
|
|
|
networks:
|
|
# Internet network - where all public IPs live
|
|
internet:
|
|
enable_ipv6: true
|
|
ipam:
|
|
config:
|
|
- subnet: 203.0.113.0/24
|
|
- subnet: 203:0:113::/64
|
|
|
|
app-internal:
|
|
enable_ipv6: true
|
|
ipam:
|
|
config:
|
|
- subnet: 172.28.0.0/24
|
|
- subnet: 172:28:0::/64
|
|
|
|
relay-1-internal:
|
|
enable_ipv6: true
|
|
ipam:
|
|
config:
|
|
- subnet: 172.29.1.0/24
|
|
- subnet: 172:29:1::/64
|
|
|
|
relay-2-internal:
|
|
enable_ipv6: true
|
|
ipam:
|
|
config:
|
|
- subnet: 172.29.2.0/24
|
|
- subnet: 172:29:2::/64
|
|
|
|
client-internal:
|
|
enable_ipv6: true
|
|
ipam:
|
|
config:
|
|
- subnet: 172.30.0.0/24
|
|
- subnet: 172:30:0::/64
|
|
|
|
gateway-internal:
|
|
enable_ipv6: true
|
|
ipam:
|
|
config:
|
|
- subnet: 172.31.0.0/24
|
|
- subnet: 172:31:0::/64
|