Files
firezone/docker-compose.yml
Thomas Eizinger ed2bc0bd25 feat(gateway): revise handling of DNS resolution errors (#10623)
Even prior to #10373, failures in resolving a name on the Gateway for a
DNS resource resulted in a failure of setting up the DNS resource NAT.
Without the DNS resource NAT, packets for that resource bounced on the
Gateway because we didn't have any traffic filters.

A non-existent filter is being treated as a "traffic not allowed" error
and we respond with an ICMP permission denied error. For domains where
both the A and AAAA query result in NXDOMAIN, that isn't necessarily
appropriate. Instead, I am proposing that for such cases, we want to
return a regular "address/host unreachable" ICMP error instead of the
more specific "permission denied" variant.

To achieve that, we refactor the Gateway's peer state to be able to hold
an `Option<IpAddr>` inside the `TranslationState`. This allows us to
always insert an entry for each proxy IP, even if we did not resolve any
IPs for it. Then, when receiving traffic for a proxy IP where the
resolved IP is `None`, we reply with the appropriate ICMP error.

As part of this, we also simplify the assignment of the proxy IPs. With
the NAT64 module removed, there is no more reason to cross-assign IPv4
and IPv6 addresses. We can simply leave the mappings for e.g. IPv6 proxy
addresses empty if the AAAA query didn't resolve anything.

From the Client's perspective, not much changes. The DNS resource NAT
setup will now succeed, even for domains that don't resolve to anything.
This doesn't change any behaviour though as we are currently already
passing packets through for failed DNS resource NAT setups. The main
change is that we now send back a different ICMP error. Most
importantly, the "address/host unreachable variant" does not trigger
#10462.
2025-10-22 19:14:45 +00:00

522 lines
15 KiB
YAML

# Run with DOCKER_BUILD_TARGET=dev to build Rust inside Docker
include:
- scripts/compose/resources.yml
- scripts/compose/edgeshark.yml
- scripts/compose/portal.yml
x-erlang-cluster: &erlang-cluster
ERLANG_CLUSTER_ADAPTER: "Elixir.Cluster.Strategy.Epmd"
ERLANG_CLUSTER_ADAPTER_CONFIG: '{"hosts":["api@api.cluster.local","web@web.cluster.local","domain@domain.cluster.local"]}'
x-portal-urls: &portal-urls
WEB_EXTERNAL_URL: http://localhost:8080/
API_EXTERNAL_URL: http://localhost:8081/
x-phoenix-config: &phoenix-config
PHOENIX_HTTP_WEB_PORT: "8080"
PHOENIX_HTTP_API_PORT: "8081"
PHOENIX_SECURE_COOKIES: "false"
x-health-check: &health-check
interval: 1s
retries: 15
timeout: 1s
services:
web:
extends:
file: scripts/compose/portal.yml
service: common
build:
context: elixir
args:
APPLICATION_NAME: web
image: ${WEB_IMAGE:-ghcr.io/firezone/web}:${WEB_TAG:-main}
hostname: web.cluster.local
ports:
- 8080:8080/tcp
environment:
<<: [*portal-urls, *erlang-cluster, *phoenix-config]
RELEASE_HOSTNAME: "web.cluster.local"
RELEASE_NAME: "web"
LOG_LEVEL: "debug"
healthcheck:
test: ["CMD-SHELL", "curl -f localhost:8080/healthz"]
<<: *health-check
depends_on:
vault:
condition: "service_healthy"
postgres:
condition: "service_healthy"
api:
extends:
file: scripts/compose/portal.yml
service: common
build:
context: elixir
args:
APPLICATION_NAME: api
image: ${API_IMAGE:-ghcr.io/firezone/api}:${API_TAG:-main}
hostname: api.cluster.local
ports:
- 8081:8081/tcp
environment:
<<: [*portal-urls, *erlang-cluster, *phoenix-config]
RELEASE_HOSTNAME: "api.cluster.local"
RELEASE_NAME: "api"
LOG_LEVEL: "debug"
user: root # Needed to run `ip route` commands
cap_add:
- NET_ADMIN # Needed to run `tc` commands to add simulated delay
command:
- sh
- -c
- |
set -e
# Add static route to internet subnet via router
ip -4 route add 203.0.113.0/24 via 172.28.0.254
ip -6 route add 203:0:113::/64 via 172:28:0::254
exec su default -c "bin/server"
depends_on:
vault:
condition: "service_healthy"
postgres:
condition: "service_healthy"
api-router:
condition: "service_healthy"
healthcheck:
test: ["CMD-SHELL", "curl -f localhost:8081/healthz"]
<<: *health-check
networks:
app-internal:
ipv4_address: 172.28.0.100
ipv6_address: 172:28:0::100
api-router:
extends:
file: scripts/compose/router.yml
service: router
environment:
PORT_FORWARDS: |
8081 172.28.0.100 tcp
8081 172:28:0::100 tcp
MASQUERADE_TYPE: ""
NETWORK_LATENCY_MS: 50
networks:
app-internal:
ipv4_address: 172.28.0.254
ipv6_address: 172:28:0::254
interface_name: internal
internet:
ipv4_address: 203.0.113.10
ipv6_address: 203:0:113::10
interface_name: internet
domain:
extends:
file: scripts/compose/portal.yml
service: common
build:
context: elixir
args:
APPLICATION_NAME: domain
image: ${DOMAIN_IMAGE:-ghcr.io/firezone/domain}:${DOMAIN_TAG:-main}
hostname: domain.cluster.local
environment:
<<: *erlang-cluster
RELEASE_HOSTNAME: "domain.cluster.local"
RELEASE_NAME: "domain"
LOG_LEVEL: "debug"
healthcheck:
test: ["CMD-SHELL", "curl -f localhost:4000/healthz"]
<<: *health-check
depends_on:
vault:
condition: "service_healthy"
postgres:
condition: "service_healthy"
# This is a service container which allows to run mix tasks for local development
# without having to install Elixir and Erlang on the host machine.
elixir:
extends:
file: scripts/compose/portal.yml
service: common
build:
context: elixir
target: compiler
args:
APPLICATION_NAME: api
image: ${ELIXIR_IMAGE:-ghcr.io/firezone/elixir}:${ELIXIR_TAG:-main}
hostname: elixir
environment:
<<: *portal-urls
RELEASE_HOSTNAME: "mix.cluster.local"
RELEASE_NAME: "mix"
# Higher log level not to make seeds output too verbose
LOG_LEVEL: "info"
# Mix env should be set to prod to use secrets declared above,
# otherwise seeds will generate invalid tokens
MIX_ENV: "prod"
depends_on:
postgres:
condition: "service_healthy"
# Run with DOCKER_BUILD_TARGET=dev to build Rust inside Docker
client:
healthcheck:
test: ["CMD-SHELL", "ip link | grep tun-firezone"]
<<: *health-check
environment:
FIREZONE_DNS_CONTROL: "${FIREZONE_DNS_CONTROL:-etc-resolv-conf}"
FIREZONE_TOKEN: "n.SFMyNTY.g2gDaANtAAAAJGM4OWJjYzhjLTkzOTItNGRhZS1hNDBkLTg4OGFlZjZkMjhlMG0AAAAkN2RhN2QxY2QtMTExYy00NGE3LWI1YWMtNDAyN2I5ZDIzMGU1bQAAACtBaUl5XzZwQmstV0xlUkFQenprQ0ZYTnFJWktXQnMyRGR3XzJ2Z0lRdkZnbgYAR_ywiZQBYgABUYA.PLNlzyqMSgZlbQb1QX5EzZgYNuY9oeOddP0qDkTwtGg"
RUST_LOG: ${RUST_LOG:-wire=trace,debug}
FIREZONE_API_URL: ws://api:8081
FIREZONE_ID: EFC7A9E3-3576-4633-B633-7D47BA9E14AC
FZFF_ICMP_ERROR_UNREACHABLE_PROHIBITED_CREATE_NEW_FLOW: true
command:
- sh
- -c
- |
set -e
# Add static route to internet subnet via router
ip -4 route add 203.0.113.0/24 via 172.30.0.254
ip -6 route add 203:0:113::/64 via 172:30:0::254
# Disable checksum offloading so that checksums are correct when they reach the relay
apk add --no-cache ethtool
ethtool -K eth0 tx off
exec firezone-headless-client
init: true
build:
target: ${DOCKER_BUILD_TARGET:-debug}
context: rust
dockerfile: Dockerfile
args:
PACKAGE: firezone-headless-client
image: ${CLIENT_IMAGE:-ghcr.io/firezone/debug/client}:${CLIENT_TAG:-main}
privileged: true # Needed to tune `sysctl` inside container.
cap_add:
- NET_ADMIN
sysctls:
- net.ipv6.conf.all.disable_ipv6=0
- net.ipv6.conf.default.disable_ipv6=0
devices:
- "/dev/net/tun:/dev/net/tun"
depends_on:
client-router:
condition: "service_healthy"
api:
condition: "service_healthy"
networks:
client-internal:
ipv4_address: 172.30.0.100
ipv6_address: 172:30:0::100
extra_hosts:
- "api:203.0.113.10"
- "api:203:0:113::10"
client-router:
extends:
file: scripts/compose/router.yml
service: router
environment:
MASQUERADE_TYPE: ${CLIENT_MASQUERADE:-}
NETWORK_LATENCY_MS: 10
networks:
client-internal:
ipv4_address: 172.30.0.254
ipv6_address: 172:30:0::254
interface_name: internal
internet:
interface_name: internet
gateway:
healthcheck:
test: ["CMD-SHELL", "ip link | grep tun-firezone"]
<<: *health-check
environment:
FIREZONE_TOKEN: ".SFMyNTY.g2gDaANtAAAAJGM4OWJjYzhjLTkzOTItNGRhZS1hNDBkLTg4OGFlZjZkMjhlMG0AAAAkMjI3NDU2MGItZTk3Yi00NWU0LThiMzQtNjc5Yzc2MTdlOThkbQAAADhPMDJMN1VTMkozVklOT01QUjlKNklMODhRSVFQNlVPOEFRVk82VTVJUEwwVkpDMjJKR0gwPT09PW4GAAH8sImUAWIAAVGA.tAm2O9FcyF67VAF3rZdwQpeADrYOIs3S2l2K51G26OM"
RUST_LOG: ${RUST_LOG:-wire=trace,debug,flow_logs=trace}
FIREZONE_API_URL: ws://api:8081
FIREZONE_ID: 4694E56C-7643-4A15-9DF3-638E5B05F570
FZFF_GATEWAY_USERSPACE_DNS_A_AAAA_RECORDS: true
command:
- sh
- -c
- |
set -e
# Add static route to internet subnet via router
ip -4 route add 203.0.113.0/24 via 172.31.0.254
ip -6 route add 203:0:113::/64 via 172:31:0::254
# Disable checksum offloading so that checksums are correct when they reach the relay
apk add --no-cache ethtool
ethtool -K eth0 tx off
ethtool -K eth1 tx off
ethtool -K eth2 tx off
exec firezone-gateway
init: true
build:
target: ${DOCKER_BUILD_TARGET:-debug}
context: rust
dockerfile: Dockerfile
args:
PACKAGE: firezone-gateway
image: ${GATEWAY_IMAGE:-ghcr.io/firezone/debug/gateway}:${GATEWAY_TAG:-main}
cap_add:
- NET_ADMIN
sysctls:
- net.ipv4.ip_forward=1
- net.ipv6.conf.all.disable_ipv6=0
- net.ipv6.conf.default.disable_ipv6=0
- net.ipv6.conf.all.forwarding=1
- net.ipv6.conf.default.forwarding=1
devices:
- "/dev/net/tun:/dev/net/tun"
depends_on:
gateway-router:
condition: "service_healthy"
api:
condition: "service_healthy"
networks:
gateway-internal:
ipv4_address: 172.31.0.100
ipv6_address: 172:31:0::100
dns_resources:
resources:
extra_hosts:
- "api:203.0.113.10"
- "api:203:0:113::10"
gateway-router:
extends:
file: scripts/compose/router.yml
service: router
environment:
MASQUERADE_TYPE: ${GATEWAY_MASQUERADE:-}
NETWORK_LATENCY_MS: 10
networks:
gateway-internal:
ipv4_address: 172.31.0.254
ipv6_address: 172:31:0::254
interface_name: internal
internet:
interface_name: internet
relay-1:
extends:
file: scripts/compose/relay.yml
service: relay
environment:
PUBLIC_IP4_ADDR: 203.0.113.101
PUBLIC_IP6_ADDR: 203:0:113::101
command:
- sh
- -c
- |
set -e
# Add static route to internet subnet via router
ip -4 route add 203.0.113.0/24 via 172.29.1.254
ip -6 route add 203:0:113::/64 via 172:29:1::254
apk add --no-cache ethtool
ethtool -K eth0 tx off
firezone-relay
depends_on:
relay-1-router:
condition: "service_healthy"
networks:
relay-1-internal:
ipv4_address: 172.29.1.100
ipv6_address: 172:29:1::100
relay-1-router:
extends:
file: scripts/compose/router.yml
service: router
environment:
PORT_FORWARDS: |
3478 172.29.1.100 udp
49152-65535 172.29.1.100 udp
3478 172:29:1::100 udp
49152-65535 172:29:1::100 udp
NETWORK_LATENCY_MS: 30
networks:
relay-1-internal:
ipv4_address: 172.29.1.254
ipv6_address: 172:29:1::254
interface_name: internal
internet:
ipv4_address: 203.0.113.101
ipv6_address: 203:0:113::101
interface_name: internet
relay-2:
extends:
file: scripts/compose/relay.yml
service: relay
environment:
PUBLIC_IP4_ADDR: 203.0.113.102
PUBLIC_IP6_ADDR: 203:0:113::102
command:
- sh
- -c
- |
set -e
# Add static route to internet subnet via router
ip -4 route add 203.0.113.0/24 via 172.29.2.254
ip -6 route add 203:0:113::/64 via 172:29:2::254
apk add --no-cache ethtool
ethtool -K eth0 tx off
firezone-relay
depends_on:
relay-2-router:
condition: "service_healthy"
networks:
relay-2-internal:
ipv4_address: 172.29.2.100
ipv6_address: 172:29:2::100
relay-2-router:
extends:
file: scripts/compose/router.yml
service: router
environment:
PORT_FORWARDS: |
3478 172.29.2.100 udp
49152-65535 172.29.2.100 udp
3478 172:29:2::100 udp
49152-65535 172:29:2::100 udp
NETWORK_LATENCY_MS: 30
networks:
relay-2-internal:
ipv4_address: 172.29.2.254
ipv6_address: 172:29:2::254
interface_name: internal
internet:
ipv4_address: 203.0.113.102
ipv6_address: 203:0:113::102
interface_name: internet
# The veth driver uses a pair of interfaces to connect the docker bridge to the container namespace.
# For containers that have an eBPF program attached and do XDP_TX, we need to attach a dummy program
# to the corresponding veth interface on the host to be able to receive the XDP_TX traffic and pass
# it up to the docker bridge successfully.
#
# The "recommended" way to do this is to set both veth interfaces' GRO to on, or attach an XDP program
# that does XDP_PASS to the host side veth interface. The GRO method is not reliable and was shown to
# only pass packets in large bursts every 15-20 seconds which breaks ICE setup, so we use the XDP method.
#
# For correct behaviour, we also disable any kind of offloading for all veth and bridge devices.
# This forces the kernel to calculate all checksums in software.
network-config:
image: ghcr.io/firezone/xdp-pass
pid: host
network_mode: host
privileged: true
restart: on-failure
command:
- sh
- -c
- |
set -e
VETHS=$$(ip -json link show type veth | jq -r '.[].ifname')
for dev in $$VETHS; do
echo "Attaching XDP to: $$dev"
ip link set dev $$dev xdpdrv off # Clear any existing XDP program.
ip link set dev $$dev xdpdrv obj /xdp/xdp_pass.o sec xdp
ethtool -K $$dev tx off # Disable offloading.
done
echo "Done configuring $$(echo "$$VETHS" | wc -w) veth interfaces"
BRIDGES=$$(ip -json link show type bridge | jq -r '.[].ifname')
for dev in $$BRIDGES; do
ethtool -K $$dev tx off # Disable offloading.
done
echo "Done configuring $$(echo "$$BRIDGES" | wc -w) bridge interfaces"
depends_on:
relay-1:
condition: "service_healthy"
relay-2:
condition: "service_healthy"
relay-1-router:
condition: "service_healthy"
relay-2-router:
condition: "service_healthy"
gateway-router:
condition: "service_healthy"
client-router:
condition: "service_healthy"
gateway:
condition: "service_healthy"
client:
condition: "service_healthy"
otel:
image: otel/opentelemetry-collector:latest
networks:
app-internal:
networks:
# Internet network - where all public IPs live
internet:
enable_ipv6: true
ipam:
config:
- subnet: 203.0.113.0/24
- subnet: 203:0:113::/64
app-internal:
enable_ipv6: true
ipam:
config:
- subnet: 172.28.0.0/24
- subnet: 172:28:0::/64
relay-1-internal:
enable_ipv6: true
ipam:
config:
- subnet: 172.29.1.0/24
- subnet: 172:29:1::/64
relay-2-internal:
enable_ipv6: true
ipam:
config:
- subnet: 172.29.2.0/24
- subnet: 172:29:2::/64
client-internal:
enable_ipv6: true
ipam:
config:
- subnet: 172.30.0.0/24
- subnet: 172:30:0::/64
gateway-internal:
enable_ipv6: true
ipam:
config:
- subnet: 172.31.0.0/24
- subnet: 172:31:0::/64