Files
firezone/docker-compose.yml
Thomas Eizinger b11adfcfe4 feat(connlib): create flow on ICMP error "prohibited" (#10462)
In Firezone, a Client requests an "access authorization" for a Resource
on the fly when it sees the first packet for said Resource going through
the tunnel. If we don't have a connection to the Gateway yet, this is
also where we will establish a connection and create the WireGuard
tunnel.

In order for this to work, the access authorization state between the
Client and the Gateway MUST NOT get out of sync. If the Client thinks it
has access to a Resource, it will just route the traffic to the Gateway.
If the access authorization on the Gateway has expired or vanished
otherwise, the packets will be black-holed.

Starting with #9816, the Gateway sends ICMP errors back to the
application whenever it filters a packet. This can happen either because
the access authorization is gone or because the traffic wasn't allowed
by the specific filter rules on the Resource.

With this patch, the Client will attempt to create a new flow (i.e.
re-authorize) traffic for this resource whenever it sees such an ICMP
error, therefore acting as a way of synchronizing the view of the world
between Client and Gateway should they ever run out of sync.

Testing turned out to be a bit tricky. If we let the authorization on
the Gateway lapse naturally, we portal will also toggle the Resource off
and on on the Client, resulting in "flushing" the current
authorizations. Additionally, it the Client had only access to one
Resource, then the Gateway will gracefully close the connection, also
resulting in the Client creating a new flow for the next packet.

To actually trigger this new behaviour we need to:

- Access at least two resources via the same Gateway
- Directly send `reject_access` to the Gateway for this particular
resource

To achieve this, we dynamically eval some code on the API node and
instruct the Gateway channel to send `reject_access`. The connection
stays intact because there is still another active access authorization
but packets for the other resource are answered with ICMP errors.

To achieve a safe roll-out, the new behaviour is feature-flagged. In
order to still test it, we now also allow feature flags to be set via
env variables.

Resolves: #10074

---------

Co-authored-by: Mariusz Klochowicz <mariusz@klochowicz.com>
2025-09-30 08:23:39 +00:00

521 lines
14 KiB
YAML

# Run with DOCKER_BUILD_TARGET=dev to build Rust inside Docker
include:
- scripts/compose/resources.yml
- scripts/compose/edgeshark.yml
- scripts/compose/portal.yml
x-erlang-cluster: &erlang-cluster
ERLANG_CLUSTER_ADAPTER: "Elixir.Cluster.Strategy.Epmd"
ERLANG_CLUSTER_ADAPTER_CONFIG: '{"hosts":["api@api.cluster.local","web@web.cluster.local","domain@domain.cluster.local"]}'
x-portal-urls: &portal-urls
WEB_EXTERNAL_URL: http://localhost:8080/
API_EXTERNAL_URL: http://localhost:8081/
x-phoenix-config: &phoenix-config
PHOENIX_HTTP_WEB_PORT: "8080"
PHOENIX_HTTP_API_PORT: "8081"
PHOENIX_SECURE_COOKIES: "false"
x-health-check: &health-check
interval: 1s
retries: 15
timeout: 1s
services:
web:
extends:
file: scripts/compose/portal.yml
service: common
build:
context: elixir
args:
APPLICATION_NAME: web
image: ${WEB_IMAGE:-ghcr.io/firezone/web}:${WEB_TAG:-main}
hostname: web.cluster.local
ports:
- 8080:8080/tcp
environment:
<<: [*portal-urls, *erlang-cluster, *phoenix-config]
RELEASE_HOSTNAME: "web.cluster.local"
RELEASE_NAME: "web"
LOG_LEVEL: "debug"
healthcheck:
test: ["CMD-SHELL", "curl -f localhost:8080/healthz"]
<<: *health-check
depends_on:
vault:
condition: "service_healthy"
postgres:
condition: "service_healthy"
api:
extends:
file: scripts/compose/portal.yml
service: common
build:
context: elixir
args:
APPLICATION_NAME: api
image: ${API_IMAGE:-ghcr.io/firezone/api}:${API_TAG:-main}
hostname: api.cluster.local
ports:
- 8081:8081/tcp
environment:
<<: [*portal-urls, *erlang-cluster, *phoenix-config]
RELEASE_HOSTNAME: "api.cluster.local"
RELEASE_NAME: "api"
LOG_LEVEL: "debug"
user: root # Needed to run `ip route` commands
cap_add:
- NET_ADMIN # Needed to run `tc` commands to add simulated delay
command:
- sh
- -c
- |
set -e
# Add static route to internet subnet via router
ip -4 route add 203.0.113.0/24 via 172.28.0.254
ip -6 route add 203:0:113::/64 via 172:28:0::254
exec su default -c "bin/server"
depends_on:
vault:
condition: "service_healthy"
postgres:
condition: "service_healthy"
api-router:
condition: "service_healthy"
healthcheck:
test: ["CMD-SHELL", "curl -f localhost:8081/healthz"]
<<: *health-check
networks:
app-internal:
ipv4_address: 172.28.0.100
ipv6_address: 172:28:0::100
api-router:
extends:
file: scripts/compose/router.yml
service: router
environment:
PORT_FORWARDS: |
8081 172.28.0.100 tcp
8081 172:28:0::100 tcp
MASQUERADE_TYPE: ""
NETWORK_LATENCY_MS: 50
networks:
app-internal:
ipv4_address: 172.28.0.254
ipv6_address: 172:28:0::254
interface_name: internal
internet:
ipv4_address: 203.0.113.10
ipv6_address: 203:0:113::10
interface_name: internet
domain:
extends:
file: scripts/compose/portal.yml
service: common
build:
context: elixir
args:
APPLICATION_NAME: domain
image: ${DOMAIN_IMAGE:-ghcr.io/firezone/domain}:${DOMAIN_TAG:-main}
hostname: domain.cluster.local
environment:
<<: *erlang-cluster
RELEASE_HOSTNAME: "domain.cluster.local"
RELEASE_NAME: "domain"
LOG_LEVEL: "debug"
healthcheck:
test: ["CMD-SHELL", "curl -f localhost:4000/healthz"]
<<: *health-check
depends_on:
vault:
condition: "service_healthy"
postgres:
condition: "service_healthy"
# This is a service container which allows to run mix tasks for local development
# without having to install Elixir and Erlang on the host machine.
elixir:
extends:
file: scripts/compose/portal.yml
service: common
build:
context: elixir
target: compiler
args:
APPLICATION_NAME: api
image: ${ELIXIR_IMAGE:-ghcr.io/firezone/elixir}:${ELIXIR_TAG:-main}
hostname: elixir
environment:
<<: *portal-urls
RELEASE_HOSTNAME: "mix.cluster.local"
RELEASE_NAME: "mix"
# Higher log level not to make seeds output too verbose
LOG_LEVEL: "info"
# Mix env should be set to prod to use secrets declared above,
# otherwise seeds will generate invalid tokens
MIX_ENV: "prod"
depends_on:
postgres:
condition: "service_healthy"
# Run with DOCKER_BUILD_TARGET=dev to build Rust inside Docker
client:
healthcheck:
test: ["CMD-SHELL", "ip link | grep tun-firezone"]
<<: *health-check
environment:
FIREZONE_DNS_CONTROL: "${FIREZONE_DNS_CONTROL:-etc-resolv-conf}"
FIREZONE_TOKEN: "n.SFMyNTY.g2gDaANtAAAAJGM4OWJjYzhjLTkzOTItNGRhZS1hNDBkLTg4OGFlZjZkMjhlMG0AAAAkN2RhN2QxY2QtMTExYy00NGE3LWI1YWMtNDAyN2I5ZDIzMGU1bQAAACtBaUl5XzZwQmstV0xlUkFQenprQ0ZYTnFJWktXQnMyRGR3XzJ2Z0lRdkZnbgYAR_ywiZQBYgABUYA.PLNlzyqMSgZlbQb1QX5EzZgYNuY9oeOddP0qDkTwtGg"
RUST_LOG: ${RUST_LOG:-wire=trace,debug}
FIREZONE_API_URL: ws://api:8081
FIREZONE_ID: EFC7A9E3-3576-4633-B633-7D47BA9E14AC
FZFF_ICMP_ERROR_UNREACHABLE_PROHIBITED_CREATE_NEW_FLOW: true
command:
- sh
- -c
- |
set -e
# Add static route to internet subnet via router
ip -4 route add 203.0.113.0/24 via 172.30.0.254
ip -6 route add 203:0:113::/64 via 172:30:0::254
# Disable checksum offloading so that checksums are correct when they reach the relay
apk add --no-cache ethtool
ethtool -K eth0 tx off
exec firezone-headless-client
init: true
build:
target: ${DOCKER_BUILD_TARGET:-debug}
context: rust
dockerfile: Dockerfile
args:
PACKAGE: firezone-headless-client
image: ${CLIENT_IMAGE:-ghcr.io/firezone/debug/client}:${CLIENT_TAG:-main}
privileged: true # Needed to tune `sysctl` inside container.
cap_add:
- NET_ADMIN
sysctls:
- net.ipv6.conf.all.disable_ipv6=0
- net.ipv6.conf.default.disable_ipv6=0
devices:
- "/dev/net/tun:/dev/net/tun"
depends_on:
client-router:
condition: "service_healthy"
api:
condition: "service_healthy"
networks:
client-internal:
ipv4_address: 172.30.0.100
ipv6_address: 172:30:0::100
extra_hosts:
- "api:203.0.113.10"
- "api:203:0:113::10"
client-router:
extends:
file: scripts/compose/router.yml
service: router
environment:
MASQUERADE_TYPE: ${CLIENT_MASQUERADE:-}
NETWORK_LATENCY_MS: 10
networks:
client-internal:
ipv4_address: 172.30.0.254
ipv6_address: 172:30:0::254
interface_name: internal
internet:
interface_name: internet
gateway:
healthcheck:
test: ["CMD-SHELL", "ip link | grep tun-firezone"]
<<: *health-check
environment:
FIREZONE_TOKEN: ".SFMyNTY.g2gDaANtAAAAJGM4OWJjYzhjLTkzOTItNGRhZS1hNDBkLTg4OGFlZjZkMjhlMG0AAAAkMjI3NDU2MGItZTk3Yi00NWU0LThiMzQtNjc5Yzc2MTdlOThkbQAAADhPMDJMN1VTMkozVklOT01QUjlKNklMODhRSVFQNlVPOEFRVk82VTVJUEwwVkpDMjJKR0gwPT09PW4GAAH8sImUAWIAAVGA.tAm2O9FcyF67VAF3rZdwQpeADrYOIs3S2l2K51G26OM"
RUST_LOG: ${RUST_LOG:-wire=trace,debug}
FIREZONE_API_URL: ws://api:8081
FIREZONE_ID: 4694E56C-7643-4A15-9DF3-638E5B05F570
command:
- sh
- -c
- |
set -e
# Add static route to internet subnet via router
ip -4 route add 203.0.113.0/24 via 172.31.0.254
ip -6 route add 203:0:113::/64 via 172:31:0::254
# Disable checksum offloading so that checksums are correct when they reach the relay
apk add --no-cache ethtool
ethtool -K eth0 tx off
ethtool -K eth1 tx off
ethtool -K eth2 tx off
exec firezone-gateway
init: true
build:
target: ${DOCKER_BUILD_TARGET:-debug}
context: rust
dockerfile: Dockerfile
args:
PACKAGE: firezone-gateway
image: ${GATEWAY_IMAGE:-ghcr.io/firezone/debug/gateway}:${GATEWAY_TAG:-main}
cap_add:
- NET_ADMIN
sysctls:
- net.ipv4.ip_forward=1
- net.ipv6.conf.all.disable_ipv6=0
- net.ipv6.conf.default.disable_ipv6=0
- net.ipv6.conf.all.forwarding=1
- net.ipv6.conf.default.forwarding=1
devices:
- "/dev/net/tun:/dev/net/tun"
depends_on:
gateway-router:
condition: "service_healthy"
api:
condition: "service_healthy"
networks:
gateway-internal:
ipv4_address: 172.31.0.100
ipv6_address: 172:31:0::100
dns_resources:
resources:
extra_hosts:
- "api:203.0.113.10"
- "api:203:0:113::10"
gateway-router:
extends:
file: scripts/compose/router.yml
service: router
environment:
MASQUERADE_TYPE: ${GATEWAY_MASQUERADE:-}
NETWORK_LATENCY_MS: 10
networks:
gateway-internal:
ipv4_address: 172.31.0.254
ipv6_address: 172:31:0::254
interface_name: internal
internet:
interface_name: internet
relay-1:
extends:
file: scripts/compose/relay.yml
service: relay
environment:
PUBLIC_IP4_ADDR: 203.0.113.101
PUBLIC_IP6_ADDR: 203:0:113::101
command:
- sh
- -c
- |
set -e
# Add static route to internet subnet via router
ip -4 route add 203.0.113.0/24 via 172.29.1.254
ip -6 route add 203:0:113::/64 via 172:29:1::254
apk add --no-cache ethtool
ethtool -K eth0 tx off
firezone-relay
depends_on:
relay-1-router:
condition: "service_healthy"
networks:
relay-1-internal:
ipv4_address: 172.29.1.100
ipv6_address: 172:29:1::100
relay-1-router:
extends:
file: scripts/compose/router.yml
service: router
environment:
PORT_FORWARDS: |
3478 172.29.1.100 udp
49152-65535 172.29.1.100 udp
3478 172:29:1::100 udp
49152-65535 172:29:1::100 udp
NETWORK_LATENCY_MS: 30
networks:
relay-1-internal:
ipv4_address: 172.29.1.254
ipv6_address: 172:29:1::254
interface_name: internal
internet:
ipv4_address: 203.0.113.101
ipv6_address: 203:0:113::101
interface_name: internet
relay-2:
extends:
file: scripts/compose/relay.yml
service: relay
environment:
PUBLIC_IP4_ADDR: 203.0.113.102
PUBLIC_IP6_ADDR: 203:0:113::102
command:
- sh
- -c
- |
set -e
# Add static route to internet subnet via router
ip -4 route add 203.0.113.0/24 via 172.29.2.254
ip -6 route add 203:0:113::/64 via 172:29:2::254
apk add --no-cache ethtool
ethtool -K eth0 tx off
firezone-relay
depends_on:
relay-2-router:
condition: "service_healthy"
networks:
relay-2-internal:
ipv4_address: 172.29.2.100
ipv6_address: 172:29:2::100
relay-2-router:
extends:
file: scripts/compose/router.yml
service: router
environment:
PORT_FORWARDS: |
3478 172.29.2.100 udp
49152-65535 172.29.2.100 udp
3478 172:29:2::100 udp
49152-65535 172:29:2::100 udp
NETWORK_LATENCY_MS: 30
networks:
relay-2-internal:
ipv4_address: 172.29.2.254
ipv6_address: 172:29:2::254
interface_name: internal
internet:
ipv4_address: 203.0.113.102
ipv6_address: 203:0:113::102
interface_name: internet
# The veth driver uses a pair of interfaces to connect the docker bridge to the container namespace.
# For containers that have an eBPF program attached and do XDP_TX, we need to attach a dummy program
# to the corresponding veth interface on the host to be able to receive the XDP_TX traffic and pass
# it up to the docker bridge successfully.
#
# The "recommended" way to do this is to set both veth interfaces' GRO to on, or attach an XDP program
# that does XDP_PASS to the host side veth interface. The GRO method is not reliable and was shown to
# only pass packets in large bursts every 15-20 seconds which breaks ICE setup, so we use the XDP method.
#
# For correct behaviour, we also disable any kind of offloading for all veth and bridge devices.
# This forces the kernel to calculate all checksums in software.
network-config:
image: ghcr.io/firezone/xdp-pass
pid: host
network_mode: host
privileged: true
restart: on-failure
command:
- sh
- -c
- |
set -e
VETHS=$$(ip -json link show type veth | jq -r '.[].ifname')
for dev in $$VETHS; do
echo "Attaching XDP to: $$dev"
ip link set dev $$dev xdpdrv off # Clear any existing XDP program.
ip link set dev $$dev xdpdrv obj /xdp/xdp_pass.o sec xdp
ethtool -K $$dev tx off # Disable offloading.
done
echo "Done configuring $$(echo "$$VETHS" | wc -w) veth interfaces"
BRIDGES=$$(ip -json link show type bridge | jq -r '.[].ifname')
for dev in $$BRIDGES; do
ethtool -K $$dev tx off # Disable offloading.
done
echo "Done configuring $$(echo "$$BRIDGES" | wc -w) bridge interfaces"
depends_on:
relay-1:
condition: "service_healthy"
relay-2:
condition: "service_healthy"
relay-1-router:
condition: "service_healthy"
relay-2-router:
condition: "service_healthy"
gateway-router:
condition: "service_healthy"
client-router:
condition: "service_healthy"
gateway:
condition: "service_healthy"
client:
condition: "service_healthy"
otel:
image: otel/opentelemetry-collector:latest
networks:
app-internal:
networks:
# Internet network - where all public IPs live
internet:
enable_ipv6: true
ipam:
config:
- subnet: 203.0.113.0/24
- subnet: 203:0:113::/64
app-internal:
enable_ipv6: true
ipam:
config:
- subnet: 172.28.0.0/24
- subnet: 172:28:0::/64
relay-1-internal:
enable_ipv6: true
ipam:
config:
- subnet: 172.29.1.0/24
- subnet: 172:29:1::/64
relay-2-internal:
enable_ipv6: true
ipam:
config:
- subnet: 172.29.2.0/24
- subnet: 172:29:2::/64
client-internal:
enable_ipv6: true
ipam:
config:
- subnet: 172.30.0.0/24
- subnet: 172:30:0::/64
gateway-internal:
enable_ipv6: true
ipam:
config:
- subnet: 172.31.0.0/24
- subnet: 172:31:0::/64