diff --git a/.github/workflows/_integration_tests.yml b/.github/workflows/_integration_tests.yml index b130b7e6e..9d0839126 100644 --- a/.github/workflows/_integration_tests.yml +++ b/.github/workflows/_integration_tests.yml @@ -139,6 +139,7 @@ jobs: docker compose up -d relay-2 --no-build docker compose up -d gateway --no-build docker compose up -d client --no-build + docker compose up veth-config # Wait a few seconds for the services to fully start. GH runners are # slow, so this gives the Client enough time to initialize its tun interface, @@ -161,10 +162,10 @@ jobs: - name: Ensure Client emitted no warnings if: "!cancelled()" - # Remove the error filter once headless-client 1.5.2 is released. run: | + # Disabling checksum offloading causes one or two "I/O error (os error 5)" warnings docker compose logs client | \ - grep "Operation not permitted (os error 1)" --invert | \ + grep --invert "I/O error (os error 5)" | \ grep "WARN" && exit 1 || exit 0 - name: Show Client logs if: "!cancelled()" @@ -180,7 +181,11 @@ jobs: - name: Ensure Gateway emitted no warnings if: "!cancelled()" - run: docker compose logs gateway | grep "WARN" && exit 1 || exit 0 + run: | + # Disabling checksum offloading causes one or two "I/O error (os error 5)" warnings + docker compose logs gateway | \ + grep --invert "I/O error (os error 5)" | \ + grep "WARN" && exit 1 || exit 0 - name: Show Gateway logs if: "!cancelled()" run: docker compose logs gateway diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 372ba60a5..ba255d15f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -295,6 +295,10 @@ jobs: CLIENT_TAG: ${{ github.sha }} RELAY_IMAGE: "ghcr.io/firezone/perf/relay" RELAY_TAG: ${{ github.sha }} + RELAY_1_PUBLIC_IP4_ADDR: 172.29.0.101 + RELAY_1_PUBLIC_IP6_ADDR: 172:29:0::101 + RELAY_2_PUBLIC_IP4_ADDR: 172.29.0.102 + RELAY_2_PUBLIC_IP6_ADDR: 172:29:0::102 strategy: fail-fast: false matrix: @@ -328,12 +332,13 @@ jobs: docker compose up -d relay-2 --no-build docker compose up -d gateway --no-build docker compose up -d client --no-build + docker compose up veth-config - name: Add 10ms simulated latency run: | - docker compose exec -d client tc qdisc add dev eth0 root netem delay 10ms - docker compose exec -d gateway tc qdisc add dev eth0 root netem delay 10ms - docker compose exec -d relay-1 tc qdisc add dev eth0 root netem delay 10ms - docker compose exec -d relay-2 tc qdisc add dev eth0 root netem delay 10ms + docker compose exec -T client tc qdisc add dev eth0 root netem delay 10ms + docker compose exec -T gateway tc qdisc add dev eth0 root netem delay 10ms + docker compose exec -T relay-1 tc qdisc add dev eth0 root netem delay 10ms + docker compose exec -T relay-2 tc qdisc add dev eth0 root netem delay 10ms - name: "Performance test: ${{ matrix.test_name }}" timeout-minutes: 5 env: @@ -374,16 +379,32 @@ jobs: - name: Ensure Client emitted no warnings if: "!cancelled()" - run: docker compose logs client | grep "WARN" && exit 1 || exit 0 + run: | + # Disabling checksum offloading causes one or two "I/O error (os error 5)" warnings + docker compose logs client | \ + grep --invert "I/O error (os error 5)" | \ + grep "WARN" && exit 1 || exit 0 - name: Ensure Relay-1 emitted no warnings if: "!cancelled()" - run: docker compose logs relay-1 | grep "WARN" && exit 1 || exit 0 + run: | + # BTF doesn't load for veth interfaces + docker compose logs relay-1 | \ + grep --invert "Object BTF couldn't be loaded in the kernel: the BPF_BTF_LOAD syscall failed." | \ + grep "WARN" && exit 1 || exit 0 - name: Ensure Relay-2 emitted no warnings if: "!cancelled()" - run: docker compose logs relay-2 | grep "WARN" && exit 1 || exit 0 + run: | + # BTF doesn't load for veth interfaces + docker compose logs relay-2 | \ + grep --invert "Object BTF couldn't be loaded in the kernel: the BPF_BTF_LOAD syscall failed." | \ + grep "WARN" && exit 1 || exit 0 - name: Ensure Gateway emitted no warnings if: "!cancelled()" - run: docker compose logs gateway | grep "WARN" && exit 1 || exit 0 + run: | + # Disabling checksum offloading causes one or two "I/O error (os error 5)" warnings + docker compose logs gateway | \ + grep --invert "I/O error (os error 5)" | \ + grep "WARN" && exit 1 || exit 0 upload-bencher: continue-on-error: true diff --git a/docker-compose.yml b/docker-compose.yml index f4580b462..3cde00666 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -176,8 +176,20 @@ services: FEATURE_IDP_SYNC_ENABLED: "true" FEATURE_REST_API_ENABLED: "true" FEATURE_INTERNET_RESOURCE_ENABLED: "true" + user: root # Needed to run `ip route` commands cap_add: - NET_ADMIN # Needed to run `tc` commands to add simulated delay + command: + - sh + - -c + - | + set -e + + # Add static route to relay subnet via router + ip route add 172.29.0.0/24 via 172.28.0.254 + ip -6 route add 172:29:0::/64 via 172:28:0::254 + + exec su default -c "bin/server" depends_on: vault: condition: "service_healthy" @@ -190,7 +202,9 @@ services: retries: 5 timeout: 5s networks: - - app + app: + ipv4_address: 172.28.0.10 + ipv6_address: 172:28:0::10 domain: build: @@ -322,6 +336,21 @@ services: RUST_LOG: ${RUST_LOG:-firezone_linux_client=trace,wire=trace,connlib_client_shared=trace,firezone_tunnel=trace,connlib_shared=trace,boringtun=debug,snownet=debug,str0m=debug,phoenix_channel=debug,info} FIREZONE_API_URL: ws://api:8081 FIREZONE_ID: EFC7A9E3-3576-4633-B633-7D47BA9E14AC + command: + - sh + - -c + - | + set -e + + # Add static route to relay subnet via router + ip route add 172.29.0.0/24 via 172.28.0.254 + ip -6 route add 172:29:0::/64 via 172:28:0::254 + + # Disable checksum offloading so that checksums are correct when they reach the relay + apk add --no-cache ethtool + ethtool -K eth0 tx off + + firezone-headless-client init: true build: target: debug @@ -339,6 +368,8 @@ services: devices: - "/dev/net/tun:/dev/net/tun" depends_on: + router: + condition: "service_started" api: condition: "service_healthy" networks: @@ -355,6 +386,23 @@ services: FIREZONE_ENABLE_MASQUERADE: 1 # FIXME: NOOP in latest version. Remove after next release. FIREZONE_API_URL: ws://api:8081 FIREZONE_ID: 4694E56C-7643-4A15-9DF3-638E5B05F570 + command: + - sh + - -c + - | + set -e + + # Add static route to relay subnet via router + ip route add 172.29.0.0/24 via 172.28.0.254 + ip -6 route add 172:29:0::/64 via 172:28:0::254 + + # Disable checksum offloading so that checksums are correct when they reach the relay + apk add --no-cache ethtool + ethtool -K eth0 tx off + ethtool -K eth1 tx off + ethtool -K eth2 tx off + + firezone-gateway init: true build: target: debug @@ -375,6 +423,8 @@ services: devices: - "/dev/net/tun:/dev/net/tun" depends_on: + router: + condition: "service_started" api: condition: "service_healthy" networks: @@ -433,9 +483,8 @@ services: relay-1: environment: - PUBLIC_IP4_ADDR: ${RELAY_1_PUBLIC_IP4_ADDR:-172.28.0.101} - PUBLIC_IP6_ADDR: ${RELAY_1_PUBLIC_IP6_ADDR:-172:28:0::101} - # PUBLIC_IP6_ADDR: fcff:3990:3990::101 + PUBLIC_IP4_ADDR: ${RELAY_1_PUBLIC_IP4_ADDR:-172.29.0.101} + PUBLIC_IP6_ADDR: ${RELAY_1_PUBLIC_IP6_ADDR:-172:29:0::101} # LOWEST_PORT: 55555 # HIGHEST_PORT: 55666 # Token for self-hosted Relay @@ -444,8 +493,22 @@ services: FIREZONE_TOKEN: ".SFMyNTY.g2gDaAN3A25pbG0AAAAkZTgyZmNkYzEtMDU3YS00MDE1LWI5MGItM2IxOGYwZjI4MDUzbQAAADhDMTROR0E4N0VKUlIwM0c0UVBSMDdBOUM2Rzc4NFRTU1RIU0Y0VEk1VDBHRDhENkwwVlJHPT09PW4GAOb7sImUAWIAAVGA.e_k2YXxBOSmqVSu5RRscjZJBkZ7OAGzkpr5X2ge1MNo" RUST_LOG: ${RUST_LOG:-debug} RUST_BACKTRACE: 1 - FIREZONE_API_URL: ws://api:8081 + FIREZONE_API_URL: ws://172.28.0.10:8081 OTLP_GRPC_ENDPOINT: otel:4317 + EBPF_OFFLOADING: eth0 + command: + - sh + - -c + - | + set -e + + # Add static route to app subnet via router + ip route add 172.28.0.0/24 via 172.29.0.254 + ip -6 route add 172:28:0::/64 via 172:29:0::254 + + firezone-relay + privileged: true + init: true build: target: debug context: rust @@ -463,6 +526,8 @@ services: retries: 5 timeout: 5s depends_on: + router: + condition: "service_started" api: condition: "service_healthy" # ports: @@ -473,23 +538,36 @@ services: # - "55555-55666:55555-55666/udp" # - 3478:3478/udp networks: - app: - ipv4_address: ${RELAY_1_PUBLIC_IP4_ADDR:-172.28.0.101} - ipv6_address: ${RELAY_1_PUBLIC_IP6_ADDR:-172:28:0::101} + relays: + ipv4_address: ${RELAY_1_PUBLIC_IP4_ADDR:-172.29.0.101} + ipv6_address: ${RELAY_1_PUBLIC_IP6_ADDR:-172:29:0::101} relay-2: environment: - PUBLIC_IP4_ADDR: ${RELAY_2_PUBLIC_IP4_ADDR:-172.28.0.201} - PUBLIC_IP6_ADDR: ${RELAY_2_PUBLIC_IP6_ADDR:-172:28:0::201} - # PUBLIC_IP6_ADDR: fcff:3990:3990::101 + PUBLIC_IP4_ADDR: ${RELAY_2_PUBLIC_IP4_ADDR:-172.29.0.102} + PUBLIC_IP6_ADDR: ${RELAY_2_PUBLIC_IP6_ADDR:-172:29:0::102} # Token for self-hosted Relay # FIREZONE_TOKEN: ".SFMyNTY.g2gDaANtAAAAJGM4OWJjYzhjLTkzOTItNGRhZS1hNDBkLTg4OGFlZjZkMjhlMG0AAAAkNTQ5YzQxMDctMTQ5Mi00ZjhmLWE0ZWMtYTlkMmE2NmQ4YWE5bQAAADhQVTVBSVRFMU84VkRWTk1ITU9BQzc3RElLTU9HVERJQTY3MlM2RzFBQjAyT1MzNEg1TUUwPT09PW4GAEngLBONAWIAAVGA.E-f2MFdGMX7JTL2jwoHBdWcUd2G3UNz2JRZLbQrlf0k" # Token for global Relay FIREZONE_TOKEN: ".SFMyNTY.g2gDaAN3A25pbG0AAAAkZTgyZmNkYzEtMDU3YS00MDE1LWI5MGItM2IxOGYwZjI4MDUzbQAAADhDMTROR0E4N0VKUlIwM0c0UVBSMDdBOUM2Rzc4NFRTU1RIU0Y0VEk1VDBHRDhENkwwVlJHPT09PW4GAOb7sImUAWIAAVGA.e_k2YXxBOSmqVSu5RRscjZJBkZ7OAGzkpr5X2ge1MNo" RUST_LOG: ${RUST_LOG:-debug} RUST_BACKTRACE: 1 - FIREZONE_API_URL: ws://api:8081 + FIREZONE_API_URL: ws://172.28.0.10:8081 OTLP_GRPC_ENDPOINT: otel:4317 + EBPF_OFFLOADING: eth0 + command: + - sh + - -c + - | + set -e + + # Add static route to app subnet via router + ip route add 172.28.0.0/24 via 172.29.0.254 + ip -6 route add 172:28:0::/64 via 172:29:0::254 + + firezone-relay + privileged: true + init: true build: target: debug context: rust @@ -507,12 +585,70 @@ services: retries: 5 timeout: 5s depends_on: + router: + condition: "service_started" api: condition: "service_healthy" + networks: + relays: + ipv4_address: ${RELAY_2_PUBLIC_IP4_ADDR:-172.29.0.102} + ipv6_address: ${RELAY_2_PUBLIC_IP6_ADDR:-172:29:0::102} + + # Relays in prod always talk to a router to reach the Internet. We leverage this to avoid a map lookup and simply swap the + # MACs for all relayed traffic. So we mimic this setup for local dev and CI to ensure this eBPF code path is getting exercised. + # For this to work, we need to ensure the relays and client/gateway are *not* connected to the same Docker network, otherwise + # they will learn each other's MAC addresses via ARP and the next-hop MAC swap will not be valid. + router: + image: alpine:3.22 + sysctls: + - net.ipv4.ip_forward=1 + - net.ipv6.conf.all.forwarding=1 + - net.ipv6.conf.default.forwarding=1 + - net.ipv6.conf.all.disable_ipv6=0 + - net.ipv6.conf.default.disable_ipv6=0 + command: ["sleep", "infinity"] + init: true networks: app: - ipv4_address: ${RELAY_2_PUBLIC_IP4_ADDR:-172.28.0.201} - ipv6_address: ${RELAY_2_PUBLIC_IP6_ADDR:-172:28:0::201} + ipv4_address: 172.28.0.254 + ipv6_address: 172:28:0::254 + relays: + ipv4_address: 172.29.0.254 + ipv6_address: 172:29:0::254 + + # The veth driver uses a pair of interfaces to connect the docker bridge to the container namespace. + # For containers that have an eBPF program attached and do XDP_TX, we need to attach a dummy program + # to the corresponding veth interface on the host to be able to receive the XDP_TX traffic and pass + # it up to the docker bridge successfully. + # + # The "recommended" way to do this is to set both veth interfaces' GRO to on, or attach an XDP program + # that does XDP_PASS to the host side veth interface. The GRO method is not reliable and was shown to + # only pass packets in large bursts every 15-20 seconds which breaks ICE setup, so we use the XDP method. + veth-config: + image: ghcr.io/firezone/xdp-pass + pid: host + network_mode: host + privileged: true + command: + - sh + - -c + - | + set -e + + VETHS=$$(ip link show type veth | grep '^[0-9]' | awk '{print $$2}' | cut -d: -f1 | cut -d@ -f1) + + # Safe to attach to all veth interfaces on the host + for dev in $$VETHS; do + echo "Attaching XDP to: $$dev" + ip link set dev $$dev xdpdrv obj /xdp/xdp_pass.o sec xdp 2>/dev/null + done + + echo "Done configuring $$(echo "$$VETHS" | wc -w) veth interfaces" + depends_on: + relay-1: + condition: "service_started" + relay-2: + condition: "service_started" otel: image: otel/opentelemetry-collector:latest @@ -623,6 +759,12 @@ networks: config: - subnet: 172.28.0.0/24 - subnet: 172:28:0::/64 + relays: + enable_ipv6: true + ipam: + config: + - subnet: 172.29.0.0/24 + - subnet: 172:29:0::/64 99-ghost-in-da-edge: name: ghost-in-da-edge internal: false diff --git a/rust/docker-init-relay.sh b/rust/docker-init-relay.sh index 1b4eee9e2..b9d46327c 100755 --- a/rust/docker-init-relay.sh +++ b/rust/docker-init-relay.sh @@ -43,9 +43,9 @@ fi if [ "${OTEL_METADATA_DISCOVERY_METHOD}" = "gce_metadata" ]; then echo "Using GCE metadata to set OTEL metadata" - instance_id=$(curl "http://metadata.google.internal/computeMetadata/v1/instance/id" -H "Metadata-Flavor: Google" -s) # i.e. 5832583187537235075 - instance_name=$(curl "http://metadata.google.internal/computeMetadata/v1/instance/name" -H "Metadata-Flavor: Google" -s) # i.e. relay-m5k7 - zone=$(curl "http://metadata.google.internal/computeMetadata/v1/instance/zone" -H "Metadata-Flavor: Google" -s | cut -d/ -f4) # i.e. us-east-1 + instance_id=$(curl "http://metadata.google.internal/computeMetadata/v1/instance/id" -H "Metadata-Flavor: Google" -s) # i.e. 5832583187537235075 + instance_name=$(curl "http://metadata.google.internal/computeMetadata/v1/instance/name" -H "Metadata-Flavor: Google" -s) # i.e. relay-m5k7 + zone=$(curl "http://metadata.google.internal/computeMetadata/v1/instance/zone" -H "Metadata-Flavor: Google" -s | cut -d/ -f4) # i.e. us-east-1 # Source for attribute names: # - https://opentelemetry.io/docs/specs/semconv/attributes-registry/service/ @@ -54,4 +54,23 @@ if [ "${OTEL_METADATA_DISCOVERY_METHOD}" = "gce_metadata" ]; then echo "Discovered OTEL metadata: ${OTEL_RESOURCE_ATTRIBUTES}" fi +# If eBPF offloading is enabled, we need the source address to use for cross-stack relaying +if [ -n "${EBPF_OFFLOADING}" ]; then + if [ -z "${EBPF_INT4_ADDR}" ]; then + # Get the address of the EBPF_OFFLOADING interface used to reach the default gw + EBPF_INT4_ADDR=$(ip -4 addr show dev "${EBPF_OFFLOADING}" | awk '/inet / {print $2}' | cut -d/ -f1) + export EBPF_INT4_ADDR + fi + if [ -z "${EBPF_INT6_ADDR}" ]; then + # Get the address of the EBPF_OFFLOADING interface used to reach the default gw + EBPF_INT6_ADDR=$(ip -6 addr show dev "${EBPF_OFFLOADING}" scope global | awk '/inet6 / {print $2; exit}' | cut -d/ -f1) + export EBPF_INT6_ADDR + fi + + if [ -z "${EBPF_INT4_ADDR}" ] && [ -z "${EBPF_INT6_ADDR}" ]; then + echo "Failed to determine IP address(es) of interface ${EBPF_OFFLOADING}" + exit 1 + fi +fi + exec "$@" diff --git a/rust/relay/ebpf-shared/src/lib.rs b/rust/relay/ebpf-shared/src/lib.rs index 00b9e061a..001daefec 100644 --- a/rust/relay/ebpf-shared/src/lib.rs +++ b/rust/relay/ebpf-shared/src/lib.rs @@ -149,60 +149,6 @@ impl PortAndPeerV6 { } } -#[repr(C)] -#[derive(Clone, Copy, PartialEq, Eq)] -#[cfg_attr(feature = "std", derive(Debug))] -pub struct Config { - udp_checksum_enabled: bool, - lowest_allocation_port: [u8; 2], - highest_allocation_port: [u8; 2], -} - -impl Config { - pub fn udp_checksum_enabled(&self) -> bool { - self.udp_checksum_enabled - } - - pub fn with_udp_checksum(self, enabled: bool) -> Self { - Self { - udp_checksum_enabled: enabled, - ..self - } - } - - pub fn lowest_allocation_port(&self) -> u16 { - u16::from_be_bytes(self.lowest_allocation_port) - } - - pub fn with_lowest_allocation_port(self, port: u16) -> Self { - Self { - lowest_allocation_port: port.to_be_bytes(), - ..self - } - } - - pub fn highest_allocation_port(&self) -> u16 { - u16::from_be_bytes(self.highest_allocation_port) - } - - pub fn with_highest_allocation_port(self, port: u16) -> Self { - Self { - highest_allocation_port: port.to_be_bytes(), - ..self - } - } -} - -impl Default for Config { - fn default() -> Self { - Self { - udp_checksum_enabled: true, - lowest_allocation_port: 49152_u16.to_be_bytes(), - highest_allocation_port: 65535_u16.to_be_bytes(), - } - } -} - #[repr(C)] #[derive(Clone, Copy, Default)] #[cfg_attr(feature = "std", derive(Debug))] @@ -278,8 +224,6 @@ mod userspace { unsafe impl aya::Pod for PortAndPeerV6 {} - unsafe impl aya::Pod for Config {} - unsafe impl aya::Pod for InterfaceAddressV4 {} unsafe impl aya::Pod for InterfaceAddressV6 {} diff --git a/rust/relay/ebpf-turn-router/src/config.rs b/rust/relay/ebpf-turn-router/src/config.rs deleted file mode 100644 index f3e73c6de..000000000 --- a/rust/relay/ebpf-turn-router/src/config.rs +++ /dev/null @@ -1,14 +0,0 @@ -use aya_ebpf::{macros::map, maps::Array}; -use ebpf_shared::Config; - -/// Dynamic configuration of the eBPF program. -#[map] -static CONFIG: Array = Array::with_max_entries(1, 0); - -pub fn udp_checksum_enabled() -> bool { - config().udp_checksum_enabled() -} - -fn config() -> Config { - CONFIG.get(0).copied().unwrap_or_default() -} diff --git a/rust/relay/ebpf-turn-router/src/error.rs b/rust/relay/ebpf-turn-router/src/error.rs index 5e62dae6c..ac2c95db1 100644 --- a/rust/relay/ebpf-turn-router/src/error.rs +++ b/rust/relay/ebpf-turn-router/src/error.rs @@ -3,9 +3,9 @@ use core::num::NonZeroUsize; #[derive(Debug, Clone, Copy)] pub enum Error { InterfaceIpv4AddressAccessFailed, - InterfaceIpv4AddressNotLearned, + InterfaceIpv4AddressNotConfigured, InterfaceIpv6AddressAccessFailed, - InterfaceIpv6AddressNotLearned, + InterfaceIpv6AddressNotConfigured, UdpChecksumMissing, PacketTooShort, NotUdp, @@ -16,6 +16,7 @@ pub enum Error { BadChannelDataLength, NoEntry(SupportedChannel), XdpAdjustHeadFailed(i64), + PacketLoop, } #[derive(Debug, Clone, Copy)] @@ -34,11 +35,11 @@ impl aya_log_ebpf::WriteToBuf for Error { Error::InterfaceIpv4AddressAccessFailed => { "Failed to get pointer to interface IPv4 address map" } - Error::InterfaceIpv4AddressNotLearned => "Interface IPv4 address not learned", + Error::InterfaceIpv4AddressNotConfigured => "Interface IPv4 address not configured", Error::InterfaceIpv6AddressAccessFailed => { "Failed to get pointer to interface IPv6 address map" } - Error::InterfaceIpv6AddressNotLearned => "Interface IPv6 address not learned", + Error::InterfaceIpv6AddressNotConfigured => "Interface IPv6 address not configured", Error::UdpChecksumMissing => "UDP checksum is missing", Error::PacketTooShort => "Packet is too short", Error::NotUdp => "Not a UDP packet", @@ -47,6 +48,7 @@ impl aya_log_ebpf::WriteToBuf for Error { Error::Ipv4PacketWithOptions => "IPv4 packet has options", Error::NotAChannelDataMessage => "Not a channel data message", Error::BadChannelDataLength => "Channel data length does not match packet length", + Error::PacketLoop => "Packet loop detected", Error::NoEntry(ch) => match ch { SupportedChannel::Udp4ToChan => "No entry in UDPv4 to channel IPv4 or IPv6 map", SupportedChannel::Chan4ToUdp => "No entry in channel IPv4 to UDPv4 or UDPv6 map", diff --git a/rust/relay/ebpf-turn-router/src/main.rs b/rust/relay/ebpf-turn-router/src/main.rs index aa11f5205..66e0a8fb2 100644 --- a/rust/relay/ebpf-turn-router/src/main.rs +++ b/rust/relay/ebpf-turn-router/src/main.rs @@ -27,7 +27,6 @@ use ref_mut_at::ref_mut_at; mod channel_data; mod checksum; -mod config; mod error; mod ref_mut_at; mod stats; @@ -68,7 +67,7 @@ static CHAN_TO_UDP_64: HashMap = static UDP_TO_CHAN_64: HashMap = HashMap::with_max_entries(NUM_ENTRIES, 0); -// Per-CPU data structures to learn relay interface addresses +// Per-CPU data structures to store relay interface addresses (configured from userspace) #[map] static INT_ADDR_V4: PerCpuArray = PerCpuArray::with_max_entries(1, 0); #[map] @@ -80,12 +79,9 @@ pub fn handle_turn(ctx: XdpContext) -> u32 { Error::NotIp | Error::NotUdp => xdp_action::XDP_PASS, Error::InterfaceIpv4AddressAccessFailed - | Error::InterfaceIpv4AddressNotLearned | Error::InterfaceIpv6AddressAccessFailed - | Error::InterfaceIpv6AddressNotLearned | Error::PacketTooShort | Error::NotTurn - | Error::NoEntry(_) | Error::NotAChannelDataMessage | Error::UdpChecksumMissing | Error::Ipv4PacketWithOptions => { @@ -94,6 +90,15 @@ pub fn handle_turn(ctx: XdpContext) -> u32 { xdp_action::XDP_PASS } + Error::InterfaceIpv4AddressNotConfigured + | Error::PacketLoop + | Error::NoEntry(_) + | Error::InterfaceIpv6AddressNotConfigured => { + debug!(&ctx, "Dropping packet: {}", e); + + xdp_action::XDP_DROP + } + Error::BadChannelDataLength | Error::XdpAdjustHeadFailed(_) => { warn!(&ctx, "Dropping packet: {}", e); @@ -122,8 +127,6 @@ fn try_handle_turn_ipv4(ctx: &XdpContext) -> Result<(), Error> { // SAFETY: The offset must point to the start of a valid `Ipv4Hdr`. let ipv4 = unsafe { ref_mut_at::(ctx, EthHdr::LEN)? }; - learn_interface_ipv4_address(ipv4)?; - if ipv4.proto != IpProto::Udp { return Err(Error::NotUdp); } @@ -169,8 +172,6 @@ fn try_handle_turn_ipv6(ctx: &XdpContext) -> Result<(), Error> { // SAFETY: The offset must point to the start of a valid `Ipv6Hdr`. let ipv6 = unsafe { ref_mut_at::(ctx, EthHdr::LEN)? }; - learn_interface_ipv6_address(ipv6)?; - if ipv6.next_hdr != IpProto::Udp { return Err(Error::NotUdp); } @@ -206,42 +207,6 @@ fn try_handle_turn_ipv6(ctx: &XdpContext) -> Result<(), Error> { Err(Error::NotTurn) } -#[inline(always)] -fn learn_interface_ipv4_address(ipv4: &Ipv4Hdr) -> Result<(), Error> { - let interface_addr = INT_ADDR_V4 - .get_ptr_mut(0) - .ok_or(Error::InterfaceIpv4AddressAccessFailed)?; - - let dst_ip = ipv4.dst_addr(); - - // SAFETY: These are per-cpu maps so we don't need to worry about thread safety. - unsafe { - if (*interface_addr).get().is_none() { - (*interface_addr).set(dst_ip); - } - } - - Ok(()) -} - -#[inline(always)] -fn learn_interface_ipv6_address(ipv6: &Ipv6Hdr) -> Result<(), Error> { - let interface_addr = INT_ADDR_V6 - .get_ptr_mut(0) - .ok_or(Error::InterfaceIpv6AddressAccessFailed)?; - - let dst_ip = ipv6.dst_addr(); - - // SAFETY: These are per-cpu maps so we don't need to worry about thread safety. - unsafe { - if (*interface_addr).get().is_none() { - (*interface_addr).set(dst_ip); - } - } - - Ok(()) -} - #[inline(always)] fn try_handle_ipv4_udp_to_channel_data(ctx: &XdpContext) -> Result<(), Error> { // SAFETY: The offset must point to the start of a valid `Ipv4Hdr`. @@ -451,6 +416,11 @@ fn handle_ipv4_udp_to_ipv4_channel( let new_ipv4_dst = client_and_channel.client_ip(); let new_ipv4_len = old_ipv4_len + CdHdr::LEN as u16; + // Check for packet loop - would we be sending to ourselves? + if new_ipv4_src == new_ipv4_dst { + return Err(Error::PacketLoop); + } + // SAFETY: The offset must point to the start of a valid `Ipv4Hdr`. let ipv4 = unsafe { ref_mut_at::(ctx, EthHdr::LEN)? }; ipv4.set_version(4); // IPv4 @@ -490,8 +460,7 @@ fn handle_ipv4_udp_to_ipv4_channel( // Incrementally update UDP checksum - // TODO: Remove conditional checksums once we can test this fully in CI - if old_udp_check == 0 || !crate::config::udp_checksum_enabled() { + if old_udp_check == 0 { // No checksum is valid for UDP IPv4 - we didn't write it, but maybe a middlebox did udp.set_check(0); } else { @@ -570,6 +539,12 @@ fn handle_ipv4_udp_to_ipv6_channel( ) }; + // Refuse to compute full UDP checksum. + // We forged these packets, so something's wrong if this is zero. + if old_udp_check == 0 { + return Err(Error::UdpChecksumMissing); + } + // // 1. Ethernet header // @@ -588,6 +563,11 @@ fn handle_ipv4_udp_to_ipv6_channel( let new_ipv6_dst = client_and_channel.client_ip(); let new_ipv6_len = old_ipv4_len - Ipv4Hdr::LEN as u16 + CdHdr::LEN as u16; + // Check for packet loop - would we be sending to ourselves? + if new_ipv6_dst == new_ipv6_src { + return Err(Error::PacketLoop); + } + // SAFETY: The offset must point to the start of a valid `Ipv6Hdr`. let ipv6 = unsafe { ref_mut_at::(ctx, EthHdr::LEN)? }; ipv6.set_version(6); @@ -618,29 +598,24 @@ fn handle_ipv4_udp_to_ipv6_channel( // Incrementally update UDP checksum - // TODO: Remove conditional checksums once we can test this fully in CI - if !crate::config::udp_checksum_enabled() { - udp.set_check(0); - } else { - udp.set_check( - ChecksumUpdate::new(old_udp_check) - .remove_u32(u32::from_be_bytes(old_ipv4_src.octets())) - .remove_u32(u32::from_be_bytes(old_ipv4_dst.octets())) - .remove_u16(old_udp_src) - .remove_u16(old_udp_dst) - .remove_u16(old_udp_len) - .remove_u16(old_udp_len) - .add_u128(u128::from_be_bytes(new_ipv6_src.octets())) - .add_u128(u128::from_be_bytes(new_ipv6_dst.octets())) - .add_u16(new_udp_src) - .add_u16(new_udp_dst) - .add_u16(new_udp_len) - .add_u16(new_udp_len) - .add_u16(channel_number) - .add_u16(channel_data_length) - .into_udp_checksum(), - ); - } + udp.set_check( + ChecksumUpdate::new(old_udp_check) + .remove_u32(u32::from_be_bytes(old_ipv4_src.octets())) + .remove_u32(u32::from_be_bytes(old_ipv4_dst.octets())) + .remove_u16(old_udp_src) + .remove_u16(old_udp_dst) + .remove_u16(old_udp_len) + .remove_u16(old_udp_len) + .add_u128(u128::from_be_bytes(new_ipv6_src.octets())) + .add_u128(u128::from_be_bytes(new_ipv6_dst.octets())) + .add_u16(new_udp_src) + .add_u16(new_udp_dst) + .add_u16(new_udp_len) + .add_u16(new_udp_len) + .add_u16(channel_number) + .add_u16(channel_data_length) + .into_udp_checksum(), + ); // // 4. Channel data header @@ -704,12 +679,6 @@ fn handle_ipv4_channel_to_ipv4_udp( ) }; - // Refuse to compute full UDP checksum. - // We forged these packets, so something's wrong if this is zero. - if old_udp_check == 0 { - return Err(Error::UdpChecksumMissing); - } - let (channel_number, channel_data_length) = { // SAFETY: The offset must point to the start of a valid `CdHdr`. let old_cd = unsafe { ref_mut_at::(ctx, EthHdr::LEN + Ipv4Hdr::LEN + UdpHdr::LEN)? }; @@ -737,6 +706,11 @@ fn handle_ipv4_channel_to_ipv4_udp( let new_ipv4_dst = port_and_peer.peer_ip(); let new_ipv4_len = old_ipv4_len - CdHdr::LEN as u16; + // Check for packet loop - would we be sending to ourselves? + if new_ipv4_src == new_ipv4_dst { + return Err(Error::PacketLoop); + } + // SAFETY: The offset must point to the start of a valid `Ipv4Hdr`. let ipv4 = unsafe { ref_mut_at::(ctx, NET_SHRINK as usize + EthHdr::LEN)? }; ipv4.set_version(4); // IPv4 @@ -775,8 +749,7 @@ fn handle_ipv4_channel_to_ipv4_udp( // Incrementally update UDP checksum - // TODO: Remove conditional checksums once we can test this fully in CI - if old_udp_check == 0 || !crate::config::udp_checksum_enabled() { + if old_udp_check == 0 { // No checksum is valid for UDP IPv4 - we didn't write it, but maybe a middlebox did udp.set_check(0); } else { @@ -884,6 +857,11 @@ fn handle_ipv4_channel_to_ipv6_udp( let new_ipv6_dst = port_and_peer.peer_ip(); let new_udp_len = old_udp_len - CdHdr::LEN as u16; + // Check for packet loop - would we be sending to ourselves? + if new_ipv6_src == new_ipv6_dst { + return Err(Error::PacketLoop); + } + // SAFETY: The offset must point to the start of a valid `Ipv6Hdr`. let ipv6 = unsafe { ref_mut_at::(ctx, EthHdr::LEN)? }; ipv6.set_version(6); // IPv6 @@ -910,29 +888,24 @@ fn handle_ipv4_channel_to_ipv6_udp( // Incrementally update UDP checksum - // TODO: Remove conditional checksums once we can test this fully in CI - if !crate::config::udp_checksum_enabled() { - udp.set_check(0); - } else { - udp.set_check( - ChecksumUpdate::new(old_udp_check) - .remove_u32(u32::from_be_bytes(old_ipv4_src.octets())) - .remove_u32(u32::from_be_bytes(old_ipv4_dst.octets())) - .remove_u16(old_udp_src) - .remove_u16(old_udp_dst) - .remove_u16(old_udp_len) - .remove_u16(old_udp_len) - .remove_u16(channel_number) - .remove_u16(channel_data_length) - .add_u128(u128::from_be_bytes(new_ipv6_src.octets())) - .add_u128(u128::from_be_bytes(new_ipv6_dst.octets())) - .add_u16(new_udp_src) - .add_u16(new_udp_dst) - .add_u16(new_udp_len) - .add_u16(new_udp_len) - .into_udp_checksum(), - ); - } + udp.set_check( + ChecksumUpdate::new(old_udp_check) + .remove_u32(u32::from_be_bytes(old_ipv4_src.octets())) + .remove_u32(u32::from_be_bytes(old_ipv4_dst.octets())) + .remove_u16(old_udp_src) + .remove_u16(old_udp_dst) + .remove_u16(old_udp_len) + .remove_u16(old_udp_len) + .remove_u16(channel_number) + .remove_u16(channel_data_length) + .add_u128(u128::from_be_bytes(new_ipv6_src.octets())) + .add_u128(u128::from_be_bytes(new_ipv6_dst.octets())) + .add_u16(new_udp_src) + .add_u16(new_udp_dst) + .add_u16(new_udp_len) + .add_u16(new_udp_len) + .into_udp_checksum(), + ); Ok(()) } @@ -1008,6 +981,11 @@ fn handle_ipv6_udp_to_ipv6_channel( let new_ipv6_dst = client_and_channel.client_ip(); let new_ipv6_len = old_ipv6_len + CdHdr::LEN as u16; + // Check for packet loop - would we be sending to ourselves? + if new_ipv6_src == new_ipv6_dst { + return Err(Error::PacketLoop); + } + // SAFETY: The offset must point to the start of a valid `Ipv6Hdr`. let ipv6 = unsafe { ref_mut_at::(ctx, EthHdr::LEN)? }; // Set fields explicitly to avoid reading potentially corrupted memory @@ -1038,27 +1016,22 @@ fn handle_ipv6_udp_to_ipv6_channel( // Incrementally update UDP checksum - // TODO: Remove conditional checksums once we can test this fully in CI - if !crate::config::udp_checksum_enabled() { - udp.set_check(0); - } else { - udp.set_check( - ChecksumUpdate::new(old_udp_check) - .remove_u128(u128::from_be_bytes(old_ipv6_src.octets())) - .remove_u16(old_udp_src) - .remove_u16(old_udp_dst) - .remove_u16(old_udp_len) - .remove_u16(old_udp_len) - .add_u128(u128::from_be_bytes(new_ipv6_dst.octets())) - .add_u16(new_udp_src) - .add_u16(new_udp_dst) - .add_u16(new_udp_len) - .add_u16(new_udp_len) - .add_u16(channel_number) - .add_u16(channel_data_length) - .into_udp_checksum(), - ); - } + udp.set_check( + ChecksumUpdate::new(old_udp_check) + .remove_u128(u128::from_be_bytes(old_ipv6_src.octets())) + .remove_u16(old_udp_src) + .remove_u16(old_udp_dst) + .remove_u16(old_udp_len) + .remove_u16(old_udp_len) + .add_u128(u128::from_be_bytes(new_ipv6_dst.octets())) + .add_u16(new_udp_src) + .add_u16(new_udp_dst) + .add_u16(new_udp_len) + .add_u16(new_udp_len) + .add_u16(channel_number) + .add_u16(channel_data_length) + .into_udp_checksum(), + ); // // 4. Channel data header @@ -1129,6 +1102,11 @@ fn handle_ipv6_udp_to_ipv4_channel( let new_udp_len = old_udp_len + CdHdr::LEN as u16; let new_ipv4_len = Ipv4Hdr::LEN as u16 + new_udp_len; + // Check for packet loop - would we be sending to ourselves? + if new_ipv4_dst == new_ipv4_src { + return Err(Error::PacketLoop); + } + // SAFETY: The offset must point to the start of a valid `Ipv4Hdr`. let ipv4 = unsafe { ref_mut_at::(ctx, NET_SHRINK as usize + EthHdr::LEN)? }; ipv4.set_version(4); @@ -1164,29 +1142,24 @@ fn handle_ipv6_udp_to_ipv4_channel( // Incrementally update UDP checksum - // TODO: Remove conditional checksums once we can test this fully in CI - if !crate::config::udp_checksum_enabled() { - udp.set_check(0); - } else { - udp.set_check( - ChecksumUpdate::new(old_udp_check) - .remove_u128(u128::from_be_bytes(old_ipv6_src.octets())) - .remove_u128(u128::from_be_bytes(old_ipv6_dst.octets())) - .remove_u16(old_udp_src) - .remove_u16(old_udp_dst) - .remove_u16(old_udp_len) - .remove_u16(old_udp_len) - .add_u32(u32::from_be_bytes(new_ipv4_src.octets())) - .add_u32(u32::from_be_bytes(new_ipv4_dst.octets())) - .add_u16(new_udp_src) - .add_u16(new_udp_dst) - .add_u16(new_udp_len) - .add_u16(new_udp_len) - .add_u16(channel_number) - .add_u16(channel_data_length) - .into_udp_checksum(), - ); - } + udp.set_check( + ChecksumUpdate::new(old_udp_check) + .remove_u128(u128::from_be_bytes(old_ipv6_src.octets())) + .remove_u128(u128::from_be_bytes(old_ipv6_dst.octets())) + .remove_u16(old_udp_src) + .remove_u16(old_udp_dst) + .remove_u16(old_udp_len) + .remove_u16(old_udp_len) + .add_u32(u32::from_be_bytes(new_ipv4_src.octets())) + .add_u32(u32::from_be_bytes(new_ipv4_dst.octets())) + .add_u16(new_udp_src) + .add_u16(new_udp_dst) + .add_u16(new_udp_len) + .add_u16(new_udp_len) + .add_u16(channel_number) + .add_u16(channel_data_length) + .into_udp_checksum(), + ); // // 4. Channel data header @@ -1280,6 +1253,11 @@ fn handle_ipv6_channel_to_ipv6_udp( let new_ipv6_dst = port_and_peer.peer_ip(); let new_ipv6_len = old_ipv6_len - CdHdr::LEN as u16; + // Check for packet loop - would we be sending to ourselves? + if new_ipv6_src == new_ipv6_dst { + return Err(Error::PacketLoop); + } + // SAFETY: The offset must point to the start of a valid `Ipv6Hdr`. let ipv6 = unsafe { ref_mut_at::(ctx, NET_SHRINK as usize + EthHdr::LEN)? }; ipv6.set_version(6); // IPv6 @@ -1308,27 +1286,22 @@ fn handle_ipv6_channel_to_ipv6_udp( // Incrementally update UDP checksum - // TODO: Remove conditional checksums once we can test this fully in CI - if !crate::config::udp_checksum_enabled() { - udp.set_check(0); - } else { - udp.set_check( - ChecksumUpdate::new(old_udp_check) - .remove_u128(u128::from_be_bytes(old_ipv6_src.octets())) - .remove_u16(old_udp_src) - .remove_u16(old_udp_dst) - .remove_u16(old_udp_len) - .remove_u16(old_udp_len) - .remove_u16(channel_number) - .remove_u16(channel_data_length) - .add_u128(u128::from_be_bytes(new_ipv6_dst.octets())) - .add_u16(new_udp_src) - .add_u16(new_udp_dst) - .add_u16(new_udp_len) - .add_u16(new_udp_len) - .into_udp_checksum(), - ); - } + udp.set_check( + ChecksumUpdate::new(old_udp_check) + .remove_u128(u128::from_be_bytes(old_ipv6_src.octets())) + .remove_u16(old_udp_src) + .remove_u16(old_udp_dst) + .remove_u16(old_udp_len) + .remove_u16(old_udp_len) + .remove_u16(channel_number) + .remove_u16(channel_data_length) + .add_u128(u128::from_be_bytes(new_ipv6_dst.octets())) + .add_u16(new_udp_src) + .add_u16(new_udp_dst) + .add_u16(new_udp_len) + .add_u16(new_udp_len) + .into_udp_checksum(), + ); adjust_head(ctx, NET_SHRINK)?; @@ -1400,6 +1373,11 @@ fn handle_ipv6_channel_to_ipv4_udp( let new_ipv4_dst = port_and_peer.peer_ip(); let new_ipv4_len = old_udp_len - CdHdr::LEN as u16 + Ipv4Hdr::LEN as u16; + // Check for packet loop - would we be sending to ourselves? + if new_ipv4_src == new_ipv4_dst { + return Err(Error::PacketLoop); + } + // SAFETY: The offset must point to the start of a valid `Ipv4Hdr`. let ipv4 = unsafe { ref_mut_at::(ctx, NET_SHRINK as usize + EthHdr::LEN)? }; ipv4.set_version(4); @@ -1434,29 +1412,24 @@ fn handle_ipv6_channel_to_ipv4_udp( // Incrementally update UDP checksum - // TODO: Remove conditional checksums once we can test this fully in CI - if !crate::config::udp_checksum_enabled() { - udp.set_check(0); - } else { - udp.set_check( - ChecksumUpdate::new(old_udp_check) - .remove_u128(u128::from_be_bytes(old_ipv6_src.octets())) - .remove_u128(u128::from_be_bytes(old_ipv6_dst.octets())) - .remove_u16(old_udp_src) - .remove_u16(old_udp_dst) - .remove_u16(old_udp_len) - .remove_u16(old_udp_len) - .remove_u16(channel_number) - .remove_u16(channel_data_length) - .add_u32(u32::from_be_bytes(new_ipv4_src.octets())) - .add_u32(u32::from_be_bytes(new_ipv4_dst.octets())) - .add_u16(new_udp_src) - .add_u16(new_udp_dst) - .add_u16(new_udp_len) - .add_u16(new_udp_len) - .into_udp_checksum(), - ); - } + udp.set_check( + ChecksumUpdate::new(old_udp_check) + .remove_u128(u128::from_be_bytes(old_ipv6_src.octets())) + .remove_u128(u128::from_be_bytes(old_ipv6_dst.octets())) + .remove_u16(old_udp_src) + .remove_u16(old_udp_dst) + .remove_u16(old_udp_len) + .remove_u16(old_udp_len) + .remove_u16(channel_number) + .remove_u16(channel_data_length) + .add_u32(u32::from_be_bytes(new_ipv4_src.octets())) + .add_u32(u32::from_be_bytes(new_ipv4_dst.octets())) + .add_u16(new_udp_src) + .add_u16(new_udp_dst) + .add_u16(new_udp_len) + .add_u16(new_udp_len) + .into_udp_checksum(), + ); adjust_head(ctx, NET_SHRINK)?; @@ -1483,7 +1456,7 @@ fn get_interface_ipv4_address() -> Result { // SAFETY: This comes from a per-cpu data structure so we can safely access it. let addr = unsafe { *interface_addr }; - addr.get().ok_or(Error::InterfaceIpv4AddressNotLearned) + addr.get().ok_or(Error::InterfaceIpv4AddressNotConfigured) } fn get_interface_ipv6_address() -> Result { @@ -1494,7 +1467,7 @@ fn get_interface_ipv6_address() -> Result { // SAFETY: This comes from a per-cpu data structure so we can safely access it. let addr = unsafe { *interface_addr }; - addr.get().ok_or(Error::InterfaceIpv6AddressNotLearned) + addr.get().ok_or(Error::InterfaceIpv6AddressNotConfigured) } /// Defines our panic handler. diff --git a/rust/relay/server/Cargo.toml b/rust/relay/server/Cargo.toml index d8af1cbef..3ecc71859 100644 --- a/rust/relay/server/Cargo.toml +++ b/rust/relay/server/Cargo.toml @@ -16,7 +16,6 @@ bytecodec = { workspace = true } bytes = { workspace = true } clap = { workspace = true, features = ["derive", "env"] } derive_more = { workspace = true, features = ["from"] } -ebpf-shared = { workspace = true, features = ["std"] } firezone-bin-shared = { workspace = true } firezone-logging = { workspace = true } firezone-telemetry = { workspace = true } @@ -53,6 +52,7 @@ uuid = { workspace = true, features = ["v4"] } [target.'cfg(target_os = "linux")'.dependencies] aya = { workspace = true, features = ["tokio"] } aya-log = { workspace = true } +ebpf-shared = { workspace = true, features = ["std"] } [target.'cfg(target_os = "linux")'.build-dependencies] anyhow = "1" diff --git a/rust/relay/server/src/ebpf/linux.rs b/rust/relay/server/src/ebpf/linux.rs index 5cb3699e1..749d7051a 100644 --- a/rust/relay/server/src/ebpf/linux.rs +++ b/rust/relay/server/src/ebpf/linux.rs @@ -1,15 +1,16 @@ -use std::net::SocketAddr; +use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr}; use anyhow::{Context as _, Result}; use aya::{ Pod, - maps::{Array, AsyncPerfEventArray, HashMap, MapData}, + maps::{AsyncPerfEventArray, HashMap, MapData, PerCpuArray, PerCpuValues}, programs::{Xdp, XdpFlags}, }; use aya_log::EbpfLogger; use bytes::BytesMut; use ebpf_shared::{ - ClientAndChannelV4, ClientAndChannelV6, Config, PortAndPeerV4, PortAndPeerV6, StatsEvent, + ClientAndChannelV4, ClientAndChannelV6, InterfaceAddressV4, InterfaceAddressV6, PortAndPeerV4, + PortAndPeerV6, StatsEvent, }; use stun_codec::rfc5766::attributes::ChannelNumber; @@ -32,7 +33,12 @@ pub struct Program { } impl Program { - pub fn try_load(interface: &str, attach_mode: AttachMode) -> Result { + pub fn try_load( + interface: &str, + attach_mode: AttachMode, + ipv4_addr: Option, + ipv6_addr: Option, + ) -> Result { let mut ebpf = aya::Ebpf::load(aya::include_bytes_aligned!(concat!( env!("OUT_DIR"), "/ebpf-turn-router-main" @@ -95,7 +101,7 @@ impl Program { tracing::warn!(%cpu_id, num_lost = %events.lost, "Lost perf events"); } - tracing::debug!(%cpu_id, num_read = %events.read, "Read perf events from eBPF kernel"); + tracing::trace!(%cpu_id, num_read = %events.read, "Read perf events from eBPF kernel"); for bytes in buffers.iter().take(events.read) { let Some(stats) = StatsEvent::from_bytes(bytes) else { @@ -114,6 +120,14 @@ impl Program { }); } + // Set interface addresses if provided + if let Some(ipv4) = ipv4_addr { + set_interface_ipv4_address(&mut ebpf, ipv4)?; + } + if let Some(ipv6) = ipv6_addr { + set_interface_ipv6_address(&mut ebpf, ipv6)?; + } + tracing::info!("eBPF TURN router loaded and attached to interface {interface}"); Ok(Self { ebpf, stats }) @@ -223,12 +237,6 @@ impl Program { Ok(()) } - pub fn set_config(&mut self, config: Config) -> Result<()> { - self.config_array_mut()?.set(0, config, 0)?; - - Ok(()) - } - fn chan_to_udp_44_map_mut( &mut self, ) -> Result> { @@ -277,10 +285,6 @@ impl Program { self.hash_map_mut("UDP_TO_CHAN_64") } - fn config_array_mut(&mut self) -> Result> { - self.array_mut("CONFIG") - } - fn hash_map_mut(&mut self, name: &'static str) -> Result> where K: Pod, @@ -294,17 +298,48 @@ impl Program { Ok(map) } - - fn array_mut(&mut self, name: &'static str) -> Result> - where - T: Pod, - { - let map = self - .ebpf - .map_mut(name) - .with_context(|| format!("Array `{name}` not found"))?; - let map = Array::<_, T>::try_from(map).context("Failed to convert array")?; - - Ok(map) - } +} + +fn set_interface_ipv4_address(ebpf: &mut aya::Ebpf, addr: Ipv4Addr) -> Result<()> { + let mut interface_addr = InterfaceAddressV4::default(); + interface_addr.set(addr); + + set_per_cpu_map(ebpf, "INT_ADDR_V4", interface_addr) + .context("Failed to set IPv4 interface address")?; + + tracing::info!(%addr, "Set eBPF interface IPv4 address"); + Ok(()) +} + +fn set_interface_ipv6_address(ebpf: &mut aya::Ebpf, addr: Ipv6Addr) -> Result<()> { + let mut interface_addr = InterfaceAddressV6::default(); + interface_addr.set(addr); + + set_per_cpu_map(ebpf, "INT_ADDR_V6", interface_addr) + .context("Failed to set IPv6 interface address")?; + + tracing::info!(%addr, "Set eBPF interface IPv6 address"); + Ok(()) +} + +fn set_per_cpu_map(ebpf: &mut aya::Ebpf, map_name: &str, value: T) -> Result<()> +where + T: Pod + Clone, +{ + let map = ebpf + .map_mut(map_name) + .with_context(|| format!("{map_name} map not found"))?; + let mut per_cpu_map: PerCpuArray<&mut MapData, T> = PerCpuArray::try_from(map)?; + + // Get the number of CPUs and create a value for each CPU + let num_cpus = + aya::util::nr_cpus().map_err(|(_, e)| anyhow::anyhow!("Failed to get CPU count: {}", e))?; + let values = vec![value; num_cpus]; + + per_cpu_map + .set(0, PerCpuValues::try_from(values)?, 0) + .with_context(|| format!("Failed to set per-CPU values in {map_name}"))?; + + tracing::debug!(%map_name, %num_cpus, "Set per-CPU map with value"); + Ok(()) } diff --git a/rust/relay/server/src/ebpf/stub.rs b/rust/relay/server/src/ebpf/stub.rs index 9a3c350c0..0ae6f0a93 100644 --- a/rust/relay/server/src/ebpf/stub.rs +++ b/rust/relay/server/src/ebpf/stub.rs @@ -4,7 +4,7 @@ )] use anyhow::Result; -use ebpf_shared::Config; +use std::net::{Ipv4Addr, Ipv6Addr}; use stun_codec::rfc5766::attributes::ChannelNumber; use crate::ebpf::AttachMode; @@ -13,7 +13,12 @@ use crate::{AllocationPort, ClientSocket, PeerSocket}; pub struct Program {} impl Program { - pub fn try_load(_: &str, _: AttachMode) -> Result { + pub fn try_load( + _: &str, + _: AttachMode, + _: Option, + _: Option, + ) -> Result { Err(anyhow::anyhow!("Platform not supported")) } @@ -36,12 +41,4 @@ impl Program { ) -> Result<()> { Ok(()) } - - pub fn set_config(&mut self, _: Config) -> Result<()> { - Ok(()) - } - - pub fn config(&self) -> Config { - Config::default() - } } diff --git a/rust/relay/server/src/main.rs b/rust/relay/server/src/main.rs index 65a5ebb82..364111cea 100644 --- a/rust/relay/server/src/main.rs +++ b/rust/relay/server/src/main.rs @@ -3,7 +3,6 @@ use anyhow::{Context, Result, bail}; use backoff::ExponentialBackoffBuilder; use clap::Parser; -use ebpf_shared::Config; use firezone_bin_shared::http_health_check; use firezone_logging::{FilterReloadHandle, err_with_src, sentry_layer}; use firezone_relay::sockets::Sockets; @@ -94,6 +93,16 @@ struct Args { #[arg(long, env, hide = true, default_value = "driver")] ebpf_attach_mode: ebpf::AttachMode, + /// IPv4 address of the interface where eBPF is attached. + /// Required when ebpf_offloading is set. + #[arg(long, env)] + ebpf_int4_addr: Option, + + /// IPv6 address of the interface where eBPF is attached. + /// Required when ebpf_offloading is set. + #[arg(long, env)] + ebpf_int6_addr: Option, + #[command(flatten)] health_check: http_health_check::HealthCheckArgs, @@ -154,22 +163,41 @@ fn main() { async fn try_main(args: Args) -> Result<()> { let filter_reload_handle = setup_tracing(&args)?; - let mut ebpf = args - .ebpf_offloading - .as_deref() - .map(|interface| ebpf::Program::try_load(interface, args.ebpf_attach_mode)) - .transpose() - .context("Failed to load eBPF TURN router")?; + let ebpf = if let Some(interface) = args.ebpf_offloading.as_deref() { + if args.ebpf_int4_addr.is_none() { + tracing::warn!( + "eBPF offloading enabled with but EBPF_INT4_ADDR not set. IPv6 to IPv4 relaying will not work." + ); + } - if let Some(ebpf) = ebpf.as_mut() { - ebpf.set_config( - Config::default() - .with_udp_checksum(true) - .with_lowest_allocation_port(args.lowest_port) - .with_highest_allocation_port(args.highest_port), + if args.ebpf_int6_addr.is_none() { + tracing::warn!( + "eBPF offloading enabled with but EBPF_INT6_ADDR not set. IPv4 to IPv6 relaying will not work." + ); + } + + if let Some(ipv4) = args.ebpf_int4_addr + && let Some(ipv6) = args.ebpf_int6_addr + { + tracing::info!( + "eBPF offloading enabled with IPv4 address {} and IPv6 address {}", + ipv4, + ipv6 + ); + } + + Some( + ebpf::Program::try_load( + interface, + args.ebpf_attach_mode, + args.ebpf_int4_addr, + args.ebpf_int6_addr, + ) + .context("Failed to load eBPF TURN router")?, ) - .context("Failed to set config of eBPF program")?; - } + } else { + None + }; let public_addr = match (args.public_ip4_addr, args.public_ip6_addr) { (Some(ip4), Some(ip6)) => IpStack::Dual { ip4, ip6 }, diff --git a/rust/relay/server/src/server.rs b/rust/relay/server/src/server.rs index 36608b0d6..a41a0c689 100644 --- a/rust/relay/server/src/server.rs +++ b/rust/relay/server/src/server.rs @@ -1038,6 +1038,14 @@ where debug_assert_eq!(&existing_n, number, "internal state should be consistent"); } + self.pending_commands + .push_back(Command::DeleteChannelBinding { + client: *cs, + channel_number: *number, + peer: c.peer_address, + allocation_port: c.allocation, + }); + tracing::info!(%peer, %number, allocation = %port, "Deleted channel binding"); false diff --git a/rust/relay/server/tests/ebpf_ipv4.rs b/rust/relay/server/tests/ebpf_ipv4.rs deleted file mode 100644 index d2f5bb93a..000000000 --- a/rust/relay/server/tests/ebpf_ipv4.rs +++ /dev/null @@ -1,129 +0,0 @@ -#![allow(clippy::unwrap_used)] - -use firezone_relay::{AllocationPort, ClientSocket, PeerSocket, ebpf}; -use opentelemetry::global; -use opentelemetry_sdk::metrics::{ - InMemoryMetricExporter, PeriodicReader, SdkMeterProvider, - data::{AggregatedMetrics, MetricData}, -}; -use std::time::Duration; -use tokio::net::UdpSocket; - -use ebpf_shared::Config; -use stun_codec::rfc5766::attributes::ChannelNumber; - -#[tokio::test] -#[ignore = "Needs root"] -async fn ping_pong() { - let _guard = firezone_logging::test("trace,mio=off"); - - let (_meter_provider, exporter) = init_meter_provider(); - - let mut program = ebpf::Program::try_load("lo", ebpf::AttachMode::Generic).unwrap(); - - // Linux does not set the correct UDP checksum when sending the packet, so our updated checksum in the eBPF code will be wrong and later dropped. - // To make the test work, we therefore need to tell the eBPF program to disable UDP checksumming by just setting it to 0. - program - .set_config(Config::default().with_udp_checksum(false)) - .unwrap(); - - let client = UdpSocket::bind("127.0.0.1:0").await.unwrap(); - let peer = UdpSocket::bind("127.0.0.1:0").await.unwrap(); - - let client_socket = client.local_addr().unwrap(); - let peer_socket = peer.local_addr().unwrap(); - - let channel_number = ChannelNumber::new(0x4000).unwrap(); - let allocation_port = 50000; - - program - .add_channel_binding( - ClientSocket::new(client_socket), - channel_number, - PeerSocket::new(peer_socket), - AllocationPort::new(allocation_port), - ) - .unwrap(); - - { - let msg = b"ping"; - let msg_len = msg.len(); - let mut buf = [0u8; 512]; - - let (header, payload) = buf.split_at_mut(4); - payload[..msg_len].copy_from_slice(msg); - - let len = firezone_relay::ChannelData::encode_header_to_slice( - channel_number, - msg_len as u16, - header, - ); - - client.send_to(&buf[..len], "127.0.0.1:3478").await.unwrap(); - - let mut recv_buf = [0u8; 512]; - - let (len, from) = - tokio::time::timeout(Duration::from_secs(1), peer.recv_from(&mut recv_buf)) - .await - .unwrap() - .unwrap(); - - assert_eq!(from.port(), allocation_port); - assert_eq!(&recv_buf[..len], msg); - } - - { - let msg = b"pong"; - - peer.send_to(msg, format!("127.0.0.1:{allocation_port}")) - .await - .unwrap(); - - let mut recv_buf = [0u8; 512]; - - let (len, from) = - tokio::time::timeout(Duration::from_secs(1), client.recv_from(&mut recv_buf)) - .await - .unwrap() - .unwrap(); - let channel_data = firezone_relay::ChannelData::parse(&recv_buf[..len]).unwrap(); - - assert_eq!(from.port(), 3478); - assert_eq!(channel_data.data(), msg); - } - - tokio::time::sleep(Duration::from_millis(10)).await; // Wait for metrics to be exported. - - let metrics = exporter.get_finished_metrics().unwrap(); - - assert!(!metrics.is_empty()); - - let metric = &metrics - .iter() - .last() - .and_then(|m| m.scope_metrics().next()) - .and_then(|m| m.metrics().next()) - .unwrap(); - let AggregatedMetrics::U64(MetricData::Sum(sum)) = metric.data() else { - panic!("Not an u64 sum"); - }; - - assert_eq!(metric.name(), "data_relayed_ebpf_bytes"); - assert_eq!(sum.data_points().next().unwrap().value(), 4 + 4); // "ping" and "pong" are both 4 bytes. -} - -fn init_meter_provider() -> (SdkMeterProvider, InMemoryMetricExporter) { - let exporter = InMemoryMetricExporter::default(); - - let provider = SdkMeterProvider::builder() - .with_reader( - PeriodicReader::builder(exporter.clone()) - .with_interval(Duration::from_millis(1)) - .build(), - ) - .build(); - global::set_meter_provider(provider.clone()); - - (provider, exporter) -} diff --git a/scripts/tests/direct-download-roaming-network.sh b/scripts/tests/direct-download-roaming-network.sh index d5aeb2dd3..5b595cc5b 100755 --- a/scripts/tests/direct-download-roaming-network.sh +++ b/scripts/tests/direct-download-roaming-network.sh @@ -2,11 +2,12 @@ source "./scripts/tests/lib.sh" -# Download 10MB at a max rate of 1MB/s. Shouldn't take longer than 13 seconds (allows for 3s of restablishing) +# Download 10MB at a max rate of 1MB/s. The first two UDP socket writes will fail as checksum offload is disabled. +# This means it will take 13 seconds + the resent STUN binding request round trip time. client sh -c \ "curl \ --fail \ - --max-time 13 \ + --max-time 16 \ --keepalive-time 1 \ --limit-rate 1000000 \ --output download.file \ @@ -20,6 +21,10 @@ docker network disconnect firezone_app firezone-client-1 # Disconnect the client sleep 3 docker network connect firezone_app firezone-client-1 --ip 172.28.0.200 # Reconnect client with a different IP +# Re-add static route to relays through router +client ip route add 172.29.0.0/24 via 172.28.0.254 dev eth0 +client ip -6 route add 172:29:0::/64 via 172:28:0::254 dev eth0 + # Send SIGHUP, triggering `reconnect` internally sudo kill -s HUP "$(ps -C firezone-headless-client -o pid=)" diff --git a/scripts/tests/lib.sh b/scripts/tests/lib.sh index 4162548c6..8704dc261 100755 --- a/scripts/tests/lib.sh +++ b/scripts/tests/lib.sh @@ -18,13 +18,53 @@ function relay2() { docker compose exec -T relay-2 "$@" } -function install_iptables_drop_rules() { +# Takes two optional arguments to force the client and gateway to use a specific IP stack. +# 1. client_stack: "ipv4", "ipv6" +# 2. gateway_stack: "ipv4", "ipv6" +# +# By default, the client and gateway will use happy eyeballs to use pick the first working IP stack. +function force_relayed_connections() { # Install `iptables` to have it available in the compatibility tests - client apk add --update --no-cache iptables + client apk add --no-cache iptables # Execute within the client container because doing so from the host is not reliable in CI. client iptables -A OUTPUT -d 172.28.0.105 -j DROP client ip6tables -A OUTPUT -d 172:28:0::105 -j DROP + + local client_stack="${1:-}" + local gateway_stack="${2:-}" + + # If both are empty, we don't care which stack they use; just return + if [[ -z "$client_stack" && -z "$gateway_stack" ]]; then + return + fi + + gateway apk add --no-cache iptables + + if [[ "$client_stack" == "ipv4" && "$gateway_stack" == "ipv4" ]]; then + client ip6tables -A OUTPUT -d $RELAY_1_PUBLIC_IP6_ADDR -j DROP + client ip6tables -A OUTPUT -d $RELAY_2_PUBLIC_IP6_ADDR -j DROP + gateway ip6tables -A OUTPUT -d $RELAY_1_PUBLIC_IP6_ADDR -j DROP + gateway ip6tables -A OUTPUT -d $RELAY_2_PUBLIC_IP6_ADDR -j DROP + elif [[ "$client_stack" == "ipv4" && "$gateway_stack" == "ipv6" ]]; then + client ip6tables -A OUTPUT -d $RELAY_1_PUBLIC_IP6_ADDR -j DROP + client ip6tables -A OUTPUT -d $RELAY_2_PUBLIC_IP6_ADDR -j DROP + gateway iptables -A OUTPUT -d $RELAY_1_PUBLIC_IP4_ADDR -j DROP + gateway iptables -A OUTPUT -d $RELAY_2_PUBLIC_IP4_ADDR -j DROP + elif [[ "$client_stack" == "ipv6" && "$gateway_stack" == "ipv4" ]]; then + client iptables -A OUTPUT -d $RELAY_1_PUBLIC_IP4_ADDR -j DROP + client iptables -A OUTPUT -d $RELAY_2_PUBLIC_IP4_ADDR -j DROP + gateway ip6tables -A OUTPUT -d $RELAY_1_PUBLIC_IP6_ADDR -j DROP + gateway ip6tables -A OUTPUT -d $RELAY_2_PUBLIC_IP6_ADDR -j DROP + elif [[ "$client_stack" == "ipv6" && "$gateway_stack" == "ipv6" ]]; then + client iptables -A OUTPUT -d $RELAY_1_PUBLIC_IP4_ADDR -j DROP + client iptables -A OUTPUT -d $RELAY_2_PUBLIC_IP4_ADDR -j DROP + gateway iptables -A OUTPUT -d $RELAY_1_PUBLIC_IP4_ADDR -j DROP + gateway iptables -A OUTPUT -d $RELAY_2_PUBLIC_IP4_ADDR -j DROP + else + echo "Invalid stack combination: client_stack=$client_stack, gateway_stack=$gateway_stack" + exit 1 + fi } function client_curl_resource() { diff --git a/scripts/tests/perf/relayed-tcp-client2server.sh b/scripts/tests/perf/relayed-tcp-client2server.sh index 150b1ceb1..d92614808 100755 --- a/scripts/tests/perf/relayed-tcp-client2server.sh +++ b/scripts/tests/perf/relayed-tcp-client2server.sh @@ -3,7 +3,7 @@ set -euox pipefail source "./scripts/tests/lib.sh" -install_iptables_drop_rules +force_relayed_connections ipv4 ipv4 docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ --time 30 \ diff --git a/scripts/tests/perf/relayed-tcp-server2client.sh b/scripts/tests/perf/relayed-tcp-server2client.sh index b1e988e00..466fad2a2 100755 --- a/scripts/tests/perf/relayed-tcp-server2client.sh +++ b/scripts/tests/perf/relayed-tcp-server2client.sh @@ -3,7 +3,7 @@ set -euox pipefail source "./scripts/tests/lib.sh" -install_iptables_drop_rules +force_relayed_connections ipv6 ipv4 docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ --time 30 \ diff --git a/scripts/tests/perf/relayed-udp-client2server.sh b/scripts/tests/perf/relayed-udp-client2server.sh index 6985b9444..2b8c48eba 100755 --- a/scripts/tests/perf/relayed-udp-client2server.sh +++ b/scripts/tests/perf/relayed-udp-client2server.sh @@ -3,7 +3,7 @@ set -euox pipefail source "./scripts/tests/lib.sh" -install_iptables_drop_rules +force_relayed_connections ipv6 ipv6 docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ --time 30 \ diff --git a/scripts/tests/perf/relayed-udp-server2client.sh b/scripts/tests/perf/relayed-udp-server2client.sh index 64e0f73a0..fc06e9d40 100755 --- a/scripts/tests/perf/relayed-udp-server2client.sh +++ b/scripts/tests/perf/relayed-udp-server2client.sh @@ -3,7 +3,7 @@ set -euox pipefail source "./scripts/tests/lib.sh" -install_iptables_drop_rules +force_relayed_connections ipv4 ipv6 docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ --time 30 \ diff --git a/scripts/tests/relay-graceful-shutdown.sh b/scripts/tests/relay-graceful-shutdown.sh index 788921ced..72dd5fc85 100755 --- a/scripts/tests/relay-graceful-shutdown.sh +++ b/scripts/tests/relay-graceful-shutdown.sh @@ -3,7 +3,7 @@ source "./scripts/tests/lib.sh" # Arrange: Setup a relayed connection -install_iptables_drop_rules +force_relayed_connections client_curl_resource "172.20.0.100/get" client_curl_resource "[172:20:0::100]/get"