From b07fa341cf27a4bb8aee57c0248f4e47a057b99f Mon Sep 17 00:00:00 2001 From: Jamil Date: Sun, 17 Aug 2025 11:04:19 -0400 Subject: [PATCH] feat(relay): XDP driver (native) mode for gVNIC (#10177) This updates our eBPF module to use DRV_MODE for less CPU overhead and better performance for all same-stack TURN relaying. Notably, gVNIC does not seem to support the `bpf_xdp_adjust_head` helper, so unfortunately we need to extend / shrink the packet tail and move the payload instead. Comprehensive benchmarks have not been performed, but early results show that we can saturate about 1 Gbps per E2 core on GCP: ``` [SUM] 0.00-30.04 sec 3.16 GBytes 904 Mbits/sec 12088 sender [SUM] 0.00-30.00 sec 3.12 GBytes 894 Mbits/sec receiver ``` This is with 64 TCP streams. More streams will better utilize all available RX queues, and lead to better performance. Related: #10138 Fixes: #8633 --- rust/relay/ebpf-turn-router/src/error.rs | 133 ++++---- rust/relay/ebpf-turn-router/src/main.rs | 21 +- .../ebpf-turn-router/src/move_headers.rs | 90 ------ .../ebpf-turn-router/src/move_payload.rs | 304 ++++++++++++++++++ rust/relay/server/src/ebpf.rs | 8 + rust/relay/server/src/ebpf/linux.rs | 12 +- rust/relay/server/src/ebpf/stub.rs | 3 +- rust/relay/server/src/main.rs | 8 +- rust/relay/server/tests/ebpf_ipv4.rs | 5 +- 9 files changed, 411 insertions(+), 173 deletions(-) delete mode 100644 rust/relay/ebpf-turn-router/src/move_headers.rs create mode 100644 rust/relay/ebpf-turn-router/src/move_payload.rs diff --git a/rust/relay/ebpf-turn-router/src/error.rs b/rust/relay/ebpf-turn-router/src/error.rs index 4302f817e..6a705389d 100644 --- a/rust/relay/ebpf-turn-router/src/error.rs +++ b/rust/relay/ebpf-turn-router/src/error.rs @@ -11,9 +11,7 @@ pub enum Error { BadChannelDataLength, NoEntry(SupportedChannel), UnsupportedChannel(UnsupportedChannel), - XdpLoadBytesFailed(i64), - XdpAdjustHeadFailed(i64), - XdpStoreBytesFailed(i64), + XdpAdjustTailFailed(i64), } #[derive(Debug, Clone, Copy)] @@ -35,66 +33,83 @@ pub enum UnsupportedChannel { impl aya_log_ebpf::WriteToBuf for Error { #[inline(always)] fn write(self, buf: &mut [u8]) -> Option { - match self { - Error::PacketTooShort => "Packet is too short".write(buf), - Error::NotUdp => "Not a UDP packet".write(buf), - Error::NotTurn => "Not TURN traffic".write(buf), - Error::NotIp => "Not an IP packet".write(buf), - Error::Ipv4PacketWithOptions => "IPv4 packet has options".write(buf), - Error::NotAChannelDataMessage => "Not a channel data message".write(buf), - Error::BadChannelDataLength => { - "Channel data length does not match packet length".write(buf) - } - Error::NoEntry(SupportedChannel::UdpToChan44) => { - "No entry in UDPv4 to channel IPv4 map".write(buf) - } - Error::NoEntry(SupportedChannel::ChanToUdp44) => { - "No entry in channel IPv4 to UDPv4 map".write(buf) - } - Error::NoEntry(SupportedChannel::UdpToChan66) => { - "No entry in UDPv6 to channel IPv6 map".write(buf) - } - Error::NoEntry(SupportedChannel::ChanToUdp66) => { - "No entry in channel IPv6 to UDPv6 map".write(buf) - } - Error::UnsupportedChannel(UnsupportedChannel::UdpToChan46) => { - "Relaying UDPv4 to channel IPv6 is not supported".write(buf) - } - Error::UnsupportedChannel(UnsupportedChannel::ChanToUdp46) => { - "Relaying channel IPv4 to UDPv6 is not supported".write(buf) - } - Error::UnsupportedChannel(UnsupportedChannel::UdpToChan64) => { - "Relaying UDPv6 to channel IPv4 is not supported".write(buf) - } - Error::UnsupportedChannel(UnsupportedChannel::ChanToUdp64) => { - "Relaying channel IPv6 to UDPv4 is not supported".write(buf) - } - Error::XdpLoadBytesFailed(ret) => { + // Use a simpler match structure to help the verifier + let msg = match self { + Error::PacketTooShort => "Packet is too short", + Error::NotUdp => "Not a UDP packet", + Error::NotTurn => "Not TURN traffic", + Error::NotIp => "Not an IP packet", + Error::Ipv4PacketWithOptions => "IPv4 packet has options", + Error::NotAChannelDataMessage => "Not a channel data message", + Error::BadChannelDataLength => "Channel data length does not match packet length", + Error::NoEntry(ch) => match ch { + SupportedChannel::UdpToChan44 => "No entry in UDPv4 to channel IPv4 map", + SupportedChannel::ChanToUdp44 => "No entry in channel IPv4 to UDPv4 map", + SupportedChannel::UdpToChan66 => "No entry in UDPv6 to channel IPv6 map", + SupportedChannel::ChanToUdp66 => "No entry in channel IPv6 to UDPv6 map", + }, + Error::UnsupportedChannel(ch) => match ch { + UnsupportedChannel::UdpToChan46 => { + "Relaying UDPv4 to channel IPv6 is not supported" + } + UnsupportedChannel::ChanToUdp46 => { + "Relaying channel IPv4 to UDPv6 is not supported" + } + UnsupportedChannel::UdpToChan64 => { + "Relaying UDPv6 to channel IPv4 is not supported" + } + UnsupportedChannel::ChanToUdp64 => { + "Relaying channel IPv6 to UDPv4 is not supported" + } + }, + Error::XdpAdjustTailFailed(ret) => { + // Handle this case separately to avoid complex control flow let mut written = 0; - - written += "Failed to load bytes: ".write(buf)?.get(); - written += ret.write(buf)?.get(); - - NonZeroUsize::new(written) + written += "Failed to adjust tail: ".write(buf)?.get(); + written += errno_to_str(ret).write(buf)?.get(); + return NonZeroUsize::new(written); } - Error::XdpAdjustHeadFailed(ret) => { - let mut written = 0; + }; - written += "Failed to adjust head: ".write(buf)?.get(); - written += ret.write(buf)?.get(); - - NonZeroUsize::new(written) - } - Error::XdpStoreBytesFailed(ret) => { - let mut written = 0; - - written += "Failed to store bytes: ".write(buf)?.get(); - written += ret.write(buf)?.get(); - - NonZeroUsize::new(written) - } - } + msg.write(buf) } } impl aya_log_ebpf::macro_support::DefaultFormatter for Error {} + +/// Helper function to map Linux/eBPF error codes to human-readable strings +/// This avoids integer formatting which can cause pointer arithmetic verifier issues +#[inline(always)] +fn errno_to_str(errno: i64) -> &'static str { + match errno { + -1 => "EPERM (Operation not permitted)", + -2 => "ENOENT (No such file or directory)", + -3 => "ESRCH (No such process)", + -4 => "EINTR (Interrupted system call)", + -5 => "EIO (I/O error)", + -6 => "ENXIO (No such device or address)", + -7 => "E2BIG (Argument list too long)", + -8 => "ENOEXEC (Exec format error)", + -9 => "EBADF (Bad file number)", + -10 => "ECHILD (No child processes)", + -11 => "EAGAIN (Try again)", + -12 => "ENOMEM (Out of memory)", + -13 => "EACCES (Permission denied)", + -14 => "EFAULT (Bad address)", + -16 => "EBUSY (Device or resource busy)", + -17 => "EEXIST (File exists)", + -19 => "ENODEV (No such device)", + -22 => "EINVAL (Invalid argument)", + -24 => "EMFILE (Too many open files)", + -28 => "ENOSPC (No space left on device)", + -32 => "EPIPE (Broken pipe)", + -34 => "ERANGE (Math result not representable)", + -61 => "ENODATA (No data available)", + -75 => "EOVERFLOW (Value too large for defined data type)", + -84 => "EILSEQ (Illegal byte sequence)", + -90 => "EMSGSIZE (Message too long)", + -95 => "ENOTSUP (Operation not supported)", + -105 => "ENOBUFS (No buffer space available)", + _ => "Unknown error", + } +} diff --git a/rust/relay/ebpf-turn-router/src/main.rs b/rust/relay/ebpf-turn-router/src/main.rs index e52e66f12..c99d5ffdb 100644 --- a/rust/relay/ebpf-turn-router/src/main.rs +++ b/rust/relay/ebpf-turn-router/src/main.rs @@ -15,7 +15,7 @@ use error::{SupportedChannel, UnsupportedChannel}; use eth::Eth; use ip4::Ip4; use ip6::Ip6; -use move_headers::{ +use move_payload::{ add_channel_data_header_ipv4, add_channel_data_header_ipv6, remove_channel_data_header_ipv4, remove_channel_data_header_ipv6, }; @@ -33,7 +33,7 @@ mod error; mod eth; mod ip4; mod ip6; -mod move_headers; +mod move_payload; mod ref_mut_at; mod stats; mod udp; @@ -72,18 +72,6 @@ static UDP_TO_CHAN_64: HashMap = #[xdp] pub fn handle_turn(ctx: XdpContext) -> u32 { - trace!( - &ctx, - "udp-checksumming = {}, allocation-range = {}..={}", - if config::udp_checksum_enabled() { - "true" - } else { - "false" - }, - *config::allocation_range().start(), - *config::allocation_range().end(), - ); - try_handle_turn(&ctx).unwrap_or_else(|e| match e { Error::NotIp | Error::NotUdp => xdp_action::XDP_PASS, @@ -98,10 +86,7 @@ pub fn handle_turn(ctx: XdpContext) -> u32 { xdp_action::XDP_PASS } - Error::BadChannelDataLength - | Error::XdpStoreBytesFailed(_) - | Error::XdpAdjustHeadFailed(_) - | Error::XdpLoadBytesFailed(_) => { + Error::BadChannelDataLength | Error::XdpAdjustTailFailed(_) => { warn!(&ctx, "Dropping packet: {}", e); xdp_action::XDP_DROP diff --git a/rust/relay/ebpf-turn-router/src/move_headers.rs b/rust/relay/ebpf-turn-router/src/move_headers.rs deleted file mode 100644 index 7c98472a5..000000000 --- a/rust/relay/ebpf-turn-router/src/move_headers.rs +++ /dev/null @@ -1,90 +0,0 @@ -use aya_ebpf::{ - cty::c_void, - helpers::{bpf_xdp_adjust_head, bpf_xdp_load_bytes, bpf_xdp_store_bytes}, - programs::XdpContext, -}; -use network_types::{ - eth::EthHdr, - ip::{Ipv4Hdr, Ipv6Hdr}, - udp::UdpHdr, -}; - -use crate::{channel_data::CdHdr, error::Error}; - -#[inline(always)] -pub fn remove_channel_data_header_ipv4(ctx: &XdpContext) -> Result<(), Error> { - move_headers::<{ CdHdr::LEN as i32 }, { Ipv4Hdr::LEN }>(ctx) -} - -#[inline(always)] -pub fn add_channel_data_header_ipv4(ctx: &XdpContext, mut header: CdHdr) -> Result<(), Error> { - move_headers::<{ -(CdHdr::LEN as i32) }, { Ipv4Hdr::LEN }>(ctx)?; - let offset = (EthHdr::LEN + Ipv4Hdr::LEN + UdpHdr::LEN) as u32; - - let header_ptr = &mut header as *mut _ as *mut c_void; - let header_len = core::mem::size_of_val(&header) as u32; - - let ret = unsafe { bpf_xdp_store_bytes(ctx.ctx, offset, header_ptr, header_len) }; - if ret < 0 { - return Err(Error::XdpStoreBytesFailed(ret)); - } - - Ok(()) -} - -#[inline(always)] -pub fn remove_channel_data_header_ipv6(ctx: &XdpContext) -> Result<(), Error> { - move_headers::<{ CdHdr::LEN as i32 }, { Ipv6Hdr::LEN }>(ctx) -} - -#[inline(always)] -pub fn add_channel_data_header_ipv6(ctx: &XdpContext, mut header: CdHdr) -> Result<(), Error> { - move_headers::<{ -(CdHdr::LEN as i32) }, { Ipv6Hdr::LEN }>(ctx)?; - let offset = (EthHdr::LEN + Ipv6Hdr::LEN + UdpHdr::LEN) as u32; - - let header_ptr = &mut header as *mut _ as *mut c_void; - let header_len = core::mem::size_of_val(&header) as u32; - - let ret = unsafe { bpf_xdp_store_bytes(ctx.ctx, offset, header_ptr, header_len) }; - if ret < 0 { - return Err(Error::XdpStoreBytesFailed(ret)); - } - - Ok(()) -} - -#[inline(always)] -fn move_headers( - ctx: &XdpContext, -) -> Result<(), Error> { - // Scratch space for our headers. - // IPv6 headers are always 40 bytes long. - // IPv4 headers are between 20 and 60 bytes long. - // We restrict the eBPF program to only handle 20 byte long IPv4 headers. - // Therefore, we only need to reserver space for IPv6 headers. - // - // Ideally, we would just use the const-generic argument here but that is not yet supported ... - let mut headers = [0u8; EthHdr::LEN + Ipv6Hdr::LEN + UdpHdr::LEN]; - - let headers_ptr = headers.as_mut_ptr() as *mut c_void; - let headers_len = (EthHdr::LEN + IP_HEADER_LEN + UdpHdr::LEN) as u32; - - // Copy headers into buffer. - let ret = unsafe { bpf_xdp_load_bytes(ctx.ctx, 0, headers_ptr, headers_len) }; - if ret < 0 { - return Err(Error::XdpLoadBytesFailed(ret)); - } - - let ret = unsafe { bpf_xdp_adjust_head(ctx.ctx, DELTA) }; - if ret < 0 { - return Err(Error::XdpAdjustHeadFailed(ret)); - } - - // Copy the headers back. - let ret = unsafe { bpf_xdp_store_bytes(ctx.ctx, 0, headers_ptr, headers_len) }; - if ret < 0 { - return Err(Error::XdpStoreBytesFailed(ret)); - } - - Ok(()) -} diff --git a/rust/relay/ebpf-turn-router/src/move_payload.rs b/rust/relay/ebpf-turn-router/src/move_payload.rs new file mode 100644 index 000000000..e05bb3ead --- /dev/null +++ b/rust/relay/ebpf-turn-router/src/move_payload.rs @@ -0,0 +1,304 @@ +//! Helpers for moving the UDP payload forward or backward. +//! +//! ## Overview +//! +//! This module shifts the UDP payload forward or packet in order to add or remove the 4-byte +//! TURN channel data header to or from the front of the UDP payload. +//! +//! How this works: +//! +//! +//! ### Adding Channel Data Header (`extend_and_add_header`) +//! +//! Original packet: +//! ┌─────────┬──────┬───────┬─────────────────┐ +//! │ ETH HDR │ IP │ UDP │ PAYLOAD │ +//! └─────────┴──────┴───────┴─────────────────┘ +//! ↑ ↑ ↑ +//! data payload_offset data_end +//! +//! +//! Step 1: Extend packet tail by 4 bytes (bpf_xdp_adjust_tail) +//! ┌─────────┬──────┬───────┬─────────────────┬────┐ +//! │ ETH HDR │ IP │ UDP │ PAYLOAD │new │ +//! └─────────┴──────┴───────┴─────────────────┴────┘ +//! ↑ ↑ +//! data data_end (new) +//! +//! +//! Step 2: Copy payload backward by 4 bytes +//! ┌─────────┬──────┬───────┬────┬─────────────────┐ +//! │ ETH HDR │ IP │ UDP │????│ PAYLOAD │ +//! └─────────┴──────┴───────┴────┴─────────────────┘ +//! ↑ ────→ copy direction ↑ +//! data data_end +//! +//! +//! Step 3: Write channel data header +//! ┌─────────┬──────┬───────┬────┬─────────────────┐ +//! │ ETH HDR │ IP │ UDP │CDH │ PAYLOAD │ +//! └─────────┴──────┴───────┴────┴─────────────────┘ +//! ↑ ↑ +//! data data_end +//! +//! +//! ### Removing Channel Data Header (`remove_header_and_shrink`) +//! +//! Original packet: +//! ┌─────────┬──────┬───────┬────┬─────────────────┐ +//! │ ETH HDR │ IP │ UDP │CDH │ PAYLOAD │ +//! └─────────┴──────┴───────┴────┴─────────────────┘ +//! ↑ ↑ ↑ +//! data payload_offset data_end +//! +//! +//! Step 1: Copy payload forward by 4 bytes (overwriting CDH) +//! ┌─────────┬──────┬───────┬─────────────────┬────┐ +//! │ ETH HDR │ IP │ UDP │ PAYLOAD │junk│ +//! └─────────┴──────┴───────┴─────────────────┴────┘ +//! ↑ ←──── copy direction ↑ +//! data data_end +//! +//! +//! Step 2: Shrink packet tail by 4 bytes (bpf_xdp_adjust_tail) +//! ┌─────────┬──────┬───────┬─────────────────┐ +//! │ ETH HDR │ IP │ UDP │ PAYLOAD │ +//! └─────────┴──────┴───────┴─────────────────┘ +//! ↑ ↑ +//! data data_end (new) +//! +//! +//! ## Approach +//! +//! Generally there are two approaches to achieve the above: +//! 1. Head adjustment + shift packet headers (42-62 bytes) + add/remove channel data header +//! 2. Tail adjustment + shift payload (0-1454 bytes) + add/remove channel data header +//! +//! Unfortunately, we can't use the first approach because the `gve` driver on GCP does not support +//! `bpf_xdp_adjust_head`, which is required to shift the packet headers forward. +//! +//! Therefore, we use the second approach. +//! +//! To perform the actual shifting, we avoid the use of `bpf_xdp_load_bytes` and +//! `bpf_xdp_store_bytes` because these helpers can often be slower for large byte copies due to +//! the overhead of kernel function calls. Instead, we do a more efficient manual byte copy using raw +//! pointers, keeping in mind the verifier constraints listed below. +//! +//! +//! ## eBPF Verifier Gotchas +//! +//! The eBPF verifier imposes strict constraints that require consideration when manipulating +//! packet data in this module: +//! +//! 1. **No arithmetic on end pointers**: The verifier doesn't allow arithmetic on `data_end` +//! pointers, so we can't do `data_end - offset`. Instead, we use tracking variables +//! (`remaining`, `copied`) to index from the start. +//! +//! 2. **Bounded loops**: The verifier needs to prove loops terminate. We use the `MAX_PAYLOAD` +//! constant to provide an upper bound, preventing the verifier from tracking too many +//! states (which would exceed the 1M instruction limit). +//! +//! 3. **Backward copying**: When extending the packet, we must copy from the end to avoid +//! overwriting data we haven't read yet. We find the payload end by counting up, then +//! copy backward using the `remaining` counter. +//! +//! 4. **Forward copying**: When shrinking, we copy from the beginning forward, using a +//! `copied` counter to track progress. +//! +//! 5. **Pointer invalidation**: After `bpf_xdp_adjust_tail`, all cached pointers become +//! invalid and must be re-fetched from the XDP context. +//! +//! 6. **Inline hints**: Functions marked `#[inline(never)]` prevent excessive inlining that +//! could blow up the instruction count. Functions marked `#[inline(always)]` ensure +//! critical paths are optimized. +//! +use crate::{channel_data::CdHdr, error::Error}; +use aya_ebpf::{helpers::bpf_xdp_adjust_tail, programs::XdpContext}; +use network_types::{ + eth::EthHdr, + ip::{Ipv4Hdr, Ipv6Hdr}, + udp::UdpHdr, +}; + +// Set an upper limit for bounds checks +const MAX_MTU: usize = 1500; // does not include Ethernet header +const MAX_PAYLOAD: usize = MAX_MTU - Ipv4Hdr::LEN - UdpHdr::LEN - CdHdr::LEN; + +#[inline(always)] +pub fn add_channel_data_header_ipv4(ctx: &XdpContext, header: CdHdr) -> Result<(), Error> { + extend_and_add_header::<{ Ipv4Hdr::LEN }>(ctx, &header) +} + +#[inline(always)] +pub fn add_channel_data_header_ipv6(ctx: &XdpContext, header: CdHdr) -> Result<(), Error> { + extend_and_add_header::<{ Ipv6Hdr::LEN }>(ctx, &header) +} + +#[inline(always)] +pub fn remove_channel_data_header_ipv4(ctx: &XdpContext) -> Result<(), Error> { + remove_header_and_shrink::<{ Ipv4Hdr::LEN }>(ctx) +} + +#[inline(always)] +pub fn remove_channel_data_header_ipv6(ctx: &XdpContext) -> Result<(), Error> { + remove_header_and_shrink::<{ Ipv6Hdr::LEN }>(ctx) +} + +/// Extend the packet by `CdHdr::LEN` bytes and add the channel data header at the front of the +/// payload. +#[inline(never)] +fn extend_and_add_header( + ctx: &XdpContext, + header: &CdHdr, +) -> Result<(), Error> { + let payload_offset = EthHdr::LEN + IP_HEADER_LEN + UdpHdr::LEN; + + // 1. Extend the packet by `CdHdr::LEN` bytes + let ret = unsafe { bpf_xdp_adjust_tail(ctx.ctx, CdHdr::LEN as i32) }; + if ret < 0 { + return Err(Error::XdpAdjustTailFailed(ret)); + } + + // 2. Get the new packet pointers as they have changed + let data_start = ctx.data(); + let data_end = ctx.data_end(); + + // 3. Copy the payload back by `CdHdr::LEN` bytes to make space for the header + copy_bytes_backward(data_start, data_end, payload_offset, CdHdr::LEN); + + // 4. Copy header + let hdr_dst = data_start + payload_offset; + let hdr_src = header as *const CdHdr as *const u8; + + for i in 0..CdHdr::LEN { + if hdr_dst + i < data_end { + let dst_ptr = (hdr_dst + i) as *mut u8; + unsafe { + *dst_ptr = *hdr_src.add(i); + } + } + } + + Ok(()) +} + +/// Remove the channel data header by shifting the payload forward `CdHdr::LEN` bytes, then +/// shrink the packet by the same amount. +#[inline(never)] +fn remove_header_and_shrink(ctx: &XdpContext) -> Result<(), Error> { + let payload_offset = EthHdr::LEN + IP_HEADER_LEN + UdpHdr::LEN; + + let data_start = ctx.data(); + let data_end = ctx.data_end(); + + // 1. Copy the payload forward by `CdHdr::LEN` bytes, overwriting the header + copy_bytes_forward(data_start, data_end, payload_offset, CdHdr::LEN); + + // 2. Shrink the packet by `CdHdr::LEN` bytes + let ret = unsafe { bpf_xdp_adjust_tail(ctx.ctx, -(CdHdr::LEN as i32)) }; + if ret < 0 { + return Err(Error::XdpAdjustTailFailed(ret)); + } + + Ok(()) +} + +/// Copy bytes forward from src_offset to dst_offset by `delta` bytes. +/// Optimized to copy 4 bytes at a time when possible. +#[inline(never)] +fn copy_bytes_forward(data_start: usize, data_end: usize, offset: usize, delta: usize) { + let mut src_offset = offset + delta; + let mut dst_offset = offset; + let mut copied: usize = 0; + + loop { + // Bounds check to prevent verifier from exploding + if copied >= MAX_PAYLOAD { + break; + } + + // Try to copy 4 bytes if we have enough remaining + if copied + 4 <= MAX_PAYLOAD && data_start + src_offset + 3 < data_end { + let src_ptr = (data_start + src_offset) as *const u8; + let dst_ptr = (data_start + dst_offset) as *mut u8; + + // SAFETY: We verified we have at least 4 bytes available + unsafe { + let value = (src_ptr as *const u32).read_unaligned(); + (dst_ptr as *mut u32).write_unaligned(value); + } + src_offset += 4; + dst_offset += 4; + copied += 4; + } else if data_start + src_offset < data_end { + // Fall back to single byte copy + let src_ptr = (data_start + src_offset) as *const u8; + let dst_ptr = (data_start + dst_offset) as *mut u8; + + // SAFETY: We verified the bounds above + unsafe { + *dst_ptr = *src_ptr; + } + src_offset += 1; + dst_offset += 1; + copied += 1; + } else { + break; + } + } +} + +/// Copy bytes backward from src_offset to dst_offset by `delta` bytes. +/// Optimized to copy 4 bytes at a time when possible. +#[inline(never)] +fn copy_bytes_backward(data_start: usize, data_end: usize, offset: usize, delta: usize) { + let mut remaining: usize = 0; + + // Calculate total bytes to copy + loop { + if remaining >= MAX_PAYLOAD { + break; + } + if data_start + offset + delta + remaining >= data_end { + break; + } + remaining += 1; + } + + // Single loop that handles both 4-byte and 1-byte copies + while remaining > 0 { + if remaining >= 4 { + let src_offset = offset + remaining - 4; + let dst_offset = src_offset + delta; + + // Check bounds for 4-byte access + if data_start + src_offset + 3 < data_end && data_start + dst_offset + 3 < data_end { + let src_ptr = (data_start + src_offset) as *const u8; + let dst_ptr = (data_start + dst_offset) as *mut u8; + + // SAFETY: We verified we have at least 4 bytes available + unsafe { + let value = (src_ptr as *const u32).read_unaligned(); + (dst_ptr as *mut u32).write_unaligned(value); + } + remaining -= 4; + continue; + } + } + + // Fall back to single byte + let src_offset = offset + remaining - 1; + let dst_offset = src_offset + delta; + + if data_start + src_offset >= data_end || data_start + dst_offset >= data_end { + break; + } + + let src_ptr = (data_start + src_offset) as *const u8; + let dst_ptr = (data_start + dst_offset) as *mut u8; + unsafe { + *dst_ptr = *src_ptr; + } + remaining -= 1; + } +} diff --git a/rust/relay/server/src/ebpf.rs b/rust/relay/server/src/ebpf.rs index 6db0d39d5..9237b308e 100644 --- a/rust/relay/server/src/ebpf.rs +++ b/rust/relay/server/src/ebpf.rs @@ -6,3 +6,11 @@ mod platform; mod platform; pub use platform::Program; + +#[derive(clap::ValueEnum, Debug, Clone, Copy)] +pub enum AttachMode { + /// Attach in generic mode (SKB_MODE) + Generic, + /// Attach in driver mode (DRV_MODE) + Driver, +} diff --git a/rust/relay/server/src/ebpf/linux.rs b/rust/relay/server/src/ebpf/linux.rs index 2a9d374da..5cb3699e1 100644 --- a/rust/relay/server/src/ebpf/linux.rs +++ b/rust/relay/server/src/ebpf/linux.rs @@ -13,6 +13,8 @@ use ebpf_shared::{ }; use stun_codec::rfc5766::attributes::ChannelNumber; +use crate::ebpf::AttachMode; + use crate::{AllocationPort, ClientSocket, PeerSocket}; /// How many [`StatsEvent`]s we will at most read in one batch. @@ -30,7 +32,7 @@ pub struct Program { } impl Program { - pub fn try_load(interface: &str) -> Result { + pub fn try_load(interface: &str, attach_mode: AttachMode) -> Result { let mut ebpf = aya::Ebpf::load(aya::include_bytes_aligned!(concat!( env!("OUT_DIR"), "/ebpf-turn-router-main" @@ -41,8 +43,14 @@ impl Program { .context("No program")? .try_into()?; program.load().context("Failed to load program")?; + + let xdp_flags = match attach_mode { + AttachMode::Generic => XdpFlags::SKB_MODE, + AttachMode::Driver => XdpFlags::DRV_MODE, + }; + program - .attach(interface, XdpFlags::SKB_MODE) + .attach(interface, xdp_flags) .with_context(|| format!("Failed to attached to interface {interface}"))?; let mut stats = AsyncPerfEventArray::try_from( diff --git a/rust/relay/server/src/ebpf/stub.rs b/rust/relay/server/src/ebpf/stub.rs index c86b2ddc1..9a3c350c0 100644 --- a/rust/relay/server/src/ebpf/stub.rs +++ b/rust/relay/server/src/ebpf/stub.rs @@ -7,12 +7,13 @@ use anyhow::Result; use ebpf_shared::Config; use stun_codec::rfc5766::attributes::ChannelNumber; +use crate::ebpf::AttachMode; use crate::{AllocationPort, ClientSocket, PeerSocket}; pub struct Program {} impl Program { - pub fn try_load(_: &str) -> Result { + pub fn try_load(_: &str, _: AttachMode) -> Result { Err(anyhow::anyhow!("Platform not supported")) } diff --git a/rust/relay/server/src/main.rs b/rust/relay/server/src/main.rs index 2151c64ee..a465a61e3 100644 --- a/rust/relay/server/src/main.rs +++ b/rust/relay/server/src/main.rs @@ -88,6 +88,12 @@ struct Args { #[arg(long, env, hide = true)] ebpf_offloading: Option, + /// eBPF attachment mode: "generic" for SKB_MODE or "driver" for DRV_MODE. + /// + /// Only relevant when ebpf_offloading is enabled. + #[arg(long, env, hide = true, default_value = "driver")] + ebpf_attach_mode: ebpf::AttachMode, + #[command(flatten)] health_check: http_health_check::HealthCheckArgs, @@ -146,7 +152,7 @@ async fn try_main(args: Args) -> Result<()> { let mut ebpf = args .ebpf_offloading .as_deref() - .map(ebpf::Program::try_load) + .map(|interface| ebpf::Program::try_load(interface, args.ebpf_attach_mode)) .transpose() .context("Failed to load eBPF TURN router")?; diff --git a/rust/relay/server/tests/ebpf_ipv4.rs b/rust/relay/server/tests/ebpf_ipv4.rs index 2d0ef2e36..95d1aec51 100644 --- a/rust/relay/server/tests/ebpf_ipv4.rs +++ b/rust/relay/server/tests/ebpf_ipv4.rs @@ -1,6 +1,6 @@ #![allow(clippy::unwrap_used)] -use firezone_relay::{AllocationPort, ClientSocket, PeerSocket}; +use firezone_relay::{AllocationPort, ClientSocket, PeerSocket, ebpf}; use opentelemetry::global; use opentelemetry_sdk::metrics::{ InMemoryMetricExporter, PeriodicReader, SdkMeterProvider, data::Sum, @@ -18,7 +18,8 @@ async fn ping_pong() { let (_meter_provider, exporter) = init_meter_provider(); - let mut program = firezone_relay::ebpf::Program::try_load("lo").unwrap(); + let mut program = + firezone_relay::ebpf::Program::try_load("lo", ebpf::AttachMode::Generic).unwrap(); // Linux does not set the correct UDP checksum when sending the packet, so our updated checksum in the eBPF code will be wrong and later dropped. // To make the test work, we therefore need to tell the eBPF program to disable UDP checksumming by just setting it to 0.