feat(relay): introduce feature-flag for toggling eBPF program (#8650)

This PR implements a feature-flag in PostHog that we can use to toggle
the use of the eBPF data plane at runtime. At every tick of the
event-loop, the relay will compare the (cached) configuration of the
eBPF program with the (cached) value of the feature-flag. If they
differ, the flag will be updated and upon the next packet, the eBPF
program will act accordingly.

Feature-flags are re-evaluated every 5 minutes, meaning there is some
delay until this gets applied.

The default value of our all our feature-flags is `false`, meaning if
there is some problem with evaluating them, we'd turn the eBPF data
plane off. Performing routing in userspace is slower but it is a safer
default.

Resolves: #8548
This commit is contained in:
Thomas Eizinger
2025-04-04 02:51:52 +00:00
committed by GitHub
parent f7fbabf692
commit 941ef6c668
7 changed files with 61 additions and 3 deletions

View File

@@ -201,9 +201,10 @@ impl PortAndPeerV6 {
}
#[repr(C)]
#[derive(Clone, Copy)]
#[derive(Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "std", derive(Debug))]
pub struct Config {
pub relaying_enabled: bool,
pub udp_checksum_enabled: bool,
pub lowest_allocation_port: u16,
pub highest_allocation_port: u16,
@@ -212,6 +213,7 @@ pub struct Config {
impl Default for Config {
fn default() -> Self {
Self {
relaying_enabled: true,
udp_checksum_enabled: true,
lowest_allocation_port: 49152,
highest_allocation_port: 65535,

View File

@@ -11,6 +11,10 @@ pub fn udp_checksum_enabled() -> bool {
config().udp_checksum_enabled
}
pub fn relaying_enabled() -> bool {
config().relaying_enabled
}
pub fn allocation_range() -> RangeInclusive<u16> {
let config = config();

View File

@@ -97,6 +97,10 @@ pub fn handle_turn(ctx: XdpContext) -> u32 {
#[inline(always)]
fn try_handle_turn(ctx: &XdpContext) -> Result<u32, Error> {
if !config::relaying_enabled() {
return Ok(xdp_action::XDP_PASS);
}
let eth = Eth::parse(ctx)?;
match eth.ether_type() {

View File

@@ -25,6 +25,11 @@ const PAGE_COUNT: usize = 0x1000;
pub struct Program {
ebpf: aya::Ebpf,
/// A cached version of our current config.
///
/// Allows for faster access without issuing sys-calls to read it from the map.
config: Config,
#[expect(dead_code, reason = "We are just keeping it alive.")]
stats: AsyncPerfEventArray<MapData>,
}
@@ -108,7 +113,11 @@ impl Program {
tracing::info!("eBPF TURN router loaded and attached to interface {interface}");
Ok(Self { ebpf, stats })
Ok(Self {
ebpf,
stats,
config: Config::default(),
})
}
pub fn add_channel_binding(
@@ -216,11 +225,21 @@ impl Program {
}
pub fn set_config(&mut self, config: Config) -> Result<()> {
if config == self.config {
tracing::debug!(config = ?self.config, "No change to config, skipping update");
return Ok(());
}
self.config_array_mut()?.set(0, config, 0)?;
Ok(())
}
pub fn config(&self) -> Config {
self.config
}
fn chan_to_udp_44_map_mut(
&mut self,
) -> Result<HashMap<&mut MapData, ClientAndChannelV4, PortAndPeerV4>> {

View File

@@ -39,4 +39,8 @@ impl Program {
pub fn set_config(&mut self, _: Config) -> Result<()> {
Ok(())
}
pub fn config(&self) -> Config {
Config::default()
}
}

View File

@@ -142,6 +142,7 @@ async fn try_main(args: Args) -> Result<()> {
if let Some(ebpf) = ebpf.as_mut() {
ebpf.set_config(Config {
relaying_enabled: true,
udp_checksum_enabled: true,
lowest_allocation_port: args.lowest_port,
highest_allocation_port: args.highest_port,
@@ -642,6 +643,21 @@ where
ready = true;
}
if let Some(ebpf) = self.ebpf.as_mut() {
let is_enabled = ebpf.config().relaying_enabled;
let should_be_enabled =
firezone_telemetry::feature_flags::ebpf_turn_router_enabled();
if is_enabled != should_be_enabled {
tracing::info!(%is_enabled, %should_be_enabled, "eBPF router feature-flag changed");
ebpf.set_config(Config {
relaying_enabled: should_be_enabled,
..ebpf.config()
})?;
}
}
if !ready {
break Poll::Pending;
}

View File

@@ -24,6 +24,10 @@ pub fn drop_llmnr_nxdomain_responses() -> bool {
FEATURE_FLAGS.read().drop_llmnr_nxdomain_responses
}
pub fn ebpf_turn_router_enabled() -> bool {
FEATURE_FLAGS.read().ebpf_turn_router_enabled
}
pub(crate) fn reevaluate(user_id: String, env: &str) {
let api_key = match env {
crate::env::PRODUCTION => POSTHOG_API_KEY_PROD,
@@ -130,6 +134,8 @@ struct FeatureFlags {
icmp_unreachable_instead_of_nat64: bool,
#[serde(default)]
drop_llmnr_nxdomain_responses: bool,
#[serde(default)]
ebpf_turn_router_enabled: bool,
}
fn sentry_flag_context(flags: FeatureFlags) -> sentry::protocol::Context {
@@ -138,12 +144,14 @@ fn sentry_flag_context(flags: FeatureFlags) -> sentry::protocol::Context {
enum SentryFlag {
IcmpUnreachableInsteadOfNat64 { result: bool },
DropLlmnrNxdomainResponses { result: bool },
EbpfTurnRouterEnabled { result: bool },
}
// Exhaustive destruction so we don't forget to update this when we add a flag.
let FeatureFlags {
icmp_unreachable_instead_of_nat64,
drop_llmnr_nxdomain_responses,
ebpf_turn_router_enabled,
} = flags;
let value = serde_json::json!({
@@ -151,7 +159,8 @@ fn sentry_flag_context(flags: FeatureFlags) -> sentry::protocol::Context {
SentryFlag::IcmpUnreachableInsteadOfNat64 {
result: icmp_unreachable_instead_of_nat64,
},
SentryFlag::DropLlmnrNxdomainResponses { result: drop_llmnr_nxdomain_responses }
SentryFlag::DropLlmnrNxdomainResponses { result: drop_llmnr_nxdomain_responses },
SentryFlag::EbpfTurnRouterEnabled { result: ebpf_turn_router_enabled }
]
});