mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 10:18:54 +00:00
fix(snownet): fail connection on handshake decryption errors (#9850)
As per the WireGuard paper, `boringtun` tries to handshake with the remote peer for 90s before it gives up. This timeout is important because when a session is discarded due to e.g. missing replies, WireGuard attempts to handshake a new session. Without this timeout, we would then try to handshake a session forever. Unfortunately, `boringtun` does not distinguish a missing handshake response from a bad one. Decryption errors whilst decoding a handshake response are simply passed up to the upper layer, in our case `snownet`. I am not sure how we can actually fail to decrypt a handshake but the pattern we are seeing in customer logs is that this happens over and over again, so there is no point in having `boringtun` retry the handshake. Therefore, we immediately fail the connection when this happens. Failed connections are immediately removed, triggering the client send a new connection-intent to the portal. Such a new connection intent will then sync-up the state between Client and Gateway so both of them use the most recent public key. Resolves: #9845
This commit is contained in:
1
rust/Cargo.lock
generated
1
rust/Cargo.lock
generated
@@ -6826,6 +6826,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"derive_more 2.0.1",
|
||||
"firezone-logging",
|
||||
"firezone-telemetry",
|
||||
"hex",
|
||||
"hex-display",
|
||||
"ip-packet",
|
||||
|
||||
@@ -12,6 +12,7 @@ bytecodec = { workspace = true }
|
||||
bytes = { workspace = true }
|
||||
derive_more = { workspace = true, features = ["debug"] }
|
||||
firezone-logging = { workspace = true }
|
||||
firezone-telemetry = { workspace = true }
|
||||
hex = { workspace = true }
|
||||
hex-display = { workspace = true }
|
||||
ip-packet = { workspace = true }
|
||||
|
||||
@@ -24,3 +24,10 @@ pub use stats::{ConnectionStats, NodeStats};
|
||||
pub fn is_wireguard(payload: &[u8]) -> bool {
|
||||
boringtun::noise::Tunn::parse_incoming_packet(payload).is_ok()
|
||||
}
|
||||
|
||||
pub(crate) fn is_handshake(payload: &[u8]) -> bool {
|
||||
use boringtun::noise::Packet;
|
||||
|
||||
boringtun::noise::Tunn::parse_incoming_packet(payload)
|
||||
.is_ok_and(|p| matches!(p, Packet::HandshakeInit(_) | Packet::HandshakeResponse(_)))
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ use boringtun::{noise::rate_limiter::RateLimiter, x25519::StaticSecret};
|
||||
use bufferpool::{Buffer, BufferPool};
|
||||
use core::fmt;
|
||||
use firezone_logging::err_with_src;
|
||||
use firezone_telemetry::{analytics, feature_flags};
|
||||
use hex_display::HexDisplayExt;
|
||||
use ip_packet::{ConvertibleIpv4Packet, ConvertibleIpv6Packet, IpPacket, IpPacketBuf};
|
||||
use itertools::Itertools;
|
||||
@@ -2178,6 +2179,17 @@ where
|
||||
.decapsulate_at(Some(src), packet, ip_packet.buf(), now)
|
||||
{
|
||||
TunnResult::Done => ControlFlow::Break(Ok(())),
|
||||
TunnResult::Err(e @ WireGuardError::InvalidAeadTag)
|
||||
if crate::is_handshake(packet)
|
||||
&& feature_flags::fail_handshake_on_decryption_errors() =>
|
||||
{
|
||||
analytics::feature_flag_called("fail-handshake-on-decryption-errors");
|
||||
|
||||
tracing::info!("Connection handshake failed ({e})");
|
||||
self.state = ConnectionState::Failed;
|
||||
|
||||
ControlFlow::Break(Ok(())) // Return `Ok` because we are already logging the error.
|
||||
}
|
||||
TunnResult::Err(e) => ControlFlow::Break(Err(anyhow::Error::new(e))),
|
||||
|
||||
// For WriteToTunnel{V4,V6}, boringtun returns the source IP of the packet that was tunneled to us.
|
||||
|
||||
@@ -43,6 +43,10 @@ pub fn export_metrics() -> bool {
|
||||
false // Placeholder until we actually deploy an OTEL collector.
|
||||
}
|
||||
|
||||
pub fn fail_handshake_on_decryption_errors() -> bool {
|
||||
FEATURE_FLAGS.fail_handshake_on_decryption_errors()
|
||||
}
|
||||
|
||||
pub(crate) async fn evaluate_now(user_id: String, env: Env) {
|
||||
if user_id.is_empty() {
|
||||
return;
|
||||
@@ -156,6 +160,8 @@ struct FeatureFlagsResponse {
|
||||
stream_logs: bool,
|
||||
#[serde(default)]
|
||||
map_enobufs_to_wouldblock: bool,
|
||||
#[serde(default)]
|
||||
fail_handshake_on_decryption_errors: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
@@ -164,6 +170,7 @@ struct FeatureFlags {
|
||||
drop_llmnr_nxdomain_responses: AtomicBool,
|
||||
stream_logs: AtomicBool,
|
||||
map_enobufs_to_wouldblock: AtomicBool,
|
||||
fail_handshake_on_decryption_errors: AtomicBool,
|
||||
}
|
||||
|
||||
/// Accessors to the actual feature flags.
|
||||
@@ -180,6 +187,7 @@ impl FeatureFlags {
|
||||
drop_llmnr_nxdomain_responses,
|
||||
stream_logs,
|
||||
map_enobufs_to_wouldblock,
|
||||
fail_handshake_on_decryption_errors,
|
||||
}: FeatureFlagsResponse,
|
||||
) {
|
||||
self.icmp_unreachable_instead_of_nat64
|
||||
@@ -189,6 +197,8 @@ impl FeatureFlags {
|
||||
self.stream_logs.store(stream_logs, Ordering::Relaxed);
|
||||
self.map_enobufs_to_wouldblock
|
||||
.store(map_enobufs_to_wouldblock, Ordering::Relaxed);
|
||||
self.fail_handshake_on_decryption_errors
|
||||
.store(fail_handshake_on_decryption_errors, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
fn icmp_unreachable_instead_of_nat64(&self) -> bool {
|
||||
@@ -207,6 +217,11 @@ impl FeatureFlags {
|
||||
fn map_enobufs_to_wouldblock(&self) -> bool {
|
||||
self.map_enobufs_to_wouldblock.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
fn fail_handshake_on_decryption_errors(&self) -> bool {
|
||||
self.fail_handshake_on_decryption_errors
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context {
|
||||
@@ -217,6 +232,7 @@ fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context
|
||||
DropLlmnrNxdomainResponses { result: bool },
|
||||
StreamLogs { result: bool },
|
||||
MapENOBUFSToWouldBlock { result: bool },
|
||||
FailHandshakeOnDecryptionErrors { result: bool },
|
||||
}
|
||||
|
||||
// Exhaustive destruction so we don't forget to update this when we add a flag.
|
||||
@@ -225,6 +241,7 @@ fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context
|
||||
drop_llmnr_nxdomain_responses,
|
||||
stream_logs,
|
||||
map_enobufs_to_wouldblock,
|
||||
fail_handshake_on_decryption_errors,
|
||||
} = flags;
|
||||
|
||||
let value = serde_json::json!({
|
||||
@@ -235,6 +252,7 @@ fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context
|
||||
SentryFlag::DropLlmnrNxdomainResponses { result: drop_llmnr_nxdomain_responses },
|
||||
SentryFlag::StreamLogs { result: stream_logs },
|
||||
SentryFlag::MapENOBUFSToWouldBlock { result: map_enobufs_to_wouldblock },
|
||||
SentryFlag::FailHandshakeOnDecryptionErrors { result: fail_handshake_on_decryption_errors },
|
||||
]
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user