diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index e649eed7e..41c826f0a 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -951,7 +951,7 @@ dependencies = [
[[package]]
name = "boringtun"
version = "0.6.1"
-source = "git+https://github.com/firezone/boringtun?branch=master#5b1892f0616f97d82599b6e324356aa9ea629c33"
+source = "git+https://github.com/firezone/boringtun?branch=master#3d5df9c2a6f55424e02671374f835cc7db1d7a44"
dependencies = [
"aead",
"base64 0.22.1",
diff --git a/rust/connlib/snownet/src/node.rs b/rust/connlib/snownet/src/node.rs
index 8a9959ba7..00d0aa45f 100644
--- a/rust/connlib/snownet/src/node.rs
+++ b/rust/connlib/snownet/src/node.rs
@@ -11,7 +11,6 @@ use boringtun::{noise::rate_limiter::RateLimiter, x25519::StaticSecret};
use bufferpool::{Buffer, BufferPool};
use core::fmt;
use firezone_logging::err_with_src;
-use firezone_telemetry::{analytics, feature_flags};
use hex_display::HexDisplayExt;
use ip_packet::{ConvertibleIpv4Packet, ConvertibleIpv6Packet, IpPacket, IpPacketBuf};
use itertools::Itertools;
@@ -747,18 +746,31 @@ where
tracing::warn!("No TURN servers connected; connection may fail to establish");
}
+ let mut tunnel = Tunn::new_at(
+ self.private_key.clone(),
+ remote,
+ Some(key),
+ None,
+ self.index.next(),
+ Some(self.rate_limiter.clone()),
+ self.rng.next_u64(),
+ now,
+ );
+ // By default, boringtun has a rekey attempt time of 90(!) seconds.
+ // In case of a state de-sync or other issues, this means we try for
+ // 90s to make a handshake, all whilst our ICE layer thinks the connection
+ // is working perfectly fine.
+ // This results in a bad UX as the user has to essentially wait for 90s
+ // before Firezone can fix the state and make a new connection.
+ //
+ // By aligning the rekey-attempt-time roughly with our ICE timeout, we ensure
+ // that even if the hole-punch was successful, it will take at most 15s
+ // until we have a WireGuard tunnel to send packets into.
+ tunnel.set_rekey_attempt_time(Duration::from_secs(15));
+
Connection {
agent,
- tunnel: Tunn::new_at(
- self.private_key.clone(),
- remote,
- Some(key),
- None,
- self.index.next(),
- Some(self.rate_limiter.clone()),
- self.rng.next_u64(),
- now,
- ),
+ tunnel,
next_wg_timer_update: now,
stats: Default::default(),
buffer: vec![0; ip_packet::MAX_FZ_PAYLOAD],
@@ -2214,16 +2226,8 @@ where
.decapsulate_at(Some(src), packet, ip_packet.buf(), now)
{
TunnResult::Done => ControlFlow::Break(Ok(())),
- TunnResult::Err(e @ WireGuardError::InvalidAeadTag)
- if crate::is_handshake(packet)
- && feature_flags::fail_handshake_on_decryption_errors() =>
- {
- analytics::feature_flag_called("fail-handshake-on-decryption-errors");
-
- tracing::info!("Connection handshake failed ({e})");
- self.state = ConnectionState::Failed;
-
- ControlFlow::Break(Ok(())) // Return `Ok` because we are already logging the error.
+ TunnResult::Err(e) if crate::is_handshake(packet) => {
+ ControlFlow::Break(Err(anyhow::Error::new(e).context("handshake packet")))
}
TunnResult::Err(e) => ControlFlow::Break(Err(anyhow::Error::new(e))),
diff --git a/rust/telemetry/src/feature_flags.rs b/rust/telemetry/src/feature_flags.rs
index ff4190cc6..e3b9be0a4 100644
--- a/rust/telemetry/src/feature_flags.rs
+++ b/rust/telemetry/src/feature_flags.rs
@@ -43,10 +43,6 @@ pub fn export_metrics() -> bool {
false // Placeholder until we actually deploy an OTEL collector.
}
-pub fn fail_handshake_on_decryption_errors() -> bool {
- FEATURE_FLAGS.fail_handshake_on_decryption_errors()
-}
-
pub(crate) async fn evaluate_now(user_id: String, env: Env) {
if user_id.is_empty() {
return;
@@ -160,8 +156,6 @@ struct FeatureFlagsResponse {
stream_logs: bool,
#[serde(default)]
map_enobufs_to_wouldblock: bool,
- #[serde(default)]
- fail_handshake_on_decryption_errors: bool,
}
#[derive(Debug, Default)]
@@ -170,7 +164,6 @@ struct FeatureFlags {
drop_llmnr_nxdomain_responses: AtomicBool,
stream_logs: AtomicBool,
map_enobufs_to_wouldblock: AtomicBool,
- fail_handshake_on_decryption_errors: AtomicBool,
}
/// Accessors to the actual feature flags.
@@ -187,7 +180,6 @@ impl FeatureFlags {
drop_llmnr_nxdomain_responses,
stream_logs,
map_enobufs_to_wouldblock,
- fail_handshake_on_decryption_errors,
}: FeatureFlagsResponse,
) {
self.icmp_unreachable_instead_of_nat64
@@ -197,8 +189,6 @@ impl FeatureFlags {
self.stream_logs.store(stream_logs, Ordering::Relaxed);
self.map_enobufs_to_wouldblock
.store(map_enobufs_to_wouldblock, Ordering::Relaxed);
- self.fail_handshake_on_decryption_errors
- .store(fail_handshake_on_decryption_errors, Ordering::Relaxed);
}
fn icmp_unreachable_instead_of_nat64(&self) -> bool {
@@ -217,11 +207,6 @@ impl FeatureFlags {
fn map_enobufs_to_wouldblock(&self) -> bool {
self.map_enobufs_to_wouldblock.load(Ordering::Relaxed)
}
-
- fn fail_handshake_on_decryption_errors(&self) -> bool {
- self.fail_handshake_on_decryption_errors
- .load(Ordering::Relaxed)
- }
}
fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context {
@@ -232,7 +217,6 @@ fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context
DropLlmnrNxdomainResponses { result: bool },
StreamLogs { result: bool },
MapENOBUFSToWouldBlock { result: bool },
- FailHandshakeOnDecryptionErrors { result: bool },
}
// Exhaustive destruction so we don't forget to update this when we add a flag.
@@ -241,7 +225,6 @@ fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context
drop_llmnr_nxdomain_responses,
stream_logs,
map_enobufs_to_wouldblock,
- fail_handshake_on_decryption_errors,
} = flags;
let value = serde_json::json!({
@@ -252,7 +235,6 @@ fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context
SentryFlag::DropLlmnrNxdomainResponses { result: drop_llmnr_nxdomain_responses },
SentryFlag::StreamLogs { result: stream_logs },
SentryFlag::MapENOBUFSToWouldBlock { result: map_enobufs_to_wouldblock },
- SentryFlag::FailHandshakeOnDecryptionErrors { result: fail_handshake_on_decryption_errors },
]
});
diff --git a/website/src/components/Changelog/Android.tsx b/website/src/components/Changelog/Android.tsx
index e69f30986..f0e164fb0 100644
--- a/website/src/components/Changelog/Android.tsx
+++ b/website/src/components/Changelog/Android.tsx
@@ -25,6 +25,10 @@ export default function Android() {
Fixes an issue where Firezone failed to sign-in on systems with
non-ASCII characters in their kernel build name.
+
+ Fixes an issue where connections would sometimes take up to 90s to
+ establish.
+
diff --git a/website/src/components/Changelog/Apple.tsx b/website/src/components/Changelog/Apple.tsx
index b6de6e506..d25091ad7 100644
--- a/website/src/components/Changelog/Apple.tsx
+++ b/website/src/components/Changelog/Apple.tsx
@@ -24,7 +24,12 @@ export default function Apple() {
return (
{/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */}
-
+
+
+ Fixes an issue where connections would sometimes take up to 90s to
+ establish.
+
+
Fixes an issue where certain log files would not be recreated after
diff --git a/website/src/components/Changelog/GUI.tsx b/website/src/components/Changelog/GUI.tsx
index 300fd106a..23d722220 100644
--- a/website/src/components/Changelog/GUI.tsx
+++ b/website/src/components/Changelog/GUI.tsx
@@ -10,7 +10,12 @@ export default function GUI({ os }: { os: OS }) {
return (
{/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */}
-
+
+
+ Fixes an issue where connections would sometimes take up to 90s to
+ establish.
+
+
Fixes a rare crash during sign-in.
diff --git a/website/src/components/Changelog/Gateway.tsx b/website/src/components/Changelog/Gateway.tsx
index 75be45c86..e4293b784 100644
--- a/website/src/components/Changelog/Gateway.tsx
+++ b/website/src/components/Changelog/Gateway.tsx
@@ -34,6 +34,10 @@ export default function Gateway() {
Adds support for translating Time-Exceeded ICMP errors in the DNS
resource NAT, allowing `tracepath` to work through a Firezone tunnel.
+
+ Fixes an issue where connections would sometimes take up to 90s to
+ establish.
+
diff --git a/website/src/components/Changelog/Headless.tsx b/website/src/components/Changelog/Headless.tsx
index 57e5933ca..5e3847109 100644
--- a/website/src/components/Changelog/Headless.tsx
+++ b/website/src/components/Changelog/Headless.tsx
@@ -9,7 +9,12 @@ export default function Headless({ os }: { os: OS }) {
return (
{/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */}
-
+
+
+ Fixes an issue where connections would sometimes take up to 90s to
+ establish.
+
+
Fixes an issue where connections would fail to establish if both