From ce06996a1411e871adef44df7a375954906b5bca Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Thu, 15 May 2025 12:08:53 +1000 Subject: [PATCH] fix(connlib): allow more than one host candidate per IP version (#9147) Currently, one machines that have multiple routable egress interfaces, `connlib` may bounce between the two instead of settling on one. This happens because we have a dedicated `CandidateSet` that we use to filter out "duplicate" candidates of the same type. Doing that is important because if the other party is behind a symmetric NAT, they will send us many server-reflexive candidates that all only differ by their port, none of them will actually be routable though. To prevent sending many of these candidates to the remote, we first gather them locally in our `CandidateSet` and de-duplicate them. --- rust/connlib/snownet/src/candidate_set.rs | 81 +++++++++++++------ website/src/components/Changelog/Android.tsx | 4 + website/src/components/Changelog/Apple.tsx | 4 + website/src/components/Changelog/GUI.tsx | 7 +- website/src/components/Changelog/Gateway.tsx | 7 +- website/src/components/Changelog/Headless.tsx | 7 +- 6 files changed, 82 insertions(+), 28 deletions(-) diff --git a/rust/connlib/snownet/src/candidate_set.rs b/rust/connlib/snownet/src/candidate_set.rs index 609e8a79d..1b57aed86 100644 --- a/rust/connlib/snownet/src/candidate_set.rs +++ b/rust/connlib/snownet/src/candidate_set.rs @@ -1,12 +1,16 @@ use std::collections::HashSet; use itertools::Itertools; -use str0m::Candidate; +use str0m::{Candidate, CandidateKind}; /// Custom "set" implementation for [`Candidate`]s based on a [`HashSet`] with an enforced ordering when iterating. +/// +/// The set only allows host and server-reflexive candidates as only those need to be de-duplicated in order to avoid +/// spamming the remote with duplicate candidates. #[derive(Debug, Default)] pub struct CandidateSet { - inner: HashSet, + host: HashSet, + server_reflexive: HashSet, } impl CandidateSet { @@ -15,32 +19,43 @@ impl CandidateSet { reason = "We don't care about the ordering." )] pub fn insert(&mut self, new: Candidate) -> bool { - // Hashing a `Candidate` takes longer than checking a handful of entries using their `PartialEq` implementation. - // This function is in the hot-path so it needs to be fast ... - if self.inner.iter().any(|c| c == &new) { - return false; + match new.kind() { + CandidateKind::PeerReflexive | CandidateKind::Relayed => { + debug_assert!(false); + tracing::warn!( + "CandidateSet is not meant to be used with candidates of kind {}", + new.kind() + ); + + false + } + CandidateKind::Host => self.host.insert(new), + CandidateKind::ServerReflexive => { + // Hashing a `Candidate` takes longer than checking a handful of entries using their `PartialEq` implementation. + // This function is in the hot-path so it needs to be fast ... + if self.server_reflexive.iter().any(|c| c == &new) { + return false; + } + + self.server_reflexive.retain(|current| { + let is_ip_version_different = current.addr().is_ipv4() != new.addr().is_ipv4(); + + if !is_ip_version_different { + tracing::debug!(%current, %new, "Replacing server-reflexive candidate"); + } + + // Candidates of different IP version are also kept. + is_ip_version_different + }); + + self.server_reflexive.insert(new) + } } - - self.inner.retain(|current| { - if current.kind() != new.kind() { - return true; // Don't evict candidates of different kinds. - } - - let is_ip_version_different = current.addr().is_ipv4() != new.addr().is_ipv4(); - - if !is_ip_version_different { - tracing::debug!(%current, %new, "Replacing server-reflexive candidate"); - } - - // Candidates of different IP version are also kept. - is_ip_version_different - }); - - self.inner.insert(new) } pub fn clear(&mut self) { - self.inner.clear() + self.host.clear(); + self.server_reflexive.clear(); } #[expect( @@ -48,7 +63,10 @@ impl CandidateSet { reason = "We are guaranteeing a stable ordering" )] pub fn iter(&self) -> impl Iterator { - self.inner.iter().sorted_by_key(|c| c.prio()) + std::iter::empty() + .chain(self.host.iter()) + .chain(self.server_reflexive.iter()) + .sorted_by(|l, r| l.prio().cmp(&r.prio()).then(l.addr().cmp(&r.addr()))) } } @@ -95,4 +113,17 @@ mod tests { vec![c2, c4, host1, host2] ); } + + #[test] + fn allows_multiple_host_candidates_of_same_ip_base() { + let mut set = CandidateSet::default(); + + let host1 = Candidate::host(SOCK_ADDR1, Protocol::Udp).unwrap(); + let host2 = Candidate::host(SOCK_ADDR2, Protocol::Udp).unwrap(); + + assert!(set.insert(host1.clone())); + assert!(set.insert(host2.clone())); + + assert_eq!(set.iter().cloned().collect::>(), vec![host1, host2]); + } } diff --git a/website/src/components/Changelog/Android.tsx b/website/src/components/Changelog/Android.tsx index 78984ab47..ddbe9b0a0 100644 --- a/website/src/components/Changelog/Android.tsx +++ b/website/src/components/Changelog/Android.tsx @@ -32,6 +32,10 @@ export default function Android() { Fixes a rare panic when the DNS servers on the system would change while Firezone is connected. + + Fixes an issue where connections failed to establish on machines + with multiple valid egress IPs. + diff --git a/website/src/components/Changelog/Apple.tsx b/website/src/components/Changelog/Apple.tsx index 4ca0df675..278d6110e 100644 --- a/website/src/components/Changelog/Apple.tsx +++ b/website/src/components/Changelog/Apple.tsx @@ -41,6 +41,10 @@ export default function Apple() { Fixes a rare panic when the DNS servers on the system would change while Firezone is connected. + + Fixes an issue where connections failed to establish on machines + with multiple valid egress IPs. + diff --git a/website/src/components/Changelog/GUI.tsx b/website/src/components/Changelog/GUI.tsx index 78e5852d5..822b380ee 100644 --- a/website/src/components/Changelog/GUI.tsx +++ b/website/src/components/Changelog/GUI.tsx @@ -8,7 +8,12 @@ export default function GUI({ os }: { os: OS }) { return ( {/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */} - + + + Fixes an issue where connections failed to establish on machines + with multiple valid egress IPs. + + Fixes an issue where idle connections would be slow (~60s) in diff --git a/website/src/components/Changelog/Gateway.tsx b/website/src/components/Changelog/Gateway.tsx index f66cb0adb..c852f73c3 100644 --- a/website/src/components/Changelog/Gateway.tsx +++ b/website/src/components/Changelog/Gateway.tsx @@ -22,7 +22,12 @@ export default function Gateway() { return ( - + + + Fixes an issue where connections failed to establish on machines + with multiple valid egress IPs. + + Fixes an issue where ICMP unreachable errors for large packets would diff --git a/website/src/components/Changelog/Headless.tsx b/website/src/components/Changelog/Headless.tsx index 1aaa47198..d4643da44 100644 --- a/website/src/components/Changelog/Headless.tsx +++ b/website/src/components/Changelog/Headless.tsx @@ -9,7 +9,12 @@ export default function Headless({ os }: { os: OS }) { return ( {/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */} - + + + Fixes an issue where connections failed to establish on machines + with multiple valid egress IPs. + + Fixes an issue where idle connections would be slow (~60s) in