fix(connlib): allow more than one host candidate per IP version (#9147)

Currently, one machines that have multiple routable egress interfaces,
`connlib` may bounce between the two instead of settling on one. This
happens because we have a dedicated `CandidateSet` that we use to filter
out "duplicate" candidates of the same type. Doing that is important
because if the other party is behind a symmetric NAT, they will send us
many server-reflexive candidates that all only differ by their port,
none of them will actually be routable though.

To prevent sending many of these candidates to the remote, we first
gather them locally in our `CandidateSet` and de-duplicate them.
This commit is contained in:
Thomas Eizinger
2025-05-15 12:08:53 +10:00
committed by GitHub
parent 14cabafa6a
commit ce06996a14
6 changed files with 82 additions and 28 deletions

View File

@@ -1,12 +1,16 @@
use std::collections::HashSet;
use itertools::Itertools;
use str0m::Candidate;
use str0m::{Candidate, CandidateKind};
/// Custom "set" implementation for [`Candidate`]s based on a [`HashSet`] with an enforced ordering when iterating.
///
/// The set only allows host and server-reflexive candidates as only those need to be de-duplicated in order to avoid
/// spamming the remote with duplicate candidates.
#[derive(Debug, Default)]
pub struct CandidateSet {
inner: HashSet<Candidate>,
host: HashSet<Candidate>,
server_reflexive: HashSet<Candidate>,
}
impl CandidateSet {
@@ -15,32 +19,43 @@ impl CandidateSet {
reason = "We don't care about the ordering."
)]
pub fn insert(&mut self, new: Candidate) -> bool {
// Hashing a `Candidate` takes longer than checking a handful of entries using their `PartialEq` implementation.
// This function is in the hot-path so it needs to be fast ...
if self.inner.iter().any(|c| c == &new) {
return false;
match new.kind() {
CandidateKind::PeerReflexive | CandidateKind::Relayed => {
debug_assert!(false);
tracing::warn!(
"CandidateSet is not meant to be used with candidates of kind {}",
new.kind()
);
false
}
CandidateKind::Host => self.host.insert(new),
CandidateKind::ServerReflexive => {
// Hashing a `Candidate` takes longer than checking a handful of entries using their `PartialEq` implementation.
// This function is in the hot-path so it needs to be fast ...
if self.server_reflexive.iter().any(|c| c == &new) {
return false;
}
self.server_reflexive.retain(|current| {
let is_ip_version_different = current.addr().is_ipv4() != new.addr().is_ipv4();
if !is_ip_version_different {
tracing::debug!(%current, %new, "Replacing server-reflexive candidate");
}
// Candidates of different IP version are also kept.
is_ip_version_different
});
self.server_reflexive.insert(new)
}
}
self.inner.retain(|current| {
if current.kind() != new.kind() {
return true; // Don't evict candidates of different kinds.
}
let is_ip_version_different = current.addr().is_ipv4() != new.addr().is_ipv4();
if !is_ip_version_different {
tracing::debug!(%current, %new, "Replacing server-reflexive candidate");
}
// Candidates of different IP version are also kept.
is_ip_version_different
});
self.inner.insert(new)
}
pub fn clear(&mut self) {
self.inner.clear()
self.host.clear();
self.server_reflexive.clear();
}
#[expect(
@@ -48,7 +63,10 @@ impl CandidateSet {
reason = "We are guaranteeing a stable ordering"
)]
pub fn iter(&self) -> impl Iterator<Item = &Candidate> {
self.inner.iter().sorted_by_key(|c| c.prio())
std::iter::empty()
.chain(self.host.iter())
.chain(self.server_reflexive.iter())
.sorted_by(|l, r| l.prio().cmp(&r.prio()).then(l.addr().cmp(&r.addr())))
}
}
@@ -95,4 +113,17 @@ mod tests {
vec![c2, c4, host1, host2]
);
}
#[test]
fn allows_multiple_host_candidates_of_same_ip_base() {
let mut set = CandidateSet::default();
let host1 = Candidate::host(SOCK_ADDR1, Protocol::Udp).unwrap();
let host2 = Candidate::host(SOCK_ADDR2, Protocol::Udp).unwrap();
assert!(set.insert(host1.clone()));
assert!(set.insert(host2.clone()));
assert_eq!(set.iter().cloned().collect::<Vec<_>>(), vec![host1, host2]);
}
}

View File

@@ -32,6 +32,10 @@ export default function Android() {
Fixes a rare panic when the DNS servers on the system would change
while Firezone is connected.
</ChangeItem>
<ChangeItem pull="9147">
Fixes an issue where connections failed to establish on machines
with multiple valid egress IPs.
</ChangeItem>
</Unreleased>
<Entry version="1.4.8" date={new Date("2025-04-30")}>
<ChangeItem pull="8920">

View File

@@ -41,6 +41,10 @@ export default function Apple() {
Fixes a rare panic when the DNS servers on the system would change
while Firezone is connected.
</ChangeItem>
<ChangeItem pull="9147">
Fixes an issue where connections failed to establish on machines
with multiple valid egress IPs.
</ChangeItem>
</Unreleased>
<Entry version="1.4.14" date={new Date("2025-05-02")}>
<ChangeItem pull="9005">

View File

@@ -8,7 +8,12 @@ export default function GUI({ os }: { os: OS }) {
return (
<Entries downloadLinks={downloadLinks(os)} title={title(os)}>
{/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */}
<Unreleased></Unreleased>
<Unreleased>
<ChangeItem pull="9147">
Fixes an issue where connections failed to establish on machines
with multiple valid egress IPs.
</ChangeItem>
</Unreleased>
<Entry version="1.4.13" date={new Date("2025-05-14")}>
<ChangeItem pull="9014">
Fixes an issue where idle connections would be slow (~60s) in

View File

@@ -22,7 +22,12 @@ export default function Gateway() {
return (
<Entries downloadLinks={downloadLinks} title="Gateway">
<Unreleased></Unreleased>
<Unreleased>
<ChangeItem pull="9147">
Fixes an issue where connections failed to establish on machines
with multiple valid egress IPs.
</ChangeItem>
</Unreleased>
<Entry version="1.4.9" date={new Date("2025-05-14")}>
<ChangeItem pull="9059">
Fixes an issue where ICMP unreachable errors for large packets would

View File

@@ -9,7 +9,12 @@ export default function Headless({ os }: { os: OS }) {
return (
<Entries downloadLinks={downloadLinks(os)} title={title(os)}>
{/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */}
<Unreleased></Unreleased>
<Unreleased>
<ChangeItem pull="9147">
Fixes an issue where connections failed to establish on machines
with multiple valid egress IPs.
</ChangeItem>
</Unreleased>
<Entry version="1.4.8" date={new Date("2025-05-14")}>
<ChangeItem pull="9014">
Fixes an issue where idle connections would be slow (~60s) in