From aebfcd56ebcd47bddad9e7a5d8be68a51439010a Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Fri, 25 Jul 2025 07:01:50 +1000 Subject: [PATCH] fix(connlib): resend candidates on connection upsert (#9986) Due to network partitions between the Client and the Portal, it is possible that a Client requests a new connection, then disconnects from the portal and re-requests the connection once it is reconnected. On the Gateway, we would have already authorized the first request and initialise our ICE agents with our local candidates. The second time around, the connection would be reused. The Client however has lost its state and therefore, we need to tell it our candidates again. --------- Signed-off-by: Thomas Eizinger --- rust/connlib/snownet/src/node.rs | 31 +++++++++++++++----- website/src/components/Changelog/Gateway.tsx | 4 +++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/rust/connlib/snownet/src/node.rs b/rust/connlib/snownet/src/node.rs index d46affc49..9ad65f493 100644 --- a/rust/connlib/snownet/src/node.rs +++ b/rust/connlib/snownet/src/node.rs @@ -257,9 +257,23 @@ where .is_some_and(|c| c == &remote_creds) && c.tunnel.remote_static_public() == remote { + tracing::info!(local = ?local_creds, "Reusing existing connection"); + c.state.on_upsert(cid, &mut c.agent, now); - tracing::info!(local = ?local_creds, "Reusing existing connection"); + for candidate in c.agent.local_candidates() { + signal_candidate_to_remote(cid, candidate, &mut self.pending_events); + } + + // Server-reflexive candidates are not in the local candidates of the ICE agent so those need special handling. + for candidate in self + .shared_candidates + .iter() + .filter(|c| c.kind() == CandidateKind::ServerReflexive) + { + signal_candidate_to_remote(cid, candidate, &mut self.pending_events); + } + return Ok(()); } @@ -1398,12 +1412,7 @@ fn add_local_candidate( { // srflx candidates don't need to be added to the local agent because we always send from the `base` anyway. if candidate.kind() == CandidateKind::ServerReflexive { - tracing::info!(?candidate, "Signalling candidate to remote"); - - pending_events.push_back(Event::NewIceCandidate { - connection: id, - candidate: candidate.to_sdp_string(), - }); + signal_candidate_to_remote(id, &candidate, pending_events); return; } @@ -1411,6 +1420,14 @@ fn add_local_candidate( return; }; + signal_candidate_to_remote(id, candidate, pending_events); +} + +fn signal_candidate_to_remote( + id: TId, + candidate: &Candidate, + pending_events: &mut VecDeque>, +) { tracing::info!(?candidate, "Signalling candidate to remote"); pending_events.push_back(Event::NewIceCandidate { diff --git a/website/src/components/Changelog/Gateway.tsx b/website/src/components/Changelog/Gateway.tsx index 1d056054e..d7886c209 100644 --- a/website/src/components/Changelog/Gateway.tsx +++ b/website/src/components/Changelog/Gateway.tsx @@ -23,6 +23,10 @@ export default function Gateway() { return ( + + Fixes an issue where a Client could not establish a connection unless + their first attempt succeeded. + Fixes an issue where connections in low-latency networks (between Client and Gateway) would fail to establish reliably.