From b313f2a349730914d3e1db55e1d837c3cedbcd04 Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Mon, 13 Jan 2025 23:45:03 +0100 Subject: [PATCH] fix(connlib): don't spam if relay disconnects during ICE (#7750) When `snownet` is tasked to establish a new connection, it first randomly samples one of its relays that is used as an additional source of candidates in case a direct connection is not possible. We (try to) maintain an allocation on each relay throughout the lifetime of a `connlib` session. In case a relay doesn't respond to the initial binding message at all (even after several retries), we consider the relay offline and remove all state associated to it. It is possible that we sampled a relay for use in a connection and only then realise that it is offline. In that case, we print a message to the log: > Selected relay disconnected during ICE; connection may fail The condition for when we print this log is: "we are in `Connecting` and the sampled relay does no longer exist". This results in log spam in case that condition is actually hit because no state is being changed as part of this check and thus, on the next call to `handle_timeout`, this condition is still true! To fix this, we change the `rid` field of `Connecting` to an `Option`. In case we detect that a relay is no longer present, we print the log and then clear the option. As a result, the log is only printed once. --- rust/connlib/snownet/src/node.rs | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/rust/connlib/snownet/src/node.rs b/rust/connlib/snownet/src/node.rs index f4f6311b5..24f4ed606 100644 --- a/rust/connlib/snownet/src/node.rs +++ b/rust/connlib/snownet/src/node.rs @@ -369,8 +369,8 @@ where | CandidateKind::PeerReflexive => {} } - let Some(allocation) = self.allocations.get_mut(&relay) else { - tracing::debug!(rid = %relay, "Unknown relay"); + let Some(allocation) = relay.and_then(|r| self.allocations.get_mut(&r)) else { + tracing::debug!(rid = ?relay, "Unknown relay"); return; }; @@ -746,7 +746,7 @@ where signalling_completed_at: now, remote_pub_key: remote, state: ConnectionState::Connecting { - relay, + relay: Some(relay), buffered: AllocRingBuffer::new(128), }, possible_sockets: BTreeSet::default(), @@ -1215,12 +1215,18 @@ where let peer_socket = match &mut c.state { Connected { peer_socket, .. } | Idle { peer_socket } => peer_socket, Failed => continue, - Connecting { relay, .. } => { + Connecting { + relay: maybe_relay, .. + } => { + let Some(relay) = maybe_relay else { + continue; + }; if allocations.contains_key(relay) { continue; } tracing::debug!("Selected relay disconnected during ICE; connection may fail"); + *maybe_relay = None; continue; } }; @@ -1250,8 +1256,11 @@ where maybe_initial_connection.or(maybe_established_connection) } - fn connecting_agent_mut(&mut self, id: TId) -> Option<(&mut IceAgent, RId)> { - let maybe_initial_connection = self.initial.get_mut(&id).map(|i| (&mut i.agent, i.relay)); + fn connecting_agent_mut(&mut self, id: TId) -> Option<(&mut IceAgent, Option)> { + let maybe_initial_connection = self + .initial + .get_mut(&id) + .map(|i| (&mut i.agent, Some(i.relay))); let maybe_pending_connection = self.established.get_mut(&id).and_then(|c| match c.state { ConnectionState::Connecting { relay, .. } => Some((&mut c.agent, relay)), ConnectionState::Failed @@ -1273,9 +1282,9 @@ where use ConnectionState::*; match c.state { - Connecting { relay, .. } if relay == id => { - Some((*cid, &mut c.agent, c.span.enter())) - } + Connecting { + relay: Some(relay), .. + } if relay == id => Some((*cid, &mut c.agent, c.span.enter())), Failed | Idle { .. } | Connecting { .. } | Connected { .. } => None, } }); @@ -1594,7 +1603,7 @@ enum ConnectionState { /// We are still running ICE to figure out, which socket to use to send data. Connecting { /// The relay we have selected for this connection. - relay: RId, + relay: Option, /// Packets emitted by wireguard whilst are still running ICE. ///