mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 10:18:54 +00:00
fix(connlib): signal all local candidates on upsert (#10920)
Firezone's UDP connections are designed to be idempotent. If a Client discards its "half" of the connection but the Gateway still keeps the state around, a subsequent connection setup by the Client will reuse connection state on the Gateway. To fully support this, `snownet` re-sends all its local candidates to the remote peer whenever a connection gets upserted. The current `seed_agent_with_local_candidates` function attempts to do this job but its design overlooked a crucial detail: Re-adding a candidate that the `IceAgent` already knows about is considered to be redundant. As such, the candidate is not re-signalled to the remote! The real-world consequences for this are subtle. `str0m`'s support for peer-reflexive candidate means that incoming STUN binding requests are still answered, even if they come from an address that the agent doesn't know anything about, i.e. it has never been told about that candidate. Thus, what happens right now is that when a Client re-creates a connection that is still present on the Gateway, it will start receiving STUN binding requests for candidates it doesn't know about and create peer-reflexive candidates for them. Where this does show up is in our test-suite which has fairly strict timing constraints. When we simulate the re-deploy of relays, we expect connections to be migrated to a new relay immediately. To support this, the current relay candidates are invalidated on both sides. This however only works if the current candidate is correctly recognised by the local ICE agent. Peer-reflexive candidates are created on-demand and typically only serve a placeholder-like role until we learn about the real candidate that is being used. Due to the above described behaviour of `seed_agent_with_local_candidates`, this however may not happen at all. As a result, attempting to invalidate a relay candidate fails because we don't recognise the relay candidate as we only have a peer-reflexive one. Putting all of this together, whilst not re-sending all candidates doesn't cause immediate issues for a connection, it may cause problems at a later point when we are trying to invalidate a currently active candidate to achieve a speedy failover to a new one.
This commit is contained in:
@@ -278,12 +278,19 @@ where
|
||||
|
||||
c.state.on_upsert(cid, &mut c.agent, now);
|
||||
|
||||
seed_agent_with_local_candidates(
|
||||
cid,
|
||||
c.relay.id,
|
||||
&mut c.agent,
|
||||
&self.allocations,
|
||||
&mut self.pending_events,
|
||||
// Take all current candidates.
|
||||
let current_candidates = c.agent.local_candidates().to_vec();
|
||||
|
||||
// Re-seed connection with all candidates.
|
||||
let new_candidates =
|
||||
seed_agent_with_local_candidates(c.relay.id, &mut c.agent, &self.allocations);
|
||||
|
||||
// Tell the remote about all of them.
|
||||
self.pending_events.extend(
|
||||
std::iter::empty()
|
||||
.chain(current_candidates)
|
||||
.chain(new_candidates)
|
||||
.map(|candidate| new_ice_candidate_event(cid, candidate)),
|
||||
);
|
||||
|
||||
// Initiate a new WG session.
|
||||
@@ -317,12 +324,13 @@ where
|
||||
agent.set_local_credentials(local_creds);
|
||||
agent.set_remote_credentials(remote_creds);
|
||||
|
||||
seed_agent_with_local_candidates(
|
||||
cid,
|
||||
selected_relay,
|
||||
&mut agent,
|
||||
&self.allocations,
|
||||
&mut self.pending_events,
|
||||
self.pending_events.extend(
|
||||
self.allocations
|
||||
.candidates_for_relay(&selected_relay)
|
||||
.filter_map(|candidate| {
|
||||
add_local_candidate(&mut agent, candidate)
|
||||
.map(|c| new_ice_candidate_event(cid, c))
|
||||
}),
|
||||
);
|
||||
|
||||
let connection = self.init_connection(
|
||||
@@ -1001,12 +1009,11 @@ where
|
||||
for (cid, agent, maybe_state) in
|
||||
self.connections.agents_and_state_by_relay_mut(rid)
|
||||
{
|
||||
add_local_candidate(
|
||||
cid,
|
||||
agent,
|
||||
candidate.clone(),
|
||||
&mut self.pending_events,
|
||||
);
|
||||
if let Some(candidate) = add_local_candidate(agent, candidate.clone()) {
|
||||
self.pending_events
|
||||
.push_back(new_ice_candidate_event(cid, candidate));
|
||||
}
|
||||
|
||||
if let Some(state) = maybe_state {
|
||||
state.on_candidate(cid, agent, now);
|
||||
}
|
||||
@@ -1110,13 +1117,12 @@ where
|
||||
|
||||
let selected_relay = initial.relay;
|
||||
|
||||
seed_agent_with_local_candidates(
|
||||
cid,
|
||||
selected_relay,
|
||||
&mut agent,
|
||||
&self.allocations,
|
||||
&mut self.pending_events,
|
||||
);
|
||||
for candidate in
|
||||
seed_agent_with_local_candidates(selected_relay, &mut agent, &self.allocations)
|
||||
{
|
||||
self.pending_events
|
||||
.push_back(new_ice_candidate_event(cid, candidate));
|
||||
}
|
||||
|
||||
let index = self.index.next();
|
||||
let connection = self.init_connection(
|
||||
@@ -1182,13 +1188,13 @@ where
|
||||
};
|
||||
|
||||
let selected_relay = self.sample_relay()?;
|
||||
seed_agent_with_local_candidates(
|
||||
cid,
|
||||
selected_relay,
|
||||
&mut agent,
|
||||
&self.allocations,
|
||||
&mut self.pending_events,
|
||||
);
|
||||
|
||||
for candidate in
|
||||
seed_agent_with_local_candidates(selected_relay, &mut agent, &self.allocations)
|
||||
{
|
||||
self.pending_events
|
||||
.push_back(new_ice_candidate_event(cid, candidate));
|
||||
}
|
||||
|
||||
let index = self.index.next();
|
||||
let connection = self.init_connection(
|
||||
@@ -1211,30 +1217,18 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn seed_agent_with_local_candidates<TId, RId>(
|
||||
connection: TId,
|
||||
/// Seeds the agent with all local candidates, returning an iterator of all candidates that should be signalled to the remote.
|
||||
fn seed_agent_with_local_candidates<'a, RId>(
|
||||
selected_relay: RId,
|
||||
agent: &mut IceAgent,
|
||||
agent: &'a mut IceAgent,
|
||||
allocations: &Allocations<RId>,
|
||||
pending_events: &mut VecDeque<Event<TId>>,
|
||||
) where
|
||||
) -> impl Iterator<Item = Candidate> + use<'a, RId>
|
||||
where
|
||||
RId: Ord + fmt::Display + Copy,
|
||||
TId: fmt::Display + Copy,
|
||||
{
|
||||
let shared_candidates = allocations.shared_candidates();
|
||||
let relay_candidates = allocations
|
||||
.get_by_id(&selected_relay)
|
||||
.into_iter()
|
||||
.flat_map(|allocation| allocation.current_relay_candidates());
|
||||
|
||||
// Candidates with a higher priority are better, therefore: Reverse the ordering by priority.
|
||||
for candidate in shared_candidates
|
||||
.chain(relay_candidates)
|
||||
.sorted_by_key(|c| c.prio())
|
||||
.rev()
|
||||
{
|
||||
add_local_candidate(connection, agent, candidate, pending_events);
|
||||
}
|
||||
allocations
|
||||
.candidates_for_relay(&selected_relay)
|
||||
.filter_map(move |c| add_local_candidate(agent, c))
|
||||
}
|
||||
|
||||
/// Generate optimistic candidates based on the ones we have already received.
|
||||
@@ -1299,38 +1293,25 @@ fn generate_optimistic_candidates(agent: &mut IceAgent) {
|
||||
}
|
||||
}
|
||||
|
||||
fn add_local_candidate<TId>(
|
||||
id: TId,
|
||||
agent: &mut IceAgent,
|
||||
candidate: Candidate,
|
||||
pending_events: &mut VecDeque<Event<TId>>,
|
||||
) where
|
||||
TId: fmt::Display,
|
||||
{
|
||||
/// Attempts to add the candidate to the agent, returning back the candidate if it should be signalled to the remote.
|
||||
fn add_local_candidate(agent: &mut IceAgent, candidate: Candidate) -> Option<Candidate> {
|
||||
// srflx candidates don't need to be added to the local agent because we always send from the `base` anyway.
|
||||
if candidate.kind() == CandidateKind::ServerReflexive {
|
||||
signal_candidate_to_remote(id, &candidate, pending_events);
|
||||
return;
|
||||
return Some(candidate);
|
||||
}
|
||||
|
||||
let Some(candidate) = agent.add_local_candidate(candidate) else {
|
||||
return;
|
||||
};
|
||||
let candidate = agent.add_local_candidate(candidate)?;
|
||||
|
||||
signal_candidate_to_remote(id, candidate, pending_events);
|
||||
Some(candidate.clone())
|
||||
}
|
||||
|
||||
fn signal_candidate_to_remote<TId>(
|
||||
id: TId,
|
||||
candidate: &Candidate,
|
||||
pending_events: &mut VecDeque<Event<TId>>,
|
||||
) {
|
||||
fn new_ice_candidate_event<TId>(id: TId, candidate: Candidate) -> Event<TId> {
|
||||
tracing::debug!(?candidate, "Signalling candidate to remote");
|
||||
|
||||
pending_events.push_back(Event::NewIceCandidate {
|
||||
Event::NewIceCandidate {
|
||||
connection: id,
|
||||
candidate: candidate.clone(),
|
||||
})
|
||||
candidate,
|
||||
}
|
||||
}
|
||||
|
||||
fn invalidate_allocation_candidates<TId, RId>(
|
||||
|
||||
@@ -74,6 +74,23 @@ where
|
||||
.unwrap_or(MutAllocationRef::Unknown)
|
||||
}
|
||||
|
||||
pub(crate) fn candidates_for_relay(
|
||||
&self,
|
||||
id: &RId,
|
||||
) -> impl Iterator<Item = Candidate> + use<RId> {
|
||||
let shared_candidates = self.shared_candidates();
|
||||
let relay_candidates = self
|
||||
.get_by_id(id)
|
||||
.into_iter()
|
||||
.flat_map(|allocation| allocation.current_relay_candidates());
|
||||
|
||||
// Candidates with a higher priority are better, therefore: Reverse the ordering by priority.
|
||||
shared_candidates
|
||||
.chain(relay_candidates)
|
||||
.sorted_by_key(|c| c.prio())
|
||||
.rev()
|
||||
}
|
||||
|
||||
pub(crate) fn iter_mut(&mut self) -> impl Iterator<Item = (&RId, &mut Allocation)> {
|
||||
self.inner.iter_mut()
|
||||
}
|
||||
@@ -144,13 +161,6 @@ where
|
||||
Some((*id, a))
|
||||
}
|
||||
|
||||
pub(crate) fn shared_candidates(&self) -> impl Iterator<Item = Candidate> {
|
||||
self.inner
|
||||
.values()
|
||||
.flat_map(|allocation| allocation.host_and_server_reflexive_candidates())
|
||||
.unique()
|
||||
}
|
||||
|
||||
pub(crate) fn poll_timeout(&mut self) -> Option<(Instant, &'static str)> {
|
||||
self.inner
|
||||
.values_mut()
|
||||
@@ -192,6 +202,13 @@ where
|
||||
None => true,
|
||||
});
|
||||
}
|
||||
|
||||
fn shared_candidates(&self) -> impl Iterator<Item = Candidate> {
|
||||
self.inner
|
||||
.values()
|
||||
.flat_map(|allocation| allocation.host_and_server_reflexive_candidates())
|
||||
.unique()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum MutAllocationRef<'a, RId> {
|
||||
|
||||
@@ -15,7 +15,7 @@ use crate::{
|
||||
ConnectionStats, Event,
|
||||
node::{
|
||||
Connection, ConnectionState, InitialConnection, add_local_candidate,
|
||||
allocations::Allocations,
|
||||
allocations::Allocations, new_ice_candidate_event,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -138,10 +138,14 @@ where
|
||||
|
||||
c.relay.id = rid;
|
||||
|
||||
for candidate in allocation.current_relay_candidates() {
|
||||
add_local_candidate(cid, &mut c.agent, candidate, pending_events);
|
||||
c.state.on_candidate(cid, &mut c.agent, now);
|
||||
for candidate in allocation
|
||||
.current_relay_candidates()
|
||||
.filter_map(|candidate| add_local_candidate(&mut c.agent, candidate))
|
||||
{
|
||||
pending_events.push_back(new_ice_candidate_event(cid, candidate));
|
||||
}
|
||||
|
||||
c.state.on_candidate(cid, &mut c.agent, now);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -251,3 +251,5 @@ cc cd931decdb174867cc8067e66287a7fd6fe85f0490c25a408431f405987ec5e4
|
||||
cc 208cbcc2338fd8f325abc5be770c51b97296729bf09bd4472be5734789c1d37b
|
||||
cc 6c01d07f2820e879d267944e063a55d0f4f07777a925a532f9da312b9b398db1
|
||||
cc 9fac3117f02193b4b1dfbbfd02fa6062c8f62eb5c94d36d9bd9c694dcda5ddab
|
||||
cc 659d967c73dae7e617a63ff86978636ef8aac193b70236adb823354067523629
|
||||
cc 1c531f3260ae1f80cf5ab4fb8c0cef5dd1d468ed4da1ff7170a12d203b5546a5
|
||||
|
||||
@@ -643,17 +643,6 @@ impl RefClient {
|
||||
for status in self.site_status.values_mut() {
|
||||
*status = ResourceStatus::Unknown;
|
||||
}
|
||||
|
||||
// TCP connections will automatically re-create connections to Gateways.
|
||||
for ((_, dst, _, _), r) in self
|
||||
.expected_tcp_connections
|
||||
.clone()
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>()
|
||||
{
|
||||
self.connect_to_resource(r, dst);
|
||||
self.set_resource_online(r);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn add_internet_resource(&mut self, resource: InternetResource) {
|
||||
|
||||
Reference in New Issue
Block a user