mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 18:18:55 +00:00
chore: don't wake Node on idle connections (#7149)
In order to make Firezone more mobile-friendly, waking up the CPU less often is key. In #6845, we introduced a low-power mode into `snownet` that sends STUN messages on a longer interval if the connection is idle. Whilst poking around `boringtun` as part integrating our fork into the main codebase, I noticed that we are updating `boringtun`'s timers every second - even on idle connections. This PR applies the same idea of #6845 to the timers within `Node`: Idle connections get "woken" less and if all connections are idle, we avoid waking the `Node` altogether (unless we need to refresh allocations / channels). Calling `handle_timeout` less often revealed an issue in the tests where we didn't fully process the state changes after invalidating a candidate from the remote. To fix this, we now call `handle_timeout` directly after `{add,remove}_remote_candidate`. This isn't super clean because at first glance, it looks like `handle_timeout` should just be part of the add/remove candidate function. It is quite common for sans-IO designs to require calling `handle_timeout` after state has been changed. In `{Client,Server}Node`, we do it implicitely so that we don't have to do it in the tests and the event-loop. It would be great to test this in some automated fashion but I haven't figured out how yet. I did temporarily add an `info!` log to the event-loop of the client and with this patch applied, the entire event-loop goes to sleep on idle connections. It still does get woken every now and then but no longer every second!
This commit is contained in:
@@ -551,11 +551,16 @@ where
|
||||
connection.handle_timeout(id, now);
|
||||
}
|
||||
|
||||
let next_reset = *self.next_rate_limiter_reset.get_or_insert(now);
|
||||
if self.connections.all_idle() {
|
||||
// If all connections are idle, there is no point in resetting the rate limiter.
|
||||
self.next_rate_limiter_reset = None;
|
||||
} else {
|
||||
let next_reset = *self.next_rate_limiter_reset.get_or_insert(now);
|
||||
|
||||
if now >= next_reset {
|
||||
self.rate_limiter.reset_count();
|
||||
self.next_rate_limiter_reset = Some(now + Duration::from_secs(1));
|
||||
if now >= next_reset {
|
||||
self.rate_limiter.reset_count();
|
||||
self.next_rate_limiter_reset = Some(now + Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
|
||||
self.allocations
|
||||
@@ -716,7 +721,8 @@ where
|
||||
self.index.next(),
|
||||
Some(self.rate_limiter.clone()),
|
||||
),
|
||||
next_timer_update: now,
|
||||
wg_timer: DEFAULT_WG_TIMER,
|
||||
next_wg_timer_update: now,
|
||||
stats: Default::default(),
|
||||
buffer: vec![0; ip_packet::MAX_DATAGRAM_PAYLOAD],
|
||||
intent_sent_at,
|
||||
@@ -1327,6 +1333,10 @@ where
|
||||
fn iter_ids(&self) -> impl Iterator<Item = TId> + '_ {
|
||||
self.initial.keys().chain(self.established.keys()).copied()
|
||||
}
|
||||
|
||||
fn all_idle(&self) -> bool {
|
||||
self.established.values().all(|c| c.is_idle())
|
||||
}
|
||||
}
|
||||
|
||||
/// Wraps the message as a channel data message via the relay, iff:
|
||||
@@ -1624,7 +1634,10 @@ struct Connection<RId> {
|
||||
|
||||
tunnel: Tunn,
|
||||
remote_pub_key: PublicKey,
|
||||
next_timer_update: Instant,
|
||||
/// At which interval we update the [`Tunn`]s timers.
|
||||
wg_timer: Duration,
|
||||
/// When to next update the [`Tunn`]'s timers.
|
||||
next_wg_timer_update: Instant,
|
||||
|
||||
state: ConnectionState<RId>,
|
||||
|
||||
@@ -1690,7 +1703,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_timeout(&mut self, agent: &mut IceAgent, now: Instant) {
|
||||
fn handle_timeout(&mut self, agent: &mut IceAgent, wg_timer: &mut Duration, now: Instant) {
|
||||
let Self::Connected {
|
||||
last_outgoing,
|
||||
last_incoming,
|
||||
@@ -1706,10 +1719,10 @@ where
|
||||
|
||||
let peer_socket = *peer_socket;
|
||||
|
||||
self.transition_to_idle(peer_socket, agent);
|
||||
self.transition_to_idle(peer_socket, agent, wg_timer);
|
||||
}
|
||||
|
||||
fn on_outgoing(&mut self, agent: &mut IceAgent, now: Instant) {
|
||||
fn on_outgoing(&mut self, agent: &mut IceAgent, wg_timer: &mut Duration, now: Instant) {
|
||||
let peer_socket = match self {
|
||||
Self::Idle { peer_socket } => *peer_socket,
|
||||
Self::Connected { last_outgoing, .. } => {
|
||||
@@ -1719,10 +1732,10 @@ where
|
||||
Self::Failed | Self::Connecting { .. } => return,
|
||||
};
|
||||
|
||||
self.transition_to_connected(peer_socket, agent, now);
|
||||
self.transition_to_connected(peer_socket, agent, wg_timer, now);
|
||||
}
|
||||
|
||||
fn on_incoming(&mut self, agent: &mut IceAgent, now: Instant) {
|
||||
fn on_incoming(&mut self, agent: &mut IceAgent, wg_timer: &mut Duration, now: Instant) {
|
||||
let peer_socket = match self {
|
||||
Self::Idle { peer_socket } => *peer_socket,
|
||||
Self::Connected { last_incoming, .. } => {
|
||||
@@ -1732,19 +1745,26 @@ where
|
||||
Self::Failed | Self::Connecting { .. } => return,
|
||||
};
|
||||
|
||||
self.transition_to_connected(peer_socket, agent, now);
|
||||
self.transition_to_connected(peer_socket, agent, wg_timer, now);
|
||||
}
|
||||
|
||||
fn transition_to_idle(&mut self, peer_socket: PeerSocket<RId>, agent: &mut IceAgent) {
|
||||
fn transition_to_idle(
|
||||
&mut self,
|
||||
peer_socket: PeerSocket<RId>,
|
||||
agent: &mut IceAgent,
|
||||
wg_timer: &mut Duration,
|
||||
) {
|
||||
tracing::debug!("Connection is idle");
|
||||
*self = Self::Idle { peer_socket };
|
||||
apply_idle_stun_timings(agent);
|
||||
*wg_timer = IDLE_WG_TIMER;
|
||||
}
|
||||
|
||||
fn transition_to_connected(
|
||||
&mut self,
|
||||
peer_socket: PeerSocket<RId>,
|
||||
agent: &mut IceAgent,
|
||||
wg_timer: &mut Duration,
|
||||
now: Instant,
|
||||
) {
|
||||
tracing::debug!("Connection resumed");
|
||||
@@ -1754,6 +1774,7 @@ where
|
||||
last_incoming: now,
|
||||
};
|
||||
apply_default_stun_timings(agent);
|
||||
*wg_timer = DEFAULT_WG_TIMER;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1809,7 +1830,7 @@ where
|
||||
#[must_use]
|
||||
fn poll_timeout(&mut self) -> Option<Instant> {
|
||||
let agent_timeout = self.agent.poll_timeout();
|
||||
let next_wg_timer = Some(self.next_timer_update);
|
||||
let next_wg_timer = Some(self.next_wg_timer_update);
|
||||
let candidate_timeout = self.candidate_timeout();
|
||||
let idle_timeout = self.state.poll_timeout();
|
||||
|
||||
@@ -1839,7 +1860,8 @@ where
|
||||
RId: Copy + Ord + fmt::Display,
|
||||
{
|
||||
self.agent.handle_timeout(now);
|
||||
self.state.handle_timeout(&mut self.agent, now);
|
||||
self.state
|
||||
.handle_timeout(&mut self.agent, &mut self.wg_timer, now);
|
||||
|
||||
if self
|
||||
.candidate_timeout()
|
||||
@@ -1852,8 +1874,8 @@ where
|
||||
|
||||
// TODO: `boringtun` is impure because it calls `Instant::now`.
|
||||
|
||||
if now >= self.next_timer_update {
|
||||
self.next_timer_update = now + Duration::from_secs(1);
|
||||
if now >= self.next_wg_timer_update {
|
||||
self.next_wg_timer_update = now + self.wg_timer;
|
||||
|
||||
// Don't update wireguard timers until we are connected.
|
||||
let Some(peer_socket) = self.socket() else {
|
||||
@@ -2026,7 +2048,8 @@ where
|
||||
}
|
||||
};
|
||||
|
||||
self.state.on_outgoing(&mut self.agent, now);
|
||||
self.state
|
||||
.on_outgoing(&mut self.agent, &mut self.wg_timer, now);
|
||||
|
||||
Ok(Some(&buffer[..len]))
|
||||
}
|
||||
@@ -2114,7 +2137,8 @@ where
|
||||
};
|
||||
|
||||
if control_flow.is_continue() {
|
||||
self.state.on_incoming(&mut self.agent, now);
|
||||
self.state
|
||||
.on_incoming(&mut self.agent, &mut self.wg_timer, now);
|
||||
}
|
||||
|
||||
control_flow
|
||||
@@ -2159,6 +2183,10 @@ where
|
||||
fn is_failed(&self) -> bool {
|
||||
matches!(self.state, ConnectionState::Failed)
|
||||
}
|
||||
|
||||
fn is_idle(&self) -> bool {
|
||||
matches!(self.state, ConnectionState::Idle { .. })
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
@@ -2197,6 +2225,17 @@ fn new_agent() -> IceAgent {
|
||||
agent
|
||||
}
|
||||
|
||||
/// By default, we will update the internal timers of a WireGuard tunnel every second.
|
||||
///
|
||||
/// This allows [`boringtun`] to send keep-alive and re-key messages at the correct times.
|
||||
const DEFAULT_WG_TIMER: Duration = Duration::from_secs(1);
|
||||
|
||||
/// When a connection is idle, the only thing that [`boringtun`] needs to do is send keep-alive messages.
|
||||
///
|
||||
/// Those happen at a rate of 25s.
|
||||
/// By waking up less often, we can save some CPU power.
|
||||
const IDLE_WG_TIMER: Duration = Duration::from_secs(30);
|
||||
|
||||
fn apply_default_stun_timings(agent: &mut IceAgent) {
|
||||
agent.set_max_stun_retransmits(8);
|
||||
agent.set_max_stun_rto(Duration::from_millis(1500));
|
||||
|
||||
@@ -126,3 +126,5 @@ cc 8fcbd19c41f0483d9b81aac2ab7440bb23d7796ef9f6bf346f73f0d633f65baa
|
||||
cc 4494e475d22ff9a318d676f10c79f545982b7787d145925c3719fe47e9868acc
|
||||
cc bafb7db795d394d1771ef07f4dd36db8ac1333dd852653900480d7ed03307853
|
||||
cc 9226be75db567f1d205a36f95cf348eb4aacebbc87c2a4778c52a573b51f0ee2
|
||||
cc d87524fde248b29835f0799429bf2f850b80f9cb98dadecf7b764da716a2182b
|
||||
cc 13e9996493813f93b3a4806aa1ef9c7c6a1da9f88f53901de2cada0b9bccddb0
|
||||
|
||||
@@ -483,6 +483,8 @@ impl ClientState {
|
||||
|
||||
pub fn add_ice_candidate(&mut self, conn_id: GatewayId, ice_candidate: String, now: Instant) {
|
||||
self.node.add_remote_candidate(conn_id, ice_candidate, now);
|
||||
self.node.handle_timeout(now);
|
||||
self.drain_node_events();
|
||||
}
|
||||
|
||||
pub fn remove_ice_candidate(
|
||||
@@ -493,6 +495,8 @@ impl ClientState {
|
||||
) {
|
||||
self.node
|
||||
.remove_remote_candidate(conn_id, ice_candidate, now);
|
||||
self.node.handle_timeout(now);
|
||||
self.drain_node_events();
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, fields(%resource_id))]
|
||||
|
||||
@@ -167,11 +167,15 @@ impl GatewayState {
|
||||
|
||||
pub fn add_ice_candidate(&mut self, conn_id: ClientId, ice_candidate: String, now: Instant) {
|
||||
self.node.add_remote_candidate(conn_id, ice_candidate, now);
|
||||
self.node.handle_timeout(now);
|
||||
self.drain_node_events();
|
||||
}
|
||||
|
||||
pub fn remove_ice_candidate(&mut self, conn_id: ClientId, ice_candidate: String, now: Instant) {
|
||||
self.node
|
||||
.remove_remote_candidate(conn_id, ice_candidate, now);
|
||||
self.node.handle_timeout(now);
|
||||
self.drain_node_events();
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip_all, fields(%resource, %client))]
|
||||
|
||||
Reference in New Issue
Block a user