mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 18:18:55 +00:00
Connlib: reduce failover timeout (#2897)
This reduces the failover time by depending on webrtc's keepalive instead of wireguard's. We have much more control over that, since boringtun doesn't bubble up any of the keepalives timeout(only a trace warning). In the a next commit, when things are more stable, we should just get rid of wireguard's keep alive. When we remove webrtc we will build our own. Events based on `keepalive` timeouts are key to our failover system, so we **need** it. Draft because it's built on top of #2891 (which is completely separate code but without that the failover just doesn't work correctly)
This commit is contained in:
@@ -12,7 +12,7 @@ use connlib_shared::{
|
||||
};
|
||||
use webrtc::ice_transport::{
|
||||
ice_candidate::RTCIceCandidate, ice_gatherer::RTCIceGatherOptions,
|
||||
ice_parameters::RTCIceParameters,
|
||||
ice_parameters::RTCIceParameters, ice_transport_state::RTCIceTransportState,
|
||||
};
|
||||
use webrtc::ice_transport::{ice_candidate_type::RTCIceCandidateType, RTCIceTransport};
|
||||
use webrtc::ice_transport::{ice_credential_type::RTCIceCredentialType, ice_server::RTCIceServer};
|
||||
@@ -151,7 +151,8 @@ fn insert_peers<TId: Copy, TTransform>(
|
||||
}
|
||||
}
|
||||
|
||||
fn start_handlers<TId, TTransform>(
|
||||
fn start_handlers<TId, TTransform, TRoleState>(
|
||||
tunnel: Arc<Tunnel<impl Callbacks + 'static, TRoleState>>,
|
||||
device: Arc<ArcSwapOption<Device>>,
|
||||
callbacks: impl Callbacks + 'static,
|
||||
peer: Arc<Peer<TId, TTransform>>,
|
||||
@@ -160,8 +161,17 @@ fn start_handlers<TId, TTransform>(
|
||||
) where
|
||||
TId: Copy + Send + Sync + fmt::Debug + 'static,
|
||||
TTransform: Send + Sync + PacketTransform + 'static,
|
||||
TRoleState: RoleState<Id = TId>,
|
||||
{
|
||||
ice.on_connection_state_change(Box::new(|_| Box::pin(async {})));
|
||||
let conn_id = peer.conn_id;
|
||||
ice.on_connection_state_change(Box::new(move |state| {
|
||||
let tunnel = tunnel.clone();
|
||||
Box::pin(async move {
|
||||
if state == RTCIceTransportState::Failed {
|
||||
tunnel.peers_to_stop.lock().push_back(conn_id);
|
||||
}
|
||||
})
|
||||
}));
|
||||
tokio::spawn({
|
||||
async move {
|
||||
// If this fails receiver will be dropped and the connection will expire at some point
|
||||
|
||||
@@ -165,6 +165,7 @@ where
|
||||
let (peer_sender, peer_receiver) = tokio::sync::mpsc::channel(PEER_QUEUE_SIZE);
|
||||
|
||||
start_handlers(
|
||||
Arc::clone(self),
|
||||
Arc::clone(&self.device),
|
||||
self.callbacks.clone(),
|
||||
peer.clone(),
|
||||
|
||||
@@ -205,7 +205,7 @@ where
|
||||
}
|
||||
|
||||
fn new_tunnel(
|
||||
&self,
|
||||
self: &Arc<Self>,
|
||||
peer_config: PeerConfig,
|
||||
client_id: ClientId,
|
||||
resource: ResourceDescription,
|
||||
@@ -239,6 +239,7 @@ where
|
||||
let (peer_sender, peer_receiver) = tokio::sync::mpsc::channel(PEER_QUEUE_SIZE);
|
||||
|
||||
start_handlers(
|
||||
Arc::clone(self),
|
||||
Arc::clone(&self.device),
|
||||
self.callbacks.clone(),
|
||||
peer.clone(),
|
||||
|
||||
@@ -213,6 +213,7 @@ where
|
||||
};
|
||||
|
||||
let mut answer_builder = msg_builder.start_answer(message, Rcode::NoError).ok()?;
|
||||
answer_builder.header_mut().set_ra(true);
|
||||
|
||||
// W/O object-safety there's no other way to access the inner type
|
||||
// we could as well implement the ComposeRecordData trait for RecordData
|
||||
@@ -227,6 +228,7 @@ where
|
||||
RecordData::Ptr(r) => answer_builder.push((qname, Class::In, DNS_TTL, r)),
|
||||
}
|
||||
.ok()?;
|
||||
|
||||
Some(answer_builder.finish())
|
||||
}
|
||||
|
||||
|
||||
@@ -87,6 +87,12 @@ const MAX_CONCURRENT_ICE_GATHERING: usize = 100;
|
||||
// Note: Taken from boringtun
|
||||
const HANDSHAKE_RATE_LIMIT: u64 = 100;
|
||||
|
||||
// These 2 are the default timeouts
|
||||
const ICE_DISCONNECTED_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
const ICE_KEEPALIVE: Duration = Duration::from_secs(2);
|
||||
// This is approximately how long failoever will take :)
|
||||
const ICE_FAILED_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
pub(crate) fn get_v4(ip: IpAddr) -> Option<Ipv4Addr> {
|
||||
match ip {
|
||||
IpAddr::V4(v4) => Some(v4),
|
||||
@@ -471,6 +477,11 @@ where
|
||||
registry = register_default_interceptors(registry, &mut media_engine)?;
|
||||
let mut setting_engine = SettingEngine::default();
|
||||
setting_engine.set_interface_filter(Box::new(|name| !name.contains("tun")));
|
||||
setting_engine.set_ice_timeouts(
|
||||
Some(ICE_DISCONNECTED_TIMEOUT),
|
||||
Some(ICE_FAILED_TIMEOUT),
|
||||
Some(ICE_KEEPALIVE),
|
||||
);
|
||||
|
||||
let webrtc_api = APIBuilder::new()
|
||||
.with_media_engine(media_engine)
|
||||
|
||||
Reference in New Issue
Block a user