Fix gateway cleanup (#2704)

Yesterday, during some portion of the day connections between clients
and resources were impossible.

While I couldn't pinpoint the exact cause I found some issues with
cleanup. This PR fixes those.

Furthermore, I increased the default log level for tunnels in the
clients so that if this happens again we have better logs to triage.

~~Furthermore, I found out about #2705 so, I removed the limit of relays
from connlib since the portal already limits it to 2 (4 if you count
per-ip), that way we make sure that we always use both ipv4 and ipv6.
The connection start up time seems to slow down due to this but I think
this is better. We might want to go to only 2 urls again later on to
speed this up, if the portal can ensure it's a working relay
load-balanced relay there might not be a point in using more than a
single server~~. cc @AndrewDryga

Edit: we always get an ipv4 and ipv6 address for the same relay as the
first two relays in the relay list, save the case where only one of the
ip types is supported. We should be safe limiting it to 2.

---------

Signed-off-by: Gabi <gabrielalejandro7@gmail.com>
Co-authored-by: Jamil <jamilbk@users.noreply.github.com>
This commit is contained in:
Gabi
2023-11-29 01:49:30 -03:00
committed by GitHub
parent 8ad82b515e
commit a309f11011
7 changed files with 145 additions and 73 deletions

View File

@@ -105,7 +105,7 @@ android {
buildConfigField(
"String",
"LOG_FILTER",
"\"connlib_client_android=info,firezone_tunnel=info,connlib_shared=info,connlib_client_shared=info,warn\"",
"\"connlib_client_android=info,firezone_tunnel=trace,connlib_shared=info,connlib_client_shared=info,warn\"",
)
firebaseAppDistribution {
serviceCredentialsFile = System.getenv("FIREBASE_CREDENTIALS_PATH")

138
rust/Cargo.lock generated
View File

@@ -193,9 +193,9 @@ dependencies = [
[[package]]
name = "async-compression"
version = "0.4.4"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f658e2baef915ba0f26f1f7c42bfb8e12f532a01f449a090ded75ae7a07e9ba2"
checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5"
dependencies = [
"flate2",
"futures-core",
@@ -851,9 +851,9 @@ dependencies = [
[[package]]
name = "crypto-bigint"
version = "0.5.4"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28f85c3514d2a6e64160359b45a3918c3b4178bcbf4ae5d03ab2d02e521c479a"
checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
dependencies = [
"generic-array",
"rand_core",
@@ -920,9 +920,9 @@ dependencies = [
[[package]]
name = "data-encoding"
version = "2.4.0"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308"
checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5"
[[package]]
name = "der"
@@ -1002,9 +1002,9 @@ dependencies = [
[[package]]
name = "domain"
version = "0.9.1"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54a83205ec042ce7339c73344d6666a39acb30021121dab68eb983740dcff86e"
checksum = "7af83e443e4bfe8602af356e5ca10b9676634e53d178875017f2ff729898a388"
dependencies = [
"octseq",
"rand",
@@ -1013,9 +1013,9 @@ dependencies = [
[[package]]
name = "ecdsa"
version = "0.16.8"
version = "0.16.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4b1e0c257a9e9f25f90ff76d7a68360ed497ee519c8e428d1825ef0000799d4"
checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
dependencies = [
"der",
"digest",
@@ -1033,9 +1033,9 @@ checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "elliptic-curve"
version = "0.13.6"
version = "0.13.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d97ca172ae9dc9f9b779a6e3a65d308f2af74e5b8c921299075bdb4a0370e914"
checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
dependencies = [
"base16ct",
"crypto-bigint",
@@ -1087,10 +1087,16 @@ dependencies = [
]
[[package]]
name = "errno"
version = "0.3.6"
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c18ee0ed65a5f1f81cac6b1d213b69c35fa47d4252ad41f1486dbd8226fe36e"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "errno"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f258a7194e7f7c2a7837a8913aeab7fd8c383457034fa20ce4dd3dcb813e8eb8"
dependencies = [
"libc",
"windows-sys 0.48.0",
@@ -1114,9 +1120,9 @@ dependencies = [
[[package]]
name = "fiat-crypto"
version = "0.2.4"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53a56f0780318174bad1c127063fd0c5fdfb35398e3cd79ffaab931a6c79df80"
checksum = "27573eac26f4dd11e2b1916c3fe1baa56407c83c71a773a8ba17ec0bca03b6b7"
[[package]]
name = "firezone-cli-utils"
@@ -1267,9 +1273,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "form_urlencoded"
version = "1.2.0"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
dependencies = [
"percent-encoding",
]
@@ -1291,9 +1297,9 @@ dependencies = [
[[package]]
name = "futures-bounded"
version = "0.2.2"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43bcbe9c086773ba37692e50f578c4c459c0a5ffe07bd924d0ea485b2a46c9c7"
checksum = "e1e2774cc104e198ef3d3e1ff4ab40f86fa3245d6cb6a3a46174f21463cee173"
dependencies = [
"futures-timer",
"futures-util",
@@ -1445,9 +1451,9 @@ dependencies = [
[[package]]
name = "h2"
version = "0.3.21"
version = "0.3.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833"
checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178"
dependencies = [
"bytes",
"fnv",
@@ -1455,7 +1461,7 @@ dependencies = [
"futures-sink",
"futures-util",
"http",
"indexmap",
"indexmap 2.1.0",
"slab",
"tokio",
"tokio-util",
@@ -1468,6 +1474,12 @@ version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156"
[[package]]
name = "heck"
version = "0.4.1"
@@ -1505,7 +1517,7 @@ dependencies = [
"futures-channel",
"futures-io",
"futures-util",
"idna",
"idna 0.4.0",
"ipnet",
"once_cell",
"rand",
@@ -1698,6 +1710,16 @@ dependencies = [
"unicode-normalization",
]
[[package]]
name = "idna"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
dependencies = [
"unicode-bidi",
"unicode-normalization",
]
[[package]]
name = "indexmap"
version = "1.9.3"
@@ -1705,7 +1727,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown",
"hashbrown 0.12.3",
]
[[package]]
name = "indexmap"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f"
dependencies = [
"equivalent",
"hashbrown 0.14.2",
]
[[package]]
@@ -2238,9 +2270,9 @@ dependencies = [
[[package]]
name = "octseq"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d233b7fea8c1c9f4ffda5dcc961c8edd2fc1587e4b355b2752fe7c27e0cd50ca"
checksum = "cd09a3d18c298e5d9b66cf65db82e2e1694b94fbc032f83e304a5cbc87bcc3bb"
[[package]]
name = "oid-registry"
@@ -2327,7 +2359,7 @@ checksum = "8a81f725323db1b1206ca3da8bb19874bbd3f57c3bcd59471bfb04525b265b9b"
dependencies = [
"futures-channel",
"futures-util",
"indexmap",
"indexmap 1.9.3",
"js-sys",
"once_cell",
"pin-project-lite",
@@ -2463,9 +2495,9 @@ dependencies = [
[[package]]
name = "percent-encoding"
version = "2.3.0"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "phoenix-channel"
@@ -2611,9 +2643,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "primeorder"
version = "0.13.3"
version = "0.13.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7dbe9ed3b56368bd99483eb32fe9c17fdd3730aebadc906918ce78d54c7eeb4"
checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6"
dependencies = [
"elliptic-curve",
]
@@ -2972,9 +3004,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.38.23"
version = "0.38.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb93593068e9babdad10e4fce47dc9b3ac25315a72a59766ffd9e9a71996a04"
checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e"
dependencies = [
"bitflags 2.4.1",
"errno",
@@ -2985,9 +3017,9 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.21.8"
version = "0.21.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "446e14c5cda4f3f30fe71863c34ec70f5ac79d6087097ad0bb433e1be5edf04c"
checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9"
dependencies = [
"log",
"ring 0.17.5",
@@ -3151,18 +3183,18 @@ checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090"
[[package]]
name = "serde"
version = "1.0.192"
version = "1.0.193"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bca2a08484b285dcb282d0f67b26cadc0df8b19f8c12502c13d966bf9482f001"
checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.192"
version = "1.0.193"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1"
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
dependencies = [
"proc-macro2",
"quote",
@@ -3240,9 +3272,9 @@ dependencies = [
[[package]]
name = "signature"
version = "2.1.0"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e1788eed21689f9cf370582dfc467ef36ed9c707f073528ddafa8d83e3b8500"
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
dependencies = [
"digest",
"rand_core",
@@ -3748,7 +3780,7 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
dependencies = [
"futures-core",
"futures-util",
"indexmap",
"indexmap 1.9.3",
"pin-project",
"pin-project-lite",
"rand",
@@ -4086,12 +4118,12 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "url"
version = "2.4.1"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5"
checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
dependencies = [
"form_urlencoded",
"idna",
"idna 0.5.0",
"percent-encoding",
"serde",
]
@@ -4116,9 +4148,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "uuid"
version = "1.5.0"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc"
checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560"
dependencies = [
"getrandom",
"serde",
@@ -4270,9 +4302,9 @@ dependencies = [
[[package]]
name = "webpki-roots"
version = "0.25.2"
version = "0.25.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc"
checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10"
[[package]]
name = "webrtc"
@@ -4733,9 +4765,9 @@ dependencies = [
[[package]]
name = "zeroize"
version = "1.6.0"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9"
checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
dependencies = [
"zeroize_derive",
]

View File

@@ -131,7 +131,7 @@ where
pub struct ClientState {
active_candidate_receivers: StreamMap<GatewayId, RTCIceCandidate>,
/// We split the receivers of ICE candidates into two phases because we only want to start sending them once we've received an SDP from the gateway.
waiting_for_sdp_from_gatway: HashMap<GatewayId, Receiver<RTCIceCandidate>>,
waiting_for_sdp_from_gateway: HashMap<GatewayId, Receiver<RTCIceCandidate>>,
// TODO: Make private
pub awaiting_connection: HashMap<ResourceId, AwaitingConnectionDetails>,
@@ -343,7 +343,7 @@ impl ClientState {
id: GatewayId,
receiver: Receiver<RTCIceCandidate>,
) -> StaticSecret {
self.waiting_for_sdp_from_gatway.insert(id, receiver);
self.waiting_for_sdp_from_gateway.insert(id, receiver);
let preshared_key = StaticSecret::random_from_rng(OsRng);
self.gateway_preshared_keys
.insert(id, preshared_key.clone());
@@ -351,7 +351,7 @@ impl ClientState {
}
pub fn activate_ice_candidate_receiver(&mut self, id: GatewayId, key: PublicKey) {
let Some(receiver) = self.waiting_for_sdp_from_gatway.remove(&id) else {
let Some(receiver) = self.waiting_for_sdp_from_gateway.remove(&id) else {
return;
};
self.gateway_public_keys.insert(id, key);
@@ -411,7 +411,7 @@ impl Default for ClientState {
Duration::from_secs(ICE_GATHERING_TIMEOUT_SECONDS),
MAX_CONCURRENT_ICE_GATHERING,
),
waiting_for_sdp_from_gatway: Default::default(),
waiting_for_sdp_from_gateway: Default::default(),
awaiting_connection: Default::default(),
gateway_awaiting_connection: Default::default(),
awaiting_connection_timers: StreamMap::new(Duration::from_secs(60), 100),

View File

@@ -44,6 +44,7 @@ where
conn_id: TRoleState::Id,
ice_candidate: RTCIceCandidate,
) -> Result<()> {
tracing::info!(%ice_candidate, %conn_id, "adding new remote candidate");
let peer_connection = self
.peer_connections
.lock()
@@ -101,6 +102,8 @@ pub(crate) async fn new_ice_connection(
return Box::pin(async {});
};
tracing::info!(%candidate, "found new local candidate");
let mut ice_candidate_tx = ice_candidate_tx.clone();
Box::pin(async move {
if ice_candidate_tx.send(candidate).await.is_err() {

View File

@@ -11,11 +11,31 @@ use connlib_shared::{
};
use std::sync::Arc;
use webrtc::ice_transport::{
ice_parameters::RTCIceParameters, ice_role::RTCIceRole, RTCIceTransport,
ice_parameters::RTCIceParameters, ice_role::RTCIceRole,
ice_transport_state::RTCIceTransportState, RTCIceTransport,
};
use super::{new_ice_connection, IceConnection};
#[tracing::instrument(level = "trace", skip(ice))]
fn set_connection_state_update(ice: &Arc<RTCIceTransport>, client_id: ClientId) {
let ice = ice.clone();
ice.on_connection_state_change({
let ice = ice.clone();
Box::new(move |state| {
tracing::trace!(%state, "peer_state");
let ice = ice.clone();
Box::pin(async move {
if state == RTCIceTransportState::Failed {
if let Err(e) = ice.stop().await {
tracing::warn!(err = ?e, "Couldn't stop ice client: {e:#}");
}
}
})
})
});
}
impl<CB> Tunnel<CB, GatewayState>
where
CB: Callbacks + 'static,
@@ -51,23 +71,47 @@ where
.lock()
.add_new_ice_receiver(client_id, ice_candidate_rx);
self.peer_connections
set_connection_state_update(&ice, client_id);
let previous_ice = self
.peer_connections
.lock()
.insert(client_id, Arc::clone(&ice));
if let Some(ice) = previous_ice {
// If we had a previous on-going connection we stop it.
// Note that ice.stop also closes the gatherer.
// we only have to do this on the gateway because clients can query
// twice for initiating connections since they can close/reopen suddenly
// however, gateways never initiate connection requests
let _ = ice.stop().await;
}
let tunnel = self.clone();
tokio::spawn(async move {
if let Err(e) = ice
.start(&remote_params, Some(RTCIceRole::Controlled))
.await
.map_err(Into::into)
.and_then(|_| tunnel.new_tunnel(peer, client_id, resource, expires_at, ice))
.and_then(|_| tunnel.new_tunnel(peer, client_id, resource, expires_at, ice.clone()))
{
tracing::warn!(%client_id, err = ?e, "Can't start tunnel: {e:#}");
let peer_connection = tunnel.peer_connections.lock().remove(&client_id);
if let Some(peer_connection) = peer_connection {
let _ = peer_connection.stop().await;
{
let mut peer_connections = tunnel.peer_connections.lock();
if let Some(peer_connection) = peer_connections.get(&client_id).cloned() {
// We need to re-check this since it might have been replaced in between.
if matches!(
peer_connection.state(),
RTCIceTransportState::Failed
| RTCIceTransportState::Disconnected
| RTCIceTransportState::Closed
) {
peer_connections.remove(&client_id);
}
}
}
// We only need to stop here because in case tunnel.new_tunnel failed.
let _ = ice.stop().await;
}
});

View File

@@ -308,14 +308,7 @@ where
if let Some(conn_id) = self.peers_to_stop.lock().pop_front() {
let mut peers = self.peers_by_ip.write();
let Some(peer_to_remove) = peers
.iter()
.find_map(|(n, p)| (p.inner.conn_id == conn_id).then_some(n))
else {
continue;
};
peers.remove(peer_to_remove);
peers.retain(|_, p| p.inner.conn_id != conn_id);
if let Some(conn) = self.peer_connections.lock().remove(&conn_id) {
tokio::spawn({

View File

@@ -57,7 +57,7 @@ struct AdvancedSettings: Equatable {
authBaseURLString: "https://app.firezone.dev/",
apiURLString: "wss://api.firezone.dev/",
connlibLogFilterString:
"connlib_client_apple=info,firezone_tunnel=info,connlib_shared=info,connlib_client_shared=info,warn"
"connlib_client_apple=info,firezone_tunnel=trace,connlib_shared=info,connlib_client_shared=info,warn"
)
#endif
}()