From a309f11011bbb0cf05af2b714cf8b539c3d98e4d Mon Sep 17 00:00:00 2001 From: Gabi Date: Wed, 29 Nov 2023 01:49:30 -0300 Subject: [PATCH] Fix gateway cleanup (#2704) Yesterday, during some portion of the day connections between clients and resources were impossible. While I couldn't pinpoint the exact cause I found some issues with cleanup. This PR fixes those. Furthermore, I increased the default log level for tunnels in the clients so that if this happens again we have better logs to triage. ~~Furthermore, I found out about #2705 so, I removed the limit of relays from connlib since the portal already limits it to 2 (4 if you count per-ip), that way we make sure that we always use both ipv4 and ipv6. The connection start up time seems to slow down due to this but I think this is better. We might want to go to only 2 urls again later on to speed this up, if the portal can ensure it's a working relay load-balanced relay there might not be a point in using more than a single server~~. cc @AndrewDryga Edit: we always get an ipv4 and ipv6 address for the same relay as the first two relays in the relay list, save the case where only one of the ip types is supported. We should be safe limiting it to 2. --------- Signed-off-by: Gabi Co-authored-by: Jamil --- kotlin/android/app/build.gradle.kts | 2 +- rust/Cargo.lock | 138 +++++++++++------- rust/connlib/tunnel/src/client.rs | 8 +- rust/connlib/tunnel/src/control_protocol.rs | 3 + .../tunnel/src/control_protocol/gateway.rs | 56 ++++++- rust/connlib/tunnel/src/lib.rs | 9 +- .../Sources/FirezoneKit/Models/Settings.swift | 2 +- 7 files changed, 145 insertions(+), 73 deletions(-) diff --git a/kotlin/android/app/build.gradle.kts b/kotlin/android/app/build.gradle.kts index ebe28a1ba..6c1fbdd01 100644 --- a/kotlin/android/app/build.gradle.kts +++ b/kotlin/android/app/build.gradle.kts @@ -105,7 +105,7 @@ android { buildConfigField( "String", "LOG_FILTER", - "\"connlib_client_android=info,firezone_tunnel=info,connlib_shared=info,connlib_client_shared=info,warn\"", + "\"connlib_client_android=info,firezone_tunnel=trace,connlib_shared=info,connlib_client_shared=info,warn\"", ) firebaseAppDistribution { serviceCredentialsFile = System.getenv("FIREBASE_CREDENTIALS_PATH") diff --git a/rust/Cargo.lock b/rust/Cargo.lock index dfa5a3f32..f8bda5ba4 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -193,9 +193,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f658e2baef915ba0f26f1f7c42bfb8e12f532a01f449a090ded75ae7a07e9ba2" +checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5" dependencies = [ "flate2", "futures-core", @@ -851,9 +851,9 @@ dependencies = [ [[package]] name = "crypto-bigint" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28f85c3514d2a6e64160359b45a3918c3b4178bcbf4ae5d03ab2d02e521c479a" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" dependencies = [ "generic-array", "rand_core", @@ -920,9 +920,9 @@ dependencies = [ [[package]] name = "data-encoding" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308" +checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" [[package]] name = "der" @@ -1002,9 +1002,9 @@ dependencies = [ [[package]] name = "domain" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54a83205ec042ce7339c73344d6666a39acb30021121dab68eb983740dcff86e" +checksum = "7af83e443e4bfe8602af356e5ca10b9676634e53d178875017f2ff729898a388" dependencies = [ "octseq", "rand", @@ -1013,9 +1013,9 @@ dependencies = [ [[package]] name = "ecdsa" -version = "0.16.8" +version = "0.16.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4b1e0c257a9e9f25f90ff76d7a68360ed497ee519c8e428d1825ef0000799d4" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" dependencies = [ "der", "digest", @@ -1033,9 +1033,9 @@ checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "elliptic-curve" -version = "0.13.6" +version = "0.13.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97ca172ae9dc9f9b779a6e3a65d308f2af74e5b8c921299075bdb4a0370e914" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" dependencies = [ "base16ct", "crypto-bigint", @@ -1087,10 +1087,16 @@ dependencies = [ ] [[package]] -name = "errno" -version = "0.3.6" +name = "equivalent" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c18ee0ed65a5f1f81cac6b1d213b69c35fa47d4252ad41f1486dbd8226fe36e" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f258a7194e7f7c2a7837a8913aeab7fd8c383457034fa20ce4dd3dcb813e8eb8" dependencies = [ "libc", "windows-sys 0.48.0", @@ -1114,9 +1120,9 @@ dependencies = [ [[package]] name = "fiat-crypto" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53a56f0780318174bad1c127063fd0c5fdfb35398e3cd79ffaab931a6c79df80" +checksum = "27573eac26f4dd11e2b1916c3fe1baa56407c83c71a773a8ba17ec0bca03b6b7" [[package]] name = "firezone-cli-utils" @@ -1267,9 +1273,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "form_urlencoded" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] @@ -1291,9 +1297,9 @@ dependencies = [ [[package]] name = "futures-bounded" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43bcbe9c086773ba37692e50f578c4c459c0a5ffe07bd924d0ea485b2a46c9c7" +checksum = "e1e2774cc104e198ef3d3e1ff4ab40f86fa3245d6cb6a3a46174f21463cee173" dependencies = [ "futures-timer", "futures-util", @@ -1445,9 +1451,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.21" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178" dependencies = [ "bytes", "fnv", @@ -1455,7 +1461,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap", + "indexmap 2.1.0", "slab", "tokio", "tokio-util", @@ -1468,6 +1474,12 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" + [[package]] name = "heck" version = "0.4.1" @@ -1505,7 +1517,7 @@ dependencies = [ "futures-channel", "futures-io", "futures-util", - "idna", + "idna 0.4.0", "ipnet", "once_cell", "rand", @@ -1698,6 +1710,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -1705,7 +1727,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +dependencies = [ + "equivalent", + "hashbrown 0.14.2", ] [[package]] @@ -2238,9 +2270,9 @@ dependencies = [ [[package]] name = "octseq" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d233b7fea8c1c9f4ffda5dcc961c8edd2fc1587e4b355b2752fe7c27e0cd50ca" +checksum = "cd09a3d18c298e5d9b66cf65db82e2e1694b94fbc032f83e304a5cbc87bcc3bb" [[package]] name = "oid-registry" @@ -2327,7 +2359,7 @@ checksum = "8a81f725323db1b1206ca3da8bb19874bbd3f57c3bcd59471bfb04525b265b9b" dependencies = [ "futures-channel", "futures-util", - "indexmap", + "indexmap 1.9.3", "js-sys", "once_cell", "pin-project-lite", @@ -2463,9 +2495,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "phoenix-channel" @@ -2611,9 +2643,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "primeorder" -version = "0.13.3" +version = "0.13.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7dbe9ed3b56368bd99483eb32fe9c17fdd3730aebadc906918ce78d54c7eeb4" +checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6" dependencies = [ "elliptic-curve", ] @@ -2972,9 +3004,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.23" +version = "0.38.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb93593068e9babdad10e4fce47dc9b3ac25315a72a59766ffd9e9a71996a04" +checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e" dependencies = [ "bitflags 2.4.1", "errno", @@ -2985,9 +3017,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.8" +version = "0.21.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "446e14c5cda4f3f30fe71863c34ec70f5ac79d6087097ad0bb433e1be5edf04c" +checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9" dependencies = [ "log", "ring 0.17.5", @@ -3151,18 +3183,18 @@ checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" [[package]] name = "serde" -version = "1.0.192" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bca2a08484b285dcb282d0f67b26cadc0df8b19f8c12502c13d966bf9482f001" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.192" +version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6c7207fbec9faa48073f3e3074cbe553af6ea512d7c21ba46e434e70ea9fbc1" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", @@ -3240,9 +3272,9 @@ dependencies = [ [[package]] name = "signature" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e1788eed21689f9cf370582dfc467ef36ed9c707f073528ddafa8d83e3b8500" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ "digest", "rand_core", @@ -3748,7 +3780,7 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", - "indexmap", + "indexmap 1.9.3", "pin-project", "pin-project-lite", "rand", @@ -4086,12 +4118,12 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", - "idna", + "idna 0.5.0", "percent-encoding", "serde", ] @@ -4116,9 +4148,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.5.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" +checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" dependencies = [ "getrandom", "serde", @@ -4270,9 +4302,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.25.2" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" +checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" [[package]] name = "webrtc" @@ -4733,9 +4765,9 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" dependencies = [ "zeroize_derive", ] diff --git a/rust/connlib/tunnel/src/client.rs b/rust/connlib/tunnel/src/client.rs index 140566048..c262ffe76 100644 --- a/rust/connlib/tunnel/src/client.rs +++ b/rust/connlib/tunnel/src/client.rs @@ -131,7 +131,7 @@ where pub struct ClientState { active_candidate_receivers: StreamMap, /// We split the receivers of ICE candidates into two phases because we only want to start sending them once we've received an SDP from the gateway. - waiting_for_sdp_from_gatway: HashMap>, + waiting_for_sdp_from_gateway: HashMap>, // TODO: Make private pub awaiting_connection: HashMap, @@ -343,7 +343,7 @@ impl ClientState { id: GatewayId, receiver: Receiver, ) -> StaticSecret { - self.waiting_for_sdp_from_gatway.insert(id, receiver); + self.waiting_for_sdp_from_gateway.insert(id, receiver); let preshared_key = StaticSecret::random_from_rng(OsRng); self.gateway_preshared_keys .insert(id, preshared_key.clone()); @@ -351,7 +351,7 @@ impl ClientState { } pub fn activate_ice_candidate_receiver(&mut self, id: GatewayId, key: PublicKey) { - let Some(receiver) = self.waiting_for_sdp_from_gatway.remove(&id) else { + let Some(receiver) = self.waiting_for_sdp_from_gateway.remove(&id) else { return; }; self.gateway_public_keys.insert(id, key); @@ -411,7 +411,7 @@ impl Default for ClientState { Duration::from_secs(ICE_GATHERING_TIMEOUT_SECONDS), MAX_CONCURRENT_ICE_GATHERING, ), - waiting_for_sdp_from_gatway: Default::default(), + waiting_for_sdp_from_gateway: Default::default(), awaiting_connection: Default::default(), gateway_awaiting_connection: Default::default(), awaiting_connection_timers: StreamMap::new(Duration::from_secs(60), 100), diff --git a/rust/connlib/tunnel/src/control_protocol.rs b/rust/connlib/tunnel/src/control_protocol.rs index 3c762699c..8dd871106 100644 --- a/rust/connlib/tunnel/src/control_protocol.rs +++ b/rust/connlib/tunnel/src/control_protocol.rs @@ -44,6 +44,7 @@ where conn_id: TRoleState::Id, ice_candidate: RTCIceCandidate, ) -> Result<()> { + tracing::info!(%ice_candidate, %conn_id, "adding new remote candidate"); let peer_connection = self .peer_connections .lock() @@ -101,6 +102,8 @@ pub(crate) async fn new_ice_connection( return Box::pin(async {}); }; + tracing::info!(%candidate, "found new local candidate"); + let mut ice_candidate_tx = ice_candidate_tx.clone(); Box::pin(async move { if ice_candidate_tx.send(candidate).await.is_err() { diff --git a/rust/connlib/tunnel/src/control_protocol/gateway.rs b/rust/connlib/tunnel/src/control_protocol/gateway.rs index 6968a2d67..2ebdf3fe9 100644 --- a/rust/connlib/tunnel/src/control_protocol/gateway.rs +++ b/rust/connlib/tunnel/src/control_protocol/gateway.rs @@ -11,11 +11,31 @@ use connlib_shared::{ }; use std::sync::Arc; use webrtc::ice_transport::{ - ice_parameters::RTCIceParameters, ice_role::RTCIceRole, RTCIceTransport, + ice_parameters::RTCIceParameters, ice_role::RTCIceRole, + ice_transport_state::RTCIceTransportState, RTCIceTransport, }; use super::{new_ice_connection, IceConnection}; +#[tracing::instrument(level = "trace", skip(ice))] +fn set_connection_state_update(ice: &Arc, client_id: ClientId) { + let ice = ice.clone(); + ice.on_connection_state_change({ + let ice = ice.clone(); + Box::new(move |state| { + tracing::trace!(%state, "peer_state"); + let ice = ice.clone(); + Box::pin(async move { + if state == RTCIceTransportState::Failed { + if let Err(e) = ice.stop().await { + tracing::warn!(err = ?e, "Couldn't stop ice client: {e:#}"); + } + } + }) + }) + }); +} + impl Tunnel where CB: Callbacks + 'static, @@ -51,23 +71,47 @@ where .lock() .add_new_ice_receiver(client_id, ice_candidate_rx); - self.peer_connections + set_connection_state_update(&ice, client_id); + + let previous_ice = self + .peer_connections .lock() .insert(client_id, Arc::clone(&ice)); + if let Some(ice) = previous_ice { + // If we had a previous on-going connection we stop it. + // Note that ice.stop also closes the gatherer. + // we only have to do this on the gateway because clients can query + // twice for initiating connections since they can close/reopen suddenly + // however, gateways never initiate connection requests + let _ = ice.stop().await; + } + let tunnel = self.clone(); tokio::spawn(async move { if let Err(e) = ice .start(&remote_params, Some(RTCIceRole::Controlled)) .await .map_err(Into::into) - .and_then(|_| tunnel.new_tunnel(peer, client_id, resource, expires_at, ice)) + .and_then(|_| tunnel.new_tunnel(peer, client_id, resource, expires_at, ice.clone())) { tracing::warn!(%client_id, err = ?e, "Can't start tunnel: {e:#}"); - let peer_connection = tunnel.peer_connections.lock().remove(&client_id); - if let Some(peer_connection) = peer_connection { - let _ = peer_connection.stop().await; + { + let mut peer_connections = tunnel.peer_connections.lock(); + if let Some(peer_connection) = peer_connections.get(&client_id).cloned() { + // We need to re-check this since it might have been replaced in between. + if matches!( + peer_connection.state(), + RTCIceTransportState::Failed + | RTCIceTransportState::Disconnected + | RTCIceTransportState::Closed + ) { + peer_connections.remove(&client_id); + } + } } + // We only need to stop here because in case tunnel.new_tunnel failed. + let _ = ice.stop().await; } }); diff --git a/rust/connlib/tunnel/src/lib.rs b/rust/connlib/tunnel/src/lib.rs index c1f65d86f..ed50b6a03 100644 --- a/rust/connlib/tunnel/src/lib.rs +++ b/rust/connlib/tunnel/src/lib.rs @@ -308,14 +308,7 @@ where if let Some(conn_id) = self.peers_to_stop.lock().pop_front() { let mut peers = self.peers_by_ip.write(); - let Some(peer_to_remove) = peers - .iter() - .find_map(|(n, p)| (p.inner.conn_id == conn_id).then_some(n)) - else { - continue; - }; - - peers.remove(peer_to_remove); + peers.retain(|_, p| p.inner.conn_id != conn_id); if let Some(conn) = self.peer_connections.lock().remove(&conn_id) { tokio::spawn({ diff --git a/swift/apple/FirezoneKit/Sources/FirezoneKit/Models/Settings.swift b/swift/apple/FirezoneKit/Sources/FirezoneKit/Models/Settings.swift index edc4ee27a..eafda603d 100644 --- a/swift/apple/FirezoneKit/Sources/FirezoneKit/Models/Settings.swift +++ b/swift/apple/FirezoneKit/Sources/FirezoneKit/Models/Settings.swift @@ -57,7 +57,7 @@ struct AdvancedSettings: Equatable { authBaseURLString: "https://app.firezone.dev/", apiURLString: "wss://api.firezone.dev/", connlibLogFilterString: - "connlib_client_apple=info,firezone_tunnel=info,connlib_shared=info,connlib_client_shared=info,warn" + "connlib_client_apple=info,firezone_tunnel=trace,connlib_shared=info,connlib_client_shared=info,warn" ) #endif }()