From cbe114bddcf2f2eb02d0f5ac8bc99b85f52dd8ff Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Fri, 25 Jul 2025 06:41:26 +1000 Subject: [PATCH] fix(connlib): clear join requests on reconnect (#9985) Room join requests on the portal are only valid whilst we have a WebSocket connection. To make sure the portal processes all our requests correctly, we need to hold all other messages back while we are waiting to join the room. If the connection flaps while we are waiting to join a room, we may have a lingering join request that never gets fulfilled and thus blocks the sending of messages forever. --------- Co-authored-by: Jamil Bou Kheir --- rust/connlib/phoenix-channel/src/lib.rs | 10 ++++++++++ website/src/components/Changelog/Android.tsx | 5 +++++ website/src/components/Changelog/Apple.tsx | 5 +++++ website/src/components/Changelog/GUI.tsx | 5 +++++ website/src/components/Changelog/Headless.tsx | 5 +++++ 5 files changed, 30 insertions(+) diff --git a/rust/connlib/phoenix-channel/src/lib.rs b/rust/connlib/phoenix-channel/src/lib.rs index bf70ea50f..dd6ebcc0e 100644 --- a/rust/connlib/phoenix-channel/src/lib.rs +++ b/rust/connlib/phoenix-channel/src/lib.rs @@ -398,6 +398,11 @@ where self.heartbeat.reset(); self.state = State::Connected(stream); + // Clear local state. + // Joins are only valid whilst we are connected, so we need to discard any previous ones on reconnect. + self.pending_joins.clear(); + self.pending_join_requests.clear(); + let (host, _) = self.url_prototype.expose_secret().host_and_port(); tracing::info!(%host, "Connected to portal"); @@ -512,6 +517,11 @@ where continue; } + } else if !self.pending_messages.is_empty() { + tracing::trace!( + requests = ?self.pending_join_requests, + "Unable to send message because we are waiting for JOIN requests to complete" + ); } } Poll::Ready(Err(e)) => { diff --git a/website/src/components/Changelog/Android.tsx b/website/src/components/Changelog/Android.tsx index ae275c660..b16d1245e 100644 --- a/website/src/components/Changelog/Android.tsx +++ b/website/src/components/Changelog/Android.tsx @@ -21,6 +21,11 @@ export default function Android() { {/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */} + + Fixes an issue where control plane messages could be stuck forever on + flaky connections, requiring signing out and signin back in to + recover. + Fixes an issue where Firezone failed to sign-in on systems with non-ASCII characters in their kernel build name. diff --git a/website/src/components/Changelog/Apple.tsx b/website/src/components/Changelog/Apple.tsx index 5bb56063d..0df66b37a 100644 --- a/website/src/components/Changelog/Apple.tsx +++ b/website/src/components/Changelog/Apple.tsx @@ -25,6 +25,11 @@ export default function Apple() { {/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */} + + Fixes an issue where control plane messages could be stuck forever on + flaky connections, requiring signing out and signin back in to + recover. + Fixes an issue where connections would sometimes take up to 90s to establish. diff --git a/website/src/components/Changelog/GUI.tsx b/website/src/components/Changelog/GUI.tsx index bf2fe3050..29c899493 100644 --- a/website/src/components/Changelog/GUI.tsx +++ b/website/src/components/Changelog/GUI.tsx @@ -11,6 +11,11 @@ export default function GUI({ os }: { os: OS }) { {/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */} + + Fixes an issue where control plane messages could be stuck forever on + flaky connections, requiring signing out and signin back in to + recover. + Fixes an issue where connections would sometimes take up to 90s to establish. diff --git a/website/src/components/Changelog/Headless.tsx b/website/src/components/Changelog/Headless.tsx index e062dbbc8..2ff40fd71 100644 --- a/website/src/components/Changelog/Headless.tsx +++ b/website/src/components/Changelog/Headless.tsx @@ -10,6 +10,11 @@ export default function Headless({ os }: { os: OS }) { {/* When you cut a release, remove any solved issues from the "known issues" lists over in `client-apps`. This must not be done when the issue's PR merges. */} + + Fixes an issue where control plane messages could be stuck forever on + flaky connections, requiring signing out and signin back in to + recover. + Fixes an issue where connections would sometimes take up to 90s to establish.