fix(gateway): don't remove peer state on disconnect (#10040)

When the connection to a Client disappears, the Gateway currently clears
all state related to this peer. Whilst eagerly cleaning up memory can be
good, in this case, it may lead to the Client thinking it has access to
a resource when in reality it doesn't.

Just because the connection to a Client failed doesn't mean their access
authorizations are invalid. In case the Client reconnects, it should be
able to just continue sending traffic.

At the moment, this only works if the connection also failed on the
Client and therefore, its view of the world in regards to "which
resources do I have access to" was also reset.

What we are seeing in Sentry reports though is that Clients are
attempting to access these resources, thinking they have access but the
Gateway denies it because it has lost the access authorization state.
This commit is contained in:
Thomas Eizinger
2025-08-02 08:27:49 +00:00
committed by GitHub
parent a4eb6509c6
commit cd177a6448
4 changed files with 16 additions and 4 deletions

View File

@@ -16,7 +16,7 @@ pub use allocation::RelaySocket;
pub use node::{Answer, Offer};
pub use node::{
Client, ClientNode, Credentials, Event, HANDSHAKE_TIMEOUT, NoTurnServers, Node, Server,
ServerNode, Transmit,
ServerNode, Transmit, UnknownConnection,
};
pub use stats::{ConnectionStats, NodeStats};

View File

@@ -426,7 +426,7 @@ where
let conn = self
.connections
.get_established_mut(&cid)
.with_context(|| format!("Unknown connection {cid}"))?;
.context(UnknownConnection(cid.to_string()))?;
if self.mode.is_server() && !conn.state.has_nominated_socket() {
tracing::debug!(
@@ -1440,6 +1440,10 @@ fn remove_local_candidate<TId>(
}
}
#[derive(thiserror::Error, Debug)]
#[error("Unknown connection: {0}")]
pub struct UnknownConnection(String);
#[deprecated]
pub struct Offer {
/// The Wireguard session key for a connection.

View File

@@ -400,8 +400,10 @@ impl GatewayState {
while let Some(event) = self.node.poll_event() {
match event {
snownet::Event::ConnectionFailed(id) | snownet::Event::ConnectionClosed(id) => {
self.peers.remove(&id);
snownet::Event::ConnectionFailed(_) | snownet::Event::ConnectionClosed(_) => {
// We purposely don't clear the peer-state here.
// The Client might re-establish the connection but if it hasn't cleared its local state too,
// it will consider all its access authorizations to be still valid.
}
snownet::Event::NewIceCandidate {
connection,

View File

@@ -115,6 +115,12 @@ impl Eventloop {
continue;
}
// Unknown connection just means packets are bouncing on the TUN device because the Client disconnected.
if e.root_cause().is::<snownet::UnknownConnection>() {
tracing::debug!("{e:#}");
continue;
}
if e.root_cause()
.downcast_ref::<io::Error>()
.is_some_and(|e| e.kind() == io::ErrorKind::PermissionDenied)