chore(snownet): improve logs on blocked STUN traffic (#5305)

Detecting blocked STUN traffic is somewhat tricky. What we can observe
is not receiving any responses from a relay (neither on IPv4 nor IPv6).
Once an `Allocation` gives up retrying requests with a relay (after
60s), we now de-allocate the `Allocation` and print the following
message:

> INFO snownet::node: Disconnecting from relay; no response received. Is
STUN blocked? id=613f68ac-483e-4e9d-bf87-457fd7223bf6

I chose to go with the wording of "disconnecting from relay" as
sysdamins likely don't have any clue of what an "Allocation" is. The
error message is specific to a relay though so it could also be emitted
if a relay is down for > 60s or not responding for whatever reason.

Resolves: #5281.
This commit is contained in:
Thomas Eizinger
2024-06-12 08:37:33 +10:00
committed by GitHub
parent abb28f16a3
commit 28cddc8304
2 changed files with 70 additions and 13 deletions

View File

@@ -174,6 +174,14 @@ impl Socket {
}
}
#[derive(Debug, thiserror::Error, PartialEq)]
pub enum FreeReason {
#[error("authentication error")]
AuthenticationError,
#[error("no response received. Is STUN blocked?")]
NoResponseReceived,
}
impl Allocation {
pub fn new(
server: RelaySocket,
@@ -715,9 +723,32 @@ impl Allocation {
/// Whether this [`Allocation`] can be freed.
///
/// This is tied to having our credentials cleared (i.e due to an authentication error) and having emitted all events.
pub fn can_be_freed(&self) -> bool {
self.credentials.is_none() && self.events.is_empty()
/// This is tied to having our credentials cleared (i.e due to an authentication error) and having emitted all events or not having received a single response.
pub fn can_be_freed(&self) -> Option<FreeReason> {
let pending_work = !self.events.is_empty()
|| !self.buffered_transmits.is_empty()
|| !self.sent_requests.is_empty();
let no_responses = !self.received_any_response();
let auth_failure = !self.has_credentials();
if !pending_work && no_responses {
return Some(FreeReason::NoResponseReceived);
}
if !pending_work && auth_failure {
return Some(FreeReason::AuthenticationError);
}
None
}
pub fn received_any_response(&self) -> bool {
self.active_socket.is_some()
}
pub fn has_credentials(&self) -> bool {
self.credentials.is_some()
}
fn log_update(&self) {
@@ -2155,7 +2186,10 @@ mod tests {
CandidateEvent::Invalid(Candidate::relayed(RELAY_ADDR_IP6, Protocol::Udp).unwrap()),
]
);
assert!(allocation.can_be_freed());
assert_eq!(
allocation.can_be_freed(),
Some(FreeReason::AuthenticationError)
);
}
#[test]
@@ -2183,7 +2217,7 @@ mod tests {
fn allocation_is_not_freed_on_startup() {
let allocation = Allocation::for_test_ip4(Instant::now());
assert!(!allocation.can_be_freed());
assert_eq!(allocation.can_be_freed(), None);
}
#[test]
@@ -2325,6 +2359,29 @@ mod tests {
assert_eq!(allocation.poll_transmit(), None);
}
#[test]
fn allocation_can_be_freed_after_all_requests_time_out() {
let mut allocation = Allocation::for_test_dual(Instant::now());
loop {
let Some(timeout) = allocation.poll_timeout() else {
break;
};
allocation.handle_timeout(timeout);
// We expect two transmits.
// The order is not deterministic because internally it is a `HashMap`.
let _ = allocation.poll_transmit().unwrap();
let _ = allocation.poll_transmit().unwrap();
}
assert_eq!(
allocation.can_be_freed(),
Some(FreeReason::NoResponseReceived)
);
}
fn ch(peer: SocketAddr, now: Instant) -> Channel {
Channel {
peer,

View File

@@ -451,15 +451,15 @@ where
self.next_rate_limiter_reset = Some(now + Duration::from_secs(1));
}
self.allocations.retain(|id, allocation| {
if allocation.can_be_freed() {
tracing::info!(%id, "Freeing memory of allocation");
self.allocations
.retain(|id, allocation| match allocation.can_be_freed() {
Some(e) => {
tracing::info!(%id, "Disconnecting from relay; {e}");
return false;
}
true
});
false
}
None => true,
});
self.connections.remove_failed(&mut self.pending_events);
}