fix(snownet): don't log unknown packet for disconnected relay (#9961)

Currently, packets for allocations, i.e. from relays are parsed inside
the `Allocation` struct. We have one of those structs for each relay
that `snownet` is talking to. When we disconnect from a relay because it
is e.g. not responding, then we deallocate this struct. As a result,
message that arrive from this relay can no longer be handled. This can
happen when the response time is longer than our timeout.

These packets then fall-through and end up being logged as "packet has
unknown format".

To prevent this, we make the signature on `Allocation` strongly-typed
and expect a fully parsed `Message` to be given to us. This allows us to
parse the message early and discard it with a DEBUG log in case we don't
have the necessary local state to handle it.

The functionality here is essentially the same, we just change at what
level this is being logged at from WARN to DEBUG.

We have to make one additional adjustment to make this work: Guard all
messages to be parsed by any `Allocation` to come from port 3478. This
is the assigned port that all relays are expected to listen on. If we
don't have any local state for a given address, we cannot decide whether
it is a STUN message for an agent or a STUN message for a relay that we
have disconnected from. Therefore, we need to de-multiplex based on the
source port.
This commit is contained in:
Thomas Eizinger
2025-07-25 10:32:43 +10:00
committed by GitHub
parent 27c7d537bb
commit 9133d46bbd
7 changed files with 104 additions and 76 deletions

2
rust/Cargo.lock generated
View File

@@ -951,7 +951,7 @@ dependencies = [
[[package]]
name = "boringtun"
version = "0.6.1"
source = "git+https://github.com/firezone/boringtun?branch=master#a99332c529ce7546e953de5b6618e4e8de7170f3"
source = "git+https://github.com/firezone/boringtun?branch=master#5cb8ec28749fe6a81a33471cb77565376bcf1d67"
dependencies = [
"aead",
"base64 0.22.1",

View File

@@ -292,7 +292,7 @@ impl Allocation {
&mut self,
from: SocketAddr,
local: SocketAddr,
packet: &[u8],
message: Message<Attribute>,
now: Instant,
) -> bool {
debug_assert_eq!(
@@ -305,10 +305,6 @@ impl Allocation {
return false;
}
let Ok(Ok(message)) = decode(packet) else {
return false;
};
let transaction_id = message.transaction_id();
Span::current().record(
@@ -1336,7 +1332,7 @@ fn relay_candidate(
}
}
fn decode(packet: &[u8]) -> bytecodec::Result<DecodedMessage<Attribute>> {
pub(crate) fn decode(packet: &[u8]) -> bytecodec::Result<DecodedMessage<Attribute>> {
MessageDecoder::<Attribute>::default().decode_from_bytes(packet)
}
@@ -1881,7 +1877,7 @@ mod tests {
);
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4]),
allocate_response(&allocate, &[RELAY_ADDR_IP4]),
Instant::now(),
);
@@ -1897,10 +1893,7 @@ mod tests {
allocation.bind_channel(PEER2_IP4, Instant::now());
let channel_bind_msg = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&encode(channel_bind_success(&channel_bind_msg)),
Instant::now(),
);
allocation.handle_test_input_ip4(channel_bind_success(&channel_bind_msg), Instant::now());
let mut buffer = channel_data_packet_buffer(b"foobar");
let encode_ok = allocation
@@ -1933,10 +1926,8 @@ mod tests {
let channel_bind_msg = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&encode(channel_bind_bad_request(&channel_bind_msg)),
Instant::now(),
);
allocation
.handle_test_input_ip4(channel_bind_bad_request(&channel_bind_msg), Instant::now());
// TODO: Not the best assertion because we are reaching into private state but better than nothing for now.
let channel = allocation
@@ -1956,10 +1947,7 @@ mod tests {
allocation.bind_channel(PEER2_IP4, Instant::now());
let channel_bind_msg = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&encode(channel_bind_success(&channel_bind_msg)),
Instant::now(),
);
allocation.handle_test_input_ip4(channel_bind_success(&channel_bind_msg), Instant::now());
allocation.bind_channel(PEER2_IP4, Instant::now());
let next_msg = allocation.next_message();
@@ -2031,7 +2019,7 @@ mod tests {
// Allocation succeeds but only for IPv4
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4]),
allocate_response(&allocate, &[RELAY_ADDR_IP4]),
Instant::now(),
);
@@ -2074,7 +2062,7 @@ mod tests {
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&stale_nonce_response(&allocate, Nonce::new("nonce2".to_owned()).unwrap()),
stale_nonce_response(&allocate, Nonce::new("nonce2".to_owned()).unwrap()),
Instant::now(),
);
@@ -2096,7 +2084,7 @@ mod tests {
// Attempt to authenticate without a nonce
let allocate = allocation.next_message().unwrap();
allocation
.handle_test_input_ip4(&unauthorized_response(&allocate, "nonce1"), Instant::now());
.handle_test_input_ip4(unauthorized_response(&allocate, "nonce1"), Instant::now());
let allocate = allocation.next_message().unwrap();
assert_eq!(
@@ -2106,7 +2094,7 @@ mod tests {
);
allocation
.handle_test_input_ip4(&unauthorized_response(&allocate, "nonce2"), Instant::now());
.handle_test_input_ip4(unauthorized_response(&allocate, "nonce2"), Instant::now());
assert!(
allocation.next_message().is_none(),
@@ -2121,7 +2109,7 @@ mod tests {
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4]),
allocate_response(&allocate, &[RELAY_ADDR_IP4]),
Instant::now(),
);
@@ -2150,7 +2138,7 @@ mod tests {
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4]),
allocate_response(&allocate, &[RELAY_ADDR_IP4]),
Instant::now(),
);
@@ -2170,7 +2158,7 @@ mod tests {
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
Instant::now(),
);
let _ = iter::from_fn(|| allocation.poll_event()).collect::<Vec<_>>(); // Drain events.
@@ -2178,7 +2166,7 @@ mod tests {
allocation.refresh_with_same_credentials();
let refresh = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(&allocation_mismatch(&refresh), Instant::now());
allocation.handle_test_input_ip4(allocation_mismatch(&refresh), Instant::now());
assert_eq!(
allocation.poll_event(),
@@ -2206,16 +2194,13 @@ mod tests {
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
Instant::now(),
);
allocation.bind_channel(PEER2_IP4, Instant::now());
let channel_bind_msg = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&encode(channel_bind_success(&channel_bind_msg)),
Instant::now(),
);
allocation.handle_test_input_ip4(channel_bind_success(&channel_bind_msg), Instant::now());
let mut packet = channel_data_packet_buffer(b"foobar");
let msg = allocation.encode_channel_data_header(PEER2_IP4, &mut packet, Instant::now());
@@ -2224,7 +2209,7 @@ mod tests {
allocation.refresh_with_same_credentials();
let refresh = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(&allocation_mismatch(&refresh), Instant::now());
allocation.handle_test_input_ip4(allocation_mismatch(&refresh), Instant::now());
let mut packet = channel_data_packet_buffer(b"foobar");
let msg = allocation.encode_channel_data_header(PEER2_IP4, &mut packet, Instant::now());
@@ -2251,14 +2236,14 @@ mod tests {
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
Instant::now(),
);
allocation.refresh_with_same_credentials();
let refresh = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(&allocation_mismatch(&refresh), Instant::now());
allocation.handle_test_input_ip4(allocation_mismatch(&refresh), Instant::now());
let allocate = allocation.next_message().unwrap();
assert_eq!(allocate.method(), ALLOCATE);
@@ -2272,7 +2257,7 @@ mod tests {
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
now,
);
@@ -2290,7 +2275,7 @@ mod tests {
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
Instant::now(),
);
@@ -2307,13 +2292,13 @@ mod tests {
Allocation::for_test_ip4(Instant::now()).with_binding_response(PEER1, Instant::now());
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(&server_error(&allocate), Instant::now());
allocation.handle_test_input_ip4(server_error(&allocate), Instant::now());
allocation.refresh_with_same_credentials();
let binding = allocation.next_message().unwrap();
assert_eq!(binding.method(), BINDING);
allocation.handle_test_input_ip4(&binding_response(&binding, PEER1), Instant::now());
allocation.handle_test_input_ip4(binding_response(&binding, PEER1), Instant::now());
let next_msg = allocation.next_message().unwrap();
assert_eq!(next_msg.method(), ALLOCATE)
@@ -2327,17 +2312,17 @@ mod tests {
allocation.bind_channel(PEER1, Instant::now());
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(&server_error(&allocate), Instant::now()); // This should clear the buffered channel bindings.
allocation.handle_test_input_ip4(server_error(&allocate), Instant::now()); // This should clear the buffered channel bindings.
allocation.refresh_with_same_credentials();
let binding = allocation.next_message().unwrap();
assert_eq!(binding.method(), BINDING);
allocation.handle_test_input_ip4(&binding_response(&binding, PEER1), Instant::now());
allocation.handle_test_input_ip4(binding_response(&binding, PEER1), Instant::now());
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
Instant::now(),
);
@@ -2356,7 +2341,7 @@ mod tests {
allocation.bind_channel(PEER1, Instant::now());
let channel_bind = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(&allocation_mismatch(&channel_bind), Instant::now());
allocation.handle_test_input_ip4(allocation_mismatch(&channel_bind), Instant::now());
let allocate = allocation.next_message().unwrap();
assert_eq!(
@@ -2365,7 +2350,7 @@ mod tests {
"should allocate after failed channel binding"
);
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
Instant::now(),
);
@@ -2387,7 +2372,7 @@ mod tests {
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4]),
allocate_response(&allocate, &[RELAY_ADDR_IP4]),
Instant::now(),
);
@@ -2408,7 +2393,7 @@ mod tests {
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4]),
allocate_response(&allocate, &[RELAY_ADDR_IP4]),
Instant::now(),
);
@@ -2461,7 +2446,7 @@ mod tests {
let mut allocation = Allocation::for_test_ip4(Instant::now());
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(&server_error(&allocate), Instant::now()); // This should clear the buffered channel bindings.
allocation.handle_test_input_ip4(server_error(&allocate), Instant::now()); // This should clear the buffered channel bindings.
assert!(allocation.is_suspended())
}
@@ -2477,7 +2462,7 @@ mod tests {
{
let allocate = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(
&allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
allocate_response(&allocate, &[RELAY_ADDR_IP4, RELAY_ADDR_IP6]),
now,
);
let _drained_events = iter::from_fn(|| allocation.poll_event()).collect::<Vec<_>>();
@@ -2542,7 +2527,7 @@ mod tests {
// If the relay is restarted, our current credentials will be invalid. Simulate with an "unauthorized" response".
let now = now + Duration::from_secs(1);
let refresh = allocation.next_message().unwrap();
allocation.handle_test_input_ip4(&unauthorized_response(&refresh, "nonce2"), now);
allocation.handle_test_input_ip4(unauthorized_response(&refresh, "nonce2"), now);
assert!(
allocation.next_message().is_none(),
@@ -2607,7 +2592,7 @@ mod tests {
allocation.handle_input(
RELAY_V6.into(),
PEER2_IP6,
&binding_response(&binding, PEER2_IP6),
binding_response(&binding, PEER2_IP6),
now,
);
@@ -2623,7 +2608,7 @@ mod tests {
let handled = allocation.handle_input(
RELAY_V4.into(),
PEER2_IP4,
&binding_response(&binding, PEER2_IP4),
binding_response(&binding, PEER2_IP4),
now,
);
assert!(handled);
@@ -2632,7 +2617,7 @@ mod tests {
let handled = allocation.handle_input(
RELAY_V6.into(),
PEER2_IP6,
&binding_response(&binding, PEER2_IP6),
binding_response(&binding, PEER2_IP6),
now,
);
assert!(handled);
@@ -2663,7 +2648,7 @@ mod tests {
allocation.handle_input(
RELAY_V4.into(),
PEER2_IP4,
&binding_response(&binding, PEER2_IP4),
binding_response(&binding, PEER2_IP4),
start,
);
@@ -2733,7 +2718,10 @@ mod tests {
}
}
fn allocate_response(request: &Message<Attribute>, relay_addrs: &[SocketAddr]) -> Vec<u8> {
fn allocate_response(
request: &Message<Attribute>,
relay_addrs: &[SocketAddr],
) -> Message<Attribute> {
let mut message = Message::new(
MessageClass::SuccessResponse,
ALLOCATE,
@@ -2748,10 +2736,13 @@ mod tests {
message.add_attribute(Lifetime::new(ALLOCATION_LIFETIME).unwrap());
encode(message)
message
}
fn binding_response(request: &Message<Attribute>, srflx_addr: SocketAddr) -> Vec<u8> {
fn binding_response(
request: &Message<Attribute>,
srflx_addr: SocketAddr,
) -> Message<Attribute> {
let mut message = Message::new(
MessageClass::SuccessResponse,
BINDING,
@@ -2759,10 +2750,10 @@ mod tests {
);
message.add_attribute(XorMappedAddress::new(srflx_addr));
encode(message)
message
}
fn unauthorized_response(request: &Message<Attribute>, nonce: &str) -> Vec<u8> {
fn unauthorized_response(request: &Message<Attribute>, nonce: &str) -> Message<Attribute> {
let mut message = Message::new(
MessageClass::ErrorResponse,
request.method(),
@@ -2772,10 +2763,10 @@ mod tests {
message.add_attribute(Realm::new("firezone".to_owned()).unwrap());
message.add_attribute(Nonce::new(nonce.to_owned()).unwrap());
encode(message)
message
}
fn server_error(request: &Message<Attribute>) -> Vec<u8> {
fn server_error(request: &Message<Attribute>) -> Message<Attribute> {
let mut message = Message::new(
MessageClass::ErrorResponse,
request.method(),
@@ -2783,10 +2774,10 @@ mod tests {
);
message.add_attribute(ErrorCode::from(ServerError));
encode(message)
message
}
fn stale_nonce_response(request: &Message<Attribute>, nonce: Nonce) -> Vec<u8> {
fn stale_nonce_response(request: &Message<Attribute>, nonce: Nonce) -> Message<Attribute> {
let mut message = Message::new(
MessageClass::ErrorResponse,
request.method(),
@@ -2796,10 +2787,10 @@ mod tests {
message.add_attribute(Realm::new("firezone".to_owned()).unwrap());
message.add_attribute(nonce);
encode(message)
message
}
fn allocation_mismatch(request: &Message<Attribute>) -> Vec<u8> {
fn allocation_mismatch(request: &Message<Attribute>) -> Message<Attribute> {
let mut message = Message::new(
MessageClass::ErrorResponse,
request.method(),
@@ -2807,7 +2798,7 @@ mod tests {
);
message.add_attribute(ErrorCode::from(AllocationMismatch));
encode(message)
message
}
fn channel_bind_bad_request(request: &Message<Attribute>) -> Message<Attribute> {
@@ -2863,14 +2854,14 @@ mod tests {
fn with_binding_response(mut self, srflx_addr: SocketAddr, now: Instant) -> Self {
let binding = self.next_message().unwrap();
self.handle_test_input_ip4(&binding_response(&binding, srflx_addr), now);
self.handle_test_input_ip4(binding_response(&binding, srflx_addr), now);
self
}
fn with_allocate_response(mut self, relay_addrs: &[SocketAddr], now: Instant) -> Self {
let allocate = self.next_message().unwrap();
self.handle_test_input_ip4(&allocate_response(&allocate, relay_addrs), now);
self.handle_test_input_ip4(allocate_response(&allocate, relay_addrs), now);
self
}
@@ -2882,7 +2873,7 @@ mod tests {
}
/// Wrapper around `handle_input` that always sets `RELAY` and `PEER1`.
fn handle_test_input_ip4(&mut self, packet: &[u8], now: Instant) -> bool {
fn handle_test_input_ip4(&mut self, packet: Message<Attribute>, now: Instant) -> bool {
self.handle_input(RELAY_V4.into(), PEER1, packet, now)
}

View File

@@ -805,19 +805,43 @@ where
packet: &'p [u8],
now: Instant,
) -> ControlFlow<(), (SocketAddr, &'p [u8], Option<Socket>)> {
if from.port() != 3478 {
// Relays always send & receive from port 3478.
//
// Some NATs may remap our p2p listening port (i.e. 52625 or another ephemeral one) to a port
// in the non-ephemeral range. If this happens, there is a chance that a peer is sending
// traffic originating from port 3478.
//
// The above check would wrongly classify a STUN request from such a peer as relay traffic and
// fail to process it because we don't have an `Allocation` for the peer's IP.
//
// Effectively this means that the connection will have to fallback to a relay-relay candidate pair.
return ControlFlow::Continue((from, packet, None));
}
match packet.first().copied() {
// STUN method range
Some(0..=3) => {
let Ok(Ok(message)) = allocation::decode(packet) else {
// False-positive, continue processing packet elsewhere
return ControlFlow::Continue((from, packet, None));
};
let Some(allocation) = self
.allocations
.values_mut()
.find(|a| a.server().matches(from))
else {
// False-positive, continue processing packet elsewhere
return ControlFlow::Continue((from, packet, None));
tracing::debug!(
%from,
packet = %hex::encode(packet),
"Packet was a STUN message but we are not connected to this relay"
);
return ControlFlow::Break(()); // Stop processing the packet.
};
if allocation.handle_input(from, local, packet, now) {
if allocation.handle_input(from, local, message, now) {
// Successfully handled the packet
return ControlFlow::Break(());
}

View File

@@ -189,3 +189,7 @@ cc a18f8702ecec7ce39aea445251697b7884cd2d38734a07d1e5400bb45f90f03b
cc 4a61f62e36ecaa6fdb937a7ba599c6f6a83c364db009b4ba5912bbab1f75f8ff
cc 1e1bbfd953f0662e602c94082c63ba50bec19aab95711a9de262ab3e4b146ef1
cc 000ae850850a75c30a36de2216c8f7ddec2c281f8beaf4f2797822b59895ecd1
cc 122b860cc7c9ab8e500c057d8b3183b1b0586d0a38d5e8e7c61a77458d06a716
cc 613cba6cf1b34d88c6733b1272aae83d4f71abfdbe66d2109f890ff7d3bf6692
cc d88ebc9e70446d9f8b602032d1d68e72175630d1073ec9410810bccac31d49cf
cc 0f7aea8d78b70ac939b68b97d93206c077744f677a473336d05b1217b6e042c4

View File

@@ -516,7 +516,7 @@ impl ReferenceState {
}
Transition::Idle => {}
Transition::PartitionRelaysFromPortal => {
if state.drop_direct_client_traffic {
if state.drop_direct_client_traffic || state.client.port == 3478 {
state.client.exec_mut(|client| client.reset_connections());
}
}

View File

@@ -25,6 +25,7 @@ use std::{
collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque},
mem,
net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr},
num::NonZeroU16,
time::Instant,
};
@@ -1141,7 +1142,7 @@ pub(crate) fn ref_client_host(
) -> impl Strategy<Value = Host<RefClient>> {
host(
any_ip_stack(),
Just(52625),
listening_port(),
ref_client(
tunnel_ip4s,
tunnel_ip6s,
@@ -1149,7 +1150,7 @@ pub(crate) fn ref_client_host(
upstream_dns,
search_domain,
),
latency(300), // TODO: Increase with #6062.
latency(250), // TODO: Increase with #6062.
)
}
@@ -1207,6 +1208,14 @@ fn ref_client(
)
}
fn listening_port() -> impl Strategy<Value = u16> {
prop_oneof![
Just(52625),
Just(3478), // Make sure connlib works even if a NAT is re-mapping our public port to a relay port.
any::<NonZeroU16>().prop_map(|p| p.get()),
]
}
fn default_routes_v4() -> Vec<Ipv4Network> {
vec![
Ipv4Network::new(Ipv4Addr::new(100, 64, 0, 0), 11).unwrap(),

View File

@@ -24,7 +24,7 @@ pub(crate) struct Host<T> {
pub(crate) ip4: Option<Ipv4Addr>,
pub(crate) ip6: Option<Ipv6Addr>,
port: u16,
pub(crate) port: u16,
#[debug(skip)]
allocated_ports: HashSet<(u16, AddressFamily)>,