feat(snownet,relay): include sticky session ID in STUN requests (#6278)

For most cases, TURN identifies clients by their 3-tuple. This can make
it hard to correlate logs in case the client roams or its NAT session
gets reset, both of which cause the port to change.

To make problem analysis easier, we include the RFC-recommended
`SOFTWARE` attribute in all STUN requests created by `snownet`.
Typically, this includes a textual description of who sent the request
and a version number. See [0] for details. We don't track the version of
`snownet` individually and passing the actual client-version across this
many layers is deemed too complicated for now.

What we can add though is a parameter that includes a sticky session ID.
This session ID is computed based on the `Node`'s public key, meaning it
doesn't change until the user logs-out and in again.

On the relay, we now look for a `SOFTWARE` attribute in all STUN
requests and optionally include it in all spans if it is present.

[0]: https://datatracker.ietf.org/doc/html/rfc5389#section-15.10
This commit is contained in:
Thomas Eizinger
2024-08-15 04:10:56 +01:00
committed by GitHub
parent 7c70850217
commit 272e4b2bcd
6 changed files with 152 additions and 32 deletions

1
rust/Cargo.lock generated
View File

@@ -5552,6 +5552,7 @@ dependencies = [
"once_cell",
"rand 0.8.5",
"secrecy",
"sha2",
"str0m",
"stun_codec",
"thiserror",

View File

@@ -14,6 +14,7 @@ ip-packet = { workspace = true }
once_cell = "1.17.1"
rand = "0.8"
secrecy = { workspace = true }
sha2 = "0.10.8"
str0m = { workspace = true }
stun_codec = "0.3.4"
thiserror = "1"

View File

@@ -1,6 +1,6 @@
use crate::{
backoff::{self, ExponentialBackoff},
node::{CandidateEvent, Transmit},
node::{CandidateEvent, SessionId, Transmit},
ringbuffer::RingBuffer,
utils::earliest,
};
@@ -17,7 +17,9 @@ use std::{
use str0m::{net::Protocol, Candidate};
use stun_codec::{
rfc5389::{
attributes::{ErrorCode, MessageIntegrity, Nonce, Realm, Username, XorMappedAddress},
attributes::{
ErrorCode, MessageIntegrity, Nonce, Realm, Software, Username, XorMappedAddress,
},
errors::{StaleNonce, Unauthorized},
methods::BINDING,
},
@@ -51,6 +53,8 @@ pub struct Allocation {
/// Whatever comes back first, wins.
active_socket: Option<SocketAddr>,
software: Software,
/// If present, the IPv4 address the relay observed for us.
ip4_srflx_candidate: Option<Candidate>,
/// If present, the IPv6 address the relay observed for us.
@@ -191,6 +195,7 @@ impl Allocation {
password: String,
realm: Realm,
now: Instant,
session_id: SessionId,
) -> Self {
let mut allocation = Self {
server,
@@ -212,6 +217,7 @@ impl Allocation {
channel_bindings: Default::default(),
last_now: now,
buffered_channel_bindings: RingBuffer::new(100),
software: Software::new(format!("snownet; session={session_id}")).unwrap(),
};
allocation.send_binding_requests();
@@ -349,21 +355,30 @@ impl Allocation {
ALLOCATE => {
// AllocationMismatch during allocate means we already have an allocation.
// Delete it.
self.authenticate_and_queue(make_delete_allocation_request(), None);
self.authenticate_and_queue(
make_delete_allocation_request(self.software.clone()),
None,
);
tracing::debug!("Deleting existing allocation to re-sync");
}
REFRESH => {
// AllocationMismatch for refresh means we don't have an allocation.
// Make one.
self.authenticate_and_queue(make_allocate_request(), None);
self.authenticate_and_queue(
make_allocate_request(self.software.clone()),
None,
);
tracing::debug!("Making new allocation to re-sync");
}
CHANNEL_BIND => {
// AllocationMismatch for channel-bind means we don't have an allocation.
// Make one.
self.authenticate_and_queue(make_allocate_request(), None);
self.authenticate_and_queue(
make_allocate_request(self.software.clone()),
None,
);
tracing::debug!("Making new allocation to re-sync");
@@ -454,9 +469,9 @@ impl Allocation {
tracing::debug!(active_socket = %original_dst, "Updating active socket");
if self.has_allocation() {
self.authenticate_and_queue(make_refresh_request(), None);
self.authenticate_and_queue(make_refresh_request(self.software.clone()), None);
} else {
self.authenticate_and_queue(make_allocate_request(), None);
self.authenticate_and_queue(make_allocate_request(self.software.clone()), None);
}
}
ALLOCATE => {
@@ -509,7 +524,7 @@ impl Allocation {
// If we refreshed with a lifetime of 0, we deleted our previous allocation.
// Make a new one.
if lifetime.lifetime().is_zero() {
self.authenticate_and_queue(make_allocate_request(), None);
self.authenticate_and_queue(make_allocate_request(self.software.clone()), None);
return true;
}
@@ -616,7 +631,7 @@ impl Allocation {
if let Some(refresh_at) = self.refresh_allocation_at() {
if (now >= refresh_at) && !self.refresh_in_flight() {
tracing::debug!("Allocation is due for a refresh");
self.authenticate_and_queue(make_refresh_request(), None);
self.authenticate_and_queue(make_refresh_request(self.software.clone()), None);
}
}
@@ -628,7 +643,7 @@ impl Allocation {
.inspect(|(number, peer)| {
tracing::debug!(%number, %peer, "Channel is due for a refresh");
})
.map(|(number, peer)| make_channel_bind_request(peer, number))
.map(|(number, peer)| make_channel_bind_request(peer, number, self.software.clone()))
.collect::<Vec<_>>(); // Need to allocate here to satisfy borrow-checker. Number of channel refresh messages should be small so this shouldn't be a big impact.
for message in channel_refresh_messages {
@@ -700,7 +715,10 @@ impl Allocation {
return;
};
self.authenticate_and_queue(make_channel_bind_request(peer, channel), None);
self.authenticate_and_queue(
make_channel_bind_request(peer, channel, self.software.clone()),
None,
);
}
/// Encodes the packet contained in the given buffer into a [`Transmit`].
@@ -902,10 +920,18 @@ impl Allocation {
fn send_binding_requests(&mut self) {
if let Some(v4) = self.server.as_v4() {
self.queue((*v4).into(), make_binding_request(), None);
self.queue(
(*v4).into(),
make_binding_request(self.software.clone()),
None,
);
}
if let Some(v6) = self.server.as_v6() {
self.queue((*v6).into(), make_binding_request(), None);
self.queue(
(*v6).into(),
make_binding_request(self.software.clone()),
None,
);
}
}
@@ -1030,11 +1056,14 @@ fn update_candidate(
}
}
fn make_binding_request() -> Message<Attribute> {
Message::new(MessageClass::Request, BINDING, TransactionId::new(random()))
fn make_binding_request(software: Software) -> Message<Attribute> {
let mut message = Message::new(MessageClass::Request, BINDING, TransactionId::new(random()));
message.add_attribute(software);
message
}
fn make_allocate_request() -> Message<Attribute> {
fn make_allocate_request(software: Software) -> Message<Attribute> {
let mut message = Message::new(
MessageClass::Request,
ALLOCATE,
@@ -1045,30 +1074,36 @@ fn make_allocate_request() -> Message<Attribute> {
message.add_attribute(AdditionalAddressFamily::new(
stun_codec::rfc8656::attributes::AddressFamily::V6,
));
message.add_attribute(software);
message
}
/// To delete an allocation, we need to refresh it with a lifetime of 0.
fn make_delete_allocation_request() -> Message<Attribute> {
let mut refresh = make_refresh_request();
fn make_delete_allocation_request(software: Software) -> Message<Attribute> {
let mut refresh = make_refresh_request(software);
refresh.add_attribute(Lifetime::from_u32(0));
refresh
}
fn make_refresh_request() -> Message<Attribute> {
fn make_refresh_request(software: Software) -> Message<Attribute> {
let mut message = Message::new(MessageClass::Request, REFRESH, TransactionId::new(random()));
message.add_attribute(RequestedTransport::new(17));
message.add_attribute(AdditionalAddressFamily::new(
stun_codec::rfc8656::attributes::AddressFamily::V6,
));
message.add_attribute(software);
message
}
fn make_channel_bind_request(target: SocketAddr, channel: u16) -> Message<Attribute> {
fn make_channel_bind_request(
target: SocketAddr,
channel: u16,
software: Software,
) -> Message<Attribute> {
let mut message = Message::new(
MessageClass::Request,
CHANNEL_BIND,
@@ -1077,6 +1112,7 @@ fn make_channel_bind_request(target: SocketAddr, channel: u16) -> Message<Attrib
message.add_attribute(XorPeerAddress::new(target));
message.add_attribute(ChannelNumber::new(channel).unwrap());
message.add_attribute(software);
message
}
@@ -1148,7 +1184,8 @@ stun_codec::define_attribute_enums!(
XorRelayAddress,
XorPeerAddress,
ChannelNumber,
Lifetime
Lifetime,
Software
]
);
@@ -2593,6 +2630,7 @@ mod tests {
"baz".to_owned(),
Realm::new("firezone".to_owned()).unwrap(),
start,
SessionId::default(),
)
}
@@ -2606,6 +2644,7 @@ mod tests {
"baz".to_owned(),
Realm::new("firezone".to_owned()).unwrap(),
start,
SessionId::default(),
)
}

View File

@@ -8,6 +8,7 @@ use boringtun::noise::{Tunn, TunnResult};
use boringtun::x25519::PublicKey;
use boringtun::{noise::rate_limiter::RateLimiter, x25519::StaticSecret};
use core::fmt;
use hex_display::HexDisplayExt;
use ip_packet::{
ConvertibleIpv4Packet, ConvertibleIpv6Packet, IpPacket, MutableIpPacket, Packet as _,
};
@@ -15,6 +16,7 @@ use rand::rngs::StdRng;
use rand::seq::IteratorRandom;
use rand::{random, SeedableRng};
use secrecy::{ExposeSecret, Secret};
use sha2::Digest;
use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::hash::Hash;
@@ -80,6 +82,7 @@ pub enum Client {}
pub struct Node<T, TId, RId> {
private_key: StaticSecret,
public_key: PublicKey,
session_id: SessionId,
index: IndexLfsr,
rate_limiter: Arc<RateLimiter>,
@@ -128,6 +131,7 @@ where
let public_key = &(&private_key).into();
Self {
rng: StdRng::from_seed(seed), // TODO: Use this seed for private key too. Requires refactoring of how we generate the login-url because that one needs to know the public key.
session_id: SessionId::new(*public_key),
private_key,
public_key: *public_key,
marker: Default::default(),
@@ -526,7 +530,14 @@ where
self.allocations.insert(
*rid,
Allocation::new(*server, username, password.clone(), realm, now),
Allocation::new(
*server,
username,
password.clone(),
realm,
now,
self.session_id.clone(),
),
);
tracing::info!(%rid, address = ?server, "Added new TURN server");
@@ -1822,3 +1833,25 @@ fn new_agent() -> IceAgent {
agent
}
/// A session ID is constant for as long as a [`Node`] is operational.
#[derive(Debug, Default, Clone)]
pub(crate) struct SessionId([u8; 32]);
impl SessionId {
/// Construct a new session ID by hashing the node's public key with a domain-separator.
fn new(key: PublicKey) -> Self {
Self(
sha2::Sha256::new_with_prefix(b"SESSION-ID")
.chain_update(key.as_bytes())
.finalize()
.into(),
)
}
}
impl fmt::Display for SessionId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:X}", &self.0.hex())
}
}

View File

@@ -24,7 +24,7 @@ use std::net::{IpAddr, SocketAddr};
use std::ops::RangeInclusive;
use std::time::{Duration, Instant, SystemTime};
use stun_codec::rfc5389::attributes::{
ErrorCode, MessageIntegrity, Nonce, Realm, Username, XorMappedAddress,
ErrorCode, MessageIntegrity, Nonce, Realm, Software, Username, XorMappedAddress,
};
use stun_codec::rfc5389::errors::{BadRequest, StaleNonce, Unauthorized};
use stun_codec::rfc5389::methods::BINDING;
@@ -416,7 +416,7 @@ where
}
}
#[tracing::instrument(level = "info", skip_all, fields(tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))]
#[tracing::instrument(level = "info", skip_all, fields(software = request.software().map(|s| field::display(s.description())), tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))]
fn handle_binding_request(&mut self, request: Binding, sender: ClientSocket) {
let mut message = Message::new(
MessageClass::SuccessResponse,
@@ -433,7 +433,7 @@ where
/// Handle a TURN allocate request.
///
/// See <https://www.rfc-editor.org/rfc/rfc8656#name-receiving-an-allocate-reque> for details.
#[tracing::instrument(level = "info", skip_all, fields(allocation, tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))]
#[tracing::instrument(level = "info", skip_all, fields(allocation, software = request.software().map(|s| field::display(s.description())), tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))]
fn handle_allocate_request(
&mut self,
request: Allocate,
@@ -552,7 +552,7 @@ where
/// Handle a TURN refresh request.
///
/// See <https://www.rfc-editor.org/rfc/rfc8656#name-receiving-a-refresh-request> for details.
#[tracing::instrument(level = "info", skip_all, fields(allocation, tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))]
#[tracing::instrument(level = "info", skip_all, fields(allocation, software = request.software().map(|s| field::display(s.description())), tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))]
fn handle_refresh_request(
&mut self,
request: Refresh,
@@ -601,7 +601,7 @@ where
/// Handle a TURN channel bind request.
///
/// See <https://www.rfc-editor.org/rfc/rfc8656#name-receiving-a-channelbind-req> for details.
#[tracing::instrument(level = "info", skip_all, fields(allocation, peer, channel, tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))]
#[tracing::instrument(level = "info", skip_all, fields(allocation, peer, channel, software = request.software().map(|s| field::display(s.description())), tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))]
fn handle_channel_bind_request(
&mut self,
request: ChannelBind,
@@ -705,7 +705,7 @@ where
///
/// This TURN server implementation does not support relaying data other than through channels.
/// Thus, creating a permission is a no-op that always succeeds.
#[tracing::instrument(level = "info", skip_all, fields(tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))]
#[tracing::instrument(level = "info", skip_all, fields(software = request.software().map(|s| field::display(s.description())), tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))]
fn handle_create_permission_request(
&mut self,
request: CreatePermission,
@@ -1270,7 +1270,8 @@ stun_codec::define_attribute_enums!(
Realm,
Username,
RequestedAddressFamily,
AdditionalAddressFamily
AdditionalAddressFamily,
Software
]
);

View File

@@ -6,7 +6,7 @@ use bytecodec::DecodeExt;
use secrecy::SecretString;
use std::io;
use std::time::Duration;
use stun_codec::rfc5389::attributes::{ErrorCode, MessageIntegrity, Nonce, Username};
use stun_codec::rfc5389::attributes::{ErrorCode, MessageIntegrity, Nonce, Software, Username};
use stun_codec::rfc5389::errors::BadRequest;
use stun_codec::rfc5389::methods::BINDING;
use stun_codec::rfc5766::attributes::{
@@ -113,22 +113,34 @@ impl ClientMessage<'_> {
#[derive(Debug)]
pub struct Binding {
transaction_id: TransactionId,
software: Option<Software>,
}
impl Binding {
pub fn new(transaction_id: TransactionId) -> Self {
Self { transaction_id }
Self {
transaction_id,
software: None,
}
}
pub fn parse(message: &Message<Attribute>) -> Self {
let transaction_id = message.transaction_id();
let software = message.get_attribute::<Software>().cloned();
Binding { transaction_id }
Binding {
transaction_id,
software,
}
}
pub fn transaction_id(&self) -> TransactionId {
self.transaction_id
}
pub fn software(&self) -> Option<&Software> {
self.software.as_ref()
}
}
pub struct Allocate {
@@ -140,6 +152,7 @@ pub struct Allocate {
nonce: Option<Nonce>,
requested_address_family: Option<RequestedAddressFamily>,
additional_address_family: Option<AdditionalAddressFamily>,
software: Option<Software>,
}
impl Allocate {
@@ -168,6 +181,7 @@ impl Allocate {
nonce: Some(nonce),
requested_address_family: None, // IPv4 is the default.
additional_address_family: None,
software: None,
}
}
@@ -198,6 +212,7 @@ impl Allocate {
nonce: Some(nonce),
requested_address_family: Some(requested_address_family),
additional_address_family: None,
software: None,
}
}
@@ -224,6 +239,7 @@ impl Allocate {
nonce: None,
requested_address_family: None,
additional_address_family: None,
software: None,
}
}
@@ -275,6 +291,7 @@ impl Allocate {
let username = message.get_attribute::<Username>().cloned();
let requested_address_family = message.get_attribute::<RequestedAddressFamily>().cloned();
let additional_address_family = message.get_attribute::<AdditionalAddressFamily>().cloned();
let software = message.get_attribute::<Software>().cloned();
Ok(Allocate {
transaction_id,
@@ -285,6 +302,7 @@ impl Allocate {
nonce,
requested_address_family,
additional_address_family,
software,
})
}
@@ -319,6 +337,10 @@ impl Allocate {
pub fn additional_address_family(&self) -> Option<&AdditionalAddressFamily> {
self.additional_address_family.as_ref()
}
pub fn software(&self) -> Option<&Software> {
self.software.as_ref()
}
}
pub struct Refresh {
@@ -327,6 +349,7 @@ pub struct Refresh {
lifetime: Option<Lifetime>,
username: Option<Username>,
nonce: Option<Nonce>,
software: Option<Software>,
}
impl Refresh {
@@ -362,6 +385,7 @@ impl Refresh {
lifetime,
username: Some(username),
nonce: Some(nonce),
software: None,
}
}
@@ -371,6 +395,7 @@ impl Refresh {
let nonce = message.get_attribute::<Nonce>().cloned();
let lifetime = message.get_attribute::<Lifetime>().cloned();
let username = message.get_attribute::<Username>().cloned();
let software = message.get_attribute::<Software>().cloned();
Refresh {
transaction_id,
@@ -378,6 +403,7 @@ impl Refresh {
lifetime,
username,
nonce,
software,
}
}
@@ -400,6 +426,10 @@ impl Refresh {
pub fn nonce(&self) -> Option<&Nonce> {
self.nonce.as_ref()
}
pub fn software(&self) -> Option<&Software> {
self.software.as_ref()
}
}
pub struct ChannelBind {
@@ -409,6 +439,7 @@ pub struct ChannelBind {
nonce: Option<Nonce>,
xor_peer_address: XorPeerAddress,
username: Option<Username>,
software: Option<Software>,
}
impl ChannelBind {
@@ -445,6 +476,7 @@ impl ChannelBind {
xor_peer_address,
username: Some(username),
nonce: Some(nonce),
software: None,
}
}
@@ -461,6 +493,7 @@ impl ChannelBind {
.get_attribute::<XorPeerAddress>()
.ok_or(bad_request(message))?
.clone();
let software = message.get_attribute::<Software>().cloned();
Ok(ChannelBind {
transaction_id,
@@ -469,6 +502,7 @@ impl ChannelBind {
nonce,
xor_peer_address,
username,
software,
})
}
@@ -495,6 +529,10 @@ impl ChannelBind {
pub fn nonce(&self) -> Option<&Nonce> {
self.nonce.as_ref()
}
pub fn software(&self) -> Option<&Software> {
self.software.as_ref()
}
}
pub struct CreatePermission {
@@ -502,6 +540,7 @@ pub struct CreatePermission {
message_integrity: Option<MessageIntegrity>,
username: Option<Username>,
nonce: Option<Nonce>,
software: Option<Software>,
}
impl CreatePermission {
@@ -510,12 +549,14 @@ impl CreatePermission {
let message_integrity = message.get_attribute::<MessageIntegrity>().cloned();
let username = message.get_attribute::<Username>().cloned();
let nonce = message.get_attribute::<Nonce>().cloned();
let software = message.get_attribute::<Software>().cloned();
CreatePermission {
transaction_id,
message_integrity,
username,
nonce,
software,
}
}
@@ -534,6 +575,10 @@ impl CreatePermission {
pub fn nonce(&self) -> Option<&Nonce> {
self.nonce.as_ref()
}
pub fn software(&self) -> Option<&Software> {
self.software.as_ref()
}
}
/// Computes the effective lifetime of an allocation.