From 272e4b2bcdf5a7721cb5458a553924a862b50a2e Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Thu, 15 Aug 2024 04:10:56 +0100 Subject: [PATCH] feat(snownet,relay): include sticky session ID in STUN requests (#6278) For most cases, TURN identifies clients by their 3-tuple. This can make it hard to correlate logs in case the client roams or its NAT session gets reset, both of which cause the port to change. To make problem analysis easier, we include the RFC-recommended `SOFTWARE` attribute in all STUN requests created by `snownet`. Typically, this includes a textual description of who sent the request and a version number. See [0] for details. We don't track the version of `snownet` individually and passing the actual client-version across this many layers is deemed too complicated for now. What we can add though is a parameter that includes a sticky session ID. This session ID is computed based on the `Node`'s public key, meaning it doesn't change until the user logs-out and in again. On the relay, we now look for a `SOFTWARE` attribute in all STUN requests and optionally include it in all spans if it is present. [0]: https://datatracker.ietf.org/doc/html/rfc5389#section-15.10 --- rust/Cargo.lock | 1 + rust/connlib/snownet/Cargo.toml | 1 + rust/connlib/snownet/src/allocation.rs | 81 ++++++++++++++++++------- rust/connlib/snownet/src/node.rs | 35 ++++++++++- rust/relay/src/server.rs | 15 ++--- rust/relay/src/server/client_message.rs | 51 +++++++++++++++- 6 files changed, 152 insertions(+), 32 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 7f545bdae..3fc4770a0 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -5552,6 +5552,7 @@ dependencies = [ "once_cell", "rand 0.8.5", "secrecy", + "sha2", "str0m", "stun_codec", "thiserror", diff --git a/rust/connlib/snownet/Cargo.toml b/rust/connlib/snownet/Cargo.toml index 01db821a7..c8181d5c0 100644 --- a/rust/connlib/snownet/Cargo.toml +++ b/rust/connlib/snownet/Cargo.toml @@ -14,6 +14,7 @@ ip-packet = { workspace = true } once_cell = "1.17.1" rand = "0.8" secrecy = { workspace = true } +sha2 = "0.10.8" str0m = { workspace = true } stun_codec = "0.3.4" thiserror = "1" diff --git a/rust/connlib/snownet/src/allocation.rs b/rust/connlib/snownet/src/allocation.rs index a26730a6f..bb02895f1 100644 --- a/rust/connlib/snownet/src/allocation.rs +++ b/rust/connlib/snownet/src/allocation.rs @@ -1,6 +1,6 @@ use crate::{ backoff::{self, ExponentialBackoff}, - node::{CandidateEvent, Transmit}, + node::{CandidateEvent, SessionId, Transmit}, ringbuffer::RingBuffer, utils::earliest, }; @@ -17,7 +17,9 @@ use std::{ use str0m::{net::Protocol, Candidate}; use stun_codec::{ rfc5389::{ - attributes::{ErrorCode, MessageIntegrity, Nonce, Realm, Username, XorMappedAddress}, + attributes::{ + ErrorCode, MessageIntegrity, Nonce, Realm, Software, Username, XorMappedAddress, + }, errors::{StaleNonce, Unauthorized}, methods::BINDING, }, @@ -51,6 +53,8 @@ pub struct Allocation { /// Whatever comes back first, wins. active_socket: Option, + software: Software, + /// If present, the IPv4 address the relay observed for us. ip4_srflx_candidate: Option, /// If present, the IPv6 address the relay observed for us. @@ -191,6 +195,7 @@ impl Allocation { password: String, realm: Realm, now: Instant, + session_id: SessionId, ) -> Self { let mut allocation = Self { server, @@ -212,6 +217,7 @@ impl Allocation { channel_bindings: Default::default(), last_now: now, buffered_channel_bindings: RingBuffer::new(100), + software: Software::new(format!("snownet; session={session_id}")).unwrap(), }; allocation.send_binding_requests(); @@ -349,21 +355,30 @@ impl Allocation { ALLOCATE => { // AllocationMismatch during allocate means we already have an allocation. // Delete it. - self.authenticate_and_queue(make_delete_allocation_request(), None); + self.authenticate_and_queue( + make_delete_allocation_request(self.software.clone()), + None, + ); tracing::debug!("Deleting existing allocation to re-sync"); } REFRESH => { // AllocationMismatch for refresh means we don't have an allocation. // Make one. - self.authenticate_and_queue(make_allocate_request(), None); + self.authenticate_and_queue( + make_allocate_request(self.software.clone()), + None, + ); tracing::debug!("Making new allocation to re-sync"); } CHANNEL_BIND => { // AllocationMismatch for channel-bind means we don't have an allocation. // Make one. - self.authenticate_and_queue(make_allocate_request(), None); + self.authenticate_and_queue( + make_allocate_request(self.software.clone()), + None, + ); tracing::debug!("Making new allocation to re-sync"); @@ -454,9 +469,9 @@ impl Allocation { tracing::debug!(active_socket = %original_dst, "Updating active socket"); if self.has_allocation() { - self.authenticate_and_queue(make_refresh_request(), None); + self.authenticate_and_queue(make_refresh_request(self.software.clone()), None); } else { - self.authenticate_and_queue(make_allocate_request(), None); + self.authenticate_and_queue(make_allocate_request(self.software.clone()), None); } } ALLOCATE => { @@ -509,7 +524,7 @@ impl Allocation { // If we refreshed with a lifetime of 0, we deleted our previous allocation. // Make a new one. if lifetime.lifetime().is_zero() { - self.authenticate_and_queue(make_allocate_request(), None); + self.authenticate_and_queue(make_allocate_request(self.software.clone()), None); return true; } @@ -616,7 +631,7 @@ impl Allocation { if let Some(refresh_at) = self.refresh_allocation_at() { if (now >= refresh_at) && !self.refresh_in_flight() { tracing::debug!("Allocation is due for a refresh"); - self.authenticate_and_queue(make_refresh_request(), None); + self.authenticate_and_queue(make_refresh_request(self.software.clone()), None); } } @@ -628,7 +643,7 @@ impl Allocation { .inspect(|(number, peer)| { tracing::debug!(%number, %peer, "Channel is due for a refresh"); }) - .map(|(number, peer)| make_channel_bind_request(peer, number)) + .map(|(number, peer)| make_channel_bind_request(peer, number, self.software.clone())) .collect::>(); // Need to allocate here to satisfy borrow-checker. Number of channel refresh messages should be small so this shouldn't be a big impact. for message in channel_refresh_messages { @@ -700,7 +715,10 @@ impl Allocation { return; }; - self.authenticate_and_queue(make_channel_bind_request(peer, channel), None); + self.authenticate_and_queue( + make_channel_bind_request(peer, channel, self.software.clone()), + None, + ); } /// Encodes the packet contained in the given buffer into a [`Transmit`]. @@ -902,10 +920,18 @@ impl Allocation { fn send_binding_requests(&mut self) { if let Some(v4) = self.server.as_v4() { - self.queue((*v4).into(), make_binding_request(), None); + self.queue( + (*v4).into(), + make_binding_request(self.software.clone()), + None, + ); } if let Some(v6) = self.server.as_v6() { - self.queue((*v6).into(), make_binding_request(), None); + self.queue( + (*v6).into(), + make_binding_request(self.software.clone()), + None, + ); } } @@ -1030,11 +1056,14 @@ fn update_candidate( } } -fn make_binding_request() -> Message { - Message::new(MessageClass::Request, BINDING, TransactionId::new(random())) +fn make_binding_request(software: Software) -> Message { + let mut message = Message::new(MessageClass::Request, BINDING, TransactionId::new(random())); + message.add_attribute(software); + + message } -fn make_allocate_request() -> Message { +fn make_allocate_request(software: Software) -> Message { let mut message = Message::new( MessageClass::Request, ALLOCATE, @@ -1045,30 +1074,36 @@ fn make_allocate_request() -> Message { message.add_attribute(AdditionalAddressFamily::new( stun_codec::rfc8656::attributes::AddressFamily::V6, )); + message.add_attribute(software); message } /// To delete an allocation, we need to refresh it with a lifetime of 0. -fn make_delete_allocation_request() -> Message { - let mut refresh = make_refresh_request(); +fn make_delete_allocation_request(software: Software) -> Message { + let mut refresh = make_refresh_request(software); refresh.add_attribute(Lifetime::from_u32(0)); refresh } -fn make_refresh_request() -> Message { +fn make_refresh_request(software: Software) -> Message { let mut message = Message::new(MessageClass::Request, REFRESH, TransactionId::new(random())); message.add_attribute(RequestedTransport::new(17)); message.add_attribute(AdditionalAddressFamily::new( stun_codec::rfc8656::attributes::AddressFamily::V6, )); + message.add_attribute(software); message } -fn make_channel_bind_request(target: SocketAddr, channel: u16) -> Message { +fn make_channel_bind_request( + target: SocketAddr, + channel: u16, + software: Software, +) -> Message { let mut message = Message::new( MessageClass::Request, CHANNEL_BIND, @@ -1077,6 +1112,7 @@ fn make_channel_bind_request(target: SocketAddr, channel: u16) -> Message { private_key: StaticSecret, public_key: PublicKey, + session_id: SessionId, index: IndexLfsr, rate_limiter: Arc, @@ -128,6 +131,7 @@ where let public_key = &(&private_key).into(); Self { rng: StdRng::from_seed(seed), // TODO: Use this seed for private key too. Requires refactoring of how we generate the login-url because that one needs to know the public key. + session_id: SessionId::new(*public_key), private_key, public_key: *public_key, marker: Default::default(), @@ -526,7 +530,14 @@ where self.allocations.insert( *rid, - Allocation::new(*server, username, password.clone(), realm, now), + Allocation::new( + *server, + username, + password.clone(), + realm, + now, + self.session_id.clone(), + ), ); tracing::info!(%rid, address = ?server, "Added new TURN server"); @@ -1822,3 +1833,25 @@ fn new_agent() -> IceAgent { agent } + +/// A session ID is constant for as long as a [`Node`] is operational. +#[derive(Debug, Default, Clone)] +pub(crate) struct SessionId([u8; 32]); + +impl SessionId { + /// Construct a new session ID by hashing the node's public key with a domain-separator. + fn new(key: PublicKey) -> Self { + Self( + sha2::Sha256::new_with_prefix(b"SESSION-ID") + .chain_update(key.as_bytes()) + .finalize() + .into(), + ) + } +} + +impl fmt::Display for SessionId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:X}", &self.0.hex()) + } +} diff --git a/rust/relay/src/server.rs b/rust/relay/src/server.rs index fba5389be..a18434ca2 100644 --- a/rust/relay/src/server.rs +++ b/rust/relay/src/server.rs @@ -24,7 +24,7 @@ use std::net::{IpAddr, SocketAddr}; use std::ops::RangeInclusive; use std::time::{Duration, Instant, SystemTime}; use stun_codec::rfc5389::attributes::{ - ErrorCode, MessageIntegrity, Nonce, Realm, Username, XorMappedAddress, + ErrorCode, MessageIntegrity, Nonce, Realm, Software, Username, XorMappedAddress, }; use stun_codec::rfc5389::errors::{BadRequest, StaleNonce, Unauthorized}; use stun_codec::rfc5389::methods::BINDING; @@ -416,7 +416,7 @@ where } } - #[tracing::instrument(level = "info", skip_all, fields(tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))] + #[tracing::instrument(level = "info", skip_all, fields(software = request.software().map(|s| field::display(s.description())), tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))] fn handle_binding_request(&mut self, request: Binding, sender: ClientSocket) { let mut message = Message::new( MessageClass::SuccessResponse, @@ -433,7 +433,7 @@ where /// Handle a TURN allocate request. /// /// See for details. - #[tracing::instrument(level = "info", skip_all, fields(allocation, tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))] + #[tracing::instrument(level = "info", skip_all, fields(allocation, software = request.software().map(|s| field::display(s.description())), tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))] fn handle_allocate_request( &mut self, request: Allocate, @@ -552,7 +552,7 @@ where /// Handle a TURN refresh request. /// /// See for details. - #[tracing::instrument(level = "info", skip_all, fields(allocation, tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))] + #[tracing::instrument(level = "info", skip_all, fields(allocation, software = request.software().map(|s| field::display(s.description())), tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))] fn handle_refresh_request( &mut self, request: Refresh, @@ -601,7 +601,7 @@ where /// Handle a TURN channel bind request. /// /// See for details. - #[tracing::instrument(level = "info", skip_all, fields(allocation, peer, channel, tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))] + #[tracing::instrument(level = "info", skip_all, fields(allocation, peer, channel, software = request.software().map(|s| field::display(s.description())), tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))] fn handle_channel_bind_request( &mut self, request: ChannelBind, @@ -705,7 +705,7 @@ where /// /// This TURN server implementation does not support relaying data other than through channels. /// Thus, creating a permission is a no-op that always succeeds. - #[tracing::instrument(level = "info", skip_all, fields(tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))] + #[tracing::instrument(level = "info", skip_all, fields(software = request.software().map(|s| field::display(s.description())), tid = %format_args!("{:X}", request.transaction_id().as_bytes().hex()), %sender))] fn handle_create_permission_request( &mut self, request: CreatePermission, @@ -1270,7 +1270,8 @@ stun_codec::define_attribute_enums!( Realm, Username, RequestedAddressFamily, - AdditionalAddressFamily + AdditionalAddressFamily, + Software ] ); diff --git a/rust/relay/src/server/client_message.rs b/rust/relay/src/server/client_message.rs index 21796f9ee..a8416591b 100644 --- a/rust/relay/src/server/client_message.rs +++ b/rust/relay/src/server/client_message.rs @@ -6,7 +6,7 @@ use bytecodec::DecodeExt; use secrecy::SecretString; use std::io; use std::time::Duration; -use stun_codec::rfc5389::attributes::{ErrorCode, MessageIntegrity, Nonce, Username}; +use stun_codec::rfc5389::attributes::{ErrorCode, MessageIntegrity, Nonce, Software, Username}; use stun_codec::rfc5389::errors::BadRequest; use stun_codec::rfc5389::methods::BINDING; use stun_codec::rfc5766::attributes::{ @@ -113,22 +113,34 @@ impl ClientMessage<'_> { #[derive(Debug)] pub struct Binding { transaction_id: TransactionId, + software: Option, } impl Binding { pub fn new(transaction_id: TransactionId) -> Self { - Self { transaction_id } + Self { + transaction_id, + software: None, + } } pub fn parse(message: &Message) -> Self { let transaction_id = message.transaction_id(); + let software = message.get_attribute::().cloned(); - Binding { transaction_id } + Binding { + transaction_id, + software, + } } pub fn transaction_id(&self) -> TransactionId { self.transaction_id } + + pub fn software(&self) -> Option<&Software> { + self.software.as_ref() + } } pub struct Allocate { @@ -140,6 +152,7 @@ pub struct Allocate { nonce: Option, requested_address_family: Option, additional_address_family: Option, + software: Option, } impl Allocate { @@ -168,6 +181,7 @@ impl Allocate { nonce: Some(nonce), requested_address_family: None, // IPv4 is the default. additional_address_family: None, + software: None, } } @@ -198,6 +212,7 @@ impl Allocate { nonce: Some(nonce), requested_address_family: Some(requested_address_family), additional_address_family: None, + software: None, } } @@ -224,6 +239,7 @@ impl Allocate { nonce: None, requested_address_family: None, additional_address_family: None, + software: None, } } @@ -275,6 +291,7 @@ impl Allocate { let username = message.get_attribute::().cloned(); let requested_address_family = message.get_attribute::().cloned(); let additional_address_family = message.get_attribute::().cloned(); + let software = message.get_attribute::().cloned(); Ok(Allocate { transaction_id, @@ -285,6 +302,7 @@ impl Allocate { nonce, requested_address_family, additional_address_family, + software, }) } @@ -319,6 +337,10 @@ impl Allocate { pub fn additional_address_family(&self) -> Option<&AdditionalAddressFamily> { self.additional_address_family.as_ref() } + + pub fn software(&self) -> Option<&Software> { + self.software.as_ref() + } } pub struct Refresh { @@ -327,6 +349,7 @@ pub struct Refresh { lifetime: Option, username: Option, nonce: Option, + software: Option, } impl Refresh { @@ -362,6 +385,7 @@ impl Refresh { lifetime, username: Some(username), nonce: Some(nonce), + software: None, } } @@ -371,6 +395,7 @@ impl Refresh { let nonce = message.get_attribute::().cloned(); let lifetime = message.get_attribute::().cloned(); let username = message.get_attribute::().cloned(); + let software = message.get_attribute::().cloned(); Refresh { transaction_id, @@ -378,6 +403,7 @@ impl Refresh { lifetime, username, nonce, + software, } } @@ -400,6 +426,10 @@ impl Refresh { pub fn nonce(&self) -> Option<&Nonce> { self.nonce.as_ref() } + + pub fn software(&self) -> Option<&Software> { + self.software.as_ref() + } } pub struct ChannelBind { @@ -409,6 +439,7 @@ pub struct ChannelBind { nonce: Option, xor_peer_address: XorPeerAddress, username: Option, + software: Option, } impl ChannelBind { @@ -445,6 +476,7 @@ impl ChannelBind { xor_peer_address, username: Some(username), nonce: Some(nonce), + software: None, } } @@ -461,6 +493,7 @@ impl ChannelBind { .get_attribute::() .ok_or(bad_request(message))? .clone(); + let software = message.get_attribute::().cloned(); Ok(ChannelBind { transaction_id, @@ -469,6 +502,7 @@ impl ChannelBind { nonce, xor_peer_address, username, + software, }) } @@ -495,6 +529,10 @@ impl ChannelBind { pub fn nonce(&self) -> Option<&Nonce> { self.nonce.as_ref() } + + pub fn software(&self) -> Option<&Software> { + self.software.as_ref() + } } pub struct CreatePermission { @@ -502,6 +540,7 @@ pub struct CreatePermission { message_integrity: Option, username: Option, nonce: Option, + software: Option, } impl CreatePermission { @@ -510,12 +549,14 @@ impl CreatePermission { let message_integrity = message.get_attribute::().cloned(); let username = message.get_attribute::().cloned(); let nonce = message.get_attribute::().cloned(); + let software = message.get_attribute::().cloned(); CreatePermission { transaction_id, message_integrity, username, nonce, + software, } } @@ -534,6 +575,10 @@ impl CreatePermission { pub fn nonce(&self) -> Option<&Nonce> { self.nonce.as_ref() } + + pub fn software(&self) -> Option<&Software> { + self.software.as_ref() + } } /// Computes the effective lifetime of an allocation.