From 81598dbaff9c43c3f7b0858a2714e6d78491e564 Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Thu, 30 Nov 2023 06:01:17 +1100 Subject: [PATCH] feat(relay): reduce packet drops (#2737) There is another channel which we didn't yet increase in size, the one between the allocation and the main task loop. Increasing to 1000 means each allocation can potentially buffer 65MB of data. With the biggest port range (16383 allocations), that would be a theoretical memory consumption of ~ 1TB. But, this would imply that we have 16383 connected clients that all send data at max speed, saturating our downlink and our uplink is somehow ridiculously small. As long as up and downlink are roughly within the same ballpark figure, it should be impossible to actually fill up these buffers. I suspect that the current packet drops of the iperf test are happening because on localhost, sending 10 UDP packets is so quick that a tokio is unable to wake up the task in time to empty the queue. In addition to the increased channel size, I've also added a check for the other channels to avoid writing to them in case they are not ready for some reason. --------- Co-authored-by: Jamil --- rust/relay/src/allocation.rs | 2 +- rust/relay/src/main.rs | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/rust/relay/src/allocation.rs b/rust/relay/src/allocation.rs index b51bf1753..cb6cf7a18 100644 --- a/rust/relay/src/allocation.rs +++ b/rust/relay/src/allocation.rs @@ -9,7 +9,7 @@ use std::net::SocketAddr; use tokio::task; /// The maximum amount of items that can be buffered in the channel to the allocation task. -const MAX_BUFFERED_ITEMS: usize = 10; +const MAX_BUFFERED_ITEMS: usize = 1000; pub struct Allocation { id: AllocationId, diff --git a/rust/relay/src/main.rs b/rust/relay/src/main.rs index 476906948..6cb22b009 100644 --- a/rust/relay/src/main.rs +++ b/rust/relay/src/main.rs @@ -17,7 +17,7 @@ use std::collections::HashMap; use std::convert::Infallible; use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr}; use std::pin::Pin; -use std::task::Poll; +use std::task::{ready, Poll}; use std::time::SystemTime; use tracing::{level_filters::LevelFilter, Instrument, Subscriber}; use tracing_core::Dispatch; @@ -367,6 +367,10 @@ where let _guard = span.enter(); loop { + // Don't fail these results. One of the senders might not be active because we might not be listening on IP4 / IP6. + let _ = ready!(self.outbound_ip4_data_sender.poll_ready_unpin(cx)); + let _ = ready!(self.outbound_ip6_data_sender.poll_ready_unpin(cx)); + let now = SystemTime::now(); // Priority 1: Execute the pending commands of the server. @@ -388,6 +392,7 @@ where ))); } + // Should never happen because we poll for readiness above. if e.is_full() { tracing::warn!(%recipient, "Dropping message because channel to primary UDP socket task is full"); }