From 76e55e61386d812d4b4f2d51137abbb107c8694b Mon Sep 17 00:00:00 2001 From: Reactor Scram Date: Thu, 27 Jun 2024 17:59:22 +0000 Subject: [PATCH] fix(client/windows): fix upload speed by letting Wintun queue packets again (#5598) Closes #5589. Refs #5571 Improves upload speeds on my Windows 11 VM from 2 Mbps to 10.5 Mbps. On the resource-constrained VM it improved from 3 to 7 Mbps. ```[tasklist] ### Tasks - [x] Open for review - [x] Manual test on resource-constrained VM - [x] Run 5x replication steps from #5571 and make sure it doesn't deadlock again - [x] Merge - [ ] https://github.com/firezone/firezone/issues/5601 ``` Sorted by decreasing speed, M = macOS host, W = Windows guest in Parallels, RC = Resource-constrained Windows guest in VirtualBox: - M, Internet - 16 Mbps - W, Internet - 13 Mbps - M, Firezone - 12 Mbps - RC, Internet - 12 Mbps - W, Firezone, after this PR - 10.5 Mbps - RC, Firezone, after this PR - 8.5 Mbps - RC, Firezone, before this PR - 4 Mbps - W, Firezone, before this PR - 2 Mbps So it's not perfect but the worst part is fixed. The slow upload speeds were probably a regression from #5571. The MPSC channel only has a few spots in it, so if connlib doesn't pick up every packet immediately (which would be impossible under load), we drop packets. I measured 25% packet drops in an earlier commit. I first tried increasing the channel size from 5 to 64, and that worked. But this solution is simpler. I switch back to `blocking_send` so if connlib isn't clearing the MPSC channel, Wintun will just queue up packets in its internal ring buffers, and we aren't responsible for buffering. Getting rid of `blocking_send` was a defense-in-depth thing to fix the deadlock yesterday, but we still close the MPSC channel inside `Tun::drop`, and I confirmed in a manual test that this will kick the worker thread out of `blocking_send`, so the deadlock won't come back. --- .../tunnel/src/device_channel/tun_windows.rs | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/rust/connlib/tunnel/src/device_channel/tun_windows.rs b/rust/connlib/tunnel/src/device_channel/tun_windows.rs index 8270aaf6e..56094544c 100644 --- a/rust/connlib/tunnel/src/device_channel/tun_windows.rs +++ b/rust/connlib/tunnel/src/device_channel/tun_windows.rs @@ -14,7 +14,7 @@ use std::{ sync::Arc, task::{ready, Context, Poll}, }; -use tokio::sync::mpsc::{self, error::TrySendError}; +use tokio::sync::mpsc; use windows::Win32::{ NetworkManagement::{ IpHelper::{ @@ -92,7 +92,9 @@ impl Tun { set_iface_config(adapter.get_luid(), MTU as u32)?; let session = Arc::new(adapter.start_session(wintun::MAX_RING_CAPACITY)?); - let (packet_tx, packet_rx) = mpsc::channel(5); + // 4 is a nice power of two. Wintun already queues packets for us, so we don't + // need much capacity here. + let (packet_tx, packet_rx) = mpsc::channel(4); let recv_thread = start_recv_thread(packet_tx, Arc::clone(&session))?; Ok(Self { @@ -128,6 +130,7 @@ impl Tun { Ok(()) } + // Moves packets from the user towards the Internet pub fn poll_read(&mut self, buf: &mut [u8], cx: &mut Context<'_>) -> Poll> { let pkt = ready!(self.packet_rx.poll_recv(cx)); @@ -163,6 +166,7 @@ impl Tun { self.write(bytes) } + // Moves packets from the Internet towards the user #[allow(clippy::unnecessary_wraps)] // Fn signature must align with other platform implementations. fn write(&self, bytes: &[u8]) -> io::Result { let len = bytes @@ -241,6 +245,7 @@ pub(crate) fn flush_dns() -> Result<()> { Ok(()) } +// Moves packets from the user towards the Internet fn start_recv_thread( packet_tx: mpsc::Sender, session: Arc, @@ -251,27 +256,29 @@ fn start_recv_thread( loop { match session.receive_blocking() { Ok(pkt) => { - match packet_tx.try_send(pkt) { + // Use `blocking_send` so that if connlib is behind by a few packets, + // Wintun will queue up new packets in its ring buffer while we + // wait for our MPSC channel to clear. + match packet_tx.blocking_send(pkt) { Ok(()) => {} - Err(TrySendError::Closed(_)) => { - // This is redundant since we aren't using - // `blocking_send` anymore but it's defense in depth. + Err(_) => { tracing::info!( - "Closing worker thread because packet channel closed" + "Stopping outbound worker thread because the packet channel closed" ); break; } - Err(TrySendError::Full(_)) => {} // Just drop the packet, it's IP } } - Err(wintun::Error::ShuttingDown) => break, + Err(wintun::Error::ShuttingDown) => { + tracing::info!("Stopping outbound worker thread because Wintun is shutting down"); + break; + } Err(e) => { tracing::error!("wintun::Session::receive_blocking: {e:#?}"); break; } } } - tracing::info!("recv_task exiting gracefully"); }) }