From 3e6094af8dcfbbdf2aa55690b852d41a12e169bf Mon Sep 17 00:00:00 2001
From: Thomas Eizinger <thomas@eizinger.io>
Date: Wed, 17 Sep 2025 23:05:01 +0000
Subject: [PATCH] feat(linux): try to set `rmem_max` and `wmem_max` on startup
 (#10349)

The default send and receive buffer sizes on Linux are too small (only
~200 KB). Checking `nstat` after an iperf run revealed that the number
of dropped packets in the first interval directly correlates with the
number of receive buffer errors reported by `nstat`.

We already try to increase the send and receive buffer sizes for our UDP
socket but unfortunately, we cannot increase them beyond what the system
limits them to. To workaround this, we try to set `rmem_max` and
`wmem_max` during startup of the Linux headless client and Gateway. This
behaviour can be disabled by setting `FIREZONE_NO_INC_BUF=true`.

This doesn't work in Docker unfortunately, so we set the values manually
in the CI perf tests and verify after the test that we didn't encounter
any send and receive buffer errors.

It is yet to be determined how we should deal with this problem for all
the GUI clients. See #10350 as an issue tracking that.

Unfortunately, this doesn't fix all packet drops during the first iperf
interval. With this PR, we now see packet drops on the interface itself.
---
 .github/workflows/_integration_tests.yml      |  1 +
 .github/workflows/ci.yml                      | 36 +++++++++----------
 rust/Cargo.lock                               |  1 +
 rust/connlib/socket-factory/src/lib.rs        |  4 +--
 rust/gateway/src/main.rs                      | 22 ++++++++++++
 rust/headless-client/Cargo.toml               |  1 +
 rust/headless-client/src/main.rs              | 22 ++++++++++++
 website/src/app/kb/deploy/gateways/readme.mdx | 25 ++++++++-----
 website/src/components/Changelog/Gateway.tsx  | 11 ++++++
 9 files changed, 95 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/_integration_tests.yml b/.github/workflows/_integration_tests.yml
index c48744cc1..fe46d8aab 100644
--- a/.github/workflows/_integration_tests.yml
+++ b/.github/workflows/_integration_tests.yml
@@ -96,6 +96,7 @@ jobs:
       ELIXIR_TAG: ${{ inputs.elixir_tag }}
       HTTP_TEST_SERVER_IMAGE: ${{ inputs.http_test_server_image }}
       HTTP_TEST_SERVER_TAG: ${{ inputs.http_test_server_tag }}
+      FIREZONE_INC_BUF: true
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 481ab49a2..dbefaeace 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -295,6 +295,7 @@ jobs:
       CLIENT_TAG: ${{ github.sha }}
       RELAY_IMAGE: "ghcr.io/firezone/perf/relay"
       RELAY_TAG: ${{ github.sha }}
+      FIREZONE_INC_BUF: true
     strategy:
       fail-fast: false
       matrix:
@@ -315,6 +316,10 @@ jobs:
       - uses: docker/setup-docker-action@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0
       - name: Seed database
         run: docker compose run elixir /bin/sh -c 'cd apps/domain && mix ecto.seed --migrations-path priv/repo/migrations --migrations-path priv/repo/manual_migrations'
+      - name: Increase max UDP buffer sizes
+        run: |
+          sudo sysctl -w net.core.wmem_max=16777216 # 16 MB
+          sudo sysctl -w net.core.rmem_max=134217728 # 128 MB
       - name: Start docker compose in the background
         run: |
           # We need to increase the log level to make sure that they don't hold off storm of packets
@@ -352,9 +357,6 @@ jobs:
       - name: Show Client logs
         if: "!cancelled()"
         run: docker compose logs client
-      - name: Show Client UDP stats
-        if: "!cancelled()"
-        run: docker compose exec client cat /proc/net/udp
       - name: Show Relay-1 logs
         if: "!cancelled()"
         run: docker compose logs relay-1
@@ -364,9 +366,6 @@ jobs:
       - name: Show Gateway logs
         if: "!cancelled()"
         run: docker compose logs gateway
-      - name: Show Gateway UDP stats
-        if: "!cancelled()"
-        run: docker compose exec gateway cat /proc/net/udp
       - name: Show API logs
         if: "!cancelled()"
         run: docker compose logs api
@@ -374,30 +373,31 @@ jobs:
         if: "!cancelled()"
         run: docker compose logs iperf3
 
-      - name: Ensure Client emitted no warnings
+      - name: Ensure no warnings are logged
         if: "!cancelled()"
         run: |
           docker compose logs client |
             grep "WARN" && exit 1 || exit 0
-      - name: Ensure Relay-1 emitted no warnings
-        if: "!cancelled()"
-        run: |
+
+          docker compose logs gateway |
+            grep "WARN" && exit 1 || exit 0
+
           # BTF doesn't load for veth interfaces
           docker compose logs relay-1 | \
             grep --invert "Object BTF couldn't be loaded in the kernel: the BPF_BTF_LOAD syscall failed." | \
             grep "WARN" && exit 1 || exit 0
-      - name: Ensure Relay-2 emitted no warnings
-        if: "!cancelled()"
-        run: |
-          # BTF doesn't load for veth interfaces
           docker compose logs relay-2 | \
             grep --invert "Object BTF couldn't be loaded in the kernel: the BPF_BTF_LOAD syscall failed." | \
             grep "WARN" && exit 1 || exit 0
-      - name: Ensure Gateway emitted no warnings
-        if: "!cancelled()"
+
+      - name: Ensure no UDP socket errors
+        if: "!cancelled() && startsWith(matrix.test, 'tcp')"
         run: |
-          docker compose logs gateway |
-            grep "WARN" && exit 1 || exit 0
+          docker compose exec client /bin/sh -c 'nstat -s' |
+            grep -i "error" && exit 1 || exit 0
+
+          docker compose exec gateway /bin/sh -c 'nstat -s' |
+            grep -i "error" && exit 1 || exit 0
 
   upload-bencher:
     continue-on-error: true
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index da8fd7e01..d932bca64 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -2519,6 +2519,7 @@ dependencies = [
  "rustls",
  "sd-notify",
  "secrecy",
+ "socket-factory",
  "tokio",
  "tokio-stream",
  "tracing",
diff --git a/rust/connlib/socket-factory/src/lib.rs b/rust/connlib/socket-factory/src/lib.rs
index 74a4ba500..b558453f9 100644
--- a/rust/connlib/socket-factory/src/lib.rs
+++ b/rust/connlib/socket-factory/src/lib.rs
@@ -22,8 +22,8 @@ pub trait SocketFactory<S>: Send + Sync + 'static {
     fn reset(&self);
 }
 
-pub const SEND_BUFFER_SIZE: usize = ONE_MB;
-pub const RECV_BUFFER_SIZE: usize = 10 * ONE_MB;
+pub const SEND_BUFFER_SIZE: usize = 16 * ONE_MB;
+pub const RECV_BUFFER_SIZE: usize = 128 * ONE_MB;
 const ONE_MB: usize = 1024 * 1024;
 
 impl<F, S> SocketFactory<S> for F
diff --git a/rust/gateway/src/main.rs b/rust/gateway/src/main.rs
index ad4d249d9..d61fed4b1 100644
--- a/rust/gateway/src/main.rs
+++ b/rust/gateway/src/main.rs
@@ -101,6 +101,20 @@ async fn try_main(cli: Cli, telemetry: &mut Telemetry) -> Result<()> {
 
     tracing::debug!(?cli);
 
+    if cfg!(target_os = "linux") && cli.is_inc_buf_allowed() {
+        let recv_buf_size = socket_factory::RECV_BUFFER_SIZE;
+        let send_buf_size = socket_factory::SEND_BUFFER_SIZE;
+
+        match tokio::fs::write("/proc/sys/net/core/rmem_max", recv_buf_size.to_string()).await {
+            Ok(()) => tracing::info!("Set `core.rmem_max` to {recv_buf_size}",),
+            Err(e) => tracing::info!("Failed to increase `core.rmem_max`: {e}"),
+        };
+        match tokio::fs::write("/proc/sys/net/core/wmem_max", send_buf_size.to_string()).await {
+            Ok(()) => tracing::info!("Set `core.wmem_max` to {send_buf_size}",),
+            Err(e) => tracing::info!("Failed to increase `core.wmem_max`: {e}"),
+        };
+    }
+
     let firezone_id = get_firezone_id(cli.firezone_id.clone()).await
         .context("Couldn't read FIREZONE_ID or write it to disk: Please provide it through the env variable or provide rw access to /var/lib/firezone/")?;
 
@@ -290,6 +304,10 @@ struct Cli {
         default_value_t = false
     )]
     validate_checksums: bool,
+
+    /// Do not try to increase the `core.rmem_max` and `core.wmem_max` kernel parameters.
+    #[arg(long, env = "FIREZONE_NO_INC_BUF", default_value_t = false)]
+    no_inc_buf: bool,
 }
 
 #[derive(Debug, Clone, Copy, clap::ValueEnum)]
@@ -302,6 +320,10 @@ impl Cli {
     fn is_telemetry_allowed(&self) -> bool {
         !self.no_telemetry
     }
+
+    fn is_inc_buf_allowed(&self) -> bool {
+        !self.no_inc_buf
+    }
 }
 
 /// An adapter struct around [`Tun`] that validates IPv4, UDP and TCP checksums.
diff --git a/rust/headless-client/Cargo.toml b/rust/headless-client/Cargo.toml
index aad8e5be8..72b6d9a71 100644
--- a/rust/headless-client/Cargo.toml
+++ b/rust/headless-client/Cargo.toml
@@ -27,6 +27,7 @@ opentelemetry_sdk = { workspace = true, features = ["rt-tokio"] }
 phoenix-channel = { workspace = true }
 rustls = { workspace = true }
 secrecy = { workspace = true }
+socket-factory = { workspace = true }
 # This actually relies on many other features in Tokio, so this will probably
 # fail to build outside the workspace. <https://github.com/firezone/firezone/pull/4328#discussion_r1540342142>
 tokio = { workspace = true, features = ["macros", "signal", "process", "time", "fs", "rt"] }
diff --git a/rust/headless-client/src/main.rs b/rust/headless-client/src/main.rs
index 7daa8ce4b..f4fb68621 100644
--- a/rust/headless-client/src/main.rs
+++ b/rust/headless-client/src/main.rs
@@ -128,6 +128,10 @@ struct Cli {
     // on disk somewhere anyway.)
     #[arg(default_value = platform::default_token_path().display().to_string(), env = "FIREZONE_TOKEN_PATH", long)]
     token_path: PathBuf,
+
+    /// Increase the `core.rmem_max` and `core.wmem_max` kernel parameters.
+    #[arg(long, env = "FIREZONE_INC_BUF", hide = true, default_value_t = false)]
+    inc_buf: bool,
 }
 
 #[derive(Debug, Clone, Copy, clap::ValueEnum)]
@@ -140,6 +144,10 @@ impl Cli {
     fn is_telemetry_allowed(&self) -> bool {
         !self.no_telemetry
     }
+
+    fn is_inc_buf_allowed(&self) -> bool {
+        self.inc_buf
+    }
 }
 
 #[derive(clap::Subcommand, Clone, Copy)]
@@ -211,6 +219,20 @@ fn try_main() -> Result<()> {
         .enable_all()
         .build()?;
 
+    if cfg!(target_os = "linux") && cli.is_inc_buf_allowed() {
+        let recv_buf_size = socket_factory::RECV_BUFFER_SIZE;
+        let send_buf_size = socket_factory::SEND_BUFFER_SIZE;
+
+        match std::fs::write("/proc/sys/net/core/rmem_max", recv_buf_size.to_string()) {
+            Ok(()) => tracing::info!("Set `core.rmem_max` to {recv_buf_size}",),
+            Err(e) => tracing::info!("Failed to increase `core.rmem_max`: {e}"),
+        };
+        match std::fs::write("/proc/sys/net/core/wmem_max", send_buf_size.to_string()) {
+            Ok(()) => tracing::info!("Set `core.wmem_max` to {send_buf_size}",),
+            Err(e) => tracing::info!("Failed to increase `core.wmem_max`: {e}"),
+        };
+    }
+
     // AKA "Device ID", not the Firezone slug
     let firezone_id = match cli.firezone_id.clone() {
         Some(id) => id,
diff --git a/website/src/app/kb/deploy/gateways/readme.mdx b/website/src/app/kb/deploy/gateways/readme.mdx
index a1f6b873d..ad6694626 100644
--- a/website/src/app/kb/deploy/gateways/readme.mdx
+++ b/website/src/app/kb/deploy/gateways/readme.mdx
@@ -134,15 +134,24 @@ to distribute Client connections across them.
 
 ### Performance tuning
 
-The default receive buffer size on Linux is quite small which can limit the
-maximum throughput that users perceive in "upload scenarios" (i.e. where the
-Gateway needs to receive large volumes of traffic).
+The default receive and send buffer sizes on Linux are quite small which can
+limit the maximum throughput that users can achieve.
 
-On startup, the Gateway attempts to increase the size of the UDP receive buffers
-to 10 MB. However, the actual size of the receive buffer is limited by the
-`net.core.rmem_max` kernel parameter. For the increased buffer size to take
-effect, you may need to increase the `net.core.rmem_max` parameter on the
-Gateway's host system.
+The Gateway attempts to increase the _system-wide_ UDP send and receive buffer
+max sizes to 16 and 128 MB respectively. It does that by setting
+`net.core.wmem_max` and `net.core.rmem_max`. This will however only work for
+Gateways deployed on bare-metal and not via Docker as Docker mounts `/proc/sys`
+as a read-only file system.
+
+To disable this behaviour, you can set `FIREZONE_NO_INC_BUF=true`.
+
+To ensure good throughput, it is advisable to ensure these parameters are set
+correctly.
+
+```sh
+sudo sysctl -w net.core.wmem_max=16777216 # 16 MB
+sudo sysctl -w net.core.rmem_max=134217728 # 128 MB
+```
 
 ## Deploy a single Gateway
 
diff --git a/website/src/components/Changelog/Gateway.tsx b/website/src/components/Changelog/Gateway.tsx
index 2b669b817..2a488f601 100644
--- a/website/src/components/Changelog/Gateway.tsx
+++ b/website/src/components/Changelog/Gateway.tsx
@@ -27,6 +27,17 @@ export default function Gateway() {
           Fixes a rare CPU-spike issue in case a Client connected with many
           possible IPv6 addresses.
         </ChangeItem>
+        <ChangeItem pull="10349">
+          Attempts to increase the system-wide parameters `core.rmem_max` to 128
+          MB and `core.wmem_max` to 16 MB for improved performance. See the{" "}
+          <Link
+            className="text-accent-500 underline hover:no-underline"
+            href="https://www.firezone.dev/kb/deploy/gateways#performance-tuning"
+          >
+            Performance tuning
+          </Link>
+          section for details.
+        </ChangeItem>
       </Unreleased>
       <Entry version="1.4.16" date={new Date("2025-09-10")}>
         <ChangeItem pull="10231">