mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-28 02:18:50 +00:00
## Context At present, we only have a single thread that reads and writes to the TUN device on all platforms. On Linux, it is possible to open the file descriptor of a TUN device multiple times by setting the `IFF_MULTI_QUEUE` option using `ioctl`. Using multi-queue, we can then spawn multiple threads that concurrently read and write to the TUN device. This is critical for achieving a better throughput. ## Solution `IFF_MULTI_QUEUE` is a Linux-only thing and therefore only applies to headless-client, GUI-client on Linux and the Gateway (it may also be possible on Android, I haven't tried). As such, we need to first change our internal abstractions a bit to move the creation of the TUN thread to the `Tun` abstraction itself. For this, we change the interface of `Tun` to the following: - `poll_recv_many`: An API, inspired by tokio's `mpsc::Receiver` where multiple items in a channel can be batch-received. - `poll_send_ready`: Mimics the API of `Sink` to check whether more items can be written. - `send`: Mimics the API of `Sink` to actually send an item. With these APIs in place, we can implement various (performance) improvements for the different platforms. - On Linux, this allows us to spawn multiple threads to read and write from the TUN device and send all packets into the same channel. The `Io` component of `connlib` then uses `poll_recv_many` to read batches of up to 100 packets at once. This ties in well with #7210 because we can then use GSO to send the encrypted packets in single syscalls to the OS. - On Windows, we already have a dedicated recv thread because `WinTun`'s most-convenient API uses blocking IO. As such, we can now also tie into that by batch-receiving from this channel. - In addition to using multiple threads, this API now also uses correct readiness checks on Linux, Darwin and Android to uphold backpressure in case we cannot write to the TUN device. ## Configuration Local testing has shown that 2 threads give the best performance for a local `iperf3` run. I suspect this is because there is only so much traffic that a single application (i.e. `iperf3`) can generate. With more than 2 threads, the throughput actually drops drastically because `connlib`'s main thread is too busy with lock-contention and triggering `Waker`s for the TUN threads (which mostly idle around if there are 4+ of them). I've made it configurable on the Gateway though so we can experiment with this during concurrent speedtests etc. In addition, switching `connlib` to a single-threaded tokio runtime further increased the throughput. I suspect due to less task / context switching. ## Results Local testing with `iperf3` shows some very promising results. We now achieve a throughput of 2+ Gbit/s. ``` Connecting to host 172.20.0.110, port 5201 Reverse mode, remote host 172.20.0.110 is sending [ 5] local 100.80.159.34 port 57040 connected to 172.20.0.110 port 5201 [ ID] Interval Transfer Bitrate [ 5] 0.00-1.00 sec 274 MBytes 2.30 Gbits/sec [ 5] 1.00-2.00 sec 279 MBytes 2.34 Gbits/sec [ 5] 2.00-3.00 sec 216 MBytes 1.82 Gbits/sec [ 5] 3.00-4.00 sec 224 MBytes 1.88 Gbits/sec [ 5] 4.00-5.00 sec 234 MBytes 1.96 Gbits/sec [ 5] 5.00-6.00 sec 238 MBytes 2.00 Gbits/sec [ 5] 6.00-7.00 sec 229 MBytes 1.92 Gbits/sec [ 5] 7.00-8.00 sec 222 MBytes 1.86 Gbits/sec [ 5] 8.00-9.00 sec 223 MBytes 1.87 Gbits/sec [ 5] 9.00-10.00 sec 217 MBytes 1.82 Gbits/sec - - - - - - - - - - - - - - - - - - - - - - - - - [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 2.30 GBytes 1.98 Gbits/sec 22247 sender [ 5] 0.00-10.00 sec 2.30 GBytes 1.98 Gbits/sec receiver iperf Done. ``` This is a pretty solid improvement over what is in `main`: ``` Connecting to host 172.20.0.110, port 5201 [ 5] local 100.65.159.3 port 56970 connected to 172.20.0.110 port 5201 [ ID] Interval Transfer Bitrate Retr Cwnd [ 5] 0.00-1.00 sec 90.4 MBytes 758 Mbits/sec 1800 106 KBytes [ 5] 1.00-2.00 sec 93.4 MBytes 783 Mbits/sec 1550 51.6 KBytes [ 5] 2.00-3.00 sec 92.6 MBytes 777 Mbits/sec 1350 76.8 KBytes [ 5] 3.00-4.00 sec 92.9 MBytes 779 Mbits/sec 1800 56.4 KBytes [ 5] 4.00-5.00 sec 93.4 MBytes 783 Mbits/sec 1650 69.6 KBytes [ 5] 5.00-6.00 sec 90.6 MBytes 760 Mbits/sec 1500 73.2 KBytes [ 5] 6.00-7.00 sec 87.6 MBytes 735 Mbits/sec 1400 76.8 KBytes [ 5] 7.00-8.00 sec 92.6 MBytes 777 Mbits/sec 1600 82.7 KBytes [ 5] 8.00-9.00 sec 91.1 MBytes 764 Mbits/sec 1500 70.8 KBytes [ 5] 9.00-10.00 sec 92.0 MBytes 771 Mbits/sec 1550 85.1 KBytes - - - - - - - - - - - - - - - - - - - - - - - - - [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 917 MBytes 769 Mbits/sec 15700 sender [ 5] 0.00-10.00 sec 916 MBytes 768 Mbits/sec receiver iperf Done. ```
211 lines
6.8 KiB
TOML
211 lines
6.8 KiB
TOML
[workspace]
|
|
members = [
|
|
"bin-shared",
|
|
"connlib/clients/android",
|
|
"connlib/clients/apple",
|
|
"connlib/clients/shared",
|
|
"connlib/model",
|
|
"connlib/snownet",
|
|
"connlib/tunnel",
|
|
"dns-over-tcp",
|
|
"gateway",
|
|
"gui-client/src-common",
|
|
"gui-client/src-tauri",
|
|
"headless-client",
|
|
"ip-packet",
|
|
"logging",
|
|
"phoenix-channel",
|
|
"relay",
|
|
"socket-factory",
|
|
"telemetry",
|
|
"tests/gui-smoke-test",
|
|
"tests/http-test-server",
|
|
"tun",
|
|
]
|
|
|
|
resolver = "2"
|
|
|
|
[workspace.package]
|
|
license = "Apache-2.0"
|
|
edition = "2021"
|
|
|
|
[workspace.dependencies]
|
|
anyhow = "1.0.93"
|
|
arboard = { version = "3.4.0", default-features = false }
|
|
async-trait = { version = "0.1", default-features = false }
|
|
atomicwrites = "0.4.4"
|
|
axum = { version = "0.7.7", default-features = false }
|
|
backoff = { version = "0.4", features = ["tokio"] }
|
|
base64 = { version = "0.22.1", default-features = false }
|
|
bimap = "0.6"
|
|
boringtun = { version = "0.6", default-features = false }
|
|
bytecodec = "0.4.15"
|
|
bytes = { version = "1.9.0", default-features = false }
|
|
chrono = { version = "0.4", default-features = false, features = ["std", "clock", "oldtime", "serde"] }
|
|
clap = "4.5.21"
|
|
derive_more = "1.0.0"
|
|
difference = "2.0.0"
|
|
dirs = "5.0.1"
|
|
divan = "0.1.14"
|
|
dns-lookup = "2.0"
|
|
domain = { version = "0.10", features = ["serde"] }
|
|
either = "1"
|
|
env_logger = "0.11.3"
|
|
etherparse = "0.16"
|
|
futures = { version = "0.3.31", default-features = false }
|
|
futures-bounded = "0.2.1"
|
|
futures-util = { version = "0.3", default-features = false }
|
|
glob = "0.3.1"
|
|
hex = "0.4.3"
|
|
hex-display = "0.3.0"
|
|
hex-literal = "0.4.1"
|
|
humantime = "2.1"
|
|
ip_network = { version = "0.4", default-features = false }
|
|
ip_network_table = { version = "0.2", default-features = false }
|
|
itertools = "0.13"
|
|
jni = "0.21.1"
|
|
keyring = "3.2.1"
|
|
known-folders = "1.2.0"
|
|
libc = "0.2.150"
|
|
log = "0.4"
|
|
lru = "0.12.5"
|
|
mio = "1.0.1"
|
|
native-dialog = "0.7.0"
|
|
nix = "0.29.0"
|
|
nu-ansi-term = "0.50"
|
|
once_cell = "1.17.1"
|
|
opentelemetry = "0.26.0"
|
|
opentelemetry-otlp = "0.26.0"
|
|
opentelemetry_sdk = "0.26.0"
|
|
os_info = { version = "3", default-features = false }
|
|
output_vt100 = "0.1"
|
|
png = "0.17.13"
|
|
proptest = "1"
|
|
proptest-state-machine = "0.3"
|
|
quinn-udp = { version = "0.5.7", features = ["fast-apple-datapath"] }
|
|
rand = "0.8.5"
|
|
rand_core = "0.6.4"
|
|
rangemap = "1.5.1"
|
|
reqwest = { version = "0.12.5", default-features = false }
|
|
rtnetlink = { version = "0.14.1", default-features = false, features = ["tokio_socket"] }
|
|
rustls = { version = "0.23.10", default-features = false, features = ["ring"] }
|
|
sadness-generator = "0.6.0"
|
|
secrecy = "0.8"
|
|
semver = "1.0.22"
|
|
sentry = { version = "0.35.0", default-features = false }
|
|
sentry-tracing = "0.35.0"
|
|
serde = "1.0.215"
|
|
serde_json = "1.0.133"
|
|
serde_variant = "0.1.3"
|
|
sha2 = "0.10.8"
|
|
smallvec = "1.13.2"
|
|
smbios-lib = "0.9.2"
|
|
smoltcp = { version = "0.11", default-features = false }
|
|
static_assertions = "1.1.0"
|
|
str0m = { version = "0.6.3", default-features = false, features = ["sha1"] }
|
|
stun_codec = "0.3.4"
|
|
subprocess = "0.2.9"
|
|
subtle = "2.5.0"
|
|
swift-bridge = "0.1.57"
|
|
swift-bridge-build = "0.1.57"
|
|
tauri = "2.0.3"
|
|
tauri-build = "2.0.1"
|
|
tauri-plugin-dialog = "2.0.1"
|
|
tauri-plugin-notification = "2.0.1"
|
|
tauri-plugin-shell = "2.0.2"
|
|
tauri-runtime = "2.1.0"
|
|
tauri-utils = "2.0.1"
|
|
tempfile = "3.13.0"
|
|
test-case = "3.3.1"
|
|
test-strategy = "0.4.0"
|
|
thiserror = "1.0.68"
|
|
time = "0.3.36"
|
|
tokio = "1.41"
|
|
tokio-stream = "0.1.16"
|
|
flume = { version = "0.11.1", features = ["async"] }
|
|
tokio-tungstenite = "0.23.1"
|
|
tokio-util = "0.7.11"
|
|
tracing = { version = "0.1.40" }
|
|
tracing-appender = "0.2.3"
|
|
tracing-core = "0.1.31"
|
|
tracing-log = "0.2.0"
|
|
tracing-macros = { git = "https://github.com/tokio-rs/tracing", branch = "v0.1.x" } # Contains `dbg!` but for `tracing`.
|
|
tracing-opentelemetry = "0.27.0"
|
|
tracing-stackdriver = "0.11.0"
|
|
tracing-subscriber = { version = "0.3.17", features = ["parking_lot"] }
|
|
trackable = "1.3.0"
|
|
url = "2.5.2"
|
|
uuid = "1.10.0"
|
|
windows = "0.58.0"
|
|
winreg = "0.52.0"
|
|
zip = { version = "2", default-features = false }
|
|
|
|
connlib-client-android = { path = "connlib/clients/android" }
|
|
connlib-client-apple = { path = "connlib/clients/apple" }
|
|
connlib-client-shared = { path = "connlib/clients/shared" }
|
|
firezone-bin-shared = { path = "bin-shared" }
|
|
firezone-logging = { path = "logging" }
|
|
firezone-telemetry = { path = "telemetry" }
|
|
firezone-headless-client = { path = "headless-client" }
|
|
firezone-gui-client-common = { path = "gui-client/src-common" }
|
|
snownet = { path = "connlib/snownet" }
|
|
dns-over-tcp = { path = "dns-over-tcp" }
|
|
firezone-relay = { path = "relay" }
|
|
connlib-model = { path = "connlib/model" }
|
|
firezone-tunnel = { path = "connlib/tunnel" }
|
|
phoenix-channel = { path = "phoenix-channel" }
|
|
ip-packet = { path = "ip-packet" }
|
|
socket-factory = { path = "socket-factory" }
|
|
tun = { path = "tun" }
|
|
socket2 = { version = "0.5" }
|
|
|
|
[workspace.lints.clippy]
|
|
dbg_macro = "warn"
|
|
print_stdout = "warn"
|
|
print_stderr = "warn"
|
|
unnecessary_wraps = "warn"
|
|
unused_async = "warn"
|
|
wildcard_enum_match_arm = "warn" # Ensures we match on all combinations of `Poll`, preventing erroneous suspensions.
|
|
redundant_else = "warn"
|
|
redundant_clone = "warn"
|
|
unwrap_in_result = "warn"
|
|
unwrap_used = "warn"
|
|
|
|
[workspace.lints.rustdoc]
|
|
private-intra-doc-links = "allow" # We don't publish any of our docs but want to catch dead links.
|
|
|
|
[patch.crates-io]
|
|
smoltcp = { git = "https://github.com/smoltcp-rs/smoltcp", branch = "main" }
|
|
quinn-udp = { git = "https://github.com/quinn-rs/quinn", branch = "main" }
|
|
boringtun = { git = "https://github.com/firezone/boringtun", branch = "master" }
|
|
str0m = { git = "https://github.com/algesten/str0m", branch = "main" }
|
|
ip_network = { git = "https://github.com/JakubOnderka/ip_network", branch = "master" } # Waiting for release.
|
|
ip_network_table = { git = "https://github.com/edmonds/ip_network_table", branch = "some-useful-traits" } # For `Debug` and `Clone`
|
|
proptest = { git = "https://github.com/proptest-rs/proptest", branch = "main" }
|
|
proptest-state-machine = { git = "https://github.com/proptest-rs/proptest", branch = "main" }
|
|
tracing-stackdriver = { git = "https://github.com/thomaseizinger/tracing-stackdriver", branch = "bump-otel-0.26" } # Waiting for release.
|
|
sentry = { git = "https://github.com/getsentry/sentry-rust", branch = "master" }
|
|
sentry-tracing = { git = "https://github.com/getsentry/sentry-rust", branch = "master" }
|
|
|
|
# Enforce `tracing-macros` to have released `tracing` version.
|
|
[patch.'https://github.com/tokio-rs/tracing']
|
|
tracing = "0.1.41"
|
|
|
|
[profile.release]
|
|
# Full link-time optimization. Reduces binaries by up to 3x on some platforms.
|
|
lto = "fat"
|
|
|
|
# Increases the compiler's ability to produce smaller, optimized code
|
|
# at the expense of compilation time
|
|
codegen-units = 1
|
|
|
|
# Override build settings just for the GUI client, so we get a pdb/dwp
|
|
# Cargo ignores profile settings if they're not in the workspace's Cargo.toml
|
|
[profile.dev.package.firezone-gui-client]
|
|
debug = "full"
|
|
split-debuginfo = "packed"
|
|
|
|
[profile.release.package.firezone-gui-client]
|
|
debug = "full"
|
|
split-debuginfo = "packed"
|