mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 10:18:54 +00:00
refactor(connlib): use a lock-free queue for the buffer pool (#9989)
We use several buffer pools across `connlib` that are all backed by the same buffer-pool library. Within that library, we currently use another object-pool library to provide the actual pooling functionality. Benchmarking has shown that spend quite a bit of time (a few % of total CPU time), fighting for the lock to either add or remote a buffer from the pool. This is unnecessary. By using a queue, we can remove buffers from the front and add buffers at the back, both of which can be implemented in a lock-free way such that they don't contend. Using the well-known `crossbeam-queue` library, we have such a queue directly available. I wasn't able to directly measure a performance gain in terms of throughput. What we can measure though, is how much time we spend dealing with our buffer pool vs everything else. If we compare the `perf` outputs that were recorded during an `iperf` run each, we can see that we spend about 60% less time dealing with the buffer pool than we did before. |Before|After| |---|---| |<img width="1982" height="553" alt="Screenshot From 2025-07-24 20-27-50" src="https://github.com/user-attachments/assets/1698f28b-5821-456f-95fa-d6f85d901920" />|<img width="1982" height="553" alt="Screenshot From 2025-07-24 20-27-53" src="https://github.com/user-attachments/assets/4f26a2d1-03e3-4c0d-84da-82c53b9761dd" />| The number in the thousands on the left is how often the respective function was the currently executing function during the profiling run. Resolves: #9972
This commit is contained in:
17
rust/Cargo.lock
generated
17
rust/Cargo.lock
generated
@@ -998,7 +998,7 @@ name = "bufferpool"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"lockfree-object-pool",
|
||||
"crossbeam-queue",
|
||||
"opentelemetry",
|
||||
"opentelemetry_sdk",
|
||||
"tokio",
|
||||
@@ -1589,6 +1589,15 @@ dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-queue"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.21"
|
||||
@@ -4173,12 +4182,6 @@ dependencies = [
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lockfree-object-pool"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.27"
|
||||
|
||||
@@ -63,6 +63,7 @@ chrono = { version = "0.4", default-features = false, features = ["std", "clock"
|
||||
clap = "4.5.41"
|
||||
client-shared = { path = "client-shared" }
|
||||
connlib-model = { path = "connlib/model" }
|
||||
crossbeam-queue = "0.3.12"
|
||||
dashmap = "6.1.0"
|
||||
derive_more = "2.0.1"
|
||||
difference = "2.0.0"
|
||||
@@ -101,7 +102,6 @@ l3-tcp = { path = "connlib/l3-tcp" }
|
||||
l4-tcp-dns-server = { path = "connlib/l4-tcp-dns-server" }
|
||||
l4-udp-dns-server = { path = "connlib/l4-udp-dns-server" }
|
||||
libc = "0.2.174"
|
||||
lockfree-object-pool = "0.1.6"
|
||||
log = "0.4"
|
||||
lru = "0.12.5"
|
||||
mio = "1.0.4"
|
||||
|
||||
@@ -9,7 +9,7 @@ path = "lib.rs"
|
||||
|
||||
[dependencies]
|
||||
bytes = { workspace = true }
|
||||
lockfree-object-pool = { workspace = true }
|
||||
crossbeam-queue = { workspace = true }
|
||||
opentelemetry = { workspace = true, features = ["metrics"] }
|
||||
tracing = { workspace = true }
|
||||
|
||||
|
||||
@@ -6,11 +6,26 @@ use std::{
|
||||
};
|
||||
|
||||
use bytes::BytesMut;
|
||||
use crossbeam_queue::SegQueue;
|
||||
use opentelemetry::{KeyValue, metrics::UpDownCounter};
|
||||
|
||||
#[derive(Clone)]
|
||||
/// A lock-free pool of buffers that are all equal in size.
|
||||
///
|
||||
/// The buffers are stored in a queue ([`SegQueue`]) and taken from the front and push to the back.
|
||||
/// This minimizes contention even under high load where buffers are constantly needed and returned.
|
||||
pub struct BufferPool<B> {
|
||||
inner: Arc<lockfree_object_pool::SpinLockObjectPool<BufferStorage<B>>>,
|
||||
inner: Arc<SegQueue<BufferStorage<B>>>,
|
||||
|
||||
new_buffer_fn: Arc<dyn Fn() -> BufferStorage<B> + Send + Sync>,
|
||||
}
|
||||
|
||||
impl<B> Clone for BufferPool<B> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
inner: self.inner.clone(),
|
||||
new_buffer_fn: self.new_buffer_fn.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> BufferPool<B>
|
||||
@@ -25,26 +40,28 @@ where
|
||||
.build();
|
||||
|
||||
Self {
|
||||
inner: Arc::new(lockfree_object_pool::SpinLockObjectPool::new(
|
||||
move || {
|
||||
BufferStorage::new(
|
||||
B::with_capacity(capacity),
|
||||
buffer_counter.clone(),
|
||||
[
|
||||
KeyValue::new("system.buffer.pool.name", tag),
|
||||
KeyValue::new("system.buffer.pool.buffer_size", capacity as i64),
|
||||
],
|
||||
)
|
||||
},
|
||||
|_| {},
|
||||
)),
|
||||
inner: Arc::new(SegQueue::new()),
|
||||
|
||||
// TODO: It would be nice to eventually create a fixed amount of buffers upfront.
|
||||
// This however means that getting a buffer can fail which would require us to implement back-pressure.
|
||||
new_buffer_fn: Arc::new(move || {
|
||||
BufferStorage::new(
|
||||
B::with_capacity(capacity),
|
||||
buffer_counter.clone(),
|
||||
[
|
||||
KeyValue::new("system.buffer.pool.name", tag),
|
||||
KeyValue::new("system.buffer.pool.buffer_size", capacity as i64),
|
||||
],
|
||||
)
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pull(&self) -> Buffer<B> {
|
||||
Buffer {
|
||||
inner: self.inner.pull_owned(),
|
||||
inner: Some(self.inner.pop().unwrap_or_else(|| (self.new_buffer_fn)())),
|
||||
pool: self.inner.clone(),
|
||||
new_buffer_fn: self.new_buffer_fn.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -65,8 +82,10 @@ where
|
||||
}
|
||||
|
||||
pub struct Buffer<B> {
|
||||
inner: lockfree_object_pool::SpinLockOwnedReusable<BufferStorage<B>>,
|
||||
pool: Arc<lockfree_object_pool::SpinLockObjectPool<BufferStorage<B>>>,
|
||||
inner: Option<BufferStorage<B>>,
|
||||
|
||||
pool: Arc<SegQueue<BufferStorage<B>>>,
|
||||
new_buffer_fn: Arc<dyn Fn() -> BufferStorage<B> + Send + Sync>,
|
||||
}
|
||||
|
||||
impl Buffer<Vec<u8>> {
|
||||
@@ -74,7 +93,7 @@ impl Buffer<Vec<u8>> {
|
||||
pub fn shift_start_right(&mut self, num: usize) -> Vec<u8> {
|
||||
let num_to_end = self.split_off(num);
|
||||
|
||||
std::mem::replace(&mut self.inner.inner, num_to_end)
|
||||
std::mem::replace(self.storage_mut(), num_to_end)
|
||||
}
|
||||
|
||||
/// Shifts the start of the buffer to the left by N bytes, returning a slice to the added bytes at the front of the buffer.
|
||||
@@ -88,18 +107,33 @@ impl Buffer<Vec<u8>> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> Buffer<B> {
|
||||
fn storage(&self) -> &BufferStorage<B> {
|
||||
self.inner
|
||||
.as_ref()
|
||||
.expect("should always have buffer storage until dropped")
|
||||
}
|
||||
|
||||
fn storage_mut(&mut self) -> &mut BufferStorage<B> {
|
||||
self.inner
|
||||
.as_mut()
|
||||
.expect("should always have buffer storage until dropped")
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> Clone for Buffer<B>
|
||||
where
|
||||
B: Buf,
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
let mut copy = self.pool.pull_owned();
|
||||
let mut copy = self.pool.pop().unwrap_or_else(|| (self.new_buffer_fn)());
|
||||
|
||||
self.inner.inner.clone(&mut copy);
|
||||
self.storage().inner.clone(&mut copy);
|
||||
|
||||
Self {
|
||||
inner: copy,
|
||||
inner: Some(copy),
|
||||
pool: self.pool.clone(),
|
||||
new_buffer_fn: self.new_buffer_fn.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -143,13 +177,21 @@ impl<B> Deref for Buffer<B> {
|
||||
type Target = B;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.inner.deref()
|
||||
self.storage().deref()
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> DerefMut for Buffer<B> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
self.inner.deref_mut()
|
||||
self.storage_mut().deref_mut()
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> Drop for Buffer<B> {
|
||||
fn drop(&mut self) {
|
||||
let buffer_storage = self.inner.take().expect("should have storage in `Drop`");
|
||||
|
||||
self.pool.push(buffer_storage);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user