From 5bf3230c62ddaf33a81197b6cf360b833696da2d Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Tue, 10 Sep 2024 10:00:00 -0400 Subject: [PATCH] docs(connlib): add profiling instructions (#6643) Documents profiling instructions that I've figured out over the last couple of days. Since Rust 1.79, the standard library is compiled with frame pointers enabled [0]. Grabbing stack-trace information from the frame pointer makes profiling much easier because the data is just there in-line. Using debug information (via `dwarf`) is also possible but requires post-processing of the performance profile with `addr2line` (`perf script` does that automatically). This can take multiple minutes or longer, depending on the sampling frequency of the captured performance data. This makes benchmarking almost infeasible because the feedback loop is simply too long. Using frame pointers is a much nicer experience. The downside is that the application themselves also needs to be compiled with frame pointers. We achieve that by setting the appropriate compiler option in `.cargo/config.toml`. Ubuntu [1], Fedora [2] and Arch [3] also ship all of their code with frame pointers enabled. Also, tech giants such as Google & Meta have been running their systems with frame pointers on-by-default for years [4]. [0]: https://blog.rust-lang.org/2024/06/13/Rust-1.79.0.html#frame-pointers-enabled-in-standard-library-builds [1]: https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html [2]: https://pagure.io/fesco/issue/2923 [3]: https://gitlab.archlinux.org/archlinux/rfcs/-/merge_requests/26 [4]: https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html --- rust/.cargo/config.toml | 5 +++++ rust/Cargo.toml | 7 +++---- rust/README.md | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 rust/.cargo/config.toml diff --git a/rust/.cargo/config.toml b/rust/.cargo/config.toml new file mode 100644 index 000000000..c6e0da270 --- /dev/null +++ b/rust/.cargo/config.toml @@ -0,0 +1,5 @@ +[target.x86_64-unknown-linux-musl] +rustflags="-C force-frame-pointers=yes" + +[target.x86_64-unknown-linux-gnu] +rustflags="-C force-frame-pointers=yes" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 7e0ae2a09..102deeedc 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -92,6 +92,9 @@ lto = "fat" # at the expense of compilation time codegen-units = 1 +[profile.bench] +strip = false # Frame pointers are necessary for profiling; `strip=true` appears to remove them. + # Override build settings just for the GUI client, so we get a pdb/dwp # Cargo ignores profile settings if they're not in the workspace's Cargo.toml [profile.dev.package.firezone-gui-client] @@ -103,7 +106,3 @@ strip = "none" debug = "full" split-debuginfo = "packed" strip = "none" - -# Override build settings for the relay, so we can capture flamegraphs -[profile.release.package.firezone-relay] -debug = "full" diff --git a/rust/README.md b/rust/README.md index 44f1f038e..e82814fc6 100644 --- a/rust/README.md +++ b/rust/README.md @@ -27,3 +27,17 @@ Resulting in, e.g. 2024-04-01T18:25:48.295243016Z INFO No token / actor_name on disk, starting in signed-out state 2024-04-01T18:25:48.295360641Z INFO null ``` + +## Benchmarking on Linux + +The recommended way for benchmarking any of the Rust components is Linux' `perf` utility. +For example, to attach to a running application, do: + +1. Ensure the binary you are profiling is compiled with the `bench` profile. +1. `sudo perf perf record -g --freq 10000 --pid $(pgrep )`. +1. Run the speed test or whatever load-inducing task you want to measure. +1. `sudo perf script > profile.perf` +1. Open [profiler.firefox.com](https://profiler.firefox.com) and load `profile.perf` + +Instead of attaching to a process with `--pid`, you can also specify the path to executable directly. +That is useful if you want to capture perf data for a test or a micro-benchmark.