ci: use bencher.dev for continuous benchmarking (#5915)

Currently, we have a homegrown benchmark suite that reports results of
the iperf runs within CI by comparing a run on `main` with the current
branch.

These comments are noisy because they happen on every PR, regardless of
the performance results. As a result, they tend to be skimmed over by
devs and not actually considered. To properly track performance, we need
to record benchmark results over time and use statistics to detect
regressions.

https://bencher.dev does exactly that. it supports various benchmark
harnesses to automatically collect benchmarks. For our case, we simply
use the generic JSON adapter to extract the relevant metrics from the
iperf results and report them to the bencher backend.

With these metrics in place, bencher can plot the results over time, and
alert us in the case of regressions using thresholds based on
statistical tests.

Resolves: #5818.

---------

Signed-off-by: Thomas Eizinger <thomas@eizinger.io>
Co-authored-by: Reactor Scram <ReactorScram@users.noreply.github.com>
This commit is contained in:
Thomas Eizinger
2024-07-24 11:22:17 +10:00
committed by GitHub
parent 50d6b865a1
commit a8aafc9e14
2 changed files with 28 additions and 220 deletions

View File

@@ -82,27 +82,15 @@ jobs:
profile: ${{ inputs.profile || 'debug' }}
stage: ${{ inputs.stage || 'debug' }}
build-base-perf-artifacts:
build-perf-artifacts:
needs: update-release-draft
if: ${{ github.event_name == 'pull_request' }}
uses: ./.github/workflows/_build_artifacts.yml
secrets: inherit
with:
sha: ${{ github.event.pull_request.base.sha }}
image_prefix: 'perf'
profile: 'release'
stage: 'debug'
build-head-perf-artifacts:
needs: update-release-draft
if: ${{ github.event_name == 'pull_request' }}
uses: ./.github/workflows/_build_artifacts.yml
secrets: inherit
with:
sha: ${{ github.sha }}
image_prefix: 'perf'
profile: 'release'
stage: 'debug'
stage: 'debug' # Only the debug images have perf tooling
integration-tests:
uses: ./.github/workflows/_integration_tests.yml
@@ -172,12 +160,8 @@ jobs:
client_tag: ${{ matrix.client.tag }}
perf-tests:
# Only the debug images have perf tooling
if: ${{ github.event_name == 'pull_request' }}
name: perf-tests-${{ matrix.version.prefix }}-${{ matrix.test_name }}
needs:
- build-base-perf-artifacts
- build-head-perf-artifacts
name: perf-tests
needs: build-perf-artifacts
runs-on: ubuntu-22.04
permissions:
contents: read
@@ -185,25 +169,20 @@ jobs:
pull-requests: write
env:
API_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/api'
API_TAG: ${{ matrix.version.sha }}
API_TAG: ${{ github.sha }}
WEB_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/web'
WEB_TAG: ${{ matrix.version.sha }}
WEB_TAG: ${{ github.sha }}
ELIXIR_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/elixir'
ELIXIR_TAG: ${{ matrix.version.sha }}
ELIXIR_TAG: ${{ github.sha }}
GATEWAY_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/perf/gateway'
GATEWAY_TAG: ${{ matrix.version.sha }}
GATEWAY_TAG: ${{ github.sha }}
CLIENT_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/perf/client'
CLIENT_TAG: ${{ matrix.version.sha }}
CLIENT_TAG: ${{ github.sha }}
RELAY_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/perf/relay'
RELAY_TAG: ${{ matrix.version.sha }}
RELAY_TAG: ${{ github.sha }}
strategy:
fail-fast: false
matrix:
version:
- sha: ${{ github.sha }}
prefix: head
- sha: ${{ github.event.pull_request.base.sha }}
prefix: base
test_name:
- direct-tcp-client2server
- direct-tcp-server2client
@@ -215,8 +194,6 @@ jobs:
- relayed-udp-server2client
steps:
- uses: actions/checkout@v4
with:
ref: ${{ matrix.version.sha }}
- uses: ./.github/actions/gcp-docker-login
id: login
with:
@@ -241,13 +218,15 @@ jobs:
timeout-minutes: 5
env:
TEST_NAME: ${{ matrix.test_name }}
run: ./scripts/tests/perf/${{ matrix.test_name }}.sh
run: |
./scripts/tests/perf/${{ matrix.test_name }}.sh
jq '{ "${{ matrix.test_name }}": { "throughput": { "value": .end.sum_received.bits_per_second } } }' ./${{ matrix.test_name }}.json > ./${{ matrix.test_name }}.bmf.json
- name: 'Save performance test results: ${{ matrix.test_name }}'
uses: actions/upload-artifact@v4
with:
overwrite: true
name: ${{ matrix.test_name }}-${{ matrix.version.sha }}-iperf3results
path: ./${{ matrix.test_name }}.json
name: ${{ matrix.test_name }}-${{ github.sha }}-iperf3results
path: ./${{ matrix.test_name }}.bmf.json
- name: Show Client logs
if: "!cancelled()"
run: docker compose logs client
@@ -273,8 +252,7 @@ jobs:
if: "!cancelled()"
run: docker compose logs iperf3
compare-results:
if: github.event_name == 'pull_request'
upload-bencher:
needs: perf-tests
runs-on: ubuntu-22.04
permissions:
@@ -283,31 +261,20 @@ jobs:
pull-requests: write
steps:
- uses: actions/checkout@v4
- name: Download base ref performance test results
uses: actions/download-artifact@v4
with:
pattern: '*-${{ github.event.pull_request.base.sha }}-iperf3results'
merge-multiple: true
path: ./${{ github.event.pull_request.base.sha }}
- name: Download head ref performance test results
- uses: bencherdev/bencher@main
- name: Download performance test results
uses: actions/download-artifact@v4
with:
pattern: '*-${{ github.sha }}-iperf3results'
merge-multiple: true
path: ./${{ github.sha }}
- name: Update PR with results
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { script } = require('./scripts/tests/perf/results.js');
script(github, context, '${{ github.event.pull_request.base.sha }}', '${{ github.sha }}', [
'direct-tcp-client2server',
'direct-tcp-server2client',
'direct-udp-client2server',
'direct-udp-server2client',
'relayed-tcp-client2server',
'relayed-tcp-server2client',
'relayed-udp-client2server',
'relayed-udp-server2client'
]);
- name: Merge benchmarks results into one report
run: jq -s 'reduce .[] as $item ({}; . * $item)' ./${{ github.sha }}/*.bmf.json > bmf.json
- name: Report results to bencher
run: bencher run --file bmf.json --adapter json --github-actions ${{ secrets.GITHUB_TOKEN }}
env:
BENCHER_PROJECT: firezone-1l75jv1z
BENCHER_API_TOKEN: ${{ secrets.BENCHER_API_TOKEN }}
BENCHER_BRANCH: ${{ github.head_ref || github.ref_name }}
BENCHER_BRANCH_START: ${{ github.base_ref }}
BENCHER_TESTBED: github-actions

View File

@@ -1,159 +0,0 @@
const fs = require("fs");
const path = require("path");
function getDiffPercents(base, head) {
let diff = -1 * (100 - head / (base / 100));
if (diff > 0) {
return "+" + diff.toFixed(0) + "%";
} else if (diff == 0) {
return "0%";
} else {
return diff.toFixed(0) + "%";
}
}
function humanFileSize(bytes, dp = 1) {
const thresh = 1000;
if (Math.abs(bytes) < thresh) {
return bytes + " B";
}
const units = ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
let u = -1;
const r = 10 ** dp;
do {
bytes /= thresh;
++u;
} while (
Math.round(Math.abs(bytes) * r) / r >= thresh &&
u < units.length - 1
);
return bytes.toFixed(dp) + " " + units[u];
}
exports.script = async function (
github,
context,
base_sha,
head_sha,
test_names
) {
let output = `### Performance Test Results
`;
let tcp_output = `#### TCP
| Test Name | Received/s | Sent/s | Retransmits |
|---|---|---|---|
`;
let udp_output = `#### UDP
| Test Name | Total/s | Jitter | Lost |
|---|---|---|---|
`;
for (const test_name of test_names) {
// 1. Read the head ref results
const results = JSON.parse(
fs.readFileSync(path.join(head_sha, test_name + ".json"))
).end;
// 2. Read the base ref results
const results_base = JSON.parse(
fs.readFileSync(path.join(base_sha, test_name + ".json"))
).end;
if (test_name.includes("tcp")) {
const tcp_sum_received_bits_per_second =
humanFileSize(results.sum_received.bits_per_second) +
" (" +
getDiffPercents(
results_base.sum_received.bits_per_second,
results.sum_received.bits_per_second
) +
")";
const tcp_sum_sent_bits_per_second =
humanFileSize(results.sum_sent.bits_per_second) +
" (" +
getDiffPercents(
results_base.sum_sent.bits_per_second,
results.sum_sent.bits_per_second
) +
")";
const tcp_sum_sent_retransmits =
results.sum_sent.retransmits +
" (" +
getDiffPercents(
results_base.sum_sent.retransmits,
results.sum_sent.retransmits
) +
")";
tcp_output += `| ${test_name} | ${tcp_sum_received_bits_per_second} | ${tcp_sum_sent_bits_per_second} | ${tcp_sum_sent_retransmits} |\n`;
} else if (test_name.includes("udp")) {
const udp_sum_bits_per_second =
humanFileSize(results.sum.bits_per_second) +
" (" +
getDiffPercents(
results_base.sum.bits_per_second,
results.sum.bits_per_second
) +
")";
const udp_sum_jitter_ms =
results.sum.jitter_ms.toFixed(2) +
"ms (" +
getDiffPercents(results_base.sum.jitter_ms, results.sum.jitter_ms) +
")";
const udp_sum_lost_percent =
results.sum.lost_percent.toFixed(2) +
"% (" +
getDiffPercents(
results_base.sum.lost_percent,
results.sum.lost_percent
) +
")";
udp_output += `| ${test_name} | ${udp_sum_bits_per_second} | ${udp_sum_jitter_ms} | ${udp_sum_lost_percent} |\n`;
} else {
throw new Error("Unknown test type");
}
}
output += tcp_output + "\n" + udp_output;
// Retrieve existing bot comments for the PR
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});
const botComment = comments.find((comment) => {
return (
comment.user.type === "Bot" &&
comment.body.includes("Performance Test Results")
);
});
// 3. Update previous comment or create new one
if (botComment) {
github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: botComment.id,
body: output,
});
} else {
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: output,
});
}
};