mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 10:18:54 +00:00
ci: use bencher.dev for continuous benchmarking (#5915)
Currently, we have a homegrown benchmark suite that reports results of the iperf runs within CI by comparing a run on `main` with the current branch. These comments are noisy because they happen on every PR, regardless of the performance results. As a result, they tend to be skimmed over by devs and not actually considered. To properly track performance, we need to record benchmark results over time and use statistics to detect regressions. https://bencher.dev does exactly that. it supports various benchmark harnesses to automatically collect benchmarks. For our case, we simply use the generic JSON adapter to extract the relevant metrics from the iperf results and report them to the bencher backend. With these metrics in place, bencher can plot the results over time, and alert us in the case of regressions using thresholds based on statistical tests. Resolves: #5818. --------- Signed-off-by: Thomas Eizinger <thomas@eizinger.io> Co-authored-by: Reactor Scram <ReactorScram@users.noreply.github.com>
This commit is contained in:
89
.github/workflows/ci.yml
vendored
89
.github/workflows/ci.yml
vendored
@@ -82,27 +82,15 @@ jobs:
|
||||
profile: ${{ inputs.profile || 'debug' }}
|
||||
stage: ${{ inputs.stage || 'debug' }}
|
||||
|
||||
build-base-perf-artifacts:
|
||||
build-perf-artifacts:
|
||||
needs: update-release-draft
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
uses: ./.github/workflows/_build_artifacts.yml
|
||||
secrets: inherit
|
||||
with:
|
||||
sha: ${{ github.event.pull_request.base.sha }}
|
||||
image_prefix: 'perf'
|
||||
profile: 'release'
|
||||
stage: 'debug'
|
||||
|
||||
build-head-perf-artifacts:
|
||||
needs: update-release-draft
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
uses: ./.github/workflows/_build_artifacts.yml
|
||||
secrets: inherit
|
||||
with:
|
||||
sha: ${{ github.sha }}
|
||||
image_prefix: 'perf'
|
||||
profile: 'release'
|
||||
stage: 'debug'
|
||||
stage: 'debug' # Only the debug images have perf tooling
|
||||
|
||||
integration-tests:
|
||||
uses: ./.github/workflows/_integration_tests.yml
|
||||
@@ -172,12 +160,8 @@ jobs:
|
||||
client_tag: ${{ matrix.client.tag }}
|
||||
|
||||
perf-tests:
|
||||
# Only the debug images have perf tooling
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
name: perf-tests-${{ matrix.version.prefix }}-${{ matrix.test_name }}
|
||||
needs:
|
||||
- build-base-perf-artifacts
|
||||
- build-head-perf-artifacts
|
||||
name: perf-tests
|
||||
needs: build-perf-artifacts
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -185,25 +169,20 @@ jobs:
|
||||
pull-requests: write
|
||||
env:
|
||||
API_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/api'
|
||||
API_TAG: ${{ matrix.version.sha }}
|
||||
API_TAG: ${{ github.sha }}
|
||||
WEB_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/web'
|
||||
WEB_TAG: ${{ matrix.version.sha }}
|
||||
WEB_TAG: ${{ github.sha }}
|
||||
ELIXIR_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/elixir'
|
||||
ELIXIR_TAG: ${{ matrix.version.sha }}
|
||||
ELIXIR_TAG: ${{ github.sha }}
|
||||
GATEWAY_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/perf/gateway'
|
||||
GATEWAY_TAG: ${{ matrix.version.sha }}
|
||||
GATEWAY_TAG: ${{ github.sha }}
|
||||
CLIENT_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/perf/client'
|
||||
CLIENT_TAG: ${{ matrix.version.sha }}
|
||||
CLIENT_TAG: ${{ github.sha }}
|
||||
RELAY_IMAGE: 'us-east1-docker.pkg.dev/firezone-staging/firezone/perf/relay'
|
||||
RELAY_TAG: ${{ matrix.version.sha }}
|
||||
RELAY_TAG: ${{ github.sha }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version:
|
||||
- sha: ${{ github.sha }}
|
||||
prefix: head
|
||||
- sha: ${{ github.event.pull_request.base.sha }}
|
||||
prefix: base
|
||||
test_name:
|
||||
- direct-tcp-client2server
|
||||
- direct-tcp-server2client
|
||||
@@ -215,8 +194,6 @@ jobs:
|
||||
- relayed-udp-server2client
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ matrix.version.sha }}
|
||||
- uses: ./.github/actions/gcp-docker-login
|
||||
id: login
|
||||
with:
|
||||
@@ -241,13 +218,15 @@ jobs:
|
||||
timeout-minutes: 5
|
||||
env:
|
||||
TEST_NAME: ${{ matrix.test_name }}
|
||||
run: ./scripts/tests/perf/${{ matrix.test_name }}.sh
|
||||
run: |
|
||||
./scripts/tests/perf/${{ matrix.test_name }}.sh
|
||||
jq '{ "${{ matrix.test_name }}": { "throughput": { "value": .end.sum_received.bits_per_second } } }' ./${{ matrix.test_name }}.json > ./${{ matrix.test_name }}.bmf.json
|
||||
- name: 'Save performance test results: ${{ matrix.test_name }}'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
overwrite: true
|
||||
name: ${{ matrix.test_name }}-${{ matrix.version.sha }}-iperf3results
|
||||
path: ./${{ matrix.test_name }}.json
|
||||
name: ${{ matrix.test_name }}-${{ github.sha }}-iperf3results
|
||||
path: ./${{ matrix.test_name }}.bmf.json
|
||||
- name: Show Client logs
|
||||
if: "!cancelled()"
|
||||
run: docker compose logs client
|
||||
@@ -273,8 +252,7 @@ jobs:
|
||||
if: "!cancelled()"
|
||||
run: docker compose logs iperf3
|
||||
|
||||
compare-results:
|
||||
if: github.event_name == 'pull_request'
|
||||
upload-bencher:
|
||||
needs: perf-tests
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
@@ -283,31 +261,20 @@ jobs:
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Download base ref performance test results
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: '*-${{ github.event.pull_request.base.sha }}-iperf3results'
|
||||
merge-multiple: true
|
||||
path: ./${{ github.event.pull_request.base.sha }}
|
||||
- name: Download head ref performance test results
|
||||
- uses: bencherdev/bencher@main
|
||||
- name: Download performance test results
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: '*-${{ github.sha }}-iperf3results'
|
||||
merge-multiple: true
|
||||
path: ./${{ github.sha }}
|
||||
- name: Update PR with results
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { script } = require('./scripts/tests/perf/results.js');
|
||||
script(github, context, '${{ github.event.pull_request.base.sha }}', '${{ github.sha }}', [
|
||||
'direct-tcp-client2server',
|
||||
'direct-tcp-server2client',
|
||||
'direct-udp-client2server',
|
||||
'direct-udp-server2client',
|
||||
'relayed-tcp-client2server',
|
||||
'relayed-tcp-server2client',
|
||||
'relayed-udp-client2server',
|
||||
'relayed-udp-server2client'
|
||||
]);
|
||||
- name: Merge benchmarks results into one report
|
||||
run: jq -s 'reduce .[] as $item ({}; . * $item)' ./${{ github.sha }}/*.bmf.json > bmf.json
|
||||
- name: Report results to bencher
|
||||
run: bencher run --file bmf.json --adapter json --github-actions ${{ secrets.GITHUB_TOKEN }}
|
||||
env:
|
||||
BENCHER_PROJECT: firezone-1l75jv1z
|
||||
BENCHER_API_TOKEN: ${{ secrets.BENCHER_API_TOKEN }}
|
||||
BENCHER_BRANCH: ${{ github.head_ref || github.ref_name }}
|
||||
BENCHER_BRANCH_START: ${{ github.base_ref }}
|
||||
BENCHER_TESTBED: github-actions
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
function getDiffPercents(base, head) {
|
||||
let diff = -1 * (100 - head / (base / 100));
|
||||
|
||||
if (diff > 0) {
|
||||
return "+" + diff.toFixed(0) + "%";
|
||||
} else if (diff == 0) {
|
||||
return "0%";
|
||||
} else {
|
||||
return diff.toFixed(0) + "%";
|
||||
}
|
||||
}
|
||||
|
||||
function humanFileSize(bytes, dp = 1) {
|
||||
const thresh = 1000;
|
||||
|
||||
if (Math.abs(bytes) < thresh) {
|
||||
return bytes + " B";
|
||||
}
|
||||
|
||||
const units = ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
|
||||
let u = -1;
|
||||
const r = 10 ** dp;
|
||||
|
||||
do {
|
||||
bytes /= thresh;
|
||||
++u;
|
||||
} while (
|
||||
Math.round(Math.abs(bytes) * r) / r >= thresh &&
|
||||
u < units.length - 1
|
||||
);
|
||||
|
||||
return bytes.toFixed(dp) + " " + units[u];
|
||||
}
|
||||
|
||||
exports.script = async function (
|
||||
github,
|
||||
context,
|
||||
base_sha,
|
||||
head_sha,
|
||||
test_names
|
||||
) {
|
||||
let output = `### Performance Test Results
|
||||
|
||||
`;
|
||||
let tcp_output = `#### TCP
|
||||
|
||||
| Test Name | Received/s | Sent/s | Retransmits |
|
||||
|---|---|---|---|
|
||||
`;
|
||||
|
||||
let udp_output = `#### UDP
|
||||
|
||||
| Test Name | Total/s | Jitter | Lost |
|
||||
|---|---|---|---|
|
||||
`;
|
||||
|
||||
for (const test_name of test_names) {
|
||||
// 1. Read the head ref results
|
||||
const results = JSON.parse(
|
||||
fs.readFileSync(path.join(head_sha, test_name + ".json"))
|
||||
).end;
|
||||
|
||||
// 2. Read the base ref results
|
||||
const results_base = JSON.parse(
|
||||
fs.readFileSync(path.join(base_sha, test_name + ".json"))
|
||||
).end;
|
||||
|
||||
if (test_name.includes("tcp")) {
|
||||
const tcp_sum_received_bits_per_second =
|
||||
humanFileSize(results.sum_received.bits_per_second) +
|
||||
" (" +
|
||||
getDiffPercents(
|
||||
results_base.sum_received.bits_per_second,
|
||||
results.sum_received.bits_per_second
|
||||
) +
|
||||
")";
|
||||
const tcp_sum_sent_bits_per_second =
|
||||
humanFileSize(results.sum_sent.bits_per_second) +
|
||||
" (" +
|
||||
getDiffPercents(
|
||||
results_base.sum_sent.bits_per_second,
|
||||
results.sum_sent.bits_per_second
|
||||
) +
|
||||
")";
|
||||
const tcp_sum_sent_retransmits =
|
||||
results.sum_sent.retransmits +
|
||||
" (" +
|
||||
getDiffPercents(
|
||||
results_base.sum_sent.retransmits,
|
||||
results.sum_sent.retransmits
|
||||
) +
|
||||
")";
|
||||
|
||||
tcp_output += `| ${test_name} | ${tcp_sum_received_bits_per_second} | ${tcp_sum_sent_bits_per_second} | ${tcp_sum_sent_retransmits} |\n`;
|
||||
} else if (test_name.includes("udp")) {
|
||||
const udp_sum_bits_per_second =
|
||||
humanFileSize(results.sum.bits_per_second) +
|
||||
" (" +
|
||||
getDiffPercents(
|
||||
results_base.sum.bits_per_second,
|
||||
results.sum.bits_per_second
|
||||
) +
|
||||
")";
|
||||
const udp_sum_jitter_ms =
|
||||
results.sum.jitter_ms.toFixed(2) +
|
||||
"ms (" +
|
||||
getDiffPercents(results_base.sum.jitter_ms, results.sum.jitter_ms) +
|
||||
")";
|
||||
const udp_sum_lost_percent =
|
||||
results.sum.lost_percent.toFixed(2) +
|
||||
"% (" +
|
||||
getDiffPercents(
|
||||
results_base.sum.lost_percent,
|
||||
results.sum.lost_percent
|
||||
) +
|
||||
")";
|
||||
|
||||
udp_output += `| ${test_name} | ${udp_sum_bits_per_second} | ${udp_sum_jitter_ms} | ${udp_sum_lost_percent} |\n`;
|
||||
} else {
|
||||
throw new Error("Unknown test type");
|
||||
}
|
||||
}
|
||||
|
||||
output += tcp_output + "\n" + udp_output;
|
||||
|
||||
// Retrieve existing bot comments for the PR
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
});
|
||||
|
||||
const botComment = comments.find((comment) => {
|
||||
return (
|
||||
comment.user.type === "Bot" &&
|
||||
comment.body.includes("Performance Test Results")
|
||||
);
|
||||
});
|
||||
|
||||
// 3. Update previous comment or create new one
|
||||
if (botComment) {
|
||||
github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: botComment.id,
|
||||
body: output,
|
||||
});
|
||||
} else {
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: output,
|
||||
});
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user