From 7ff40b82edcdd11e454f461e28e44c7fed1b28e4 Mon Sep 17 00:00:00 2001 From: Jamil Date: Tue, 20 Feb 2024 14:44:20 -0800 Subject: [PATCH] fix(ci): Run each perf test in its own matrix job (#3695) The iperf3 server sometimes hangs, or takes a while to startup. Rather than trying to reset the iperf3 state between performance tests, this PR refactors them so they each run in their matrix job. This ensures each performance test will run on a separate VM, unaffected by previous test runs to eliminate the effect any residual network buffer state can have on a particular test. It also makes sure the server is listening with a `healthcheck`. --- .github/workflows/ci.yml | 53 +++-- docker-compose.yml | 3 +- .../tests/perf/direct-tcp-client2server.sh | 9 + .../tests/perf/direct-tcp-server2client.sh | 10 + .../tests/perf/direct-udp-client2server.sh | 10 + .../tests/perf/direct-udp-server2client.sh | 11 + .../tests/perf/relayed-tcp-client2server.sh | 12 + .../tests/perf/relayed-tcp-server2client.sh | 13 ++ .../tests/perf/relayed-udp-client2server.sh | 13 ++ .../tests/perf/relayed-udp-server2client.sh | 14 ++ scripts/tests/perf/results.js | 216 +++++++----------- scripts/tests/perf/test.sh | 13 -- 12 files changed, 210 insertions(+), 167 deletions(-) create mode 100755 scripts/tests/perf/direct-tcp-client2server.sh create mode 100755 scripts/tests/perf/direct-tcp-server2client.sh create mode 100755 scripts/tests/perf/direct-udp-client2server.sh create mode 100755 scripts/tests/perf/direct-udp-server2client.sh create mode 100755 scripts/tests/perf/relayed-tcp-client2server.sh create mode 100755 scripts/tests/perf/relayed-tcp-server2client.sh create mode 100755 scripts/tests/perf/relayed-udp-client2server.sh create mode 100755 scripts/tests/perf/relayed-udp-server2client.sh delete mode 100755 scripts/tests/perf/test.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aace9a950..ab4e3c72f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -227,8 +227,8 @@ jobs: if: "!cancelled()" run: docker compose logs api - performance-tests: - name: performance-tests-${{ matrix.test_name }} + perf-tests: + name: perf-tests-${{ matrix.test_name }} needs: build-images runs-on: ubuntu-22.04 permissions: @@ -240,14 +240,15 @@ jobs: strategy: fail-fast: false matrix: - include: - - test_name: direct-perf - setup: echo 'Noop' - - test_name: relayed-perf - setup: | - # Disallow traffic between gateway and client container - sudo iptables -I FORWARD 1 -s 172.28.0.100 -d 172.28.0.105 -j DROP - sudo iptables -I FORWARD 1 -s 172.28.0.105 -d 172.28.0.100 -j DROP + test_name: + - direct-tcp-client2server + - direct-tcp-server2client + - direct-udp-client2server + - direct-udp-server2client + - relayed-tcp-client2server + - relayed-tcp-server2client + - relayed-udp-client2server + - relayed-udp-server2client steps: - uses: actions/checkout@v4 @@ -265,25 +266,23 @@ jobs: sed -i 's/^\(\s*\)RUST_LOG:.*$/\1RUST_LOG: wire=error,info/' docker-compose.yml cat docker-compose.yml | grep RUST_LOG - # TODO: Order matters here, but it shouldn't. There seems to be some race - # condition involved in letting Docker deterime the start order here. + # Start services in the same order each time for the tests docker compose up -d iperf3 docker compose up -d api web docker compose up -d relay docker compose up -d gateway docker compose up -d client - - name: 'Setup test: ${{ matrix.test_name }}' - run: ${{ matrix.setup }} - name: 'Performance test: ${{ matrix.test_name }}' - id: performance-test timeout-minutes: 5 - run: ./scripts/tests/perf/test.sh + env: + TEST_NAME: ${{ matrix.test_name }} + run: ./scripts/tests/perf/${{ matrix.test_name }}.sh - name: 'Save performance test results: ${{ matrix.test_name }}' uses: actions/upload-artifact@v4 with: name: '${{ matrix.test_name }}-iperf3results' - path: ./iperf3results + path: ./${{ matrix.test_name }}.json - name: 'Download main branch performance test results: ${{ matrix.test_name }}' id: download-artifact if: ${{ github.event_name == 'pull_request' }} @@ -295,7 +294,6 @@ jobs: REPO="${{ github.repository }}" WORKFLOW="cd.yml" ARTIFACT_NAME="${{ matrix.test_name }}-iperf3results" - DESTINATION="./iperf3results-main" ARTIFACTS_URL=$( gh api \ @@ -313,16 +311,23 @@ jobs: --jq '.artifacts[] | select(.name == "'${ARTIFACT_NAME}'") | .archive_download_url' ) - set +x - curl -H "Accept: application/vnd.github+json" -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -L -o "${DESTINATION}.zip" "$DOWNLOAD_URL" + if [ -z "$DOWNLOAD_URL" ]; then + echo "continue=false" >> $GITHUB_ENV + echo "No artifact found for ${ARTIFACT_NAME} in main branch" + else + echo "continue=true" >> $GITHUB_ENV + set +x + curl -H "Accept: application/vnd.github+json" -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -L -o "${ARTIFACT_NAME}.zip" "$DOWNLOAD_URL" - set -x - unzip "${DESTINATION}.zip" -d "${DESTINATION}" - rm "${DESTINATION}.zip" + set -x + unzip "${ARTIFACT_NAME}.zip" + rm "${ARTIFACT_NAME}.zip" + mv "${{ matrix.test_name }}.json" "${{ matrix.test_name }}-main.json" + fi - name: Update PR with results uses: actions/github-script@v7 id: perf-comment - if: ${{ github.event_name == 'pull_request' }} + if: ${{ github.event_name == 'pull_request' && steps.download-artifact.outputs.continue == 'true' }} with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | diff --git a/docker-compose.yml b/docker-compose.yml index 1043c6749..2a5b8692f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -182,7 +182,6 @@ services: resources: ipv4_address: 172.20.0.100 - dns.httpbin: image: kennethreitz/httpbin healthcheck: @@ -193,6 +192,8 @@ services: iperf3: image: mlabbe/iperf3 + healthcheck: + test: ["CMD-SHELL", "cat /proc/net/tcp | grep 5201"] command: -s -V networks: resources: diff --git a/scripts/tests/perf/direct-tcp-client2server.sh b/scripts/tests/perf/direct-tcp-client2server.sh new file mode 100755 index 000000000..ef295ce4c --- /dev/null +++ b/scripts/tests/perf/direct-tcp-client2server.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -euox pipefail + +docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ + --set-mss 1240 \ + --zerocopy \ + --client 172.20.0.110 \ + --json' >>"${TEST_NAME}.json" diff --git a/scripts/tests/perf/direct-tcp-server2client.sh b/scripts/tests/perf/direct-tcp-server2client.sh new file mode 100755 index 000000000..e3eed77b4 --- /dev/null +++ b/scripts/tests/perf/direct-tcp-server2client.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -euox pipefail + +docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ + --reverse \ + --set-mss 1240 \ + --zerocopy \ + --client 172.20.0.110 \ + --json' >>"${TEST_NAME}.json" diff --git a/scripts/tests/perf/direct-udp-client2server.sh b/scripts/tests/perf/direct-udp-client2server.sh new file mode 100755 index 000000000..a3c327275 --- /dev/null +++ b/scripts/tests/perf/direct-udp-client2server.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -euox pipefail + +docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ + --zerocopy \ + --udp \ + --bandwidth 1G \ + --client 172.20.0.110 \ + --json' >>"${TEST_NAME}.json" diff --git a/scripts/tests/perf/direct-udp-server2client.sh b/scripts/tests/perf/direct-udp-server2client.sh new file mode 100755 index 000000000..6f92b8d98 --- /dev/null +++ b/scripts/tests/perf/direct-udp-server2client.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -euox pipefail + +docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ + --reverse \ + --zerocopy \ + --udp \ + --bandwidth 1G \ + --client 172.20.0.110 \ + --json' >>"${TEST_NAME}.json" diff --git a/scripts/tests/perf/relayed-tcp-client2server.sh b/scripts/tests/perf/relayed-tcp-client2server.sh new file mode 100755 index 000000000..80a9e5961 --- /dev/null +++ b/scripts/tests/perf/relayed-tcp-client2server.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +set -euox pipefail + +source "./scripts/tests/lib.sh" +install_iptables_drop_rules + +docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ + --set-mss 1240 \ + --zerocopy \ + --client 172.20.0.110 \ + --json' >>"${TEST_NAME}.json" diff --git a/scripts/tests/perf/relayed-tcp-server2client.sh b/scripts/tests/perf/relayed-tcp-server2client.sh new file mode 100755 index 000000000..5c8889bca --- /dev/null +++ b/scripts/tests/perf/relayed-tcp-server2client.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +set -euox pipefail + +source "./scripts/tests/lib.sh" +install_iptables_drop_rules + +docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ + --reverse \ + --set-mss 1240 \ + --zerocopy \ + --client 172.20.0.110 \ + --json' >>"${TEST_NAME}.json" diff --git a/scripts/tests/perf/relayed-udp-client2server.sh b/scripts/tests/perf/relayed-udp-client2server.sh new file mode 100755 index 000000000..d6b962862 --- /dev/null +++ b/scripts/tests/perf/relayed-udp-client2server.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +set -euox pipefail + +source "./scripts/tests/lib.sh" +install_iptables_drop_rules + +docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ + --zerocopy \ + --udp \ + --bandwidth 1G \ + --client 172.20.0.110 \ + --json' >>"${TEST_NAME}.json" diff --git a/scripts/tests/perf/relayed-udp-server2client.sh b/scripts/tests/perf/relayed-udp-server2client.sh new file mode 100755 index 000000000..91d6bb667 --- /dev/null +++ b/scripts/tests/perf/relayed-udp-server2client.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -euox pipefail + +source "./scripts/tests/lib.sh" +install_iptables_drop_rules + +docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 \ + --reverse \ + --zerocopy \ + --udp \ + --bandwidth 1G \ + --client 172.20.0.110 \ + --json' >>"${TEST_NAME}.json" diff --git a/scripts/tests/perf/results.js b/scripts/tests/perf/results.js index c0965e266..7fbe1389d 100644 --- a/scripts/tests/perf/results.js +++ b/scripts/tests/perf/results.js @@ -36,33 +36,97 @@ function humanFileSize(bytes, dp = 1) { exports.script = async function (github, context, test_name) { // 1. Read the current results - const tcp_s2c = JSON.parse( - fs.readFileSync("iperf3results/tcp_server2client.json") - ).end; - const tcp_c2s = JSON.parse( - fs.readFileSync("iperf3results/tcp_client2server.json") - ).end; - const udp_s2c = JSON.parse( - fs.readFileSync("iperf3results/udp_server2client.json") - ).end; - const udp_c2s = JSON.parse( - fs.readFileSync("iperf3results/udp_client2server.json") - ).end; + const results = JSON.parse(fs.readFileSync(test_name + ".json")).end; // 2. Read the main results - const tcp_s2c_main = JSON.parse( - fs.readFileSync("iperf3results-main/tcp_server2client.json") - ).end; - const tcp_c2s_main = JSON.parse( - fs.readFileSync("iperf3results-main/tcp_client2server.json") - ).end; - const udp_s2c_main = JSON.parse( - fs.readFileSync("iperf3results-main/udp_server2client.json") - ).end; - const udp_c2s_main = JSON.parse( - fs.readFileSync("iperf3results-main/udp_client2server.json") + const results_main = JSON.parse( + fs.readFileSync(test_name + "-main.json") ).end; + let output = ""; + + if (test_name.includes("tcp")) { + const tcp_sum_received_bits_per_second = + humanFileSize(results.sum_received.bits_per_second) + + " (" + + getDiffPercents( + results_main.sum_received.bits_per_second, + results.sum_received.bits_per_second + ) + + ")"; + const tcp_sum_sent_bits_per_second = + humanFileSize(results.sum_sent.bits_per_second) + + " (" + + getDiffPercents( + results_main.sum_sent.bits_per_second, + results.sum_sent.bits_per_second + ) + + ")"; + const tcp_sum_sent_retransmits = + results.sum_sent.retransmits + + " (" + + getDiffPercents( + results_main.sum_sent.retransmits, + results.sum_sent.retransmits + ) + + ")"; + + output = `## Performance Test Results: ${test_name} + +| Received/s | Sent/s | Retransmits | +|---|---|---| +| ${tcp_sum_received_bits_per_second} | ${tcp_sum_sent_bits_per_second} | ${tcp_sum_sent_retransmits} | +`; + } else if (test_name.includes("udp")) { + const udp_sum_bits_per_second = + humanFileSize(results.sum.bits_per_second) + + " (" + + getDiffPercents( + results_main.sum.bits_per_second, + results.sum.bits_per_second + ) + + ")"; + const udp_sum_jitter_ms = + results.sum.jitter_ms.toFixed(2) + + "ms (" + + getDiffPercents(results_main.sum.jitter_ms, results.sum.jitter_ms) + + ")"; + const udp_sum_lost_percent = + results.sum.lost_percent.toFixed(2) + + "% (" + + getDiffPercents(results_main.sum.lost_percent, results.sum.lost_percent) + + ")"; + + const udp_sum_bits_per_second = + humanFileSize(results.sum.bits_per_second) + + " (" + + getDiffPercents( + results_main.sum.bits_per_second, + results.sum.bits_per_second + ) + + ")"; + const udp_sum_jitter_ms = + results.sum.jitter_ms.toFixed(2) + + "ms (" + + getDiffPercents(results_main.sum.jitter_ms, results.sum.jitter_ms) + + ")"; + const udp_sum_lost_percent = + results.sum.lost_percent.toFixed(2) + + "% (" + + getDiffPercents(results_main.sum.lost_percent, results.sum.lost_percent) + + ")"; + + output = `## Performance Test Results: ${test_name} + +| Total/s | Jitter | Lost | +|---|---|---| +| ${udp_sum_bits_per_second} | ${udp_sum_jitter_ms} | ${udp_sum_lost_percent} | + +`; + } else { + throw new Error("Unknown test type"); + } + // Retrieve existing bot comments for the PR const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, @@ -74,112 +138,6 @@ exports.script = async function (github, context, test_name) { return comment.user.type === "Bot" && comment.body.includes(test_name); }); - const tcp_server2client_sum_received_bits_per_second = - humanFileSize(tcp_s2c.sum_received.bits_per_second) + - " (" + - getDiffPercents( - tcp_s2c_main.sum_received.bits_per_second, - tcp_s2c.sum_received.bits_per_second - ) + - ")"; - const tcp_server2client_sum_sent_bits_per_second = - humanFileSize(tcp_s2c.sum_sent.bits_per_second) + - " (" + - getDiffPercents( - tcp_s2c_main.sum_sent.bits_per_second, - tcp_s2c.sum_sent.bits_per_second - ) + - ")"; - const tcp_server2client_sum_sent_retransmits = - tcp_s2c.sum_sent.retransmits + - " (" + - getDiffPercents( - tcp_s2c_main.sum_sent.retransmits, - tcp_s2c.sum_sent.retransmits - ) + - ")"; - - const tcp_client2server_sum_received_bits_per_second = - humanFileSize(tcp_c2s.sum_received.bits_per_second) + - " (" + - getDiffPercents( - tcp_c2s_main.sum_received.bits_per_second, - tcp_c2s.sum_received.bits_per_second - ) + - ")"; - const tcp_client2server_sum_sent_bits_per_second = - humanFileSize(tcp_c2s.sum_sent.bits_per_second) + - " (" + - getDiffPercents( - tcp_c2s_main.sum_sent.bits_per_second, - tcp_c2s.sum_sent.bits_per_second - ) + - ")"; - const tcp_client2server_sum_sent_retransmits = - tcp_c2s.sum_sent.retransmits + - " (" + - getDiffPercents( - tcp_c2s.sum_sent.retransmits, - tcp_c2s_main.sum_sent.retransmits - ) + - ")"; - - const udp_server2client_sum_bits_per_second = - humanFileSize(udp_s2c.sum.bits_per_second) + - " (" + - getDiffPercents( - udp_s2c_main.sum.bits_per_second, - udp_s2c.sum.bits_per_second - ) + - ")"; - const udp_server2client_sum_jitter_ms = - udp_s2c.sum.jitter_ms.toFixed(2) + - "ms (" + - getDiffPercents(udp_s2c_main.sum.jitter_ms, udp_s2c.sum.jitter_ms) + - ")"; - const udp_server2client_sum_lost_percent = - udp_s2c.sum.lost_percent.toFixed(2) + - "% (" + - getDiffPercents(udp_s2c_main.sum.lost_percent, udp_s2c.sum.lost_percent) + - ")"; - - const udp_client2server_sum_bits_per_second = - humanFileSize(udp_c2s.sum.bits_per_second) + - " (" + - getDiffPercents( - udp_c2s_main.sum.bits_per_second, - udp_c2s.sum.bits_per_second - ) + - ")"; - const udp_client2server_sum_jitter_ms = - udp_c2s.sum.jitter_ms.toFixed(2) + - "ms (" + - getDiffPercents(udp_c2s_main.sum.jitter_ms, udp_c2s.sum.jitter_ms) + - ")"; - const udp_client2server_sum_lost_percent = - udp_c2s.sum.lost_percent.toFixed(2) + - "% (" + - getDiffPercents(udp_c2s_main.sum.lost_percent, udp_c2s.sum.lost_percent) + - ")"; - - const output = `## Performance Test Results: ${test_name} - -### TCP - -| Direction | Received/s | Sent/s | Retransmits | -|------------------|--------------------------------------------------------|----------------------------------------------------|------------------------------------------------| -| Client to Server | ${tcp_client2server_sum_received_bits_per_second} | ${tcp_client2server_sum_sent_bits_per_second} | ${tcp_client2server_sum_sent_retransmits} | -| Server to Client | ${tcp_server2client_sum_received_bits_per_second} | ${tcp_server2client_sum_sent_bits_per_second} | ${tcp_server2client_sum_sent_retransmits} | - -### UDP - -| Direction | Total/s | Jitter | Lost | -|------------------|-----------------------------------------------|-----------------------------------------|--------------------------------------------| -| Client to Server | ${udp_client2server_sum_bits_per_second} | ${udp_client2server_sum_jitter_ms} | ${udp_server2client_sum_lost_percent} | -| Server to Client | ${udp_server2client_sum_bits_per_second} | ${udp_server2client_sum_jitter_ms} | ${udp_client2server_sum_lost_percent} | - -`; - // 3. Update previous comment or create new one if (botComment) { github.rest.issues.updateComment({ diff --git a/scripts/tests/perf/test.sh b/scripts/tests/perf/test.sh deleted file mode 100755 index 529b9991f..000000000 --- a/scripts/tests/perf/test.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -set -euox pipefail - -mkdir -p iperf3results - -docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 -t 30 -b 1G -R -c 172.20.0.110 --json' >>iperf3results/tcp_server2client.json - -docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 -t 30 -b 1G -c 172.20.0.110 --json' >>iperf3results/tcp_client2server.json - -docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 -t 30 -u -b 1G -R -c 172.20.0.110 --json' >>iperf3results/udp_server2client.json - -docker compose exec --env RUST_LOG=info -it client /bin/sh -c 'iperf3 -t 30 -u -b 1G -c 172.20.0.110 --json' >>iperf3results/udp_client2server.json