mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 18:18:55 +00:00
In #6876, we added functionality that would only make use of new remote candidates whilst we haven't nominated a socket yet with the remote. The reason for that was because in the described edge-case where relays reboot or get replaced whilst the client is partitioned from the portal (or we experience a connection hiccup), only one of the two peers, i.e. Client or Gateway would migrate to the new relay, leaving the other one in an inconsistent state. Looking at recent customer logs, I've been seeing a lot of these messages: > Unknown connection or socket has already been nominated For this particular customer, these are then very quickly followed by ICE timeouts, leaving the connection unusable. Considering that, I no longer think that the above change was a good idea and we should instead always make use of all candidates that we are given. What we are seeing is that in deployment scenarios where the latency link between Client and Gateway is very short (5-10ms) yet the latency to the portal is longer (~30-50ms), we trigger a race condition where we are temporarily nominating a _peer-reflexive_ candidate pair instead of a regular one. This happens because with such a short latency link, Client and Gateway are _faster_ in sending back and forth several STUN bindings than the control plane is in delivering all the candidates. Due to the functionality added in #6876, this then results in us not accepting the candidates. It further appears that a nominated peer-reflexive candidate does not provide a stable connection which is why we then run into an ICE timeout, requiring Firezone to establish a new connection only to have the same thing happen again. This is very disruptive for the user experience as the connection only works for a few moments at a time. With #9793, we have actually added a feature that is also at play here. Now that we don't immediately act on an ICE timeout, it is actually possible for both Client and Gateway to migrate a connection to a different relay, should the one that they are using get disconnected. In #9793, we added a timeout of 2s for this. To make this fully work, we need to patch str0m to transition to `Checking` early. Presently, str0m would directly transition from `Disconnected` to `Connected` in this case which in some of the high-latency scenarios that we are testing in CI is not enough to recover the connection within 2s. By transitioning to `Checking` early, we abort this timer. Related: https://github.com/algesten/str0m/pull/676
149 lines
5.9 KiB
YAML
149 lines
5.9 KiB
YAML
---
|
|
name: Rust
|
|
"on":
|
|
workflow_call:
|
|
|
|
defaults:
|
|
run:
|
|
working-directory: ./rust
|
|
|
|
permissions:
|
|
contents: "read"
|
|
id-token: "write"
|
|
|
|
# Never tolerate warnings. Duplicated in `_tauri.yml`
|
|
env:
|
|
RUSTFLAGS: "-Dwarnings --cfg tokio_unstable"
|
|
RUSTDOCFLAGS: "-D warnings"
|
|
|
|
jobs:
|
|
static-analysis:
|
|
name: static-analysis-${{ matrix.runs-on }}
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
# TODO: https://github.com/rust-lang/cargo/issues/5220
|
|
runs-on: [ubuntu-22.04-xlarge, macos-14-xlarge, windows-2022-xlarge]
|
|
runs-on: ${{ matrix.runs-on }}
|
|
steps:
|
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
- uses: ./.github/actions/setup-rust
|
|
id: setup-rust
|
|
- uses: ./.github/actions/setup-tauri-v2
|
|
timeout-minutes: 10
|
|
- uses: taiki-e/install-action@c99cc51b309eee71a866715cfa08c922f11cf898 # v2.56.19
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
with:
|
|
tool: cargo-udeps,cargo-deny
|
|
- uses: taiki-e/install-action@c99cc51b309eee71a866715cfa08c922f11cf898 # v2.56.19
|
|
if: ${{ runner.os == 'Linux' }}
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
with:
|
|
tool: bpf-linker
|
|
|
|
- run: cargo clippy --all-targets --all-features ${{ steps.setup-rust.outputs.compile-packages }}
|
|
name: cargo clippy
|
|
shell: bash
|
|
- run: cargo doc --all-features --no-deps --document-private-items ${{ steps.setup-rust.outputs.compile-packages }}
|
|
name: cargo doc
|
|
shell: bash
|
|
- run: cargo fmt -- --check
|
|
- run: cargo +${{ steps.setup-rust.outputs.nightly_version }} udeps --all-targets --all-features ${{ steps.setup-rust.outputs.compile-packages }}
|
|
name: cargo udeps
|
|
- run: cargo deny check --hide-inclusion-graph --deny unnecessary-skip
|
|
shell: bash
|
|
|
|
test:
|
|
name: test-${{ matrix.runs-on }}
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
# TODO: https://github.com/rust-lang/cargo/issues/5220
|
|
runs-on:
|
|
[
|
|
ubuntu-22.04-xlarge,
|
|
ubuntu-24.04-xlarge,
|
|
macos-13-xlarge,
|
|
macos-14-xlarge,
|
|
macos-15-xlarge,
|
|
windows-2022-xlarge,
|
|
windows-2025-xlarge,
|
|
]
|
|
runs-on: ${{ matrix.runs-on }}
|
|
steps:
|
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
- uses: ./.github/actions/setup-rust
|
|
id: setup-rust
|
|
- uses: ./.github/actions/setup-tauri-v2
|
|
- uses: taiki-e/install-action@c99cc51b309eee71a866715cfa08c922f11cf898 # v2.56.19
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
with:
|
|
tool: ripgrep
|
|
- uses: taiki-e/install-action@c99cc51b309eee71a866715cfa08c922f11cf898 # v2.56.19
|
|
if: ${{ runner.os == 'Linux' }}
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
with:
|
|
tool: bpf-linker
|
|
- name: "cargo test"
|
|
shell: bash
|
|
run: |
|
|
|
|
set -x
|
|
|
|
# First, run all tests.
|
|
cargo test --all-features ${{ steps.setup-rust.outputs.test-packages }} -- --include-ignored --nocapture
|
|
|
|
# Poor man's test coverage testing: Grep the generated logs for specific patterns / lines.
|
|
rg --count --no-ignore SendIcmpPacket "$TESTCASES_DIR"
|
|
rg --count --no-ignore SendUdpPacket "$TESTCASES_DIR"
|
|
rg --count --no-ignore ConnectTcp "$TESTCASES_DIR"
|
|
rg --count --no-ignore SendDnsQueries "$TESTCASES_DIR"
|
|
rg --count --no-ignore "Packet for DNS resource" "$TESTCASES_DIR"
|
|
rg --count --no-ignore "Packet for CIDR resource" "$TESTCASES_DIR"
|
|
rg --count --no-ignore "Packet for Internet resource" "$TESTCASES_DIR"
|
|
rg --count --no-ignore "Truncating DNS response" "$TESTCASES_DIR"
|
|
rg --count --no-ignore "ICMP Error error=V4Unreachable" "$TESTCASES_DIR"
|
|
rg --count --no-ignore "ICMP Error error=V6Unreachable" "$TESTCASES_DIR"
|
|
rg --count --no-ignore "ICMP Error error=V4TimeExceeded" "$TESTCASES_DIR"
|
|
rg --count --no-ignore "ICMP Error error=V6TimeExceeded" "$TESTCASES_DIR"
|
|
rg --count --no-ignore "Forwarding query for DNS resource to corresponding site" "$TESTCASES_DIR"
|
|
rg --count --no-ignore "Revoking resource authorization" "$TESTCASES_DIR"
|
|
|
|
# Make sure we are recovering from ICE disconnect
|
|
rg --count --no-ignore "State change \(got new possible\): Disconnected -> Checking" "$TESTCASES_DIR"
|
|
|
|
env:
|
|
# <https://github.com/rust-lang/cargo/issues/5999>
|
|
# Needed to create tunnel interfaces in unit tests
|
|
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER: "sudo --preserve-env"
|
|
PROPTEST_VERBOSE: 0 # Otherwise the output is very long.
|
|
PROPTEST_CASES: ${{ runner.os == 'Windows' && '0' || '256' }} # Default is only 256. Windows is very slow in GitHub Actions, so only run the regression cases there.
|
|
CARGO_PROFILE_TEST_OPT_LEVEL: 1 # Otherwise the tests take forever.
|
|
TESTCASES_DIR: "connlib/tunnel/testcases"
|
|
|
|
headless-client:
|
|
name: headless-client-${{ matrix.test }}-${{ matrix.runs-on }}
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
include:
|
|
- { runs-on: windows-2022-xlarge, test: token-path-windows.ps1 }
|
|
- { runs-on: windows-2025-xlarge, test: token-path-windows.ps1 }
|
|
- { runs-on: ubuntu-22.04-xlarge, test: linux-group.sh }
|
|
- { runs-on: ubuntu-24.04-xlarge, test: linux-group.sh }
|
|
- { runs-on: ubuntu-22.04-xlarge, test: token-path-linux.sh }
|
|
- { runs-on: ubuntu-24.04-xlarge, test: token-path-linux.sh }
|
|
runs-on: ${{ matrix.runs-on }}
|
|
steps:
|
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
- uses: ./.github/actions/setup-rust
|
|
- uses: ./.github/actions/setup-tauri-v2
|
|
timeout-minutes: 10
|
|
- run: scripts/tests/${{ matrix.test }}
|
|
name: "test script"
|
|
working-directory: ./
|