feat(gateway): add flow-logs MVP (#10576)

Network flow logs are a common feature of VPNs. Due to the nature of a
shared exit node, it is of great interest to a network analyst, which
TCP connections are getting routed through the tunnel, who is initiating
them, for long do they last and how much traffic is sent across them.

With this PR, the Firezone Gateway gains the ability of detecting the
TCP and UDP flows that are being routed through it. The information we
want to attach to these flows is spread out over several layers of the
packet handling code. To simplify the implementation and not complicate
the APIs unnecessarily, we chose to rely on TLS (thread-local storage)
for gathering all the necessary data as a packet gets passed through the
various layers. When using a const initializer, the overhead of a TLS
variable over an actual local variable is basically zero. The entire
routing state of the Gateway is also never sent across any threads,
making TLS variables a particularly good choice for this problem.

In its MVP form, the detected flows are only emitted on stdout and also
that only if `flow_logs=trace` is set using `RUST_LOG`. Early adopters
of this feature are encouraged to enable these logs as described and
then ingest the Gateway's logs into the SIEM of their choice for further
analysis.

Related: #8353
This commit is contained in:
Thomas Eizinger
2025-10-22 14:10:21 +11:00
committed by GitHub
parent 80331b4e93
commit 6a538368cb
12 changed files with 1124 additions and 29 deletions

View File

@@ -0,0 +1,37 @@
#!/usr/bin/env bash
source "./scripts/tests/lib.sh"
client sh -c "curl --fail --max-time 10 --output /tmp/download1.file http://download.httpbin/bytes?num=5000000" &
PID1=$!
client sh -c "curl --fail --max-time 10 --output /tmp/download2.file http://download.httpbin/bytes?num=5000000" &
PID2=$!
client sh -c "curl --fail --max-time 10 --output /tmp/download3.file http://download.httpbin/bytes?num=5000000" &
PID3=$!
wait $PID1 || {
echo "Download 1 failed"
exit 1
}
wait $PID2 || {
echo "Download 2 failed"
exit 1
}
wait $PID3 || {
echo "Download 3 failed"
exit 1
}
sleep 3
readarray -t flows < <(get_flow_logs "tcp")
assert_eq "${#flows[@]}" 3
for flow in "${flows[@]}"; do
assert_eq "$(get_flow_field "$flow" "inner_dst_ip")" "172.21.0.101"
assert_gteq "$(get_flow_field "$flow" "rx_bytes")" 5000000
done

View File

@@ -43,3 +43,24 @@ if [[ "$computed_checksum" != "$known_checksum" ]]; then
echo "Checksum of downloaded file does not match"
exit 1
fi
sleep 3
readarray -t flows < <(get_flow_logs "tcp")
assert_gteq "${#flows[@]}" 2
# All flows should have same inner_dst_ip
for flow in "${flows[@]}"; do
assert_eq "$(get_flow_field "$flow" "inner_dst_ip")" "172.21.0.101"
done
# Verify different outer_src_port after roaming (network change)
# The docker-compose setup uses routers and therefore the source IP is always the router.
# But conntrack on the router will allocate a new source port because the binding on the old one is still active after roaming.
original_src_port=$(get_flow_field "${flows[0]}" "outer_src_port")
for ((i = 1; i < ${#flows[@]}; i++)); do
next_src_port=$(get_flow_field "${flows[i]}" "outer_src_port")
assert_ne "$original_src_port" "$next_src_port"
done

24
scripts/tests/download-rst.sh Executable file
View File

@@ -0,0 +1,24 @@
#!/usr/bin/env bash
source "./scripts/tests/lib.sh"
# 2 seconds are not enough at the given speed to download the file, curl will therefore abort and RST the connection.
client sh -c "curl --max-time 2 --limit-rate 1000000 --no-keepalive --parallel-max 1 --output /dev/null http://download.httpbin/bytes?num=100000000" &
DOWNLOAD_PID=$!
wait $DOWNLOAD_PID || true # The download fails but we want to continue.
sleep 3
readarray -t flows < <(get_flow_logs "tcp")
assert_gteq "${#flows[@]}" 1
rx_bytes=0
# All flows should have same inner_dst_ip
for flow in "${flows[@]}"; do
assert_eq "$(get_flow_field "$flow" "inner_dst_ip")" "172.21.0.101"
rx_bytes+="$(get_flow_field "$flow" "rx_bytes")"
done
assert_gteq "$rx_bytes" 2000000

View File

@@ -18,3 +18,12 @@ if [[ "$computed_checksum" != "$known_checksum" ]]; then
echo "Checksum of downloaded file does not match"
exit 1
fi
sleep 3
readarray -t flows < <(get_flow_logs "tcp")
assert_eq "${#flows[@]}" 1
flow="${flows[0]}"
assert_eq "$(get_flow_field "$flow" "inner_dst_ip")" "172.21.0.101"
assert_gteq "$(get_flow_field "$flow" "rx_bytes")" 10000000

View File

@@ -50,7 +50,7 @@ Domain.PubSub.Account.broadcast(account_id, {{:reject_access, gateway_id}, clien
"
}
function assert_equals() {
function assert_eq() {
local actual="$1"
local expected="$2"
@@ -60,6 +60,26 @@ function assert_equals() {
fi
}
function assert_ne() {
local actual="$1"
local expected="$2"
if [[ "$expected" == "$actual" ]]; then
echo "Expected values to differ but both are $actual"
exit 1
fi
}
function assert_gteq() {
local actual="$1"
local expected="$2"
if [ "$actual" -lt "$expected" ]; then
echo "Expected $actual to be greater than or equal to $expected"
exit 1
fi
}
function process_state() {
local container="$1"
@@ -70,7 +90,7 @@ function assert_process_state {
local container="$1"
local expected_state="$2"
assert_equals "$(process_state "$container")" "$expected_state"
assert_eq "$(process_state "$container")" "$expected_state"
}
function create_token_file {
@@ -96,3 +116,23 @@ function expect_error() {
return 0
fi
}
# Extract flow logs from gateway for a given protocol
# Returns flow log lines (use with readarray)
# Usage: readarray -t flows < <(get_flow_logs "tcp")
function get_flow_logs() {
local protocol="$1"
docker compose logs gateway --since 30s 2>/dev/null |
grep "flow_logs::${protocol}.*flow completed" || true
}
# Extract a field value from a flow log line
# Usage: get_flow_field <flow_log_line> <field_name>
# Example: get_flow_field "$flow" "inner_dst_ip"
function get_flow_field() {
local flow_log="$1"
local field_name="$2"
echo "$flow_log" | grep -oP "${field_name}=\K[^ ]+" || echo ""
}