Files
vault/enos/modules/vault_setup_replication_secondary/scripts/wait-for-leader-ready.sh
Ryan Cragun c8c51b1b9d VAULT-30819: verify DR secondary leader before unsealing followers (#28459)
* VAULT-30819: verify DR secondary leader before unsealing followers

After we've enabled DR replication on the secondary leader the existing
cluster followers will be resealed with the primary clusters encryption
keys. We have to unseal the followers to make them available. To ensure
that we absolutely take every precaution before attempting to unseal the
followers we now verify that the secondary leader is the cluster leader,
has a valid merkle tree, and is streaming wals from the primary cluster
before we attempt to unseal the secondary followers.

Signed-off-by: Ryan Cragun <me@ryan.ec>
2024-09-24 09:13:40 -06:00

66 lines
1.9 KiB
Bash

#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
set -e
fail() {
echo "$1" 1>&2
return 1
}
[[ -z "$REPLICATION_TYPE" ]] && fail "REPLICATION_TYPE env variable has not been set"
[[ -z "$RETRY_INTERVAL" ]] && fail "RETRY_INTERVAL env variable has not been set"
[[ -z "$TIMEOUT_SECONDS" ]] && fail "TIMEOUT_SECONDS env variable has not been set"
[[ -z "$VAULT_ADDR" ]] && fail "VAULT_ADDR env variable has not been set"
[[ -z "$VAULT_INSTALL_DIR" ]] && fail "VAULT_INSTALL_DIR env variable has not been set"
[[ -z "$VAULT_TOKEN" ]] && fail "VAULT_TOKEN env variable has not been set"
binpath=${VAULT_INSTALL_DIR}/vault
test -x "$binpath" || fail "unable to locate vault binary at $binpath"
export VAULT_FORMAT=json
replicationStatus() {
$binpath read "sys/replication/${REPLICATION_TYPE}/status" | jq .data
}
isReady() {
# Find the leader private IP address
local status
if ! status=$(replicationStatus); then
return 1
fi
if ! jq -eMc '.state == "stream-wals"' &> /dev/null <<< "$status"; then
echo "DR replication state is not yet running" 1>&2
echo "DR replication is not yet running, got: $(jq '.state' <<< "$status")" 1>&2
return 1
fi
if ! jq -eMc '.mode == "secondary"' &> /dev/null <<< "$status"; then
echo "DR replication mode is not yet primary, got: $(jq '.mode' <<< "$status")" 1>&2
return 1
fi
if ! jq -eMc '.corrupted_merkle_tree == false' &> /dev/null <<< "$status"; then
echo "DR replication merkle is corrupted" 1>&2
return 1
fi
echo "${REPLICATION_TYPE} primary is ready for followers to be unsealed!" 1>&2
return 0
}
begin_time=$(date +%s)
end_time=$((begin_time + TIMEOUT_SECONDS))
while [ "$(date +%s)" -lt "$end_time" ]; do
if isReady; then
exit 0
fi
sleep "$RETRY_INTERVAL"
done
fail "Timed out waiting for ${REPLICATION_TYPE} primary to ready: $(replicationStatus)"