mirror of
https://github.com/optim-enterprises-bv/vault.git
synced 2025-10-30 18:17:55 +00:00
VAULT-30819: verify DR secondary leader before unsealing followers (#28459)
* VAULT-30819: verify DR secondary leader before unsealing followers After we've enabled DR replication on the secondary leader the existing cluster followers will be resealed with the primary clusters encryption keys. We have to unseal the followers to make them available. To ensure that we absolutely take every precaution before attempting to unseal the followers we now verify that the secondary leader is the cluster leader, has a valid merkle tree, and is streaming wals from the primary cluster before we attempt to unseal the secondary followers. Signed-off-by: Ryan Cragun <me@ryan.ec>
This commit is contained in:
@@ -815,7 +815,7 @@ scenario "dev_pr_replication" {
|
||||
Depending on how we're configured we'll pass the unseal keys according to this guide:
|
||||
https://developer.hashicorp.com/vault/docs/enterprise/replication#seals
|
||||
EOF
|
||||
module = module.vault_unseal_nodes
|
||||
module = module.vault_unseal_replication_followers
|
||||
depends_on = [
|
||||
step.create_primary_cluster,
|
||||
step.create_secondary_cluster,
|
||||
|
||||
@@ -256,8 +256,8 @@ module "vault_test_ui" {
|
||||
ui_run_tests = var.ui_run_tests
|
||||
}
|
||||
|
||||
module "vault_unseal_nodes" {
|
||||
source = "./modules/vault_unseal_nodes"
|
||||
module "vault_unseal_replication_followers" {
|
||||
source = "./modules/vault_unseal_replication_followers"
|
||||
|
||||
vault_install_dir = var.vault_install_dir
|
||||
}
|
||||
|
||||
@@ -814,7 +814,11 @@ scenario "dr_replication" {
|
||||
enos = local.enos_provider[matrix.distro]
|
||||
}
|
||||
|
||||
verifies = quality.vault_api_sys_replication_dr_secondary_enable_write
|
||||
verifies = [
|
||||
quality.vault_api_sys_leader_read,
|
||||
quality.vault_api_sys_replication_dr_secondary_enable_write,
|
||||
quality.vault_api_sys_replication_dr_status_read,
|
||||
]
|
||||
|
||||
variables {
|
||||
ip_version = matrix.ip_version
|
||||
@@ -834,7 +838,7 @@ scenario "dr_replication" {
|
||||
type combinations. See the guide for more information:
|
||||
https://developer.hashicorp.com/vault/docs/enterprise/replication#seals
|
||||
EOF
|
||||
module = module.vault_unseal_nodes
|
||||
module = module.vault_unseal_replication_followers
|
||||
depends_on = [
|
||||
step.configure_dr_replication_secondary
|
||||
]
|
||||
@@ -883,7 +887,11 @@ scenario "dr_replication" {
|
||||
and ensuring that all secondary nodes are unsealed.
|
||||
EOF
|
||||
module = module.vault_verify_dr_replication
|
||||
depends_on = [step.configure_dr_replication_secondary]
|
||||
depends_on = [
|
||||
step.configure_dr_replication_secondary,
|
||||
step.unseal_secondary_followers,
|
||||
step.verify_secondary_cluster_is_unsealed_after_enabling_replication,
|
||||
]
|
||||
|
||||
providers = {
|
||||
enos = local.enos_provider[matrix.distro]
|
||||
|
||||
@@ -820,7 +820,7 @@ scenario "pr_replication" {
|
||||
type combinations. See the guide for more information:
|
||||
https://developer.hashicorp.com/vault/docs/enterprise/replication#seals
|
||||
EOF
|
||||
module = module.vault_unseal_nodes
|
||||
module = module.vault_unseal_replication_followers
|
||||
depends_on = [
|
||||
step.create_primary_cluster,
|
||||
step.create_secondary_cluster,
|
||||
|
||||
@@ -42,14 +42,16 @@ variable "vault_root_token" {
|
||||
type = string
|
||||
description = "The vault root token"
|
||||
}
|
||||
resource "enos_remote_exec" "configure_dr_primary" {
|
||||
|
||||
// Enable DR replication on the primary. This will immediately clear all data in the secondary.
|
||||
resource "enos_remote_exec" "enable_dr_replication" {
|
||||
environment = {
|
||||
VAULT_ADDR = var.vault_addr
|
||||
VAULT_TOKEN = var.vault_root_token
|
||||
VAULT_INSTALL_DIR = var.vault_install_dir
|
||||
}
|
||||
|
||||
scripts = [abspath("${path.module}/scripts/configure-vault-dr-primary.sh")]
|
||||
scripts = [abspath("${path.module}/scripts/enable.sh")]
|
||||
|
||||
transport = {
|
||||
ssh = {
|
||||
|
||||
@@ -58,7 +58,7 @@ variable "wrapping_token" {
|
||||
description = "The wrapping token created on primary cluster"
|
||||
}
|
||||
|
||||
resource "enos_remote_exec" "configure_pr_secondary" {
|
||||
resource "enos_remote_exec" "enable_replication" {
|
||||
environment = {
|
||||
VAULT_ADDR = var.vault_addr
|
||||
VAULT_TOKEN = var.vault_root_token
|
||||
@@ -72,3 +72,43 @@ resource "enos_remote_exec" "configure_pr_secondary" {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for our primary host to be the "leader", which means it's running and all "setup" tasks
|
||||
// have been completed. We'll have to unseal our follower nodes after this has occurred.
|
||||
module "wait_for_leader" {
|
||||
source = "../vault_wait_for_leader"
|
||||
|
||||
depends_on = [
|
||||
enos_remote_exec.enable_replication
|
||||
]
|
||||
|
||||
hosts = { "0" : var.secondary_leader_host }
|
||||
ip_version = var.ip_version
|
||||
vault_addr = var.vault_addr
|
||||
vault_install_dir = var.vault_install_dir
|
||||
vault_root_token = var.vault_root_token
|
||||
}
|
||||
|
||||
// Ensure that our leader is ready to for us to unseal follower nodes.
|
||||
resource "enos_remote_exec" "wait_for_leader_ready" {
|
||||
depends_on = [
|
||||
module.wait_for_leader,
|
||||
]
|
||||
|
||||
environment = {
|
||||
REPLICATION_TYPE = var.replication_type
|
||||
RETRY_INTERVAL = 3 // seconds
|
||||
TIMEOUT_SECONDS = 60 // seconds
|
||||
VAULT_ADDR = var.vault_addr
|
||||
VAULT_TOKEN = var.vault_root_token
|
||||
VAULT_INSTALL_DIR = var.vault_install_dir
|
||||
}
|
||||
|
||||
scripts = [abspath("${path.module}/scripts/wait-for-leader-ready.sh")]
|
||||
|
||||
transport = {
|
||||
ssh = {
|
||||
host = var.secondary_leader_host.public_ip
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright (c) HashiCorp, Inc.
|
||||
# SPDX-License-Identifier: BUSL-1.1
|
||||
|
||||
set -e
|
||||
|
||||
fail() {
|
||||
echo "$1" 1>&2
|
||||
return 1
|
||||
}
|
||||
|
||||
[[ -z "$REPLICATION_TYPE" ]] && fail "REPLICATION_TYPE env variable has not been set"
|
||||
[[ -z "$RETRY_INTERVAL" ]] && fail "RETRY_INTERVAL env variable has not been set"
|
||||
[[ -z "$TIMEOUT_SECONDS" ]] && fail "TIMEOUT_SECONDS env variable has not been set"
|
||||
[[ -z "$VAULT_ADDR" ]] && fail "VAULT_ADDR env variable has not been set"
|
||||
[[ -z "$VAULT_INSTALL_DIR" ]] && fail "VAULT_INSTALL_DIR env variable has not been set"
|
||||
[[ -z "$VAULT_TOKEN" ]] && fail "VAULT_TOKEN env variable has not been set"
|
||||
|
||||
binpath=${VAULT_INSTALL_DIR}/vault
|
||||
test -x "$binpath" || fail "unable to locate vault binary at $binpath"
|
||||
|
||||
export VAULT_FORMAT=json
|
||||
|
||||
replicationStatus() {
|
||||
$binpath read "sys/replication/${REPLICATION_TYPE}/status" | jq .data
|
||||
}
|
||||
|
||||
isReady() {
|
||||
# Find the leader private IP address
|
||||
local status
|
||||
if ! status=$(replicationStatus); then
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! jq -eMc '.state == "stream-wals"' &> /dev/null <<< "$status"; then
|
||||
echo "DR replication state is not yet running" 1>&2
|
||||
echo "DR replication is not yet running, got: $(jq '.state' <<< "$status")" 1>&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! jq -eMc '.mode == "secondary"' &> /dev/null <<< "$status"; then
|
||||
echo "DR replication mode is not yet primary, got: $(jq '.mode' <<< "$status")" 1>&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! jq -eMc '.corrupted_merkle_tree == false' &> /dev/null <<< "$status"; then
|
||||
echo "DR replication merkle is corrupted" 1>&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "${REPLICATION_TYPE} primary is ready for followers to be unsealed!" 1>&2
|
||||
return 0
|
||||
}
|
||||
|
||||
begin_time=$(date +%s)
|
||||
end_time=$((begin_time + TIMEOUT_SECONDS))
|
||||
while [ "$(date +%s)" -lt "$end_time" ]; do
|
||||
if isReady; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
sleep "$RETRY_INTERVAL"
|
||||
done
|
||||
|
||||
fail "Timed out waiting for ${REPLICATION_TYPE} primary to ready: $(replicationStatus)"
|
||||
Reference in New Issue
Block a user