mirror of
https://github.com/cozystack/cozystack.git
synced 2026-03-02 22:59:06 +00:00
fix(platform): fix upgrade issues in migrations, etcd timeout, and migration script (#2096)
## What this PR does
Fixes multiple upgrade issues discovered during v0.41.1 → v1.0 upgrade
testing.
**Migration 26 (monitoring → monitoring-system):**
- Use `cozystack.io/ui=true` label with
`--field-selector=metadata.name=monitoring` instead of
`apps.cozystack.io/application.kind=Monitoring` to find monitoring
HelmReleases — the old label is guaranteed to exist on v0.41.1 clusters,
while the new one depends on migration 22 having run
- Add `delete_helm_secrets` function with fallback deletion by secret
name pattern and post-deletion verification
**Migrations 28 and 29 (mysql→mariadb, virtual-machine split):**
- Wrap `grep` in pipes with `{ ... || true; }` to prevent `pipefail`
exit when grep filters out all lines
- Fix reconcile annotation in migration 29 to use RFC3339 timestamp
format instead of Unix epoch
- Remove protection-webhook handling from migration 29 — it is an
external component and should not be managed by cozystack migrations
**Migration 27 (piraeus CRD ownership):**
- Skip CRDs that don't exist instead of failing the entire migration
- Add name-pattern fallback for helm secret deletion
**etcd HelmRelease:**
- Increase timeout from 10m to 30m to accommodate TLS cert rotation hook
**migrate-to-version-1.0.sh:**
- Add missing ConfigMap → Package field mappings: `bundle-disable`,
`bundle-enable`, `expose-ingress`, `expose-services`
- Remove redundant bundle enabled flags — the variant already determines
them via its values file
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit
* **New Features**
* Package generation now supports disabled/enabled package lists,
ingress name, and exposed services for customized publishing.
* **Bug Fixes**
* More robust secret cleanup with fallback deletions and post-deletion
verification.
* Guarded pipelines to avoid failures when no resources match.
* Reconciliation timestamps now use RFC3339 UTC.
* Suspension failures are no longer silently suppressed.
* **Chores**
* Increased etcd upgrade timeout; improved namespace discovery,
relabeling behavior, and user-facing messaging.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -52,6 +52,10 @@ OIDC_ENABLED=$(echo "$COZYSTACK_CM" | jq -r '.data["oidc-enabled"] // "false"')
|
||||
KEYCLOAK_REDIRECTS=$(echo "$COZYSTACK_CM" | jq -r '.data["extra-keycloak-redirect-uri-for-dashboard"] // ""' )
|
||||
TELEMETRY_ENABLED=$(echo "$COZYSTACK_CM" | jq -r '.data["telemetry-enabled"] // "true"')
|
||||
BUNDLE_NAME=$(echo "$COZYSTACK_CM" | jq -r '.data["bundle-name"] // "paas-full"')
|
||||
BUNDLE_DISABLE=$(echo "$COZYSTACK_CM" | jq -r '.data["bundle-disable"] // ""')
|
||||
BUNDLE_ENABLE=$(echo "$COZYSTACK_CM" | jq -r '.data["bundle-enable"] // ""')
|
||||
EXPOSE_INGRESS=$(echo "$COZYSTACK_CM" | jq -r '.data["expose-ingress"] // "tenant-root"')
|
||||
EXPOSE_SERVICES=$(echo "$COZYSTACK_CM" | jq -r '.data["expose-services"] // ""')
|
||||
|
||||
# Certificate issuer configuration (old undocumented field: clusterissuer)
|
||||
OLD_CLUSTER_ISSUER=$(echo "$COZYSTACK_CM" | jq -r '.data["clusterissuer"] // ""')
|
||||
@@ -99,21 +103,24 @@ else
|
||||
EXTERNAL_IPS=$(echo "$EXTERNAL_IPS" | sed 's/,/\n/g' | awk 'BEGIN{print}{print " - "$0}')
|
||||
fi
|
||||
|
||||
# Determine bundle type
|
||||
case "$BUNDLE_NAME" in
|
||||
paas-full|distro-full)
|
||||
SYSTEM_ENABLED="true"
|
||||
SYSTEM_TYPE="full"
|
||||
;;
|
||||
paas-hosted|distro-hosted)
|
||||
SYSTEM_ENABLED="false"
|
||||
SYSTEM_TYPE="hosted"
|
||||
;;
|
||||
*)
|
||||
SYSTEM_ENABLED="false"
|
||||
SYSTEM_TYPE="hosted"
|
||||
;;
|
||||
esac
|
||||
# Convert comma-separated lists to YAML arrays
|
||||
if [ -z "$BUNDLE_DISABLE" ]; then
|
||||
DISABLED_PACKAGES="[]"
|
||||
else
|
||||
DISABLED_PACKAGES=$(echo "$BUNDLE_DISABLE" | sed 's/,/\n/g' | awk 'BEGIN{print}{print " - "$0}')
|
||||
fi
|
||||
|
||||
if [ -z "$BUNDLE_ENABLE" ]; then
|
||||
ENABLED_PACKAGES="[]"
|
||||
else
|
||||
ENABLED_PACKAGES=$(echo "$BUNDLE_ENABLE" | sed 's/,/\n/g' | awk 'BEGIN{print}{print " - "$0}')
|
||||
fi
|
||||
|
||||
if [ -z "$EXPOSE_SERVICES" ]; then
|
||||
EXPOSED_SERVICES_YAML="[]"
|
||||
else
|
||||
EXPOSED_SERVICES_YAML=$(echo "$EXPOSE_SERVICES" | sed 's/,/\n/g' | awk 'BEGIN{print}{print " - "$0}')
|
||||
fi
|
||||
|
||||
# Update bundle naming
|
||||
BUNDLE_NAME=$(echo "$BUNDLE_NAME" | sed 's/paas/isp/')
|
||||
@@ -141,8 +148,6 @@ echo " Root Host: $ROOT_HOST"
|
||||
echo " API Server Endpoint: $API_SERVER_ENDPOINT"
|
||||
echo " OIDC Enabled: $OIDC_ENABLED"
|
||||
echo " Bundle Name: $BUNDLE_NAME"
|
||||
echo " System Enabled: $SYSTEM_ENABLED"
|
||||
echo " System Type: $SYSTEM_TYPE"
|
||||
echo " Certificate Solver: ${SOLVER:-http01 (default)}"
|
||||
echo " Issuer Name: ${ISSUER_NAME:-letsencrypt-prod (default)}"
|
||||
echo ""
|
||||
@@ -160,15 +165,8 @@ spec:
|
||||
platform:
|
||||
values:
|
||||
bundles:
|
||||
system:
|
||||
enabled: $SYSTEM_ENABLED
|
||||
type: "$SYSTEM_TYPE"
|
||||
iaas:
|
||||
enabled: true
|
||||
paas:
|
||||
enabled: true
|
||||
naas:
|
||||
enabled: true
|
||||
disabledPackages: $DISABLED_PACKAGES
|
||||
enabledPackages: $ENABLED_PACKAGES
|
||||
networking:
|
||||
clusterDomain: "$CLUSTER_DOMAIN"
|
||||
podCIDR: "$POD_CIDR"
|
||||
@@ -177,6 +175,8 @@ spec:
|
||||
joinCIDR: "$JOIN_CIDR"
|
||||
publishing:
|
||||
host: "$ROOT_HOST"
|
||||
ingressName: "$EXPOSE_INGRESS"
|
||||
exposedServices: $EXPOSED_SERVICES_YAML
|
||||
apiServerEndpoint: "$API_SERVER_ENDPOINT"
|
||||
externalIPs: $EXTERNAL_IPS
|
||||
${CERTIFICATES_SECTION}
|
||||
|
||||
@@ -18,7 +18,7 @@ spec:
|
||||
name: cozystack-etcd-application-default-etcd
|
||||
namespace: cozy-system
|
||||
interval: 5m
|
||||
timeout: 10m
|
||||
timeout: 30m
|
||||
install:
|
||||
remediation:
|
||||
retries: -1
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
# Migration 26 --> 27
|
||||
# Migrate monitoring resources from extra/monitoring to system/monitoring
|
||||
# This migration re-labels resources so they become owned by monitoring-system HelmRelease
|
||||
# and deletes old helm release secrets so that helm does not diff old vs new chart manifests.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -35,10 +36,39 @@ relabel_resources() {
|
||||
done
|
||||
}
|
||||
|
||||
# Delete all helm release secrets for a given release name in a namespace.
|
||||
# Uses both label selector and name-pattern matching to ensure complete cleanup.
|
||||
delete_helm_secrets() {
|
||||
local ns="$1"
|
||||
local release="$2"
|
||||
|
||||
# Primary: delete by label selector
|
||||
kubectl delete secrets -n "$ns" -l "name=${release},owner=helm" --ignore-not-found
|
||||
|
||||
# Fallback: find and delete by name pattern (in case labels were modified)
|
||||
local remaining
|
||||
remaining=$(kubectl get secrets -n "$ns" -o name | { grep "^secret/sh\.helm\.release\.v1\.${release}\." || true; })
|
||||
if [ -n "$remaining" ]; then
|
||||
echo " Found secrets not matched by label selector, deleting by name..."
|
||||
echo "$remaining" | while IFS= read -r secret; do
|
||||
echo " Deleting $secret"
|
||||
kubectl delete -n "$ns" "$secret" --ignore-not-found
|
||||
done
|
||||
fi
|
||||
|
||||
# Verify all secrets are gone
|
||||
remaining=$(kubectl get secrets -n "$ns" -o name | { grep "^secret/sh\.helm\.release\.v1\.${release}\." || true; })
|
||||
if [ -n "$remaining" ]; then
|
||||
echo " ERROR: Failed to delete helm release secrets:"
|
||||
echo "$remaining"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Find all tenant namespaces with monitoring HelmRelease
|
||||
echo "Finding tenant namespaces with monitoring HelmRelease..."
|
||||
NAMESPACES=$(kubectl get hr --all-namespaces -l apps.cozystack.io/application.kind=Monitoring \
|
||||
-o jsonpath='{range .items[*]}{.metadata.namespace}{"\n"}{end}' 2>/dev/null | sort -u || true)
|
||||
NAMESPACES=$(kubectl get hr --all-namespaces -l cozystack.io/ui=true --field-selector=metadata.name=monitoring \
|
||||
-o jsonpath='{range .items[*]}{.metadata.namespace}{"\n"}{end}' | sort -u)
|
||||
|
||||
if [ -z "$NAMESPACES" ]; then
|
||||
echo "No monitoring HelmReleases found in tenant namespaces, skipping migration"
|
||||
@@ -66,7 +96,7 @@ for ns in $NAMESPACES; do
|
||||
# Step 1: Suspend the HelmRelease
|
||||
echo ""
|
||||
echo "Step 1: Suspending HelmRelease monitoring..."
|
||||
kubectl patch hr -n "$ns" monitoring --type=merge -p '{"spec":{"suspend":true}}' 2>/dev/null || true
|
||||
kubectl patch hr -n "$ns" monitoring --type=merge -p '{"spec":{"suspend":true}}'
|
||||
|
||||
# Wait a moment for reconciliation to stop
|
||||
sleep 2
|
||||
@@ -74,7 +104,7 @@ for ns in $NAMESPACES; do
|
||||
# Step 2: Delete helm secrets for the monitoring release
|
||||
echo ""
|
||||
echo "Step 2: Deleting helm secrets for monitoring release..."
|
||||
kubectl delete secrets -n "$ns" -l name=monitoring,owner=helm --ignore-not-found
|
||||
delete_helm_secrets "$ns" "monitoring"
|
||||
|
||||
# Step 3: Relabel resources to be owned by monitoring-system
|
||||
echo ""
|
||||
@@ -121,7 +151,9 @@ for ns in $NAMESPACES; do
|
||||
echo "Processing Cozystack resources..."
|
||||
relabel_resources "$ns" "workloadmonitors.cozystack.io"
|
||||
|
||||
# Step 4: Delete the suspended HelmRelease (Flux won't delete resources when HR is suspended)
|
||||
# Step 4: Delete the suspended HelmRelease
|
||||
# Helm secrets are already gone, so flux finalizer will find no release to uninstall
|
||||
# and will simply remove the finalizer without deleting any resources.
|
||||
echo ""
|
||||
echo "Step 4: Deleting suspended HelmRelease monitoring..."
|
||||
kubectl delete hr -n "$ns" monitoring --ignore-not-found
|
||||
|
||||
@@ -5,10 +5,24 @@ set -euo pipefail
|
||||
|
||||
# Migrate Piraeus CRDs to piraeus-operator-crds Helm release
|
||||
for crd in linstorclusters.piraeus.io linstornodeconnections.piraeus.io linstorsatelliteconfigurations.piraeus.io linstorsatellites.piraeus.io; do
|
||||
kubectl annotate crd "$crd" meta.helm.sh/release-namespace=cozy-linstor meta.helm.sh/release-name=piraeus-operator-crds --overwrite
|
||||
kubectl label crd "$crd" app.kubernetes.io/managed-by=Helm helm.toolkit.fluxcd.io/namespace=cozy-linstor helm.toolkit.fluxcd.io/name=piraeus-operator-crds --overwrite
|
||||
if kubectl get crd "$crd" >/dev/null 2>&1; then
|
||||
echo " Relabeling CRD $crd"
|
||||
kubectl annotate crd "$crd" meta.helm.sh/release-namespace=cozy-linstor meta.helm.sh/release-name=piraeus-operator-crds --overwrite
|
||||
kubectl label crd "$crd" app.kubernetes.io/managed-by=Helm helm.toolkit.fluxcd.io/namespace=cozy-linstor helm.toolkit.fluxcd.io/name=piraeus-operator-crds --overwrite
|
||||
else
|
||||
echo " CRD $crd not found, skipping"
|
||||
fi
|
||||
done
|
||||
|
||||
# Delete old piraeus-operator helm secrets (by label and by name pattern)
|
||||
kubectl delete secret -n cozy-linstor -l name=piraeus-operator,owner=helm --ignore-not-found
|
||||
remaining=$(kubectl get secrets -n cozy-linstor -o name 2>/dev/null | { grep "^secret/sh\.helm\.release\.v1\.piraeus-operator\." || true; })
|
||||
if [ -n "$remaining" ]; then
|
||||
echo " Deleting remaining piraeus-operator helm secrets by name..."
|
||||
echo "$remaining" | while IFS= read -r secret; do
|
||||
kubectl delete -n cozy-linstor "$secret" --ignore-not-found
|
||||
done
|
||||
fi
|
||||
|
||||
# Stamp version
|
||||
kubectl create configmap -n cozy-system cozystack-version \
|
||||
|
||||
@@ -348,7 +348,7 @@ PVCEOF
|
||||
# --- 3g: Clone Secrets ---
|
||||
echo " --- Clone Secrets ---"
|
||||
for secret in $(kubectl -n "$NAMESPACE" get secret -o name 2>/dev/null \
|
||||
| grep "secret/${OLD_NAME}" | grep -v "sh.helm.release"); do
|
||||
| { grep "secret/${OLD_NAME}" || true; } | { grep -v "sh.helm.release" || true; }); do
|
||||
old_secret_name="${secret#secret/}"
|
||||
new_secret_name="${NEW_NAME}${old_secret_name#${OLD_NAME}}"
|
||||
clone_resource "$NAMESPACE" "secret" "$old_secret_name" "$new_secret_name" "$OLD_NAME" "$NEW_NAME"
|
||||
@@ -357,7 +357,7 @@ PVCEOF
|
||||
# --- 3h: Clone ConfigMaps ---
|
||||
echo " --- Clone ConfigMaps ---"
|
||||
for cm in $(kubectl -n "$NAMESPACE" get configmap -o name 2>/dev/null \
|
||||
| grep "configmap/${OLD_NAME}"); do
|
||||
| { grep "configmap/${OLD_NAME}" || true; }); do
|
||||
old_cm_name="${cm#configmap/}"
|
||||
new_cm_name="${NEW_NAME}${old_cm_name#${OLD_NAME}}"
|
||||
clone_resource "$NAMESPACE" "configmap" "$old_cm_name" "$new_cm_name" "$OLD_NAME" "$NEW_NAME"
|
||||
@@ -468,13 +468,13 @@ PVCEOF
|
||||
fi
|
||||
|
||||
for secret in $(kubectl -n "$NAMESPACE" get secret -o name 2>/dev/null \
|
||||
| grep "secret/${OLD_NAME}" | grep -v "sh.helm.release"); do
|
||||
| { grep "secret/${OLD_NAME}" || true; } | { grep -v "sh.helm.release" || true; }); do
|
||||
old_secret_name="${secret#secret/}"
|
||||
delete_resource "$NAMESPACE" "secret" "$old_secret_name"
|
||||
done
|
||||
|
||||
for cm in $(kubectl -n "$NAMESPACE" get configmap -o name 2>/dev/null \
|
||||
| grep "configmap/${OLD_NAME}"); do
|
||||
| { grep "configmap/${OLD_NAME}" || true; }); do
|
||||
old_cm_name="${cm#configmap/}"
|
||||
delete_resource "$NAMESPACE" "configmap" "$old_cm_name"
|
||||
done
|
||||
|
||||
@@ -9,8 +9,6 @@ set -euo pipefail
|
||||
OLD_PREFIX="virtual-machine"
|
||||
NEW_DISK_PREFIX="vm-disk"
|
||||
NEW_INSTANCE_PREFIX="vm-instance"
|
||||
PROTECTION_WEBHOOK_NAME="protection-webhook"
|
||||
PROTECTION_WEBHOOK_NS="protection-webhook"
|
||||
CDI_APISERVER_NS="cozy-kubevirt-cdi"
|
||||
CDI_APISERVER_DEPLOY="cdi-apiserver"
|
||||
CDI_VALIDATING_WEBHOOKS="cdi-api-datavolume-validate cdi-api-dataimportcron-validate cdi-api-populator-validate cdi-api-validate"
|
||||
@@ -88,7 +86,6 @@ echo " Total: ${#INSTANCES[@]} instance(s)"
|
||||
# STEP 2: Migrate each instance
|
||||
# ============================================================
|
||||
ALL_PV_NAMES=()
|
||||
ALL_PROTECTED_RESOURCES=()
|
||||
|
||||
for entry in "${INSTANCES[@]}"; do
|
||||
NAMESPACE="${entry%%/*}"
|
||||
@@ -315,7 +312,7 @@ PVCEOF
|
||||
# --- 2i: Clone Secrets ---
|
||||
echo " --- Clone Secrets ---"
|
||||
kubectl -n "$NAMESPACE" get secret -o name 2>/dev/null \
|
||||
| grep "secret/${OLD_NAME}" | grep -v "sh.helm.release" | grep -v "values" \
|
||||
| { grep "secret/${OLD_NAME}" || true; } | { grep -v "sh.helm.release" || true; } | { grep -v "values" || true; } \
|
||||
| while IFS= read -r secret; do
|
||||
old_secret_name="${secret#secret/}"
|
||||
suffix="${old_secret_name#${OLD_NAME}}"
|
||||
@@ -542,7 +539,7 @@ SVCEOF
|
||||
# --- 2q: Delete old resources ---
|
||||
echo " --- Delete old resources ---"
|
||||
kubectl -n "$NAMESPACE" get secret -o name 2>/dev/null \
|
||||
| grep "secret/${OLD_NAME}" | grep -v "sh.helm.release" | grep -v "values" \
|
||||
| { grep "secret/${OLD_NAME}" || true; } | { grep -v "sh.helm.release" || true; } | { grep -v "values" || true; } \
|
||||
| while IFS= read -r secret; do
|
||||
old_secret_name="${secret#secret/}"
|
||||
delete_resource "$NAMESPACE" "secret" "$old_secret_name"
|
||||
@@ -564,71 +561,17 @@ SVCEOF
|
||||
delete_resource "$NAMESPACE" "secret" "$VALUES_SECRET"
|
||||
fi
|
||||
|
||||
# Collect protected resources for batch deletion
|
||||
# Delete old service (if exists)
|
||||
if resource_exists "$NAMESPACE" "svc" "$OLD_NAME"; then
|
||||
ALL_PROTECTED_RESOURCES+=("${NAMESPACE}:svc/${OLD_NAME}")
|
||||
delete_resource "$NAMESPACE" "svc" "$OLD_NAME"
|
||||
fi
|
||||
done
|
||||
|
||||
# ============================================================
|
||||
# STEP 3: Delete protected resources (Services)
|
||||
# STEP 3: Restore PV reclaim policies
|
||||
# ============================================================
|
||||
echo ""
|
||||
echo "--- Step 3: Delete protected resources ---"
|
||||
|
||||
if [ ${#ALL_PROTECTED_RESOURCES[@]} -gt 0 ]; then
|
||||
WEBHOOK_EXISTS=false
|
||||
if kubectl -n "$PROTECTION_WEBHOOK_NS" get deploy "$PROTECTION_WEBHOOK_NAME" --no-headers 2>/dev/null | grep -q .; then
|
||||
WEBHOOK_EXISTS=true
|
||||
fi
|
||||
|
||||
if [ "$WEBHOOK_EXISTS" = "true" ]; then
|
||||
echo " --- Temporarily disabling protection-webhook ---"
|
||||
|
||||
WEBHOOK_REPLICAS=$(kubectl -n "$PROTECTION_WEBHOOK_NS" get deploy "$PROTECTION_WEBHOOK_NAME" \
|
||||
-o jsonpath='{.spec.replicas}' 2>/dev/null || echo "1")
|
||||
|
||||
echo " [SCALE] ${PROTECTION_WEBHOOK_NAME} -> 0 (was ${WEBHOOK_REPLICAS})"
|
||||
kubectl -n "$PROTECTION_WEBHOOK_NS" scale deploy "$PROTECTION_WEBHOOK_NAME" --replicas=0
|
||||
|
||||
echo " [PATCH] Set failurePolicy=Ignore on ValidatingWebhookConfiguration/${PROTECTION_WEBHOOK_NAME}"
|
||||
kubectl get validatingwebhookconfiguration "$PROTECTION_WEBHOOK_NAME" -o json | \
|
||||
jq '.webhooks[].failurePolicy = "Ignore"' | \
|
||||
kubectl apply -f - 2>/dev/null || true
|
||||
|
||||
echo " Waiting for webhook pods to terminate..."
|
||||
kubectl -n "$PROTECTION_WEBHOOK_NS" wait --for=delete pod \
|
||||
-l app.kubernetes.io/name=protection-webhook --timeout=60s 2>/dev/null || true
|
||||
sleep 3
|
||||
fi
|
||||
|
||||
for entry in "${ALL_PROTECTED_RESOURCES[@]}"; do
|
||||
ns="${entry%%:*}"
|
||||
res="${entry#*:}"
|
||||
echo " [DELETE] ${ns}/${res}"
|
||||
kubectl -n "$ns" delete "$res" --wait=false 2>/dev/null || true
|
||||
done
|
||||
|
||||
if [ "$WEBHOOK_EXISTS" = "true" ]; then
|
||||
echo " [PATCH] Set failurePolicy=Fail on ValidatingWebhookConfiguration/${PROTECTION_WEBHOOK_NAME}"
|
||||
kubectl get validatingwebhookconfiguration "$PROTECTION_WEBHOOK_NAME" -o json | \
|
||||
jq '.webhooks[].failurePolicy = "Fail"' | \
|
||||
kubectl apply -f - 2>/dev/null || true
|
||||
|
||||
echo " [SCALE] ${PROTECTION_WEBHOOK_NAME} -> ${WEBHOOK_REPLICAS}"
|
||||
kubectl -n "$PROTECTION_WEBHOOK_NS" scale deploy "$PROTECTION_WEBHOOK_NAME" \
|
||||
--replicas="$WEBHOOK_REPLICAS"
|
||||
echo " --- protection-webhook restored ---"
|
||||
fi
|
||||
else
|
||||
echo " [SKIP] No protected resources to delete"
|
||||
fi
|
||||
|
||||
# ============================================================
|
||||
# STEP 4: Restore PV reclaim policies
|
||||
# ============================================================
|
||||
echo ""
|
||||
echo "--- Step 4: Restore PV reclaim policies ---"
|
||||
echo "--- Step 3: Restore PV reclaim policies ---"
|
||||
for pv_name in "${ALL_PV_NAMES[@]}"; do
|
||||
if [ -n "$pv_name" ]; then
|
||||
current_policy=$(kubectl get pv "$pv_name" \
|
||||
@@ -643,7 +586,7 @@ for pv_name in "${ALL_PV_NAMES[@]}"; do
|
||||
done
|
||||
|
||||
# ============================================================
|
||||
# STEP 5: Temporarily disable CDI datavolume webhooks
|
||||
# STEP 4: Temporarily disable CDI datavolume webhooks
|
||||
# ============================================================
|
||||
# CDI's datavolume-validate webhook rejects DataVolume creation when a PVC
|
||||
# with the same name already exists. We must disable it so that vm-disk
|
||||
@@ -652,7 +595,7 @@ done
|
||||
# cdi-apiserver (which serves the webhooks), then delete webhook configs.
|
||||
# Both are restored after vm-disk HRs reconcile.
|
||||
echo ""
|
||||
echo "--- Step 5: Temporarily disable CDI webhooks ---"
|
||||
echo "--- Step 4: Temporarily disable CDI webhooks ---"
|
||||
|
||||
CDI_OPERATOR_REPLICAS=$(kubectl -n "$CDI_APISERVER_NS" get deploy cdi-operator \
|
||||
-o jsonpath='{.spec.replicas}' 2>/dev/null || echo "1")
|
||||
@@ -685,10 +628,10 @@ done
|
||||
sleep 2
|
||||
|
||||
# ============================================================
|
||||
# STEP 6: Unsuspend vm-disk HelmReleases first
|
||||
# STEP 5: Unsuspend vm-disk HelmReleases first
|
||||
# ============================================================
|
||||
echo ""
|
||||
echo "--- Step 6: Unsuspend vm-disk HelmReleases ---"
|
||||
echo "--- Step 5: Unsuspend vm-disk HelmReleases ---"
|
||||
for entry in "${INSTANCES[@]}"; do
|
||||
ns="${entry%%/*}"
|
||||
instance="${entry#*/}"
|
||||
@@ -705,7 +648,7 @@ for entry in "${INSTANCES[@]}"; do
|
||||
# Force immediate reconciliation
|
||||
echo " [TRIGGER] Reconcile ${ns}/hr/${disk_name}"
|
||||
kubectl -n "$ns" annotate hr "$disk_name" --overwrite \
|
||||
"reconcile.fluxcd.io/requestedAt=$(date +%s)" 2>/dev/null || true
|
||||
"reconcile.fluxcd.io/requestedAt=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -729,12 +672,12 @@ for entry in "${INSTANCES[@]}"; do
|
||||
done
|
||||
|
||||
# ============================================================
|
||||
# STEP 7: Restore CDI webhooks
|
||||
# STEP 6: Restore CDI webhooks
|
||||
# ============================================================
|
||||
# Scale cdi-operator and cdi-apiserver back up.
|
||||
# cdi-apiserver will recreate webhook configurations automatically on start.
|
||||
echo ""
|
||||
echo "--- Step 7: Restore CDI webhooks ---"
|
||||
echo "--- Step 6: Restore CDI webhooks ---"
|
||||
|
||||
echo " [SCALE] cdi-operator -> ${CDI_OPERATOR_REPLICAS}"
|
||||
kubectl -n "$CDI_APISERVER_NS" scale deploy cdi-operator \
|
||||
@@ -749,10 +692,10 @@ kubectl -n "$CDI_APISERVER_NS" rollout status deploy "$CDI_APISERVER_DEPLOY" --t
|
||||
echo " --- CDI webhooks restored ---"
|
||||
|
||||
# ============================================================
|
||||
# STEP 8: Unsuspend vm-instance HelmReleases
|
||||
# STEP 7: Unsuspend vm-instance HelmReleases
|
||||
# ============================================================
|
||||
echo ""
|
||||
echo "--- Step 8: Unsuspend vm-instance HelmReleases ---"
|
||||
echo "--- Step 7: Unsuspend vm-instance HelmReleases ---"
|
||||
for entry in "${INSTANCES[@]}"; do
|
||||
ns="${entry%%/*}"
|
||||
instance="${entry#*/}"
|
||||
|
||||
Reference in New Issue
Block a user