[kubernetes] Helm hooks for cleanup (#1606)

## What this PR does

When deleting a Kubernetes, some resources may linger post deletion
because of a race to remove HelmReleases deployed inside the tenant
cluster and the removal of the cluster and its controlplane itself. This
patch modifies the existing pre-delete hook to remove those helmreleases
instead of simply suspending them. Similarly, datavolumes may also
remain. These are now delete with a post-delete hook.

### Release note

```release-note
[kubernetes] Use Helm hooks to clean up HelmReleases deployed in tenant
clusters and DataVolumes backing the tenant clusters' PVCs when deleting
a tenant Kubernetes.
```

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **Chores**
* Added an automated post-delete cleanup job to remove persistent data
volumes scoped to the release namespace when a release is deleted.
* Updated Helm release teardown to actively delete lingering release
resources (rather than only suspending them) for cleaner uninstall
behavior.
* Broadened lifecycle hooks to run on successful completions and
expanded teardown permissions to list and delete related release
artifacts, including gateway CRDs.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Timofei Larkin
2025-11-07 13:50:55 +04:00
committed by GitHub
2 changed files with 98 additions and 21 deletions

View File

@@ -0,0 +1,76 @@
---
apiVersion: batch/v1
kind: Job
metadata:
annotations:
"helm.sh/hook": post-delete
"helm.sh/hook-weight": "10"
"helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation,hook-failed
name: {{ .Release.Name }}-datavolume-cleanup
spec:
template:
spec:
serviceAccountName: {{ .Release.Name }}-datavolume-cleanup
restartPolicy: Never
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: "NoSchedule"
containers:
- name: kubectl
image: docker.io/clastix/kubectl:v1.32
command:
- /bin/sh
- -c
- kubectl -n {{ .Release.Namespace }} delete datavolumes
-l "cluster.x-k8s.io/cluster-name={{ .Release.Name }}"
--ignore-not-found=true
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ .Release.Name }}-datavolume-cleanup
annotations:
helm.sh/hook: post-delete
helm.sh/hook-delete-policy: before-hook-creation,hook-failed,hook-succeeded
helm.sh/hook-weight: "0"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
annotations:
"helm.sh/hook": post-delete
"helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation,hook-failed
"helm.sh/hook-weight": "5"
name: {{ .Release.Name }}-datavolume-cleanup
rules:
- apiGroups:
- "cdi.kubevirt.io"
resources:
- datavolumes
verbs:
- get
- list
- delete
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
annotations:
"helm.sh/hook": post-delete
"helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation,hook-failed
"helm.sh/hook-weight": "5"
name: {{ .Release.Name }}-datavolume-cleanup
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ .Release.Name }}-datavolume-cleanup
subjects:
- kind: ServiceAccount
name: {{ .Release.Name }}-datavolume-cleanup
namespace: {{ .Release.Namespace }}

View File

@@ -24,26 +24,26 @@ spec:
command:
- /bin/sh
- -c
- |
kubectl
--namespace={{ .Release.Namespace }}
patch
helmrelease
{{ .Release.Name }}-cilium
{{ .Release.Name }}-gateway-api-crds
{{ .Release.Name }}-csi
{{ .Release.Name }}-cert-manager
{{ .Release.Name }}-cert-manager-crds
{{ .Release.Name }}-vertical-pod-autoscaler
{{ .Release.Name }}-vertical-pod-autoscaler-crds
{{ .Release.Name }}-ingress-nginx
{{ .Release.Name }}-fluxcd-operator
{{ .Release.Name }}-fluxcd
{{ .Release.Name }}-gpu-operator
{{ .Release.Name }}-velero
{{ .Release.Name }}-coredns
-p '{"spec": {"suspend": true}}'
--type=merge --field-manager=flux-client-side-apply || true
- >-
kubectl
--namespace={{ .Release.Namespace }}
patch
helmrelease
{{ .Release.Name }}-cilium
{{ .Release.Name }}-gateway-api-crds
{{ .Release.Name }}-csi
{{ .Release.Name }}-cert-manager
{{ .Release.Name }}-cert-manager-crds
{{ .Release.Name }}-vertical-pod-autoscaler
{{ .Release.Name }}-vertical-pod-autoscaler-crds
{{ .Release.Name }}-ingress-nginx
{{ .Release.Name }}-fluxcd-operator
{{ .Release.Name }}-fluxcd
{{ .Release.Name }}-gpu-operator
{{ .Release.Name }}-velero
{{ .Release.Name }}-coredns
-p '{"spec": {"suspend": true}}'
--type=merge --field-manager=flux-client-side-apply || true
---
apiVersion: v1
kind: ServiceAccount
@@ -51,7 +51,7 @@ metadata:
name: {{ .Release.Name }}-flux-teardown
annotations:
helm.sh/hook: pre-delete
helm.sh/hook-delete-policy: before-hook-creation,hook-failed
helm.sh/hook-delete-policy: before-hook-creation,hook-failed,hook-succeeded
helm.sh/hook-weight: "0"
---
apiVersion: rbac.authorization.k8s.io/v1
@@ -75,6 +75,7 @@ rules:
- {{ .Release.Name }}-csi
- {{ .Release.Name }}-cert-manager
- {{ .Release.Name }}-cert-manager-crds
- {{ .Release.Name }}-gateway-api-crds
- {{ .Release.Name }}-vertical-pod-autoscaler
- {{ .Release.Name }}-vertical-pod-autoscaler-crds
- {{ .Release.Name }}-ingress-nginx