feat(taskfiles): add rook tasks

This commit is contained in:
JJGadgets
2023-06-30 22:47:34 +08:00
parent 7f707f4faa
commit 32a11260f6
10 changed files with 266 additions and 13 deletions

View File

@@ -6,13 +6,22 @@ vars:
APPLY: '{{.APPLY | default "0"}}'
tasks:
cluster-init-flux:
aliases: [initflux]
cluster-switch:
aliases: [ctx]
vars:
C: '{{ or .C (fail "Missing C environment variable for cluster!") }}'
cmds:
- |
sed -i 's/current-context: admin@.*/current-context: admin@{{.C}}/g' ~/.kube/config
sed -i 's/current-context: admin@.*/current-context: admin@{{.C}}/g' ~/.kube/config || true
cluster-init-flux:
aliases: [initflux]
vars:
C: '{{ or .C (fail "Missing `C` environment variable for cluster!") }}'
cmds:
- task: cluster-switch
vars:
C: '{{.C}}'
- kubectl apply --server-side --kustomize ./kube/bootstrap/flux/
cluster-init:

View File

@@ -5,6 +5,16 @@ vars:
C: '{{ .C | default "biohazard" }}'
tasks:
wait-job-pending:
aliases: [waitjob]
internal: true
desc: Wait for a job's pod to change its status to pending
vars:
NAMESPACE: '{{ or .NAMESPACE (fail "Missing `NAMESPACE` environment variable!") }}'
JOB_NAME: '{{ or .JOB_NAME (fail "Missing `JOB_NAME` environment variable!") }}'
cmds:
- until [[ $(kubectl -n "${NAMESPACE}" get pod -l job-name="${JOB_NAME}" -o jsonpath='{.items[*].status.phase}') == "Pending" ]]; do sleep 1; done
get-public-ingresses:
aliases: [gpi]
desc: |

View File

@@ -0,0 +1,31 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "root"
namespace: "kube-system"
labels:
jobs.home.arpa/rook: "wipe-disk"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: Never
nodeName: blackfish
containers:
- name: test
image: ghcr.io/onedr0p/alpine:3.17.3@sha256:999384960b6114496a5e4036e945141c205d064ce23b87326bd3f8d878c5a9d4
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args: ["sleep 999999999"]
volumeMounts:
- mountPath: /mnt/disk
name: host-disk
volumes:
- name: host-disk
hostPath:
path: "/dev/disk/by-id/ata-INTEL_SSDSC2BB016T4_BTWD709202JK1P6HGN"

View File

@@ -0,0 +1,126 @@
---
version: "3"
x-task-vars: &task-vars
NODE: "{{.NODE}}"
CEPH_DISK: "{{.CEPH_DISK}}"
TIME: "{{.TIME}}"
JOB_NAME: "{{.JOB_NAME}}"
vars:
TIME: '{{now | date "150405"}}'
includes:
k8s:
internal: true
taskfile: ../k8s
cluster:
internal: true
taskfile: ../cluster
tasks:
zap-disk:
desc: Prepare a disk to be used as a Ceph OSD on specified node by zapping all data and partition data.
internal: true
dir: '/{{.ROOT_DIR}}/.taskfiles/rook'
cmds:
# TODO: mimic deviceFilter
- envsubst < <(cat {{.JOB_TEMPLATE}}) | kubectl apply -f -
# - task: k8s:wait-job-pending
# vars:
# JOB_NAME: '{{.wipeCephDiskJobName}}'
# NAMESPACE: kube-system
- |-
until kubectl -n kube-system wait job/{{.JOB_NAME}} --for condition=complete --timeout=1m; do
echo "Job {{.JOB_NAME}} is still running, logs:" &&
kubectl -n kube-system logs job/{{.JOB_NAME}} -f;
done
# - kubectl -n kube-system logs job/{{.JOB_NAME}}
- defer: kubectl -n kube-system delete job {{.JOB_NAME}}
vars:
NODE: '{{ or .NODE (fail "`NODE` is required") }}'
CEPH_DISK: '{{ or .CEPH_DISK (fail "`CEPH_DISK` is required") }}'
JOB_NAME: 'zap-disk-{{- .NODE -}}-{{- .TIME -}}'
JOB_TEMPLATE: "zap-disk-job.tmpl.yaml"
env: *task-vars
preconditions:
- sh: test -f {{.JOB_TEMPLATE}}
wipe-state:
desc: Wipe all Ceph state on specified node.
internal: true
dir: '/{{.ROOT_DIR}}/.taskfiles/rook'
cmds:
- envsubst < <(cat {{.JOB_TEMPLATE}}) | kubectl apply -f -
# - task: k8s:wait-job-pending
# vars:
# JOB_NAME: '{{.wipeCephDiskJobName}}'
# NAMESPACE: kube-system
- until kubectl -n kube-system wait job/{{.JOB_NAME}} --for condition=complete --timeout=1m; do echo "Job {{.JOB_NAME}} is still running, logs:" && kubectl -n kube-system logs job/{{.JOB_NAME}} -f; done
# - kubectl -n kube-system logs job/{{.JOB_NAME}}
- defer: kubectl -n kube-system delete job {{.JOB_NAME}}
vars:
NODE: '{{ or .NODE (fail "`NODE` is required") }}'
JOB_NAME: "wipe-rook-state-{{- .NODE -}}-{{- .TIME -}}"
JOB_TEMPLATE: "wipe-rook-state-job.tmpl.yaml"
env: *task-vars
preconditions:
- sh: test -f {{.JOB_TEMPLATE}}
wipe-node:
aliases: ["wn"]
desc: Trigger a wipe of all Rook-Ceph data on specified node.
dir: '/{{.ROOT_DIR}}/.taskfiles/rook'
vars:
NODE: '{{ or .NODE (fail "Missing `NODE` environment variable!") }}'
CEPH_DISK: '{{ or .CEPH_DISK (fail "Missing `CEPH_DISK` environment variable!") }}'
cmds:
- task: zap-disk
vars:
NODE: '{{.NODE}}'
CEPH_DISK: '{{ or .CEPH_DISK (fail "Missing `CEPH_DISK` environment variable!") }}'
- task: wipe-state
vars:
NODE: '{{.NODE}}'
wipe-nodes-nuclear:
desc: Wipe all nodes in cluster "nuclear"
dir: '/{{.ROOT_DIR}}/.taskfiles/rook'
cmds:
- task: wipe-node
vars:
NODE: "blackfish"
CEPH_DISK: "/dev/disk/by-id/ata-INTEL_SSDSC2BB016T4_BTWD709202JK1P6HGN"
reinstall:
desc: |-
For when Rook refuses to create any OSDs at all
Assuming Flux and resource names, suspends master ks.yaml (Flux Kustomization), suspends ks.yaml for Rook-Ceph and cluster, suspends HelmReleases for Rook-Ceph and cluster, deletes cluster HelmRelease, patches Ceph CR and cm/secret finalizers, removes Rook-Ceph HR and namespace.
Then, reconcile master, Rook-Ceph and cluster ks.yaml.
dir: '/{{.ROOT_DIR}}/.taskfiles/rook'
vars:
C: '{{ or .C (fail "Missing `C` environment variable for cluster!") }}'
cmds:
- task: cluster:cluster-switch
vars:
C: '{{.C}}'
- flux suspend ks 0-{{.C}}-config
- flux suspend ks 1-core-storage-rook-ceph-app
- flux suspend ks 1-core-storage-rook-ceph-cluster
- helm uninstall -n rook-ceph rook-ceph-cluster && true || true
- flux delete hr -n rook-ceph rook-ceph-cluster --silent && true || true
- |-
for CRD in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}'); do
kubectl get -n rook-ceph "$CRD" -o name | \
xargs -I {} kubectl patch -n rook-ceph {} --type merge -p '{"metadata":{"finalizers": []}}' && true || true
done
- |-
kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{"metadata":{"finalizers": []}}' && true || true
kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{"metadata":{"finalizers": []}}' && true || true
- helm uninstall -n rook-ceph rook-ceph && true || true
- flux delete hr -n rook-ceph rook-ceph --silent && true || true
- kubectl get namespaces rook-ceph && until kubectl delete namespaces rook-ceph; do kubectl get namespaces rook-ceph -o jsonpath="{.status}"; done || true
- task: wipe-nodes-{{.C}}
- flux suspend ks 0-{{.C}}-config && flux resume ks 0-{{.C}}-config
- flux suspend ks 1-core-storage-rook-ceph-app && flux resume ks 1-core-storage-rook-ceph-app
- flux suspend ks 1-core-storage-rook-ceph-cluster && flux resume ks 1-core-storage-rook-ceph-cluster

View File

@@ -0,0 +1,31 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "${JOB_NAME}"
namespace: "kube-system"
labels:
jobs.home.arpa/rook: "wipe-rook-state"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: Never
nodeName: ${NODE}
containers:
- name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.17.3@sha256:999384960b6114496a5e4036e945141c205d064ce23b87326bd3f8d878c5a9d4
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- echo "Running `rm -rf`:"; rm -rf /mnt/host_var/lib/rook && echo "Success!" || echo "Failed!"
volumeMounts:
- mountPath: /mnt/host_var
name: host-var
volumes:
- name: host-var
hostPath:
path: /var

View File

@@ -0,0 +1,37 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: "${JOB_NAME}"
namespace: "kube-system"
labels:
jobs.home.arpa/rook: "wipe-disk"
spec:
ttlSecondsAfterFinished: 3600
template:
spec:
automountServiceAccountToken: false
restartPolicy: Never
nodeName: ${NODE}
containers:
- name: disk-wipe
image: ghcr.io/onedr0p/alpine:3.17.3@sha256:999384960b6114496a5e4036e945141c205d064ce23b87326bd3f8d878c5a9d4
securityContext:
privileged: true
resources: {}
command: ["/bin/sh", "-c"]
args:
- apk add --no-cache sgdisk util-linux parted;
fdisk -l /mnt/disk;
echo "Running sgdisk..." && sgdisk --zap-all /mnt/disk && echo "Success!" || break;
echo "Running wipefs..." && wipefs --all --backup /mnt/disk && echo "Success!" || break;
echo "Running blkdiscard..." && blkdiscard /mnt/disk && echo "Success!" || break;
echo "Running partprobe..." && partprobe /mnt/disk && echo "Success!" || echo "Failed!"
#dd if=/dev/zero bs=1M count=10000 oflag=direct of=${CEPH_DISK};
volumeMounts:
- mountPath: /mnt/disk
name: host-disk
volumes:
- name: host-disk
hostPath:
path: "${CEPH_DISK}"

View File

@@ -2,8 +2,12 @@
version: "3"
includes:
k8s: ../k8s
cluster: ../cluster
k8s:
internal: true
taskfile: ../k8s
cluster:
internal: true
taskfile: ../cluster
vars:
C: '{{.C | default "biohazard"}}'

View File

@@ -23,6 +23,9 @@ includes:
cnpg:
aliases: [pg]
taskfile: .taskfiles/cnpg/Taskfile.dist.yaml
rook:
aliases: [r]
taskfile: .taskfiles/rook
tasks:
default:

View File

@@ -57,7 +57,7 @@ APP_IP_K8S_GATEWAY=ENC[AES256_GCM,data:OwsOAO2jzTqrjULN,iv:EFsZhmlSDA8iHkyJgpE7B
APP_IP_HAPROXY_PROXY_X=ENC[AES256_GCM,data:X4OtM9iKr9RKllTb,iv:tl5zMdaYKu9iZF23860bLHxyxwgFMl0Y7cCMD1mI8wY=,tag:tCkyCrYQjOYJHuUdlDnyZw==,type:str]
APP_UID_HAPROXY_PROXY_X=ENC[AES256_GCM,data:B3G6nA==,iv:e5UIYZa52kQ8GFBD30d4/U8WMito6albh4CMgYhHOpg=,tag:8TBer2t4zGGYIC3bmO5FLA==,type:str]
APP_DNS_FLUX_WEBHOOK=ENC[AES256_GCM,data:/vKJ0ACjgrnQdz93O0IXCy3q1Ttv8yFsJA==,iv:mbL57+d+xn9XuggscB9KiaP9t2mTZi57j+89Whhw7Ro=,tag:q37t5QXvLjTqZXhQWoT7gw==,type:str]
APP_DNS_CEPH=ENC[AES256_GCM,data:npgEErUvNqTnZ8I=,iv:rt3ditftNw2PJJIodAUJLBQv4EIqTgmZSk8Gkg2t/lY=,tag:8adKoJ/shIVoRlsSwttwUQ==,type:str]
APP_DNS_CEPH=ENC[AES256_GCM,data:d3Rr47Rs6vLEAGOb,iv:oRwmj7gvc9nVHg7vvPNN/EdqXKwtm+BjeEvEZx5fUvQ=,tag:EjR15rnCAc+K/cwYgwgJAA==,type:str]
APP_DNS_RADOSGW=ENC[AES256_GCM,data:H5CKxKXf3imtuA==,iv:PKnd+pSP7eEHHUA5YLsHOEEBxApAakvJq3WzfVQGymY=,tag:2uZdzZ8ZDCkFdCQ/n9KSGQ==,type:str]
APP_IP_RADOSGW=ENC[AES256_GCM,data:OYs5/gWPQkPbYc8=,iv:HxElYcvv/+hQCfOa+TD04Vp/tM7jzlGApK3Z/kp/q0s=,tag:gWUtQiWgIIkfG9Yd4WC0fg==,type:str]
APP_DNS_RGW_S3=ENC[AES256_GCM,data:MD1ldDBMlPsyGNY=,iv:4lkGMXZSU6Ygsy5EkHFvg8dSDw49NfEJ0/1u48gXXmo=,tag:ICeMEHR6zPA3sbxUloXYoQ==,type:str]
@@ -123,12 +123,12 @@ CONFIG_ZEROTIER_ENDPOINT=ENC[AES256_GCM,data:tOyIlrzdn8sck7um7OSicq5T0XWAmymaRLn
CONFIG_AUTHENTIK_REMOTE_HOST=ENC[AES256_GCM,data:K0SOweZKXdAhlKQrZQ42UQ3Eg7u3KTF5nCemjkz2XA==,iv:65Qret7KcDIZRoLzRwoY6sXGNUGfrhsMugJ+jeO5O6Y=,tag:qkv/mksEiPuMtylKvIs/Yw==,type:str]
CONFIG_HEADSCALE_IPV4=ENC[AES256_GCM,data:EZ7GMHA6u1wWPS5g6Pg=,iv:W1hcseQ4Q6CisTXnDLI7hWTy18fIVKtZ46tudCyhfa4=,tag:2WnnNjuZhwUPG07OKTQt2g==,type:str]
CONFIG_OVENMEDIAENGINE_NAME=ENC[AES256_GCM,data:58CuH8bcUHWXBZA=,iv:BN7x6aAJPbzIn25sNoycsHRE5pugkubLS2VrM77+g/E=,tag:6JAsRjU0L6wbZtns3rk6KQ==,type:str]
sops_lastmodified=2023-06-29T21:15:10Z
sops_mac=ENC[AES256_GCM,data:pCl0VZSogPmc8Mcp2zOz7nk5qvUW5UvV7R/nvaGuxTKUe/YkNgELKV5wSfw6Xlx0NJf28TKaJirYRsIHQnWCiZPp7oN7N1XObh37Ud9azZK2xxLRiz3+DGtG05i9UdhsZ0AXhzcuoo4E/u1o00UzEDCkqKyk0ZVjUQGFlZRXCyU=,iv:jusmwDbhxrSHPm0wv+aMh79g1y3RjGjYSHkOXKM+ocw=,tag:cPieIAP6hXmJIUfPTpAUHw==,type:str]
sops_unencrypted_suffix=_unencrypted
sops_age__list_0__map_recipient=age1u57l4s400gqstc0p485j4646cemntufr0pcyp32yudklsp90xpmszxvnkj
sops_version=3.7.3
sops_age__list_0__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBFSXFvLzFQaFJ0OVJKUFV5\nTWh2OUltUlpJWFlVVytFYU9VajBHSnQ4SGdjCnRVbEVXdDVyUHJrR05Ba0xvUm1l\nTkt2YmNUZy90ZFA2b3QrODFKZ01EVG8KLS0tIEw2dkd1cnFCbnI5eWxKL2o1aDVB\nN0hveXZ2dWdxQ2k2L0pGR0ROMStVTmsK4dV/hNyDjsYnVUiFQ7kqdmcVHfYyVckz\nh/rwLjcZgsup72WDVP3v6Eul8B3LKFrSb8CDFA54tyQmSdFDCQC+Zg==\n-----END AGE ENCRYPTED FILE-----\n
sops_mac=ENC[AES256_GCM,data:POdxJILRjhilf8dMLmtK+D6uuHeLZG7HmtaU7rd9msYh0a+tEIXF+VD/zi+CZEyofsezvKJIJETj6sP0UchUJoHpf2nRN+4jiDKuDnmi64jAl8tQNjduZYvbbsCwiBcA69ThQ9i3HdOtJzmDlSHorBkyUokU1UlV9Lzqq3VfvPY=,iv:hY1teRiKLpIbfoyouNOmdZ2d2OcTmP15+npejRkbOcI=,tag:z9sZf/1LhYaAMM8p+SI2NQ==,type:str]
sops_pgp__list_0__map_fp=31E70E5BC80C58AFF5DD649921AC5A1AC6E5B7F2
sops_pgp__list_0__map_created_at=2023-06-01T18:01:04Z
sops_pgp__list_0__map_enc=-----BEGIN PGP MESSAGE-----\n\nhF4DAAAAAAAAAAASAQdAbA35718t0WVKrjQFYUPviCb0lVuh8NpfSdJCHjHcWWww\n8ak4q4VL69tZLSjQHx+VsMmKooknxWz6pw0lGxyDYlZMQ81bodInjaZGFZSz8Uuh\n0l4BhDCNDBBALTrnTliz6/DAHvmavI4UxMHost5alFio9JPkTDNmXZyvcy1/R6aw\n/uhQXLUBRvm0TSOhBZb7d0SLkLfe02Um40w1TibpKXsZz1GOMbPRNBMHHra0QIuQ\n=0jA+\n-----END PGP MESSAGE-----\n
sops_pgp__list_0__map_fp=31E70E5BC80C58AFF5DD649921AC5A1AC6E5B7F2
sops_version=3.7.3
sops_lastmodified=2023-06-29T21:53:05Z

View File

@@ -32,16 +32,18 @@ spec:
compression:
enabled: true
crashCollector:
disable: false
disable: true
dashboard:
enabled: true
urlPrefix: "/"
ssl: false
mon:
count: 3
# count: 3
count: 1
allowMultiplePerNode: false
mgr:
count: 2
# count: 2
count: 1
allowMultiplePerNode: false
modules:
- name: "pg_autoscaler"