Add backups for ClickHouse

Signed-off-by: Andrei Kvapil <kvapss@gmail.com>
This commit is contained in:
Andrei Kvapil
2024-09-12 22:50:06 +02:00
parent b3ee563e23
commit 4b84798f00
12 changed files with 260 additions and 8 deletions

View File

@@ -2,6 +2,7 @@
build:
make -C packages/apps/http-cache image
make -C packages/apps/clickhouse image
make -C packages/apps/kubernetes image
make -C packages/system/cilium image
make -C packages/system/kubeovn image

View File

@@ -16,7 +16,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.4.0
version: 0.4.1
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to

View File

@@ -1,4 +1,20 @@
CLICKHOUSE_BACKUP_TAG = $(shell awk '$$1 == "version:" {print $$2}' Chart.yaml)
include ../../../scripts/common-envs.mk
include ../../../scripts/package.mk
generate:
readme-generator -v values.yaml -s values.schema.json -r README.md
image:
docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 images/clickhouse-backup \
--provenance false \
--tag $(REGISTRY)/clickhouse-backup:$(call settag,$(CLICKHOUSE_BACKUP_TAG)) \
--cache-from type=registry,ref=$(REGISTRY)/clickhouse-backup:latest \
--cache-to type=inline \
--metadata-file images/clickhouse-backup.json \
--push=$(PUSH) \
--load=$(LOAD)
echo "$(REGISTRY)/clickhouse-backup:$(call settag,$(CLICKHOUSE_BACKUP_TAG))@$$(yq e '."containerimage.digest"' images/clickhouse-backup.json -o json -r)" \
> images/clickhouse-backup.tag
rm -f images/clickhouse-backup.json

View File

@@ -13,6 +13,14 @@
### Configuration parameters
| Name | Description | Value |
| ------- | ------------------- | ----- |
| `users` | Users configuration | `{}` |
| Name | Description | Value |
| ------------------------ | ---------------------------------------------- | ------------------------------------------------------ |
| `users` | Users configuration | `{}` |
| `backup.enabled` | Enable pereiodic backups | `false` |
| `backup.s3Region` | The AWS S3 region where backups are stored | `us-east-1` |
| `backup.s3Bucket` | The S3 bucket used for storing backups | `s3.example.org/clickhouse-backups` |
| `backup.schedule` | Cron schedule for automated backups | `0 2 * * *` |
| `backup.cleanupStrategy` | The strategy for cleaning up old backups | `--keep-last=3 --keep-daily=3 --keep-within-weekly=1m` |
| `backup.s3AccessKey` | The access key for S3, used for authentication | `oobaiRus9pah8PhohL1ThaeTa4UVa7gu` |
| `backup.s3SecretKey` | The secret key for S3, used for authentication | `ju3eum4dekeich9ahM1te8waeGai0oog` |
| `backup.resticPassword` | The password for Restic backup encryption | `ChaXoveekoh6eigh4siesheeda2quai0` |

View File

@@ -0,0 +1 @@
ghcr.io/aenix-io/cozystack/clickhouse-backup:latest@sha256:dda84420cb8648721299221268a00d72a05c7af5b7fb452619bac727068b9e61

View File

@@ -0,0 +1,2 @@
FROM clickhouse/clickhouse-server:24.8.4-alpine
RUN apk add --no-cache restic uuidgen

View File

@@ -0,0 +1,95 @@
{{- if .Values.backup.enabled }}
{{ $image := .Files.Get "images/backup.json" | fromJson }}
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ .Release.Name }}-backup
spec:
schedule: "{{ .Values.backup.schedule }}"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 2
template:
spec:
restartPolicy: OnFailure
template:
metadata:
annotations:
checksum/config: {{ include (print $.Template.BasePath "/backup-script.yaml") . | sha256sum }}
checksum/secret: {{ include (print $.Template.BasePath "/backup-secret.yaml") . | sha256sum }}
spec:
imagePullSecrets:
- name: {{ .Release.Name }}-regsecret
restartPolicy: Never
containers:
- name: clickhouse-backup
image: "{{ $.Files.Get "images/clickhouse-backup.tag" | trim }}"
command:
- /bin/sh
- -x
- /scripts/backup.sh
env:
- name: REPO_PREFIX
value: {{ required "s3Bucket is not specified!" .Values.backup.s3Bucket | quote }}
- name: CLEANUP_STRATEGY
value: {{ required "cleanupPolicy is not specified!" .Values.backup.cleanupStrategy | quote }}
- name: CLICKHOUSE_USER
value: backup
- name: CLICKHOUSE_HOST
value: chi-{{ .Release.Name }}-clickhouse-0-0
- name: CLICKHOUSE_PASSWORD
valueFrom:
secretKeyRef:
name: {{ .Release.Name }}-credentials
key: backup
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: {{ .Release.Name }}-backup
key: s3AccessKey
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: {{ .Release.Name }}-backup
key: s3SecretKey
- name: AWS_DEFAULT_REGION
value: {{ .Values.backup.s3Region }}
- name: RESTIC_PASSWORD
valueFrom:
secretKeyRef:
name: {{ .Release.Name }}-backup
key: resticPassword
volumeMounts:
- mountPath: /scripts
name: scripts
- mountPath: /tmp
name: tmp
- mountPath: /.cache
name: cache
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
volumes:
- name: scripts
secret:
secretName: {{ .Release.Name }}-backup-script
- name: tmp
emptyDir: {}
- name: cache
emptyDir: {}
securityContext:
runAsNonRoot: true
runAsUser: 9000
runAsGroup: 9000
seccompProfile:
type: RuntimeDefault
{{- end }}

View File

@@ -0,0 +1,55 @@
{{- if .Values.backup.enabled }}
---
apiVersion: v1
kind: Secret
metadata:
name: {{ .Release.Name }}-backup-script
stringData:
backup.sh: |
#!/bin/sh
set -e
set -o pipefail
JOB_ID="job-$(uuidgen|cut -f1 -d-)"
TABLE_LIST=$(clickhouse-client --host "$CLICKHOUSE_HOST" -q 'SHOW TABLES;' | grep -v '^.inner.' || true)
echo DB_LIST=$(echo "$TABLE_LIST" | shuf) # shuffle list
echo "Job ID: $JOB_ID"
echo "Target repo: $REPO_PREFIX"
echo "Cleanup strategy: $CLEANUP_STRATEGY"
echo "Start backup for:"
echo "$TABLE_LIST"
echo
echo "Backup started at `date +%Y-%m-%d\ %H:%M:%S`"
for table in $TABLE_LIST; do
(
set -x
restic -r "s3:${REPO_PREFIX}/$table" cat config >/dev/null 2>&1 || \
restic -r "s3:${REPO_PREFIX}/$table" init --repository-version 2
restic -r "s3:${REPO_PREFIX}/$table" unlock --remove-all >/dev/null 2>&1 || true # no locks, k8s takes care of it
clickhouse-client --host "$CLICKHOUSE_HOST" -q "SHOW CREATE TABLE ${table}" | awk '{gsub(/\\n/, "\n")} {gsub(/\\'\''/, "'\''")} 1' | \
restic -r "s3:${REPO_PREFIX}/$table" backup --tag "$JOB_ID" --tag index --stdin --stdin-filename index.sql
clickhouse-client --host "$CLICKHOUSE_HOST" -q "SELECT * FROM ${table} FORMAT TabSeparated" | \
restic -r "s3:${REPO_PREFIX}/$table" backup --tag "$JOB_ID" --tag data --stdin --stdin-filename data.tsv
restic -r "s3:${REPO_PREFIX}/$table" tag --tag "$JOB_ID" --set "completed"
)
done
echo "Backup finished at `date +%Y-%m-%d\ %H:%M:%S`"
echo
echo "Run cleanup:"
echo
echo "Cleanup started at `date +%Y-%m-%d\ %H:%M:%S`"
for db in $DB_LIST; do
(
set -x
# keep completed snapshots only
restic forget -r "s3:${REPO_PREFIX}/$db" --group-by=tags --keep-tag "completed" --tag index
restic forget -r "s3:${REPO_PREFIX}/$db" --group-by=tags --keep-tag "completed" --tag data
restic forget -r "s3:${REPO_PREFIX}/$db" --group-by=tags $CLEANUP_STRATEGY --tag index
restic forget -r "s3:${REPO_PREFIX}/$db" --group-by=tags $CLEANUP_STRATEGY --tag data
restic prune -r "s3:${REPO_PREFIX}/$db"
)
done
echo "Cleanup finished at `date +%Y-%m-%d\ %H:%M:%S`"
{{- end }}

View File

@@ -0,0 +1,11 @@
{{- if .Values.backup.enabled }}
---
apiVersion: v1
kind: Secret
metadata:
name: {{ .Release.Name }}-backup
stringData:
s3AccessKey: {{ required "s3AccessKey is not specified!" .Values.backup.s3AccessKey }}
s3SecretKey: {{ required "s3SecretKey is not specified!" .Values.backup.s3SecretKey }}
resticPassword: {{ required "resticPassword is not specified!" .Values.backup.resticPassword }}
{{- end }}

View File

@@ -1,5 +1,7 @@
{{- $existingSecret := lookup "v1" "Secret" .Release.Namespace (printf "%s-credentials" .Release.Name) }}
{{- $passwords := dict }}
{{- $users := .Values.users }}
{{- $_ := set $users "backup" dict }}
{{- with (index $existingSecret "data") }}
{{- range $k, $v := . }}
@@ -7,7 +9,7 @@
{{- end }}
{{- end }}
{{- range $user, $u := .Values.users }}
{{- range $user, $u := $users }}
{{- if $u.password }}
{{- $_ := set $passwords $user $u.password }}
{{- else if not (index $passwords $user) }}
@@ -15,7 +17,6 @@
{{- end }}
{{- end }}
{{- if .Values.users }}
apiVersion: v1
kind: Secret
metadata:
@@ -24,7 +25,6 @@ stringData:
{{- range $user, $u := .Values.users }}
{{ quote $user }}: {{ quote (index $passwords $user) }}
{{- end }}
{{- end }}
---
apiVersion: "clickhouse.altinity.com/v1"
@@ -38,7 +38,7 @@ spec:
dataVolumeClaimTemplate: data-volume-template
{{- end }}
configuration:
{{- with .Values.users }}
{{- with $users }}
users:
{{- range $name, $u := . }}
{{ $name }}/password_sha256_hex: {{ sha256sum (index $passwords $name) }}

View File

@@ -21,6 +21,51 @@
"type": "string",
"description": "StorageClass used to store the data",
"default": ""
},
"backup": {
"type": "object",
"properties": {
"enabled": {
"type": "boolean",
"description": "Enable pereiodic backups",
"default": false
},
"s3Region": {
"type": "string",
"description": "The AWS S3 region where backups are stored",
"default": "us-east-1"
},
"s3Bucket": {
"type": "string",
"description": "The S3 bucket used for storing backups",
"default": "s3.example.org/clickhouse-backups"
},
"schedule": {
"type": "string",
"description": "Cron schedule for automated backups",
"default": "0 2 * * *"
},
"cleanupStrategy": {
"type": "string",
"description": "The strategy for cleaning up old backups",
"default": "--keep-last=3 --keep-daily=3 --keep-within-weekly=1m"
},
"s3AccessKey": {
"type": "string",
"description": "The access key for S3, used for authentication",
"default": "oobaiRus9pah8PhohL1ThaeTa4UVa7gu"
},
"s3SecretKey": {
"type": "string",
"description": "The secret key for S3, used for authentication",
"default": "ju3eum4dekeich9ahM1te8waeGai0oog"
},
"resticPassword": {
"type": "string",
"description": "The password for Restic backup encryption",
"default": "ChaXoveekoh6eigh4siesheeda2quai0"
}
}
}
}
}

View File

@@ -22,3 +22,21 @@ storageClass: ""
## password: hackme
##
users: {}
## @param backup.enabled Enable pereiodic backups
## @param backup.s3Region The AWS S3 region where backups are stored
## @param backup.s3Bucket The S3 bucket used for storing backups
## @param backup.schedule Cron schedule for automated backups
## @param backup.cleanupStrategy The strategy for cleaning up old backups
## @param backup.s3AccessKey The access key for S3, used for authentication
## @param backup.s3SecretKey The secret key for S3, used for authentication
## @param backup.resticPassword The password for Restic backup encryption
backup:
enabled: false
s3Region: us-east-1
s3Bucket: s3.example.org/clickhouse-backups
schedule: "0 2 * * *"
cleanupStrategy: "--keep-last=3 --keep-daily=3 --keep-within-weekly=1m"
s3AccessKey: oobaiRus9pah8PhohL1ThaeTa4UVa7gu
s3SecretKey: ju3eum4dekeich9ahM1te8waeGai0oog
resticPassword: ChaXoveekoh6eigh4siesheeda2quai0