mirror of
https://github.com/outbackdingo/cozystack.git
synced 2026-03-31 19:04:33 +00:00
Compare commits
1 Commits
v0.16.0
...
kubermatic
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b0a7eacca |
@@ -114,7 +114,7 @@ machine:
|
||||
- name: zfs
|
||||
- name: spl
|
||||
install:
|
||||
image: ghcr.io/aenix-io/cozystack/talos:v1.8.0
|
||||
image: ghcr.io/aenix-io/cozystack/talos:v1.7.1
|
||||
files:
|
||||
- content: |
|
||||
[plugins]
|
||||
|
||||
@@ -68,7 +68,7 @@ spec:
|
||||
serviceAccountName: cozystack
|
||||
containers:
|
||||
- name: cozystack
|
||||
image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.0"
|
||||
image: "ghcr.io/aenix-io/cozystack/cozystack:v0.15.0"
|
||||
env:
|
||||
- name: KUBERNETES_SERVICE_HOST
|
||||
value: localhost
|
||||
@@ -87,7 +87,7 @@ spec:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: darkhttpd
|
||||
image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.0"
|
||||
image: "ghcr.io/aenix-io/cozystack/cozystack:v0.15.0"
|
||||
command:
|
||||
- /usr/bin/darkhttpd
|
||||
- /cozystack/assets
|
||||
|
||||
@@ -1 +1 @@
|
||||
ghcr.io/aenix-io/cozystack/postgres-backup:0.7.0@sha256:d2015c6dba92293bda652d055e97d1be80e8414c2dc78037c12812d1a2e2cba1
|
||||
ghcr.io/aenix-io/cozystack/postgres-backup:0.6.2@sha256:d2015c6dba92293bda652d055e97d1be80e8414c2dc78037c12812d1a2e2cba1
|
||||
|
||||
@@ -1 +1 @@
|
||||
ghcr.io/aenix-io/cozystack/nginx-cache:0.3.1@sha256:cd744b2d1d50191f4908f2db83079b32973d1c009fe9468627be72efbfa0a107
|
||||
ghcr.io/aenix-io/cozystack/nginx-cache:0.3.1@sha256:556bc8d29ee9e90b3d64d0481dcfc66483d055803315bba3d9ece17c0d97f32b
|
||||
|
||||
@@ -1 +1 @@
|
||||
ghcr.io/aenix-io/cozystack/cluster-autoscaler:0.11.0@sha256:7f617de5a24de790a15d9e97c6287ff2b390922e6e74c7a665cbf498f634514d
|
||||
ghcr.io/aenix-io/cozystack/cluster-autoscaler:latest@sha256:7f617de5a24de790a15d9e97c6287ff2b390922e6e74c7a665cbf498f634514d
|
||||
|
||||
@@ -1 +1 @@
|
||||
ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:0.11.0@sha256:ba567212f9fe5e7c32af9857edd79eb012f3eb39c2eae0fc831b14d5b7879427
|
||||
ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:latest@sha256:735aa8092501fc0f2904b685b15bc0137ea294cb08301ca1185d3dec5f467f0f
|
||||
|
||||
@@ -1 +1 @@
|
||||
ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:0.11.0@sha256:1a9e6592fc035dbaae27f308b934206858c2e0025d4c99cd906b51615cc9766c
|
||||
ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:latest@sha256:e56b46591cdf9140e97c3220a0c2681aadd4a4b3f7ea8473fb2504dc96e8b53a
|
||||
|
||||
@@ -1 +1 @@
|
||||
ghcr.io/aenix-io/cozystack/ubuntu-container-disk:v1.30.1@sha256:1f249fbe52821a62f706c6038b13401234e1b758ac498e53395b8f9a642b015f
|
||||
ghcr.io/aenix-io/cozystack/ubuntu-container-disk:v1.30.1@sha256:5ce80a453073c4f44347409133fc7b15f1d2f37a564d189871a4082fc552ff0f
|
||||
|
||||
@@ -1 +1 @@
|
||||
ghcr.io/aenix-io/cozystack/mariadb-backup:0.5.1@sha256:793edb25a29cbc00781e40af883815ca36937e736e2b0d202ea9c9619fb6ca11
|
||||
ghcr.io/aenix-io/cozystack/mariadb-backup:0.5.1@sha256:fa2b3195521cffa55eb6d71a50b875d3c234a45e5dff71b2b9002674175bea93
|
||||
|
||||
@@ -1 +1 @@
|
||||
ghcr.io/aenix-io/cozystack/postgres-backup:0.7.0@sha256:d2015c6dba92293bda652d055e97d1be80e8414c2dc78037c12812d1a2e2cba1
|
||||
ghcr.io/aenix-io/cozystack/postgres-backup:0.6.2@sha256:d2015c6dba92293bda652d055e97d1be80e8414c2dc78037c12812d1a2e2cba1
|
||||
|
||||
@@ -10,9 +10,7 @@ spec:
|
||||
postgresql:
|
||||
parameters:
|
||||
max_wal_senders: "30"
|
||||
{{- with .Values.postgresql.parameters.max_connections }}
|
||||
max_connections: "{{ . }}"
|
||||
{{- end }}
|
||||
max_connections: “{{ .Values.postgresql.parameters.max_connections }}”
|
||||
|
||||
minSyncReplicas: {{ .Values.quorum.minSyncReplicas }}
|
||||
maxSyncReplicas: {{ .Values.quorum.maxSyncReplicas }}
|
||||
|
||||
@@ -29,9 +29,9 @@
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"max_connections": {
|
||||
"type": "number",
|
||||
"type": "string",
|
||||
"description": "Determines the maximum number of concurrent connections to the database server. The default is typically 100 connections",
|
||||
"default": 100
|
||||
"default": "100"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -103,4 +103,4 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,7 +14,7 @@ storageClass: ""
|
||||
## @param postgresql.parameters.max_connections Determines the maximum number of concurrent connections to the database server. The default is typically 100 connections
|
||||
postgresql:
|
||||
parameters:
|
||||
max_connections: 100
|
||||
max_connections: "100"
|
||||
|
||||
## Configuration for the quorum-based synchronous replication
|
||||
## @param quorum.minSyncReplicas Minimum number of synchronous replicas that must acknowledge a transaction before it is considered committed.
|
||||
|
||||
@@ -16,7 +16,7 @@ type: application
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.4.2
|
||||
version: 0.4.1
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
|
||||
@@ -47,7 +47,7 @@ metadata:
|
||||
config: '{{ printf "%s %s" $user $password | sha256sum }}'
|
||||
spec:
|
||||
importCredentialsSecret:
|
||||
name: {{ $.Release.Name }}-{{ kebabcase $user }}-credentials
|
||||
name: {{ $.Release.Name }}-{{ $user }}-credentials
|
||||
rabbitmqClusterReference:
|
||||
name: {{ $.Release.Name }}
|
||||
---
|
||||
|
||||
@@ -31,8 +31,7 @@ kubernetes 0.8.0 ac11056e
|
||||
kubernetes 0.8.1 e54608d8
|
||||
kubernetes 0.8.2 5ca8823
|
||||
kubernetes 0.9.0 9b6dd19
|
||||
kubernetes 0.10.0 ac5c38b
|
||||
kubernetes 0.11.0 HEAD
|
||||
kubernetes 0.10.0 HEAD
|
||||
mysql 0.1.0 f642698
|
||||
mysql 0.2.0 8b975ff0
|
||||
mysql 0.3.0 5ca8823
|
||||
@@ -49,14 +48,12 @@ postgres 0.4.0 ec283c33
|
||||
postgres 0.4.1 5ca8823
|
||||
postgres 0.5.0 c07c4bbd
|
||||
postgres 0.6.0 2a4768a
|
||||
postgres 0.6.2 54fd61c
|
||||
postgres 0.7.0 HEAD
|
||||
postgres 0.6.2 HEAD
|
||||
rabbitmq 0.1.0 f642698
|
||||
rabbitmq 0.2.0 5ca8823
|
||||
rabbitmq 0.3.0 9e33dc0
|
||||
rabbitmq 0.4.0 36d8855
|
||||
rabbitmq 0.4.1 35536bb
|
||||
rabbitmq 0.4.2 HEAD
|
||||
rabbitmq 0.4.1 HEAD
|
||||
redis 0.1.1 f642698
|
||||
redis 0.2.0 5ca8823
|
||||
redis 0.3.0 HEAD
|
||||
|
||||
@@ -3,24 +3,24 @@
|
||||
arch: amd64
|
||||
platform: metal
|
||||
secureboot: false
|
||||
version: v1.8.0
|
||||
version: v1.7.6
|
||||
input:
|
||||
kernel:
|
||||
path: /usr/install/amd64/vmlinuz
|
||||
initramfs:
|
||||
path: /usr/install/amd64/initramfs.xz
|
||||
baseInstaller:
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.8.0
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.7.6
|
||||
systemExtensions:
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240910
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240813
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6
|
||||
output:
|
||||
kind: initramfs
|
||||
imageOptions: {}
|
||||
|
||||
@@ -3,24 +3,24 @@
|
||||
arch: amd64
|
||||
platform: metal
|
||||
secureboot: false
|
||||
version: v1.8.0
|
||||
version: v1.7.6
|
||||
input:
|
||||
kernel:
|
||||
path: /usr/install/amd64/vmlinuz
|
||||
initramfs:
|
||||
path: /usr/install/amd64/initramfs.xz
|
||||
baseInstaller:
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.8.0
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.7.6
|
||||
systemExtensions:
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240910
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240813
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6
|
||||
output:
|
||||
kind: installer
|
||||
imageOptions: {}
|
||||
|
||||
@@ -3,24 +3,24 @@
|
||||
arch: amd64
|
||||
platform: metal
|
||||
secureboot: false
|
||||
version: v1.8.0
|
||||
version: v1.7.6
|
||||
input:
|
||||
kernel:
|
||||
path: /usr/install/amd64/vmlinuz
|
||||
initramfs:
|
||||
path: /usr/install/amd64/initramfs.xz
|
||||
baseInstaller:
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.8.0
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.7.6
|
||||
systemExtensions:
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240910
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240813
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6
|
||||
output:
|
||||
kind: iso
|
||||
imageOptions: {}
|
||||
|
||||
@@ -3,24 +3,24 @@
|
||||
arch: amd64
|
||||
platform: metal
|
||||
secureboot: false
|
||||
version: v1.8.0
|
||||
version: v1.7.6
|
||||
input:
|
||||
kernel:
|
||||
path: /usr/install/amd64/vmlinuz
|
||||
initramfs:
|
||||
path: /usr/install/amd64/initramfs.xz
|
||||
baseInstaller:
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.8.0
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.7.6
|
||||
systemExtensions:
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240910
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240813
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6
|
||||
output:
|
||||
kind: kernel
|
||||
imageOptions: {}
|
||||
|
||||
@@ -3,24 +3,24 @@
|
||||
arch: amd64
|
||||
platform: metal
|
||||
secureboot: false
|
||||
version: v1.8.0
|
||||
version: v1.7.6
|
||||
input:
|
||||
kernel:
|
||||
path: /usr/install/amd64/vmlinuz
|
||||
initramfs:
|
||||
path: /usr/install/amd64/initramfs.xz
|
||||
baseInstaller:
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.8.0
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.7.6
|
||||
systemExtensions:
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240910
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240813
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6
|
||||
output:
|
||||
kind: image
|
||||
imageOptions: { diskSize: 1306525696, diskFormat: raw }
|
||||
|
||||
@@ -3,24 +3,24 @@
|
||||
arch: amd64
|
||||
platform: nocloud
|
||||
secureboot: false
|
||||
version: v1.8.0
|
||||
version: v1.7.6
|
||||
input:
|
||||
kernel:
|
||||
path: /usr/install/amd64/vmlinuz
|
||||
initramfs:
|
||||
path: /usr/install/amd64/initramfs.xz
|
||||
baseInstaller:
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.8.0
|
||||
imageRef: ghcr.io/siderolabs/installer:v1.7.6
|
||||
systemExtensions:
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240910
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0
|
||||
- imageRef: ghcr.io/siderolabs/amd-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811
|
||||
- imageRef: ghcr.io/siderolabs/i915-ucode:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/intel-ucode:20240813
|
||||
- imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811
|
||||
- imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6
|
||||
- imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6
|
||||
output:
|
||||
kind: image
|
||||
imageOptions: { diskSize: 1306525696, diskFormat: raw }
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
cozystack:
|
||||
image: ghcr.io/aenix-io/cozystack/cozystack:v0.16.0@sha256:1119f30a50b3fea1ac7d8068009ca233df6214d709c7861f7ce8fbf0402cdc72
|
||||
image: ghcr.io/aenix-io/cozystack/cozystack:v0.15.0@sha256:aeff26a80f84b4323578e613b3bf03caa842d617ec8d9ca98706867c1e70609f
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
e2e:
|
||||
image: ghcr.io/aenix-io/cozystack/e2e-sandbox:v0.16.0@sha256:25b298d621ec79431d106184d59849bbae634588742583d111628126ad8615c5
|
||||
image: ghcr.io/aenix-io/cozystack/e2e-sandbox:v0.15.0@sha256:20cc84e4a11db31434881355c070113a7823501a28a6114ca02830b18607ad21
|
||||
|
||||
@@ -3,4 +3,4 @@ name: monitoring
|
||||
description: Monitoring and observability stack
|
||||
icon: /logos/monitoring.svg
|
||||
type: application
|
||||
version: 1.5.0
|
||||
version: 1.4.0
|
||||
|
||||
@@ -12,7 +12,6 @@ monitoring 1.1.0 15478a88
|
||||
monitoring 1.2.0 c9e0d63b
|
||||
monitoring 1.2.1 4471b4ba
|
||||
monitoring 1.3.0 6c5cf5b
|
||||
monitoring 1.4.0 adaf603b
|
||||
monitoring 1.5.0 HEAD
|
||||
monitoring 1.4.0 HEAD
|
||||
seaweedfs 0.1.0 5ca8823
|
||||
seaweedfs 0.2.0 HEAD
|
||||
|
||||
@@ -79,7 +79,7 @@ annotations:
|
||||
Pod IP Pool\n description: |\n CiliumPodIPPool defines an IP pool that can
|
||||
be used for pooled IPAM (i.e. the multi-pool IPAM mode).\n"
|
||||
apiVersion: v2
|
||||
appVersion: 1.16.2
|
||||
appVersion: 1.16.1
|
||||
description: eBPF-based Networking, Security, and Observability
|
||||
home: https://cilium.io/
|
||||
icon: https://cdn.jsdelivr.net/gh/cilium/cilium@main/Documentation/images/logo-solo.svg
|
||||
@@ -95,4 +95,4 @@ kubeVersion: '>= 1.21.0-0'
|
||||
name: cilium
|
||||
sources:
|
||||
- https://github.com/cilium/cilium
|
||||
version: 1.16.2
|
||||
version: 1.16.1
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# cilium
|
||||
|
||||
 
|
||||
 
|
||||
|
||||
Cilium is open source software for providing and transparently securing
|
||||
network connectivity and loadbalancing between application workloads such as
|
||||
@@ -83,7 +83,7 @@ contributors across the globe, there is almost always someone available to help.
|
||||
| authentication.mutual.spire.install.agent.tolerations | list | `[{"effect":"NoSchedule","key":"node.kubernetes.io/not-ready"},{"effect":"NoSchedule","key":"node-role.kubernetes.io/master"},{"effect":"NoSchedule","key":"node-role.kubernetes.io/control-plane"},{"effect":"NoSchedule","key":"node.cloudprovider.kubernetes.io/uninitialized","value":"true"},{"key":"CriticalAddonsOnly","operator":"Exists"}]` | SPIRE agent tolerations configuration By default it follows the same tolerations as the agent itself to allow the Cilium agent on this node to connect to SPIRE. ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ |
|
||||
| authentication.mutual.spire.install.enabled | bool | `true` | Enable SPIRE installation. This will only take effect only if authentication.mutual.spire.enabled is true |
|
||||
| authentication.mutual.spire.install.existingNamespace | bool | `false` | SPIRE namespace already exists. Set to true if Helm should not create, manage, and import the SPIRE namespace. |
|
||||
| authentication.mutual.spire.install.initImage | object | `{"digest":"sha256:c230832bd3b0be59a6c47ed64294f9ce71e91b327957920b6929a0caa8353140","override":null,"pullPolicy":"IfNotPresent","repository":"docker.io/library/busybox","tag":"1.36.1","useDigest":true}` | init container image of SPIRE agent and server |
|
||||
| authentication.mutual.spire.install.initImage | object | `{"digest":"sha256:9ae97d36d26566ff84e8893c64a6dc4fe8ca6d1144bf5b87b2b85a32def253c7","override":null,"pullPolicy":"IfNotPresent","repository":"docker.io/library/busybox","tag":"1.36.1","useDigest":true}` | init container image of SPIRE agent and server |
|
||||
| authentication.mutual.spire.install.namespace | string | `"cilium-spire"` | SPIRE namespace to install into |
|
||||
| authentication.mutual.spire.install.server.affinity | object | `{}` | SPIRE server affinity configuration |
|
||||
| authentication.mutual.spire.install.server.annotations | object | `{}` | SPIRE server annotations |
|
||||
@@ -182,7 +182,7 @@ contributors across the globe, there is almost always someone available to help.
|
||||
| clustermesh.apiserver.extraVolumeMounts | list | `[]` | Additional clustermesh-apiserver volumeMounts. |
|
||||
| clustermesh.apiserver.extraVolumes | list | `[]` | Additional clustermesh-apiserver volumes. |
|
||||
| clustermesh.apiserver.healthPort | int | `9880` | TCP port for the clustermesh-apiserver health API. |
|
||||
| clustermesh.apiserver.image | object | `{"digest":"sha256:cc84190fed92e03a2b3a33bc670b2447b521ee258ad9b076baaad13be312ea73","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/clustermesh-apiserver","tag":"v1.16.2","useDigest":true}` | Clustermesh API server image. |
|
||||
| clustermesh.apiserver.image | object | `{"digest":"sha256:e9c77417cd474cc943b2303a76c5cf584ac7024dd513ebb8d608cb62fe28896f","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/clustermesh-apiserver","tag":"v1.16.1","useDigest":true}` | Clustermesh API server image. |
|
||||
| clustermesh.apiserver.kvstoremesh.enabled | bool | `true` | Enable KVStoreMesh. KVStoreMesh caches the information retrieved from the remote clusters in the local etcd instance. |
|
||||
| clustermesh.apiserver.kvstoremesh.extraArgs | list | `[]` | Additional KVStoreMesh arguments. |
|
||||
| clustermesh.apiserver.kvstoremesh.extraEnv | list | `[]` | Additional KVStoreMesh environment variables. |
|
||||
@@ -353,7 +353,7 @@ contributors across the globe, there is almost always someone available to help.
|
||||
| envoy.extraVolumes | list | `[]` | Additional envoy volumes. |
|
||||
| envoy.healthPort | int | `9878` | TCP port for the health API. |
|
||||
| envoy.idleTimeoutDurationSeconds | int | `60` | Set Envoy upstream HTTP idle connection timeout seconds. Does not apply to connections with pending requests. Default 60s |
|
||||
| envoy.image | object | `{"digest":"sha256:9762041c3760de226a8b00cc12f27dacc28b7691ea926748f9b5c18862db503f","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium-envoy","tag":"v1.29.9-1726784081-a90146d13b4cd7d168d573396ccf2b3db5a3b047","useDigest":true}` | Envoy container image. |
|
||||
| envoy.image | object | `{"digest":"sha256:bd5ff8c66716080028f414ec1cb4f7dc66f40d2fb5a009fff187f4a9b90b566b","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium-envoy","tag":"v1.29.7-39a2a56bbd5b3a591f69dbca51d3e30ef97e0e51","useDigest":true}` | Envoy container image. |
|
||||
| envoy.livenessProbe.failureThreshold | int | `10` | failure threshold of liveness probe |
|
||||
| envoy.livenessProbe.periodSeconds | int | `30` | interval between checks of the liveness probe |
|
||||
| envoy.log.format | string | `"[%Y-%m-%d %T.%e][%t][%l][%n] [%g:%#] %v"` | The format string to use for laying out the log message metadata of Envoy. |
|
||||
@@ -484,7 +484,7 @@ contributors across the globe, there is almost always someone available to help.
|
||||
| hubble.relay.extraVolumes | list | `[]` | Additional hubble-relay volumes. |
|
||||
| hubble.relay.gops.enabled | bool | `true` | Enable gops for hubble-relay |
|
||||
| hubble.relay.gops.port | int | `9893` | Configure gops listen port for hubble-relay |
|
||||
| hubble.relay.image | object | `{"digest":"sha256:4b559907b378ac18af82541dafab430a857d94f1057f2598645624e6e7ea286c","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/hubble-relay","tag":"v1.16.2","useDigest":true}` | Hubble-relay container image. |
|
||||
| hubble.relay.image | object | `{"digest":"sha256:2e1b4c739a676ae187d4c2bfc45c3e865bda2567cc0320a90cb666657fcfcc35","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/hubble-relay","tag":"v1.16.1","useDigest":true}` | Hubble-relay container image. |
|
||||
| hubble.relay.listenHost | string | `""` | Host to listen to. Specify an empty string to bind to all the interfaces. |
|
||||
| hubble.relay.listenPort | string | `"4245"` | Port to listen to. |
|
||||
| hubble.relay.nodeSelector | object | `{"kubernetes.io/os":"linux"}` | Node labels for pod assignment ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector |
|
||||
@@ -590,7 +590,7 @@ contributors across the globe, there is almost always someone available to help.
|
||||
| hubble.ui.updateStrategy | object | `{"rollingUpdate":{"maxUnavailable":1},"type":"RollingUpdate"}` | hubble-ui update strategy. |
|
||||
| identityAllocationMode | string | `"crd"` | Method to use for identity allocation (`crd` or `kvstore`). |
|
||||
| identityChangeGracePeriod | string | `"5s"` | Time to wait before using new identity on endpoint identity change. |
|
||||
| image | object | `{"digest":"sha256:4386a8580d8d86934908eea022b0523f812e6a542f30a86a47edd8bed90d51ea","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium","tag":"v1.16.2","useDigest":true}` | Agent container image. |
|
||||
| image | object | `{"digest":"sha256:0b4a3ab41a4760d86b7fc945b8783747ba27f29dac30dd434d94f2c9e3679f39","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium","tag":"v1.16.1","useDigest":true}` | Agent container image. |
|
||||
| imagePullSecrets | list | `[]` | Configure image pull secrets for pulling container images |
|
||||
| ingressController.default | bool | `false` | Set cilium ingress controller to be the default ingress controller This will let cilium ingress controller route entries without ingress class set |
|
||||
| ingressController.defaultSecretName | string | `nil` | Default secret name for ingresses without .spec.tls[].secretName set. |
|
||||
@@ -717,7 +717,7 @@ contributors across the globe, there is almost always someone available to help.
|
||||
| operator.hostNetwork | bool | `true` | HostNetwork setting |
|
||||
| operator.identityGCInterval | string | `"15m0s"` | Interval for identity garbage collection. |
|
||||
| operator.identityHeartbeatTimeout | string | `"30m0s"` | Timeout for identity heartbeats. |
|
||||
| operator.image | object | `{"alibabacloudDigest":"sha256:16e33abb6b8381e2f66388b6d7141399f06c9b51b9ffa08fd159b8d321929716","awsDigest":"sha256:b6a73ec94407a56cccc8a395225e2aecc3ca3611e7acfeec86201c19fc0727dd","azureDigest":"sha256:fde7cf8bb887e106cd388bb5c3327e92682b2ec3ab4f03bb57b87f495b99f727","genericDigest":"sha256:cccfd3b886d52cb132c06acca8ca559f0fce91a6bd99016219b1a81fdbc4813a","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/operator","suffix":"","tag":"v1.16.2","useDigest":true}` | cilium-operator image. |
|
||||
| operator.image | object | `{"alibabacloudDigest":"sha256:4381adf48d76ec482551183947e537d44bcac9b6c31a635a9ac63f696d978804","awsDigest":"sha256:e3876fcaf2d6ccc8d5b4aaaded7b1efa971f3f4175eaa2c8a499878d58c39df4","azureDigest":"sha256:e55c222654a44ceb52db7ade3a7b9e8ef05681ff84c14ad1d46fea34869a7a22","genericDigest":"sha256:3bc7e7a43bc4a4d8989cb7936c5d96675dd2d02c306adf925ce0a7c35aa27dc4","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/operator","suffix":"","tag":"v1.16.1","useDigest":true}` | cilium-operator image. |
|
||||
| operator.nodeGCInterval | string | `"5m0s"` | Interval for cilium node garbage collection. |
|
||||
| operator.nodeSelector | object | `{"kubernetes.io/os":"linux"}` | Node labels for cilium-operator pod assignment ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector |
|
||||
| operator.podAnnotations | object | `{}` | Annotations to be added to cilium-operator pods |
|
||||
@@ -767,7 +767,7 @@ contributors across the globe, there is almost always someone available to help.
|
||||
| preflight.extraEnv | list | `[]` | Additional preflight environment variables. |
|
||||
| preflight.extraVolumeMounts | list | `[]` | Additional preflight volumeMounts. |
|
||||
| preflight.extraVolumes | list | `[]` | Additional preflight volumes. |
|
||||
| preflight.image | object | `{"digest":"sha256:4386a8580d8d86934908eea022b0523f812e6a542f30a86a47edd8bed90d51ea","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium","tag":"v1.16.2","useDigest":true}` | Cilium pre-flight image. |
|
||||
| preflight.image | object | `{"digest":"sha256:0b4a3ab41a4760d86b7fc945b8783747ba27f29dac30dd434d94f2c9e3679f39","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium","tag":"v1.16.1","useDigest":true}` | Cilium pre-flight image. |
|
||||
| preflight.nodeSelector | object | `{"kubernetes.io/os":"linux"}` | Node labels for preflight pod assignment ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector |
|
||||
| preflight.podAnnotations | object | `{}` | Annotations to be added to preflight pods |
|
||||
| preflight.podDisruptionBudget.enabled | bool | `false` | enable PodDisruptionBudget ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ |
|
||||
|
||||
@@ -26,6 +26,10 @@ spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
{{- if and .Values.envoy.prometheus.enabled (not .Values.envoy.prometheus.serviceMonitor.enabled) }}
|
||||
prometheus.io/port: "{{ .Values.envoy.prometheus.port }}"
|
||||
prometheus.io/scrape: "true"
|
||||
{{- end }}
|
||||
{{- if .Values.envoy.rollOutPods }}
|
||||
# ensure pods roll when configmap updates
|
||||
cilium.io/cilium-envoy-configmap-checksum: {{ include (print $.Template.BasePath "/cilium-envoy/configmap.yaml") . | sha256sum | quote }}
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
{{- $envoyDS := eq (include "envoyDaemonSetEnabled" .) "true" -}}
|
||||
{{- if and $envoyDS (not .Values.preflight.enabled) .Values.envoy.prometheus.enabled }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: cilium-envoy
|
||||
namespace: {{ .Release.Namespace }}
|
||||
{{- if or (not .Values.envoy.prometheus.serviceMonitor.enabled) .Values.envoy.annotations }}
|
||||
annotations:
|
||||
{{- if not .Values.envoy.prometheus.serviceMonitor.enabled }}
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: {{ .Values.envoy.prometheus.port | quote }}
|
||||
{{- end }}
|
||||
{{- with .Values.envoy.annotations }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
labels:
|
||||
k8s-app: cilium-envoy
|
||||
app.kubernetes.io/name: cilium-envoy
|
||||
app.kubernetes.io/part-of: cilium
|
||||
io.cilium/app: proxy
|
||||
spec:
|
||||
clusterIP: None
|
||||
type: ClusterIP
|
||||
selector:
|
||||
k8s-app: cilium-envoy
|
||||
ports:
|
||||
- name: envoy-metrics
|
||||
port: {{ .Values.envoy.prometheus.port }}
|
||||
protocol: TCP
|
||||
targetPort: envoy-metrics
|
||||
{{- end }}
|
||||
@@ -362,7 +362,7 @@ spec:
|
||||
name: cilium-clustermesh
|
||||
optional: true
|
||||
# note: items are not explicitly listed here, since the entries of this secret
|
||||
# depend on the peers configured, and that would cause a restart of all operators
|
||||
# depend on the peers configured, and that would cause a restart of all agents
|
||||
# at every addition/removal. Leaving the field empty makes each secret entry
|
||||
# to be automatically projected into the volume as a file whose name is the key.
|
||||
- secret:
|
||||
@@ -384,28 +384,5 @@ spec:
|
||||
- key: {{ .Values.tls.caBundle.key }}
|
||||
path: common-etcd-client-ca.crt
|
||||
{{- end }}
|
||||
# note: we configure the volume for the kvstoremesh-specific certificate
|
||||
# regardless of whether KVStoreMesh is enabled or not, so that it can be
|
||||
# automatically mounted in case KVStoreMesh gets subsequently enabled,
|
||||
# without requiring an operator restart.
|
||||
- secret:
|
||||
name: clustermesh-apiserver-local-cert
|
||||
optional: true
|
||||
items:
|
||||
- key: tls.key
|
||||
path: local-etcd-client.key
|
||||
- key: tls.crt
|
||||
path: local-etcd-client.crt
|
||||
{{- if not .Values.tls.caBundle.enabled }}
|
||||
- key: ca.crt
|
||||
path: local-etcd-client-ca.crt
|
||||
{{- else }}
|
||||
- {{ .Values.tls.caBundle.useSecret | ternary "secret" "configMap" }}:
|
||||
name: {{ .Values.tls.caBundle.name }}
|
||||
optional: true
|
||||
items:
|
||||
- key: {{ .Values.tls.caBundle.key }}
|
||||
path: local-etcd-client-ca.crt
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
@@ -1,47 +1,3 @@
|
||||
{{/* validate deprecated options are not being used */}}
|
||||
|
||||
{{/* Options deprecated in v1.15 and removed in v1.16 */}}
|
||||
{{- if or
|
||||
(dig "encryption" "keyFile" "" .Values.AsMap)
|
||||
(dig "encryption" "mountPath" "" .Values.AsMap)
|
||||
(dig "encryption" "secretName" "" .Values.AsMap)
|
||||
(dig "encryption" "interface" "" .Values.AsMap)
|
||||
}}
|
||||
{{ fail "encryption.{keyFile,mountPath,secretName,interface} were deprecated in v1.14 and has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }}
|
||||
{{- end }}
|
||||
{{- if or
|
||||
((dig "proxy" "prometheus" "enabled" "" .Values.AsMap) | toString)
|
||||
(dig "proxy" "prometheus" "port" "" .Values.AsMap)
|
||||
}}
|
||||
{{ fail "proxy.prometheus.enabled and proxy.prometheus.port were deprecated in v1.14 and has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }}
|
||||
{{- end }}
|
||||
{{- if (dig "endpointStatus" "" .Values.AsMap) }}
|
||||
{{ fail "endpointStatus has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }}
|
||||
{{- end }}
|
||||
{{- if (dig "remoteNodeIdentity" "" .Values.AsMap) }}
|
||||
{{ fail "remoteNodeIdentity was deprecated in v1.15 and has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }}
|
||||
{{- end }}
|
||||
{{- if (dig "containerRuntime" "integration" "" .Values.AsMap) }}
|
||||
{{ fail "containerRuntime.integration was deprecated in v1.14 and has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }}
|
||||
{{- end }}
|
||||
{{- if (dig "etcd" "managed" "" .Values.AsMap) }}
|
||||
{{ fail "etcd.managed was deprecated in v1.10 has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }}
|
||||
{{- end }}
|
||||
|
||||
{{/* Options deprecated in v1.14 and removed in v1.15 */}}
|
||||
{{- if .Values.tunnel }}
|
||||
{{ fail "tunnel was deprecated in v1.14 and has been removed in v1.15. For details please refer to https://docs.cilium.io/en/v1.15/operations/upgrade/#helm-options" }}
|
||||
{{- end }}
|
||||
{{- if or (dig "clustermesh" "apiserver" "tls" "ca" "cert" "" .Values.AsMap) (dig "clustermesh" "apiserver" "tls" "ca" "key" "" .Values.AsMap) }}
|
||||
{{ fail "clustermesh.apiserver.tls.ca.cert and clustermesh.apiserver.tls.ca.key were deprecated in v1.14 and has been removed in v1.15. For details please refer to https://docs.cilium.io/en/v1.15/operations/upgrade/#helm-options" }}
|
||||
{{- end }}
|
||||
{{- if .Values.enableK8sEventHandover }}
|
||||
{{ fail "enableK8sEventHandover was deprecated in v1.14 and has been removed in v1.15. For details please refer to https://docs.cilium.io/en/v1.15/operations/upgrade/#helm-options" }}
|
||||
{{- end }}
|
||||
{{- if .Values.enableCnpStatusUpdates }}
|
||||
{{ fail "enableCnpStatusUpdates was deprecated in v1.14 and has been removed in v1.15. For details please refer to https://docs.cilium.io/en/v1.15/operations/upgrade/#helm-options" }}
|
||||
{{- end }}
|
||||
|
||||
{{/* validate hubble config */}}
|
||||
{{- if and .Values.hubble.ui.enabled (not .Values.hubble.ui.standalone.enabled) }}
|
||||
{{- if not .Values.hubble.relay.enabled }}
|
||||
|
||||
@@ -153,10 +153,10 @@ image:
|
||||
# @schema
|
||||
override: ~
|
||||
repository: "quay.io/cilium/cilium"
|
||||
tag: "v1.16.2"
|
||||
tag: "v1.16.1"
|
||||
pullPolicy: "IfNotPresent"
|
||||
# cilium-digest
|
||||
digest: "sha256:4386a8580d8d86934908eea022b0523f812e6a542f30a86a47edd8bed90d51ea"
|
||||
digest: "sha256:0b4a3ab41a4760d86b7fc945b8783747ba27f29dac30dd434d94f2c9e3679f39"
|
||||
useDigest: true
|
||||
# -- Affinity for cilium-agent.
|
||||
affinity:
|
||||
@@ -1309,9 +1309,9 @@ hubble:
|
||||
# @schema
|
||||
override: ~
|
||||
repository: "quay.io/cilium/hubble-relay"
|
||||
tag: "v1.16.2"
|
||||
tag: "v1.16.1"
|
||||
# hubble-relay-digest
|
||||
digest: "sha256:4b559907b378ac18af82541dafab430a857d94f1057f2598645624e6e7ea286c"
|
||||
digest: "sha256:2e1b4c739a676ae187d4c2bfc45c3e865bda2567cc0320a90cb666657fcfcc35"
|
||||
useDigest: true
|
||||
pullPolicy: "IfNotPresent"
|
||||
# -- Specifies the resources for the hubble-relay pods
|
||||
@@ -2158,9 +2158,9 @@ envoy:
|
||||
# @schema
|
||||
override: ~
|
||||
repository: "quay.io/cilium/cilium-envoy"
|
||||
tag: "v1.29.9-1726784081-a90146d13b4cd7d168d573396ccf2b3db5a3b047"
|
||||
tag: "v1.29.7-39a2a56bbd5b3a591f69dbca51d3e30ef97e0e51"
|
||||
pullPolicy: "IfNotPresent"
|
||||
digest: "sha256:9762041c3760de226a8b00cc12f27dacc28b7691ea926748f9b5c18862db503f"
|
||||
digest: "sha256:bd5ff8c66716080028f414ec1cb4f7dc66f40d2fb5a009fff187f4a9b90b566b"
|
||||
useDigest: true
|
||||
# -- Additional containers added to the cilium Envoy DaemonSet.
|
||||
extraContainers: []
|
||||
@@ -2474,15 +2474,15 @@ operator:
|
||||
# @schema
|
||||
override: ~
|
||||
repository: "quay.io/cilium/operator"
|
||||
tag: "v1.16.2"
|
||||
tag: "v1.16.1"
|
||||
# operator-generic-digest
|
||||
genericDigest: "sha256:cccfd3b886d52cb132c06acca8ca559f0fce91a6bd99016219b1a81fdbc4813a"
|
||||
genericDigest: "sha256:3bc7e7a43bc4a4d8989cb7936c5d96675dd2d02c306adf925ce0a7c35aa27dc4"
|
||||
# operator-azure-digest
|
||||
azureDigest: "sha256:fde7cf8bb887e106cd388bb5c3327e92682b2ec3ab4f03bb57b87f495b99f727"
|
||||
azureDigest: "sha256:e55c222654a44ceb52db7ade3a7b9e8ef05681ff84c14ad1d46fea34869a7a22"
|
||||
# operator-aws-digest
|
||||
awsDigest: "sha256:b6a73ec94407a56cccc8a395225e2aecc3ca3611e7acfeec86201c19fc0727dd"
|
||||
awsDigest: "sha256:e3876fcaf2d6ccc8d5b4aaaded7b1efa971f3f4175eaa2c8a499878d58c39df4"
|
||||
# operator-alibabacloud-digest
|
||||
alibabacloudDigest: "sha256:16e33abb6b8381e2f66388b6d7141399f06c9b51b9ffa08fd159b8d321929716"
|
||||
alibabacloudDigest: "sha256:4381adf48d76ec482551183947e537d44bcac9b6c31a635a9ac63f696d978804"
|
||||
useDigest: true
|
||||
pullPolicy: "IfNotPresent"
|
||||
suffix: ""
|
||||
@@ -2756,9 +2756,9 @@ preflight:
|
||||
# @schema
|
||||
override: ~
|
||||
repository: "quay.io/cilium/cilium"
|
||||
tag: "v1.16.2"
|
||||
tag: "v1.16.1"
|
||||
# cilium-digest
|
||||
digest: "sha256:4386a8580d8d86934908eea022b0523f812e6a542f30a86a47edd8bed90d51ea"
|
||||
digest: "sha256:0b4a3ab41a4760d86b7fc945b8783747ba27f29dac30dd434d94f2c9e3679f39"
|
||||
useDigest: true
|
||||
pullPolicy: "IfNotPresent"
|
||||
# -- The priority class to use for the preflight pod.
|
||||
@@ -2905,9 +2905,9 @@ clustermesh:
|
||||
# @schema
|
||||
override: ~
|
||||
repository: "quay.io/cilium/clustermesh-apiserver"
|
||||
tag: "v1.16.2"
|
||||
tag: "v1.16.1"
|
||||
# clustermesh-apiserver-digest
|
||||
digest: "sha256:cc84190fed92e03a2b3a33bc670b2447b521ee258ad9b076baaad13be312ea73"
|
||||
digest: "sha256:e9c77417cd474cc943b2303a76c5cf584ac7024dd513ebb8d608cb62fe28896f"
|
||||
useDigest: true
|
||||
pullPolicy: "IfNotPresent"
|
||||
# -- TCP port for the clustermesh-apiserver health API.
|
||||
@@ -3406,7 +3406,7 @@ authentication:
|
||||
override: ~
|
||||
repository: "docker.io/library/busybox"
|
||||
tag: "1.36.1"
|
||||
digest: "sha256:c230832bd3b0be59a6c47ed64294f9ce71e91b327957920b6929a0caa8353140"
|
||||
digest: "sha256:9ae97d36d26566ff84e8893c64a6dc4fe8ca6d1144bf5b87b2b85a32def253c7"
|
||||
useDigest: true
|
||||
pullPolicy: "IfNotPresent"
|
||||
# SPIRE agent configuration
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
ARG VERSION=v1.16.2
|
||||
ARG VERSION=v1.16.1
|
||||
FROM quay.io/cilium/cilium:${VERSION}
|
||||
|
||||
@@ -15,4 +15,4 @@ cilium:
|
||||
enableIdentityMark: false
|
||||
enableRuntimeDeviceDetection: true
|
||||
forceDeviceDetection: true
|
||||
devices: "ovn0 genev_sys_6081"
|
||||
devices: ovn0
|
||||
|
||||
@@ -12,7 +12,7 @@ cilium:
|
||||
mode: "kubernetes"
|
||||
image:
|
||||
repository: ghcr.io/aenix-io/cozystack/cilium
|
||||
tag: 1.16.2
|
||||
digest: "sha256:534c5b04fef356a6be59234243c23c0c09702fe1e2c8872012afb391ce2965c4"
|
||||
tag: 1.16.1
|
||||
digest: "sha256:9593dbc3bd25487b52d8f43330d4a308e450605479a8384a32117e9613289892"
|
||||
envoy:
|
||||
enabled: false
|
||||
|
||||
@@ -33,11 +33,11 @@ kubeapps:
|
||||
image:
|
||||
registry: ghcr.io/aenix-io/cozystack
|
||||
repository: dashboard
|
||||
tag: v0.16.0
|
||||
tag: v0.15.0
|
||||
digest: "sha256:4818712e9fc9c57cc321512760c3226af564a04e69d4b3ec9229ab91fd39abeb"
|
||||
kubeappsapis:
|
||||
image:
|
||||
registry: ghcr.io/aenix-io/cozystack
|
||||
repository: kubeapps-apis
|
||||
tag: v0.16.0
|
||||
digest: "sha256:55bc8e2495933112c7cb4bb9e3b1fcb8df46aa14e27fa007f78388a9757e3238"
|
||||
tag: v0.15.0
|
||||
digest: "sha256:70c095c8f7e3ecfa11433a3a2c8f57f6ff5a0053f006939a2c171c180cc50baf"
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
NAME=fluxcd
|
||||
NAMESPACE=cozy-$(NAME)
|
||||
|
||||
include ../../../scripts/package.mk
|
||||
|
||||
apply-locally:
|
||||
helm upgrade -i -n $(NAMESPACE) $(NAME) .
|
||||
|
||||
update:
|
||||
rm -rf charts
|
||||
helm pull oci://ghcr.io/controlplaneio-fluxcd/charts/flux-instance --untar --untardir charts
|
||||
include ../../../scripts/package.mk
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
helmdocs.gotmpl
|
||||
@@ -1,28 +0,0 @@
|
||||
annotations:
|
||||
artifacthub.io/license: AGPL-3.0
|
||||
artifacthub.io/links: |
|
||||
- name: Documentation
|
||||
url: https://fluxcd.control-plane.io/operator
|
||||
- name: Chart Source
|
||||
url: https://github.com/controlplaneio-fluxcd/charts
|
||||
- name: Upstream Project
|
||||
url: https://github.com/controlplaneio-fluxcd/flux-operator
|
||||
apiVersion: v2
|
||||
appVersion: v0.9.0
|
||||
description: 'A Helm chart for deploying a Flux instance managed by Flux Operator. '
|
||||
home: https://github.com/controlplaneio-fluxcd
|
||||
icon: https://raw.githubusercontent.com/cncf/artwork/main/projects/flux/icon/color/flux-icon-color.png
|
||||
keywords:
|
||||
- flux
|
||||
- fluxcd
|
||||
- gitops
|
||||
kubeVersion: '>=1.22.0-0'
|
||||
maintainers:
|
||||
- email: flux-enterprise@control-plane.io
|
||||
name: ControlPlane Flux Team
|
||||
name: flux-instance
|
||||
sources:
|
||||
- https://github.com/controlplaneio-fluxcd/flux-operator
|
||||
- https://github.com/controlplaneio-fluxcd/charts
|
||||
type: application
|
||||
version: 0.9.0
|
||||
@@ -1,52 +0,0 @@
|
||||
# flux-instance
|
||||
|
||||
  
|
||||
|
||||
This chart is a thin wrapper around the `FluxInstance` custom resource, which is
|
||||
used by the [Flux Operator](https://github.com/controlplaneio-fluxcd/flux-operator)
|
||||
to install, configure and automatically upgrade Flux.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Kubernetes 1.22+
|
||||
- Helm 3.8+
|
||||
|
||||
## Installing the Chart
|
||||
|
||||
To deploy Flux in the `flux-system` namespace:
|
||||
|
||||
```console
|
||||
helm -n flux-system install flux oci://ghcr.io/controlplaneio-fluxcd/charts/flux-instance
|
||||
```
|
||||
|
||||
For more information on the available configuration options,
|
||||
see the [Flux Instance documentation](https://fluxcd.control-plane.io/operator/fluxinstance/).
|
||||
|
||||
## Uninstalling the Chart
|
||||
|
||||
To uninstall Flux without affecting the resources it manages:
|
||||
|
||||
```console
|
||||
helm -n flux-system uninstall flux
|
||||
```
|
||||
|
||||
## Values
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| commonAnnotations | object | `{}` | Common annotations to add to all deployed objects including pods. |
|
||||
| commonLabels | object | `{}` | Common labels to add to all deployed objects including pods. |
|
||||
| fullnameOverride | string | `"flux"` | |
|
||||
| instance.cluster | object | `{"domain":"cluster.local","multitenant":false,"networkPolicy":true,"tenantDefaultServiceAccount":"default","type":"kubernetes"}` | Cluster https://fluxcd.control-plane.io/operator/fluxinstance/#cluster-configuration |
|
||||
| instance.components | list | `["source-controller","kustomize-controller","helm-controller","notification-controller"]` | Components https://fluxcd.control-plane.io/operator/fluxinstance/#components-configuration |
|
||||
| instance.distribution | object | `{"artifact":"oci://ghcr.io/controlplaneio-fluxcd/flux-operator-manifests:latest","imagePullSecret":"","registry":"ghcr.io/fluxcd","version":"2.x"}` | Distribution https://fluxcd.control-plane.io/operator/fluxinstance/#distribution-configuration |
|
||||
| instance.kustomize.patches | list | `[]` | Kustomize patches https://fluxcd.control-plane.io/operator/fluxinstance/#kustomize-patches |
|
||||
| instance.sharding | object | `{"key":"sharding.fluxcd.io/key","shards":[]}` | Sharding https://fluxcd.control-plane.io/operator/fluxinstance/#sharding-configuration |
|
||||
| instance.storage | object | `{"class":"","size":""}` | Storage https://fluxcd.control-plane.io/operator/fluxinstance/#storage-configuration |
|
||||
| instance.sync | object | `{"kind":"GitRepository","path":"","pullSecret":"","ref":"","url":""}` | Sync https://fluxcd.control-plane.io/operator/fluxinstance/#sync-configuration |
|
||||
| nameOverride | string | `""` | |
|
||||
|
||||
## Source Code
|
||||
|
||||
* <https://github.com/controlplaneio-fluxcd/flux-operator>
|
||||
* <https://github.com/controlplaneio-fluxcd/charts>
|
||||
@@ -1 +0,0 @@
|
||||
Documentation at https://fluxcd.control-plane.io/operator/
|
||||
@@ -1,51 +0,0 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "flux-instance.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "flux-instance.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "flux-instance.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "flux-instance.labels" -}}
|
||||
helm.sh/chart: {{ include "flux-instance.chart" . }}
|
||||
{{ include "flux-instance.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "flux-instance.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "flux-instance.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
@@ -1,43 +0,0 @@
|
||||
apiVersion: fluxcd.controlplane.io/v1
|
||||
kind: FluxInstance
|
||||
metadata:
|
||||
name: {{ include "flux-instance.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "flux-instance.labels" . | nindent 4 }}
|
||||
{{- with .Values.commonLabels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- with .Values.commonAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
distribution:
|
||||
version: {{ .Values.instance.distribution.version }}
|
||||
registry: {{ .Values.instance.distribution.registry }}
|
||||
artifact: {{ .Values.instance.distribution.artifact }}
|
||||
{{- if .Values.instance.distribution.imagePullSecret }}
|
||||
imagePullSecret: {{ .Values.instance.distribution.imagePullSecret }}
|
||||
{{- end }}
|
||||
components: {{ .Values.instance.components | toYaml | nindent 4 }}
|
||||
cluster: {{ .Values.instance.cluster | toYaml | nindent 4 }}
|
||||
kustomize: {{ .Values.instance.kustomize | toYaml | nindent 4 }}
|
||||
{{- if .Values.instance.sync.url }}
|
||||
sync:
|
||||
kind: {{ .Values.instance.sync.kind }}
|
||||
url: {{ .Values.instance.sync.url }}
|
||||
ref: {{ .Values.instance.sync.ref }}
|
||||
path: {{ .Values.instance.sync.path }}
|
||||
{{- if .Values.instance.sync.pullSecret }}
|
||||
pullSecret: {{ .Values.instance.sync.pullSecret }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.instance.storage.size }}
|
||||
storage: {{ .Values.instance.storage | toYaml | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- if .Values.instance.sharding.shards }}
|
||||
sharding:
|
||||
key: {{ .Values.instance.sharding.key }}
|
||||
shards: {{ .Values.instance.sharding.shards | toYaml | nindent 4 }}
|
||||
{{- end }}
|
||||
@@ -1,153 +0,0 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2019-09/schema",
|
||||
"properties": {
|
||||
"commonAnnotations": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"commonLabels": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"fullnameOverride": {
|
||||
"type": "string"
|
||||
},
|
||||
"instance": {
|
||||
"properties": {
|
||||
"cluster": {
|
||||
"properties": {
|
||||
"domain": {
|
||||
"type": "string"
|
||||
},
|
||||
"multitenant": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"networkPolicy": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"tenantDefaultServiceAccount": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"enum": [
|
||||
"kubernetes",
|
||||
"openshift",
|
||||
"aws",
|
||||
"azure",
|
||||
"gcp"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"components": {
|
||||
"items": {
|
||||
"enum": [
|
||||
"source-controller",
|
||||
"kustomize-controller",
|
||||
"helm-controller",
|
||||
"notification-controller",
|
||||
"image-reflector-controller",
|
||||
"image-automation-controller"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array",
|
||||
"uniqueItems": true
|
||||
},
|
||||
"distribution": {
|
||||
"properties": {
|
||||
"artifact": {
|
||||
"type": "string"
|
||||
},
|
||||
"imagePullSecret": {
|
||||
"type": "string"
|
||||
},
|
||||
"registry": {
|
||||
"type": "string"
|
||||
},
|
||||
"version": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"version",
|
||||
"registry"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"kustomize": {
|
||||
"properties": {
|
||||
"patches": {
|
||||
"items": {
|
||||
"type": "object"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"sharding": {
|
||||
"properties": {
|
||||
"key": {
|
||||
"type": "string"
|
||||
},
|
||||
"shards": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"storage": {
|
||||
"properties": {
|
||||
"class": {
|
||||
"type": "string"
|
||||
},
|
||||
"size": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"sync": {
|
||||
"properties": {
|
||||
"kind": {
|
||||
"enum": [
|
||||
"GitRepository",
|
||||
"OCIRepository",
|
||||
"Bucket"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"path": {
|
||||
"type": "string"
|
||||
},
|
||||
"pullSecret": {
|
||||
"type": "string"
|
||||
},
|
||||
"ref": {
|
||||
"type": "string"
|
||||
},
|
||||
"url": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"distribution",
|
||||
"cluster"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"nameOverride": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
# Default values for flux-instance.
|
||||
|
||||
nameOverride: ""
|
||||
fullnameOverride: "flux"
|
||||
|
||||
instance:
|
||||
# -- Distribution https://fluxcd.control-plane.io/operator/fluxinstance/#distribution-configuration
|
||||
distribution: # @schema required: true
|
||||
version: "2.x" # @schema required: true
|
||||
registry: "ghcr.io/fluxcd" # @schema required: true
|
||||
artifact: "oci://ghcr.io/controlplaneio-fluxcd/flux-operator-manifests:latest"
|
||||
imagePullSecret: ""
|
||||
# -- Components https://fluxcd.control-plane.io/operator/fluxinstance/#components-configuration
|
||||
components: # @schema item: string; uniqueItems: true; itemEnum: [source-controller,kustomize-controller,helm-controller,notification-controller,image-reflector-controller,image-automation-controller]
|
||||
- source-controller
|
||||
- kustomize-controller
|
||||
- helm-controller
|
||||
- notification-controller
|
||||
# -- Cluster https://fluxcd.control-plane.io/operator/fluxinstance/#cluster-configuration
|
||||
cluster: # @schema required: true
|
||||
type: kubernetes # @schema enum:[kubernetes,openshift,aws,azure,gcp]
|
||||
domain: "cluster.local"
|
||||
networkPolicy: true
|
||||
multitenant: false
|
||||
tenantDefaultServiceAccount: "default"
|
||||
# -- Storage https://fluxcd.control-plane.io/operator/fluxinstance/#storage-configuration
|
||||
storage: # @schema required: false
|
||||
class: ""
|
||||
size: ""
|
||||
# -- Sharding https://fluxcd.control-plane.io/operator/fluxinstance/#sharding-configuration
|
||||
sharding: # @schema required: false
|
||||
key: "sharding.fluxcd.io/key"
|
||||
shards: [] # @schema item: string
|
||||
# -- Sync https://fluxcd.control-plane.io/operator/fluxinstance/#sync-configuration
|
||||
sync: # @schema required: false
|
||||
kind: "GitRepository" # @schema enum:[GitRepository,OCIRepository,Bucket]
|
||||
url: ""
|
||||
ref: ""
|
||||
path: ""
|
||||
pullSecret: ""
|
||||
kustomize: # @schema required: false
|
||||
# -- Kustomize patches https://fluxcd.control-plane.io/operator/fluxinstance/#kustomize-patches
|
||||
patches: [] # @schema item: object
|
||||
|
||||
# -- Common annotations to add to all deployed objects including pods.
|
||||
commonAnnotations: { }
|
||||
|
||||
# -- Common labels to add to all deployed objects including pods.
|
||||
commonLabels: { }
|
||||
25
packages/system/fluxcd/templates/flux-instance.yaml
Normal file
25
packages/system/fluxcd/templates/flux-instance.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
apiVersion: fluxcd.controlplane.io/v1
|
||||
kind: FluxInstance
|
||||
metadata:
|
||||
name: flux
|
||||
spec:
|
||||
{{- with .Values.cluster }}
|
||||
cluster:
|
||||
{{- with .networkPolicy }}
|
||||
networkPolicy: {{ . }}
|
||||
{{- end }}
|
||||
{{- with .domain }}
|
||||
domain: {{ . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
distribution:
|
||||
version: {{ .Values.distribution.version }}
|
||||
registry: {{ .Values.distribution.registry }}
|
||||
components:
|
||||
{{- if .Values.components }}
|
||||
{{- toYaml .Values.components | nindent 4 }}
|
||||
{{- end }}
|
||||
kustomize:
|
||||
{{- if .Values.kustomize }}
|
||||
{{- toYaml .Values.kustomize | nindent 4 }}
|
||||
{{- end }}
|
||||
@@ -1,49 +1,47 @@
|
||||
flux-instance:
|
||||
instance:
|
||||
cluster:
|
||||
networkPolicy: true
|
||||
domain: cozy.local # -- default value is overriden in patches
|
||||
distribution:
|
||||
version: 2.3.x
|
||||
registry: ghcr.io/fluxcd
|
||||
components:
|
||||
- source-controller
|
||||
- kustomize-controller
|
||||
- helm-controller
|
||||
- notification-controller
|
||||
- image-reflector-controller
|
||||
- image-automation-controller
|
||||
kustomize:
|
||||
patches:
|
||||
- target:
|
||||
kind: Deployment
|
||||
name: "(kustomize-controller|helm-controller|source-controller)"
|
||||
patch: |
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --concurrent=20
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --requeue-dependency=5s
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/limits
|
||||
value:
|
||||
cpu: 2000m
|
||||
memory: 2048Mi
|
||||
- target:
|
||||
kind: Deployment
|
||||
name: source-controller
|
||||
patch: |
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --storage-adv-addr=source-controller.cozy-fluxcd.svc
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --events-addr=http://notification-controller.cozy-fluxcd.svc/
|
||||
- target:
|
||||
kind: Deployment
|
||||
name: (kustomize-controller|helm-controller|image-reflector-controller|image-automation-controller)
|
||||
patch: |
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --events-addr=http://notification-controller.cozy-fluxcd.svc/
|
||||
cluster:
|
||||
networkPolicy: true
|
||||
# domain: cozy.local
|
||||
distribution:
|
||||
version: 2.3.x
|
||||
registry: ghcr.io/fluxcd
|
||||
components:
|
||||
- source-controller
|
||||
- kustomize-controller
|
||||
- helm-controller
|
||||
- notification-controller
|
||||
- image-reflector-controller
|
||||
- image-automation-controller
|
||||
kustomize:
|
||||
patches:
|
||||
- target:
|
||||
kind: Deployment
|
||||
name: "(kustomize-controller|helm-controller|source-controller)"
|
||||
patch: |
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --concurrent=20
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --requeue-dependency=5s
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/resources/limits
|
||||
value:
|
||||
cpu: 2000m
|
||||
memory: 2048Mi
|
||||
- target:
|
||||
kind: Deployment
|
||||
name: source-controller
|
||||
patch: |
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --storage-adv-addr=source-controller.cozy-fluxcd.svc
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --events-addr=http://notification-controller.cozy-fluxcd.svc/
|
||||
- target:
|
||||
kind: Deployment
|
||||
name: (kustomize-controller|helm-controller|image-reflector-controller|image-automation-controller)
|
||||
patch: |
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/args/-
|
||||
value: --events-addr=http://notification-controller.cozy-fluxcd.svc/
|
||||
|
||||
@@ -6,7 +6,7 @@ ingress-nginx:
|
||||
registry: ghcr.io
|
||||
image: kvaps/ingress-nginx-with-protobuf-exporter/controller
|
||||
tag: v1.11.2
|
||||
digest: sha256:e80856ece4e30e9646d65c8d92c25a3446a0bba1c2468cd026f17df9e60d2c0f
|
||||
digest: sha256:f4194edb06a43c82405167427ebd552b90af9698bd295845418680aebc13f600
|
||||
allowSnippetAnnotations: true
|
||||
replicaCount: 2
|
||||
admissionWebhooks:
|
||||
|
||||
@@ -1,25 +1 @@
|
||||
# Build the manager binary
|
||||
FROM golang:1.22 as builder
|
||||
|
||||
ARG VERSION=edge-24.9.2
|
||||
ARG TARGETOS TARGETARCH
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
RUN curl -sSL https://github.com/clastix/kamaji/archive/refs/tags/${VERSION}.tar.gz | tar -xzvf- --strip=1
|
||||
|
||||
COPY patches /patches
|
||||
RUN git apply /patches/disable-datastore-check.diff
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=$TARGETARCH go build \
|
||||
-ldflags "-X github.com/clastix/kamaji/internal.GitRepo=$GIT_REPO -X github.com/clastix/kamaji/internal.GitTag=$GIT_LAST_TAG -X github.com/clastix/kamaji/internal.GitCommit=$GIT_HEAD_COMMIT -X github.com/clastix/kamaji/internal.GitDirty=$GIT_MODIFIED -X github.com/clastix/kamaji/internal.BuildTime=$BUILD_DATE" \
|
||||
-a -o kamaji main.go
|
||||
|
||||
# Use distroless as minimal base image to package the manager binary
|
||||
# Refer to https://github.com/GoogleContainerTools/distroless for more details
|
||||
FROM gcr.io/distroless/static:nonroot
|
||||
WORKDIR /
|
||||
COPY --from=builder /workspace/kamaji .
|
||||
USER 65532:65532
|
||||
|
||||
ENTRYPOINT ["/kamaji"]
|
||||
FROM clastix/kamaji:edge-24.9.2
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
diff --git a/cmd/manager/cmd.go b/cmd/manager/cmd.go
|
||||
index 9a24d4e..a03a4e0 100644
|
||||
--- a/cmd/manager/cmd.go
|
||||
+++ b/cmd/manager/cmd.go
|
||||
@@ -31,7 +31,6 @@ import (
|
||||
"github.com/clastix/kamaji/controllers/soot"
|
||||
"github.com/clastix/kamaji/internal"
|
||||
"github.com/clastix/kamaji/internal/builders/controlplane"
|
||||
- datastoreutils "github.com/clastix/kamaji/internal/datastore/utils"
|
||||
"github.com/clastix/kamaji/internal/webhook"
|
||||
"github.com/clastix/kamaji/internal/webhook/handlers"
|
||||
"github.com/clastix/kamaji/internal/webhook/routes"
|
||||
@@ -80,10 +79,6 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command {
|
||||
return fmt.Errorf("unable to read webhook CA: %w", err)
|
||||
}
|
||||
|
||||
- if err = datastoreutils.CheckExists(ctx, scheme, datastore); err != nil {
|
||||
- return err
|
||||
- }
|
||||
-
|
||||
if controllerReconcileTimeout.Seconds() == 0 {
|
||||
return fmt.Errorf("the controller reconcile timeout must be greater than zero")
|
||||
}
|
||||
@@ -3,7 +3,7 @@ kamaji:
|
||||
deploy: false
|
||||
image:
|
||||
pullPolicy: IfNotPresent
|
||||
tag: v0.16.0@sha256:241e6cdf60905e53f0cb47aadcab69dd8ffa97d316faac6bdeb704c1b13c24db
|
||||
tag: latest@sha256:bb45d953a8ba46a19c8941ccc9fc8498d91435c77db439d8b1d6bde9fea8802a
|
||||
repository: ghcr.io/aenix-io/cozystack/kamaji
|
||||
resources:
|
||||
limits:
|
||||
|
||||
@@ -22,4 +22,4 @@ global:
|
||||
images:
|
||||
kubeovn:
|
||||
repository: kubeovn
|
||||
tag: v1.13.0@sha256:6676da12917c3645a08fb2adb7def5fcb68995338126b257e0a0a9570516cd5c
|
||||
tag: v1.13.0@sha256:11c4ef0f71c73df4703743c0f63b7ff0ec67af6342caf1e7db8ebd5546071855
|
||||
|
||||
3
packages/system/monitoring/charts/alerts/Chart.yaml
Normal file
3
packages/system/monitoring/charts/alerts/Chart.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
apiVersion: v1
|
||||
name: alerts
|
||||
version: v0.0.0
|
||||
@@ -0,0 +1,48 @@
|
||||
groups:
|
||||
- name: blackbox-exporter
|
||||
rules:
|
||||
- alert: HttpProbeFailed
|
||||
annotations:
|
||||
message: Probing the blackbox-exporter target {{ $labels.instance }} failed.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-httpprobefailed
|
||||
expr: probe_success != 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }}'
|
||||
service: blackbox-exporter
|
||||
|
||||
- alert: HttpProbeSlow
|
||||
annotations:
|
||||
message: '{{ $labels.instance }} takes {{ $value }} seconds to respond.'
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-httpprobeslow
|
||||
expr: sum by (instance) (probe_http_duration_seconds) > 3
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }}'
|
||||
service: blackbox-exporter
|
||||
runbook:
|
||||
steps:
|
||||
- Check the target system's resource usage for anomalias.
|
||||
- Check if the target application has been recently rescheduled and is still settling.
|
||||
|
||||
- alert: HttpCertExpiresSoon
|
||||
annotations:
|
||||
message: The certificate for {{ $labels.instance }} expires in less than 3 days.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-httpcertexpiressoon
|
||||
expr: probe_ssl_earliest_cert_expiry - time() < 3*24*3600
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }}'
|
||||
service: blackbox-exporter
|
||||
|
||||
- alert: HttpCertExpiresVerySoon
|
||||
annotations:
|
||||
message: The certificate for {{ $labels.instance }} expires in less than 24 hours.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-httpcertexpiresverysoon
|
||||
expr: probe_ssl_earliest_cert_expiry - time() < 24*3600
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.instance }}'
|
||||
service: blackbox-exporter
|
||||
@@ -0,0 +1,44 @@
|
||||
groups:
|
||||
- name: cadvisor
|
||||
rules:
|
||||
- alert: CadvisorDown
|
||||
annotations:
|
||||
message: Cadvisor has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-cadvisordown
|
||||
expr: absent(up{job="cadvisor"} == 1)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: cadvisor
|
||||
service: cadvisor
|
||||
|
||||
- record: namespace:container_memory_usage_bytes:sum
|
||||
expr: |
|
||||
sum by (namespace) (
|
||||
container_memory_usage_bytes{job="cadvisor", image!="", container!=""}
|
||||
)
|
||||
|
||||
- record: namespace:container_cpu_usage_seconds_total:sum_rate
|
||||
expr: |
|
||||
sum(rate(container_cpu_usage_seconds_total{job="cadvisor", image!="", container!=""}[5m])) by (namespace)
|
||||
|
||||
- record: namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
||||
expr: |
|
||||
sum by (namespace, pod, container) (
|
||||
rate(container_cpu_usage_seconds_total{job="cadvisor", image!="", container!=""}[5m])
|
||||
)
|
||||
|
||||
# triggered by kernel bug, see issue kubermatic#2367
|
||||
|
||||
# - alert: CPUThrottlingHigh
|
||||
# annotations:
|
||||
# message: '{{ printf "%0.0f" $value }}% throttling of CPU in namespace {{ $labels.namespace }} for {{ $labels.container }}.'
|
||||
# runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-cputhrottlinghigh
|
||||
# expr: |
|
||||
# 100 * sum(increase(container_cpu_cfs_throttled_periods_total[5m])) by (container, pod, namespace)
|
||||
# /
|
||||
# sum(increase(container_cpu_cfs_periods_total[5m])) by (container, pod, namespace)
|
||||
# > 25
|
||||
# for: 15m
|
||||
# labels:
|
||||
# severity: warning
|
||||
@@ -0,0 +1,22 @@
|
||||
groups:
|
||||
- name: cert-manager
|
||||
rules:
|
||||
- alert: CertManagerCertExpiresSoon
|
||||
annotations:
|
||||
message: The certificate {{ $labels.name }} expires in less than 3 days.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-certmanagercertexpiressoon
|
||||
expr: certmanager_certificate_expiration_timestamp_seconds - time() < 3*24*3600
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.name }}'
|
||||
service: cert-manager
|
||||
|
||||
- alert: CertManagerCertExpiresVerySoon
|
||||
annotations:
|
||||
message: The certificate {{ $labels.name }} expires in less than 24 hours.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-certmanagercertexpiresverysoon
|
||||
expr: certmanager_certificate_expiration_timestamp_seconds - time() < 24*3600
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.name }}'
|
||||
service: cert-manager
|
||||
@@ -0,0 +1,20 @@
|
||||
groups:
|
||||
- name: helm-exporter
|
||||
rules:
|
||||
- alert: HelmReleaseNotDeployed
|
||||
annotations:
|
||||
message:
|
||||
The Helm release `{{ $labels.release }}` (`{{ $labels.chart }}` chart in namespace `{{ $labels.exported_namespace }}`)
|
||||
in version {{ $labels.version }} has not been ready for more than 15 minutes.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-helmreleasenotdeployed
|
||||
expr: helm_chart_info != 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.release }}'
|
||||
service: helm-exporter
|
||||
runbook:
|
||||
steps:
|
||||
- Check the installed Helm releases via `helm --namespace monitoring ls --all`.
|
||||
- If Helm cannot repair the chart automatically, delete/purge the chart (`helm delete --purge [RELEASE]`) and
|
||||
re-install the chart again.
|
||||
@@ -0,0 +1,128 @@
|
||||
groups:
|
||||
- name: kube-apiserver
|
||||
rules:
|
||||
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
expr: |
|
||||
histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod))
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
|
||||
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
expr: |
|
||||
histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod))
|
||||
labels:
|
||||
quantile: "0.9"
|
||||
|
||||
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||
expr: |
|
||||
histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod))
|
||||
labels:
|
||||
quantile: "0.5"
|
||||
|
||||
############################################################
|
||||
# alerts
|
||||
############################################################
|
||||
|
||||
- alert: KubernetesApiserverDown
|
||||
annotations:
|
||||
message: KubernetesApiserver has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubernetesapiserverdown
|
||||
expr: absent(up{job="apiserver"} == 1)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: apiserver
|
||||
service: kubernetes
|
||||
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value }}% of requests.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) without(instance, pod) * 100 > 10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: apiserver
|
||||
service: kubernetes
|
||||
|
||||
- alert: KubeAPITerminatedRequests
|
||||
annotations:
|
||||
message: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeapiterminatedrequests
|
||||
expr: |
|
||||
sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m]))
|
||||
/
|
||||
(sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: apiserver
|
||||
service: kubernetes
|
||||
|
||||
- alert: KubeAPITerminatedRequests
|
||||
annotations:
|
||||
message: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeapiterminatedrequests
|
||||
expr: |
|
||||
sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m]))
|
||||
/
|
||||
(sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: apiserver
|
||||
service: kubernetes
|
||||
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value }}% of requests.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) without(instance, pod) * 100 > 5
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: apiserver
|
||||
service: kubernetes
|
||||
|
||||
- alert: KubeClientCertificateExpiration
|
||||
annotations:
|
||||
message: A client certificate used to authenticate to the apiserver is expiring in less than 7 days.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeclientcertificateexpiration
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0
|
||||
and
|
||||
histogram_quantile(0.01, sum by (job, instance, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
||||
labels:
|
||||
severity: warning
|
||||
resource: apiserver
|
||||
service: kubernetes
|
||||
runbook:
|
||||
steps:
|
||||
- Check the [Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/kubeadm-certs/) on how to renew certificates.
|
||||
- If your certificate has already expired, the steps in the documentation might not work. Check [Github](https://github.com/kubernetes/kubeadm/issues/581#issuecomment-421477139)
|
||||
for hints about fixing your cluster.
|
||||
|
||||
- alert: KubeClientCertificateExpiration
|
||||
annotations:
|
||||
message: A client certificate used to authenticate to the apiserver is expiring in less than 24 hours.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeclientcertificateexpiration
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0
|
||||
and
|
||||
histogram_quantile(0.01, sum by (job, instance, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
||||
labels:
|
||||
severity: critical
|
||||
resource: apiserver
|
||||
service: kubernetes
|
||||
runbook:
|
||||
steps:
|
||||
- Urgently renew your certificates. Expired certificates can make fixing the cluster difficult to begin with.
|
||||
- Check the [Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/kubeadm-certs/) on how to renew certificates.
|
||||
- If your certificate has already expired, the steps in the documentation might not work. Check [Github](https://github.com/kubernetes/kubeadm/issues/581#issuecomment-421477139)
|
||||
for hints about fixing your cluster.
|
||||
@@ -0,0 +1,143 @@
|
||||
groups:
|
||||
- name: kube-kubelet
|
||||
rules:
|
||||
- alert: KubeletDown
|
||||
annotations:
|
||||
message: Kubelet has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeletdown
|
||||
expr: absent(up{job="kubelet"} == 1)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.instance }}'
|
||||
service: kubelet
|
||||
|
||||
- alert: KubePersistentVolumeUsageCritical
|
||||
annotations:
|
||||
message:
|
||||
The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in namespace
|
||||
{{ $labels.namespace }} is only {{ printf "%0.0f" $value }}% free.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubepersistentvolumeusagecritical
|
||||
expr: |
|
||||
100 * kubelet_volume_stats_available_bytes{job="kubelet"}
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet"}
|
||||
< 3
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
service: kubelet
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }}'
|
||||
|
||||
- alert: KubePersistentVolumeFullInFourDays
|
||||
annotations:
|
||||
message:
|
||||
Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }}
|
||||
in namespace {{ $labels.namespace }} is expected to fill up within four days.
|
||||
Currently {{ $value }} bytes are available.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubepersistentvolumefullinfourdays
|
||||
expr: |
|
||||
(
|
||||
kubelet_volume_stats_used_bytes{job="kubelet"}
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet"}
|
||||
) > 0.85
|
||||
and
|
||||
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
service: kubelet
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }}'
|
||||
|
||||
- alert: KubeletTooManyPods
|
||||
annotations:
|
||||
message: Kubelet {{ $labels.instance }} is running {{ $value }} pods, close to the limit of 110.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubelettoomanypods
|
||||
expr: kubelet_running_pod_count{job="kubelet"} > 110 * 0.9
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }}'
|
||||
service: kubelet
|
||||
|
||||
- alert: KubeClientErrors
|
||||
annotations:
|
||||
message:
|
||||
The kubelet on {{ $labels.instance }} is experiencing {{ printf "%0.0f" $value }}% errors.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeclienterrors
|
||||
expr: |
|
||||
(sum(rate(rest_client_requests_total{code=~"(5..|<error>)",job="kubelet"}[5m])) by (instance)
|
||||
/
|
||||
sum(rate(rest_client_requests_total{job="kubelet"}[5m])) by (instance))
|
||||
* 100 > 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }}'
|
||||
service: kubelet
|
||||
|
||||
# a dedicated rule for pods to include more helpful labels in the message like the instance and job name
|
||||
- alert: KubeClientErrors
|
||||
annotations:
|
||||
message:
|
||||
The pod {{ $labels.namespace }}/{{ $labels.pod }} is experiencing {{ printf "%0.0f" $value }}% errors.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeclienterrors
|
||||
expr: |
|
||||
(sum(rate(rest_client_requests_total{code=~"(5..|<error>)",job="pods"}[5m])) by (namespace, pod)
|
||||
/
|
||||
sum(rate(rest_client_requests_total{job="pods"}[5m])) by (namespace, pod))
|
||||
* 100 > 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }}'
|
||||
service: kubelet
|
||||
|
||||
- alert: KubeletRuntimeErrors
|
||||
annotations:
|
||||
message:
|
||||
The kubelet on {{ $labels.instance }} is having an elevated error rate for container runtime oprations.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeletruntimeerrors
|
||||
expr: |
|
||||
sum(rate(kubelet_runtime_operations_errors_total{job="kubelet"}[5m])) by (instance) > 0.1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }}'
|
||||
service: kubelet
|
||||
|
||||
- alert: KubeletCGroupManagerDurationHigh
|
||||
annotations:
|
||||
message:
|
||||
The kubelet's cgroup manager duration on {{ $labels.instance }} has been elevated ({{ printf "%0.2f" $value }}ms) for more than 15 minutes.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeletcgroupmanagerlatencyhigh
|
||||
expr: |
|
||||
sum(rate(kubelet_cgroup_manager_duration_seconds{quantile="0.9"}[5m])) by (instance) * 1000 > 1
|
||||
for: 15m
|
||||
labels:
|
||||
resource: '{{ $labels.instance }}'
|
||||
service: kubelet
|
||||
severity: warning
|
||||
|
||||
- alert: KubeletPodWorkerDurationHigh
|
||||
annotations:
|
||||
message:
|
||||
The kubelet's pod worker duration for {{ $labels.operation_type }} operations on {{ $labels.instance }} has been elevated ({{ printf "%0.2f" $value }}ms) for more than 15 minutes.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeletpodworkerdurationhigh
|
||||
expr: |
|
||||
sum(rate(kubelet_pod_worker_duration_seconds{quantile="0.9"}[5m])) by (instance, operation_type) * 1000 > 250
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }}/{{ $labels.operation_type }}'
|
||||
service: kubelet
|
||||
|
||||
- alert: KubeVersionMismatch
|
||||
annotations:
|
||||
message: There are {{ $value }} different versions of Kubernetes components running.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubeversionmismatch
|
||||
expr: count(count(kubernetes_build_info{job!="dns"}) by (gitVersion)) > 1
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -0,0 +1,339 @@
|
||||
groups:
|
||||
- name: kube-state-metrics
|
||||
rules:
|
||||
- record: ':kube_pod_info_node_count:'
|
||||
expr: |
|
||||
sum(min(kube_pod_info) by (node))
|
||||
|
||||
- record: 'node_namespace_pod:kube_pod_info:'
|
||||
expr: |
|
||||
max(kube_pod_info{job="kube-state-metrics"}) by (node, namespace, pod)
|
||||
|
||||
- record: namespace_name:container_cpu_usage_seconds_total:sum_rate
|
||||
expr: |
|
||||
sum by (namespace, label_name) (
|
||||
sum(rate(container_cpu_usage_seconds_total{job="cadvisor", image!="", container!=""}[5m])) by (namespace, pod)
|
||||
* on (namespace, pod) group_left (label_name)
|
||||
kube_pod_labels{job="kube-state-metrics"}
|
||||
)
|
||||
|
||||
- record: namespace_name:container_memory_usage_bytes:sum
|
||||
expr: |
|
||||
sum by (namespace, label_name) (
|
||||
sum(container_memory_usage_bytes{job="cadvisor",image!="", container!=""}) by (pod, namespace)
|
||||
* on (namespace, pod) group_left (label_name)
|
||||
kube_pod_labels{job="kube-state-metrics"}
|
||||
)
|
||||
|
||||
- record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum
|
||||
expr: |
|
||||
sum by (namespace, label_name) (
|
||||
sum(kube_pod_container_resource_requests{job="kube-state-metrics",resource="memory"}) by (namespace, pod)
|
||||
* on (namespace, pod) group_left (label_name)
|
||||
kube_pod_labels{job="kube-state-metrics"}
|
||||
)
|
||||
|
||||
- record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum
|
||||
expr: |
|
||||
sum by (namespace, label_name) (
|
||||
sum(kube_pod_container_resource_requests{job="kube-state-metrics",resource="cpu"} and on(pod) kube_pod_status_scheduled{condition="true"}) by (namespace, pod)
|
||||
* on (namespace, pod) group_left (label_name)
|
||||
kube_pod_labels{job="kube-state-metrics"}
|
||||
)
|
||||
|
||||
############################################################
|
||||
# alerts
|
||||
############################################################
|
||||
|
||||
- alert: KubeStateMetricsDown
|
||||
annotations:
|
||||
message: KubeStateMetrics has disappeared from Prometheus target discovery.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubestatemetricsdown
|
||||
expr: absent(up{job="kube-state-metrics"} == 1)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.pod }}'
|
||||
service: kube-state-metrics
|
||||
|
||||
|
||||
- alert: KubePodCrashLooping
|
||||
annotations:
|
||||
message:
|
||||
Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting
|
||||
{{ printf "%.2f" $value }} times / 5 minutes.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubepodcrashlooping
|
||||
expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.pod }}"
|
||||
runbook:
|
||||
steps:
|
||||
- Check the pod's logs.
|
||||
|
||||
- alert: KubePodNotReady
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than an hour.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubepodnotready
|
||||
expr: sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) > 0
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.pod }}"
|
||||
runbook:
|
||||
steps:
|
||||
- Check the pod via `kubectl describe pod [POD]` to find out about scheduling issues.
|
||||
|
||||
- alert: KubeDeploymentGenerationMismatch
|
||||
annotations:
|
||||
message:
|
||||
Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match,
|
||||
this indicates that the Deployment has failed but has not been rolled back.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubedeploymentgenerationmismatch
|
||||
expr: |
|
||||
kube_deployment_status_observed_generation{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_deployment_metadata_generation{job="kube-state-metrics"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.deployment }}"
|
||||
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
annotations:
|
||||
message:
|
||||
Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected
|
||||
number of replicas for longer than an hour.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubedeploymentreplicasmismatch
|
||||
expr: |
|
||||
kube_deployment_spec_replicas{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_deployment_status_replicas_available{job="kube-state-metrics"}
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.deployment }}"
|
||||
|
||||
- alert: KubeStatefulSetReplicasMismatch
|
||||
annotations:
|
||||
message:
|
||||
StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected
|
||||
number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubestatefulsetreplicasmismatch
|
||||
expr: |
|
||||
kube_statefulset_status_replicas_ready{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_statefulset_status_replicas{job="kube-state-metrics"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.statefulset }}"
|
||||
|
||||
- alert: KubeStatefulSetGenerationMismatch
|
||||
annotations:
|
||||
message:
|
||||
StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match,
|
||||
this indicates that the StatefulSet has failed but has not been rolled back.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubestatefulsetgenerationmismatch
|
||||
expr: |
|
||||
kube_statefulset_status_observed_generation{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_statefulset_metadata_generation{job="kube-state-metrics"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.statefulset }}"
|
||||
|
||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||
annotations:
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubestatefulsetupdatenotrolledout
|
||||
expr: |
|
||||
max without (revision) (
|
||||
kube_statefulset_status_current_revision{job="kube-state-metrics"}
|
||||
unless
|
||||
kube_statefulset_status_update_revision{job="kube-state-metrics"}
|
||||
)
|
||||
*
|
||||
(
|
||||
kube_statefulset_replicas{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_statefulset_status_replicas_updated{job="kube-state-metrics"}
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.statefulset }}"
|
||||
|
||||
- alert: KubeDaemonSetRolloutStuck
|
||||
annotations:
|
||||
message:
|
||||
Only {{ $value }}% of the desired Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }}
|
||||
are scheduled and ready.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubedaemonsetrolloutstuck
|
||||
expr: |
|
||||
kube_daemonset_status_number_ready{job="kube-state-metrics"}
|
||||
/
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} * 100 < 100
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.daemonset }}"
|
||||
|
||||
- alert: KubeDaemonSetNotScheduled
|
||||
annotations:
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubedaemonsetnotscheduled
|
||||
expr: |
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
|
||||
-
|
||||
kube_daemonset_status_current_number_scheduled{job="kube-state-metrics"} > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.daemonset }}"
|
||||
|
||||
- alert: KubeDaemonSetMisScheduled
|
||||
annotations:
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubedaemonsetmisscheduled
|
||||
expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.daemonset }}"
|
||||
|
||||
- alert: KubeCronJobRunning
|
||||
annotations:
|
||||
message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more than 1h to complete.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubecronjobrunning
|
||||
expr: time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.cronjob }}"
|
||||
|
||||
- alert: KubeJobCompletion
|
||||
annotations:
|
||||
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than one hour to complete.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubejobcompletion
|
||||
expr: kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.job_name }}"
|
||||
|
||||
- alert: KubeJobFailed
|
||||
annotations:
|
||||
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubejobfailed
|
||||
expr: kube_job_status_failed{job="kube-state-metrics"} > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
resource: "{{ $labels.namespace }}/{{ $labels.job_name }}"
|
||||
|
||||
- alert: KubeCPUOvercommit
|
||||
annotations:
|
||||
message: Cluster has overcommitted CPU resource requests for namespaces.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubecpuovercommit
|
||||
expr: |
|
||||
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.cpu"})
|
||||
/
|
||||
sum(node:node_num_cpu:sum)
|
||||
> 1.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: cluster
|
||||
service: kube-state-metrics
|
||||
|
||||
- alert: KubeCPUOvercommit
|
||||
annotations:
|
||||
message: Cluster has overcommitted CPU resource requests for pods and cannot tolerate node failure.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubecpuovercommit
|
||||
expr: |
|
||||
sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum)
|
||||
/
|
||||
sum(node:node_num_cpu:sum)
|
||||
>
|
||||
(count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: cluster
|
||||
service: kube-state-metrics
|
||||
|
||||
- alert: KubeMemOvercommit
|
||||
annotations:
|
||||
message: Cluster has overcommitted memory resource requests for namespaces.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubememovercommit
|
||||
expr: |
|
||||
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"})
|
||||
/
|
||||
sum(node_memory_MemTotal_bytes{app="node-exporter"})
|
||||
> 1.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: cluster
|
||||
service: kube-state-metrics
|
||||
|
||||
- alert: KubeMemOvercommit
|
||||
annotations:
|
||||
message: Cluster has overcommitted memory resource requests for pods and cannot tolerate node failure.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubememovercommit
|
||||
expr: |
|
||||
sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
|
||||
/
|
||||
sum(node_memory_MemTotal_bytes)
|
||||
>
|
||||
(count(node:node_num_cpu:sum)-1)
|
||||
/
|
||||
count(node:node_num_cpu:sum)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: cluster
|
||||
service: kube-state-metrics
|
||||
|
||||
- alert: KubeQuotaExceeded
|
||||
annotations:
|
||||
message: Namespace {{ $labels.namespace }} is using {{ printf "%0.0f" $value }}% of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubequotaexceeded
|
||||
expr: |
|
||||
100 * kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
/ ignoring(instance, job, type)
|
||||
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||
> 90
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: cluster
|
||||
service: kube-state-metrics
|
||||
|
||||
- alert: KubePodOOMKilled
|
||||
annotations:
|
||||
message:
|
||||
Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }}
|
||||
has been OOMKilled {{ $value }} times in the last 30 minutes.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubepodoomkilled
|
||||
expr: |
|
||||
(kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 30m >= 2)
|
||||
and
|
||||
ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[30m]) == 1
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.pod }}/{{ $labels.container }}'
|
||||
|
||||
- alert: KubeNodeNotReady
|
||||
annotations:
|
||||
message: '{{ $labels.node }} has been unready for more than an hour.'
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-kubenodenotready
|
||||
expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.node }}'
|
||||
@@ -0,0 +1,342 @@
|
||||
groups:
|
||||
- name: node-exporter
|
||||
rules:
|
||||
- record: node:node_num_cpu:sum
|
||||
expr: |
|
||||
count by (node) (sum by (node, cpu) (
|
||||
node_cpu_seconds_total{app="node-exporter"}
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
))
|
||||
|
||||
- record: :node_cpu_utilisation:avg1m
|
||||
expr: |
|
||||
1 - avg(rate(node_cpu_seconds_total{app="node-exporter",mode="idle"}[1m]))
|
||||
|
||||
- record: node:node_cpu_utilisation:avg1m
|
||||
expr: |
|
||||
1 - avg by (node) (
|
||||
rate(node_cpu_seconds_total{app="node-exporter",mode="idle"}[1m])
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:)
|
||||
|
||||
- record: ':node_cpu_saturation_load1:'
|
||||
expr: |
|
||||
sum(node_load1{app="node-exporter"})
|
||||
/
|
||||
sum(node:node_num_cpu:sum)
|
||||
|
||||
- record: 'node:node_cpu_saturation_load1:'
|
||||
expr: |
|
||||
sum by (node) (
|
||||
node_load1{app="node-exporter"}
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
/
|
||||
node:node_num_cpu:sum
|
||||
|
||||
- record: node:cluster_cpu_utilisation:ratio
|
||||
expr: |
|
||||
node:node_cpu_utilisation:avg1m
|
||||
*
|
||||
node:node_num_cpu:sum
|
||||
/
|
||||
scalar(sum(node:node_num_cpu:sum))
|
||||
|
||||
- record: ':node_memory_utilisation:'
|
||||
expr: |
|
||||
1 -
|
||||
sum(node_memory_MemFree_bytes{app="node-exporter"} + node_memory_Cached_bytes{app="node-exporter"} + node_memory_Buffers_bytes{app="node-exporter"})
|
||||
/
|
||||
sum(node_memory_MemTotal_bytes{app="node-exporter"})
|
||||
|
||||
- record: node:node_memory_bytes_available:sum
|
||||
expr: |
|
||||
sum by (node) (
|
||||
(node_memory_MemFree_bytes{app="node-exporter"} + node_memory_Cached_bytes{app="node-exporter"} + node_memory_Buffers_bytes{app="node-exporter"})
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
|
||||
- record: node:node_memory_bytes_total:sum
|
||||
expr: |
|
||||
sum by (node) (
|
||||
node_memory_MemTotal_bytes{app="node-exporter"}
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
|
||||
- record: node:node_memory_utilisation:ratio
|
||||
expr: |
|
||||
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
|
||||
/
|
||||
scalar(sum(node:node_memory_bytes_total:sum))
|
||||
|
||||
- record: :node_memory_swap_io_bytes:sum_rate
|
||||
expr: |
|
||||
1e3 * sum(
|
||||
(rate(node_vmstat_pgpgin{app="node-exporter"}[1m])
|
||||
+ rate(node_vmstat_pgpgout{app="node-exporter"}[1m]))
|
||||
)
|
||||
|
||||
- record: 'node:node_memory_utilisation:'
|
||||
expr: |
|
||||
1 -
|
||||
sum by (node) (
|
||||
(node_memory_MemFree_bytes{app="node-exporter"} + node_memory_Cached_bytes{app="node-exporter"} + node_memory_Buffers_bytes{app="node-exporter"})
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
/
|
||||
sum by (node) (
|
||||
node_memory_MemTotal_bytes{app="node-exporter"}
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
|
||||
- record: 'node:node_memory_utilisation_2:'
|
||||
expr: |
|
||||
1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum)
|
||||
|
||||
- record: node:node_memory_swap_io_bytes:sum_rate
|
||||
expr: |
|
||||
1e3 * sum by (node) (
|
||||
(rate(node_vmstat_pgpgin{app="node-exporter"}[1m])
|
||||
+ rate(node_vmstat_pgpgout{app="node-exporter"}[1m]))
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
|
||||
- record: node:cluster_memory_utilisation:ratio
|
||||
expr: |
|
||||
(node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum)
|
||||
/
|
||||
scalar(sum(node:node_memory_bytes_total:sum))
|
||||
|
||||
- record: :node_disk_utilisation:avg_irate
|
||||
expr: |
|
||||
avg(irate(node_disk_io_time_seconds_total{app="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]))
|
||||
|
||||
- record: node:node_disk_utilisation:avg_irate
|
||||
expr: |
|
||||
avg by (node) (
|
||||
irate(node_disk_io_time_seconds_total{app="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m])
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
|
||||
- record: :node_disk_saturation:avg_irate
|
||||
expr: |
|
||||
avg(irate(node_disk_io_time_weighted_seconds_total{app="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3)
|
||||
|
||||
- record: node:node_disk_saturation:avg_irate
|
||||
expr: |
|
||||
avg by (node) (
|
||||
irate(node_disk_io_time_weighted_seconds_total{app="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
|
||||
- record: 'node:node_filesystem_usage:'
|
||||
expr: |
|
||||
max by (namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
|
||||
- node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
||||
/ node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
||||
|
||||
- record: 'node:node_filesystem_avail:'
|
||||
expr: |
|
||||
max by (namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
||||
|
||||
- record: :node_net_utilisation:sum_irate
|
||||
expr: |
|
||||
sum(irate(node_network_receive_bytes_total{app="node-exporter",device!~"veth.+"}[1m])) +
|
||||
sum(irate(node_network_transmit_bytes_total{app="node-exporter",device!~"veth.+"}[1m]))
|
||||
|
||||
- record: node:node_net_utilisation:sum_irate
|
||||
expr: |
|
||||
sum by (node) (
|
||||
(irate(node_network_receive_bytes_total{app="node-exporter",device!~"veth.+"}[1m]) +
|
||||
irate(node_network_transmit_bytes_total{app="node-exporter",device!~"veth.+"}[1m]))
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
|
||||
- record: :node_net_saturation:sum_irate
|
||||
expr: |
|
||||
sum(irate(node_network_receive_drop_total{app="node-exporter",device!~"veth.+"}[1m])) +
|
||||
sum(irate(node_network_transmit_drop_total{app="node-exporter",device!~"veth.+"}[1m]))
|
||||
|
||||
- record: node:node_net_saturation:sum_irate
|
||||
expr: |
|
||||
sum by (node) (
|
||||
(irate(node_network_receive_drop_total{app="node-exporter",device!~"veth.+"}[1m]) +
|
||||
irate(node_network_transmit_drop_total{app="node-exporter",device!~"veth.+"}[1m]))
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
|
||||
############################################################
|
||||
# alerts
|
||||
############################################################
|
||||
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
message:
|
||||
Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted
|
||||
to run out of space within the next 24 hours.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-nodefilesystemspacefillingup
|
||||
expr: |
|
||||
predict_linear(node_filesystem_avail_bytes{app="node-exporter",fstype=~"ext.|xfs"}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_avail_bytes{app="node-exporter",fstype=~"ext.|xfs"} / node_filesystem_size_bytes{app="node-exporter",fstype=~"ext.|xfs"} < 0.4
|
||||
and
|
||||
node_filesystem_readonly{app="node-exporter",fstype=~"ext.|xfs"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }} {{ $labels.device }}'
|
||||
service: 'node-exporter'
|
||||
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
message:
|
||||
Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted
|
||||
to run out of space within the next 4 hours.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-nodefilesystemspacefillingup
|
||||
expr: |
|
||||
predict_linear(node_filesystem_avail_bytes{app="node-exporter",fstype=~"ext.|xfs"}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_avail_bytes{app="node-exporter",fstype=~"ext.|xfs"} / node_filesystem_size_bytes{app="node-exporter",fstype=~"ext.|xfs"} < 0.2
|
||||
and
|
||||
node_filesystem_readonly{app="node-exporter",fstype=~"ext.|xfs"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.instance }} {{ $labels.device }}'
|
||||
service: 'node-exporter'
|
||||
|
||||
- alert: NodeFilesystemOutOfSpace
|
||||
annotations:
|
||||
message:
|
||||
Filesystem on node {{ $labels.node_name }} having IP {{ $labels.instance }} has only
|
||||
{{ $value }}% available space left on drive {{ $labels.device }}.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-nodefilesystemoutofspace
|
||||
expr: |
|
||||
node_filesystem_avail_bytes{app="node-exporter",fstype=~"ext.|xfs"} / node_filesystem_size_bytes{app="node-exporter",fstype=~"ext.|xfs"} * 100 < 10
|
||||
and
|
||||
node_filesystem_readonly{app="node-exporter",fstype=~"ext.|xfs"} == 0
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }} {{ $labels.device }}'
|
||||
service: 'node-exporter'
|
||||
|
||||
- alert: NodeFilesystemOutOfSpace
|
||||
annotations:
|
||||
message:
|
||||
Filesystem on node {{ $labels.node_name }} having IP {{ $labels.instance }} has only
|
||||
{{ $value }}% available space left on drive {{ $labels.device }}.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-nodefilesystemoutofspace
|
||||
expr: |
|
||||
node_filesystem_avail_bytes{app="node-exporter",fstype=~"ext.|xfs"} / node_filesystem_size_bytes{app="node-exporter",fstype=~"ext.|xfs"} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{app="node-exporter",fstype=~"ext.|xfs"} == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.instance }} {{ $labels.device }}'
|
||||
service: 'node-exporter'
|
||||
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
message:
|
||||
Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted
|
||||
to run out of files within the next 24 hours.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-nodefilesystemfilesfillingup
|
||||
expr: |
|
||||
predict_linear(node_filesystem_files_free{app="node-exporter",fstype=~"ext.|xfs"}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_files_free{app="node-exporter",fstype=~"ext.|xfs"} / node_filesystem_files{app="node-exporter",fstype=~"ext.|xfs"} < 0.4
|
||||
and
|
||||
node_filesystem_readonly{app="node-exporter",fstype=~"ext.|xfs"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }} {{ $labels.device }}'
|
||||
service: 'node-exporter'
|
||||
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
message:
|
||||
Filesystem on {{ $labels.device }} at {{ $labels.instance }} is predicted
|
||||
to run out of files within the next 4 hours.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-nodefilesystemfilesfillingup
|
||||
expr: |
|
||||
predict_linear(node_filesystem_files_free{app="node-exporter",fstype=~"ext.|xfs"}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_files_free{app="node-exporter",fstype=~"ext.|xfs"} / node_filesystem_files{app="node-exporter",fstype=~"ext.|xfs"} < 0.2
|
||||
and
|
||||
node_filesystem_readonly{app="node-exporter",fstype=~"ext.|xfs"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }} {{ $labels.device }}'
|
||||
service: 'node-exporter'
|
||||
|
||||
- alert: NodeFilesystemOutOfFiles
|
||||
annotations:
|
||||
message:
|
||||
Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only
|
||||
{{ $value }}% available inodes left.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-nodefilesystemoutoffiles
|
||||
expr: |
|
||||
node_filesystem_files_free{app="node-exporter",fstype=~"ext.|xfs"} / node_filesystem_files{app="node-exporter",fstype=~"ext.|xfs"} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{app="node-exporter",fstype=~"ext.|xfs"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.instance }} {{ $labels.device }}'
|
||||
service: 'node-exporter'
|
||||
|
||||
- alert: NodeFilesystemOutOfSpace
|
||||
annotations:
|
||||
message:
|
||||
Filesystem on node {{ $labels.node_name }} having IP {{ $labels.instance }} has only {{ $value }}% inodes available on drive {{ $labels.device }}.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-nodefilesystemoutofspace
|
||||
expr: |
|
||||
node_filesystem_files_free{app="node-exporter",fstype=~"ext.|xfs"} / node_filesystem_files{app="node-exporter",fstype=~"ext.|xfs"} * 100 < 10
|
||||
and
|
||||
node_filesystem_readonly{app="node-exporter",fstype=~"ext.|xfs"} == 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.instance }} {{ $labels.device }}'
|
||||
service: 'node-exporter'
|
||||
|
||||
- alert: NodeNetworkReceiveErrs
|
||||
annotations:
|
||||
message:
|
||||
'{{ $labels.instance }} interface {{ $labels.device }} shows errors
|
||||
while receiving packets ({{ $value }} errors in two minutes).'
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-nodenetworkreceiveerrs
|
||||
expr: increase(node_network_receive_errs_total[2m]) > 10
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.instance }} {{ $labels.device }}'
|
||||
service: 'node-exporter'
|
||||
|
||||
- alert: NodeNetworkTransmitErrs
|
||||
annotations:
|
||||
message:
|
||||
'{{ $labels.instance }} interface {{ $labels.device }} shows errors
|
||||
while transmitting packets ({{ $value }} errors in two minutes).'
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-nodenetworktransmiterrs
|
||||
expr: increase(node_network_transmit_errs_total[2m]) > 10
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.instance }} {{ $labels.device }}'
|
||||
service: 'node-exporter'
|
||||
@@ -0,0 +1,95 @@
|
||||
groups:
|
||||
- name: prometheus
|
||||
rules:
|
||||
- alert: PromScrapeFailed
|
||||
annotations:
|
||||
message: Prometheus failed to scrape a target {{ $labels.job }} / {{ $labels.instance }}.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-promscrapefailed
|
||||
expr: up != 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.job }}/{{ $labels.instance }}'
|
||||
service: prometheus
|
||||
runbook:
|
||||
steps:
|
||||
- Check the Prometheus Service Discovery page to find out why the target is unreachable.
|
||||
|
||||
- alert: PromBadConfig
|
||||
annotations:
|
||||
message: Prometheus failed to reload config.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-prombadconfig
|
||||
expr: prometheus_config_last_reload_successful{job="prometheus"} == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.job }}/{{ $labels.instance }}'
|
||||
service: prometheus
|
||||
runbook:
|
||||
steps:
|
||||
- Check Prometheus pod's logs via `kubectl -n monitoring logs prometheus-0` and `-1`.
|
||||
- Check the `prometheus-rules` configmap via `kubectl -n monitoring get configmap prometheus-rules -o yaml`.
|
||||
|
||||
- alert: PromAlertmanagerBadConfig
|
||||
annotations:
|
||||
message: Alertmanager failed to reload config.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-promalertmanagerbadconfig
|
||||
expr: alertmanager_config_last_reload_successful{job="alertmanager"} == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.job }}/{{ $labels.instance }}'
|
||||
service: prometheus
|
||||
runbook:
|
||||
steps:
|
||||
- Check Alertmanager pod's logs via `kubectl -n monitoring logs alertmanager-0`, `-1` and `-2`.
|
||||
- Check the `alertmanager` secret via `kubectl -n monitoring get secret alertmanager -o yaml`.
|
||||
|
||||
- alert: PromAlertsFailed
|
||||
annotations:
|
||||
message: Alertmanager failed to send an alert.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-promalertsfailed
|
||||
expr: sum(increase(alertmanager_notifications_failed_total{job="alertmanager"}[5m])) by (namespace) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.job }}/{{ $labels.instance }}'
|
||||
service: prometheus
|
||||
runbook:
|
||||
steps:
|
||||
- Check Prometheus pod's logs via `kubectl -n monitoring logs prometheus-0` and `-1`.
|
||||
- 'Make sure the Alertmanager StatefulSet is running: `kubectl -n monitoring get pods`.'
|
||||
|
||||
- alert: PromRemoteStorageFailures
|
||||
annotations:
|
||||
message: Prometheus failed to send {{ printf "%.1f" $value }}% samples.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-promremotestoragefailures
|
||||
expr: |
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus"}[1m]) * 100)
|
||||
/
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus"}[1m]) + rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus"}[1m]))
|
||||
> 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.job }}/{{ $labels.instance }}'
|
||||
service: prometheus
|
||||
runbook:
|
||||
steps:
|
||||
- Ensure that the Prometheus volume has not reached capacity.
|
||||
- Check Prometheus pod's logs via `kubectl -n monitoring logs prometheus-0` and `-1`.
|
||||
|
||||
- alert: PromRuleFailures
|
||||
annotations:
|
||||
message: Prometheus failed to evaluate {{ printf "%.1f" $value }} rules/sec.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-promrulefailures
|
||||
expr: rate(prometheus_rule_evaluation_failures_total{job="prometheus"}[1m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.job }}/{{ $labels.instance }}'
|
||||
service: prometheus
|
||||
runbook:
|
||||
steps:
|
||||
- Check Prometheus pod's logs via `kubectl -n monitoring logs prometheus-0` and `-1`.
|
||||
- Check CPU/memory pressure on the node.
|
||||
@@ -0,0 +1,85 @@
|
||||
groups:
|
||||
- name: thanos
|
||||
rules:
|
||||
- alert: ThanosSidecarDown
|
||||
annotations:
|
||||
message: The Thanos sidecar in `{{ $labels.namespace }}/{{ $labels.pod }}` is down.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-thanossidecardown
|
||||
expr: thanos_sidecar_prometheus_up != 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.pod }}'
|
||||
service: thanos
|
||||
|
||||
- alert: ThanosSidecarNoHeartbeat
|
||||
annotations:
|
||||
message: The Thanos sidecar in `{{ $labels.namespace }}/{{ $labels.pod }}` didn't send a heartbeat in {{ $value }} seconds.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-thanossidecardown
|
||||
expr: time() - thanos_sidecar_last_heartbeat_success_time_seconds > 60
|
||||
for: 3m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.pod }}'
|
||||
service: thanos
|
||||
|
||||
- alert: ThanosCompactorManyRetries
|
||||
annotations:
|
||||
message: The Thanos compactor in `{{ $labels.namespace }}` is experiencing a high retry rate.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-thanoscompactormanyretries
|
||||
expr: sum(rate(thanos_compact_retries_total[5m])) > 0.01
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.pod }}'
|
||||
service: thanos
|
||||
runbook:
|
||||
steps:
|
||||
- Check the `thanos-compact` pod's logs.
|
||||
|
||||
- alert: ThanosShipperManyDirSyncFailures
|
||||
annotations:
|
||||
message: The Thanos shipper in `{{ $labels.namespace }}/{{ $labels.pod }}` is experiencing a high dir-sync failure rate.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-thanosshippermanydirsyncfailures
|
||||
expr: sum(rate(thanos_shipper_dir_sync_failures_total[5m])) > 0.01
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.pod }}'
|
||||
service: thanos
|
||||
runbook:
|
||||
steps:
|
||||
- Check the `thanos` containers's logs inside the Prometheus pod.
|
||||
|
||||
- alert: ThanosManyPanicRecoveries
|
||||
annotations:
|
||||
message: The Thanos component in `{{ $labels.namespace }}/{{ $labels.pod }}` is experiencing a panic recovery rate.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-thanosmanypanicrecoveries
|
||||
expr: sum(rate(thanos_grpc_req_panics_recovered_total[5m])) > 0.01
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.pod }}'
|
||||
service: thanos
|
||||
|
||||
- alert: ThanosManyBlockLoadFailures
|
||||
annotations:
|
||||
message: The Thanos store in `{{ $labels.namespace }}/{{ $labels.pod }}` is experiencing a many failed block loads.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-thanosmanyblockloadfailures
|
||||
expr: sum(rate(thanos_bucket_store_block_load_failures_total[5m])) > 0.01
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.pod }}'
|
||||
service: thanos
|
||||
|
||||
- alert: ThanosManyBlockDropFailures
|
||||
annotations:
|
||||
message: The Thanos store in `{{ $labels.namespace }}/{{ $labels.pod }}` is experiencing a many failed block drops.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-thanosmanyblockdropfailures
|
||||
expr: sum(rate(thanos_bucket_store_block_drop_failures_total[5m])) > 0.01
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.namespace }}/{{ $labels.pod }}'
|
||||
service: thanos
|
||||
@@ -0,0 +1,34 @@
|
||||
groups:
|
||||
- name: velero
|
||||
rules:
|
||||
- alert: VeleroBackupTakesTooLong
|
||||
annotations:
|
||||
message: Last backup with schedule {{ $labels.schedule }} has not finished successfully within 60min.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-velerobackuptakestoolong
|
||||
expr: time() - velero_backup_last_successful_timestamp{schedule!=""} > 3600
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
resource: '{{ $labels.schedule }}'
|
||||
service: 'velero'
|
||||
runbook:
|
||||
steps:
|
||||
- Check if a backup is really in "InProgress" state via `velero -n velero backup get`.
|
||||
- Check the backup logs via `velero -n velero backup logs [BACKUP_NAME]`.
|
||||
- Depending on the backup, find the pod and check the processes inside that pod or any sidecar containers.
|
||||
|
||||
- alert: VeleroNoRecentBackup
|
||||
annotations:
|
||||
message: There has not been a successful backup for schedule {{ $labels.schedule }} in the last 24 hours.
|
||||
runbook_url: https://docs.kubermatic.com/kubermatic/master/cheat_sheets/alerting_runbook/#alert-veleronorecentbackup
|
||||
expr: time() - velero_backup_last_successful_timestamp{schedule!=""} > 3600*25
|
||||
labels:
|
||||
severity: critical
|
||||
resource: '{{ $labels.schedule }}'
|
||||
service: 'velero'
|
||||
runbook:
|
||||
steps:
|
||||
- Check if really no backups happened via `velero -n velero backup get`.
|
||||
- If a backup failed, check its logs via `velero -n velero backup logs [BACKUP_NAME]`.
|
||||
- If a backup was not even triggered, check the Velero server's logs via `kubectl -n velero logs -l 'name=velero-server'`.
|
||||
- Make sure the Velero server pod has not been rescheduled and possibly opt to schedule it on a stable node using a node affinity.
|
||||
@@ -0,0 +1,23 @@
|
||||
groups:
|
||||
- name: vertical-pod-autoscaler
|
||||
rules:
|
||||
|
||||
# These rules provide metrics to be consumed by Kubernetes' VPA. The VPA only needs a tiny fraction
|
||||
# of the labels available on the container_* metrics, so we reduce them with the inner query to
|
||||
# only contain pod name, namespace and name.
|
||||
# Because the VPA does not allow to change the metric name it queries, but only the job selector,
|
||||
# we "cheat" by re-using the same metric name and injecting a custom job ("cadvisor-vpa") label.
|
||||
|
||||
- record: container_cpu_usage_seconds_total
|
||||
expr: |
|
||||
label_replace(
|
||||
sum(container_cpu_usage_seconds_total{job="cadvisor", pod=~".+", name!="POD", name!=""}) by (pod, namespace, name),
|
||||
"job", "cadvisor-vpa", "", ""
|
||||
)
|
||||
|
||||
- record: container_memory_usage_bytes
|
||||
expr: |
|
||||
label_replace(
|
||||
sum(container_memory_usage_bytes{job="cadvisor", pod=~".+", name!="POD", name!=""}) by (pod, namespace, name),
|
||||
"job", "cadvisor-vpa", "", ""
|
||||
)
|
||||
@@ -0,0 +1,9 @@
|
||||
{{- range $path, $file := .Files.Glob "files/rules/**/*.yaml" }}
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMRule
|
||||
metadata:
|
||||
name: {{ toString $path | trimPrefix "files/rules/" | replace "/" "-" | replace ".yaml" "" }}
|
||||
spec:
|
||||
{{- toString $file | nindent 2 }}
|
||||
---
|
||||
{{- end }}
|
||||
@@ -1,7 +1,3 @@
|
||||
NAME=seaweedfs-system
|
||||
|
||||
include ../../../scripts/package.mk
|
||||
|
||||
update:
|
||||
rm -rf charts
|
||||
mkdir -p charts
|
||||
|
||||
@@ -877,7 +877,7 @@ s3:
|
||||
# For more information, visit: https://container-object-storage-interface.github.io/docs/deployment-guide
|
||||
cosi:
|
||||
enabled: false
|
||||
image: "ghcr.io/seaweedfs/seaweedfs-cosi-driver:v0.1.2"
|
||||
image: "ghcr.io/seaweedfs/seaweedfs-cosi-driver:v0.1.1"
|
||||
driverName: "seaweedfs.objectstorage.k8s.io"
|
||||
bucketClassName: "seaweedfs"
|
||||
endpoint: ""
|
||||
|
||||
@@ -114,6 +114,7 @@ seaweedfs:
|
||||
policy.cozystack.io/allow-to-apiserver: "true"
|
||||
driverName: "seaweedfs.objectstorage.k8s.io"
|
||||
bucketClassName: "seaweedfs"
|
||||
image: "ghcr.io/seaweedfs/seaweedfs-cosi-driver:v0.1.1"
|
||||
region: ""
|
||||
|
||||
sidecar:
|
||||
|
||||
Reference in New Issue
Block a user