From b82ba4ac3c4063b824fda784718a8c54f4e4615d Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Thu, 26 Sep 2024 10:48:44 +0200 Subject: [PATCH 01/41] Update ingress-nginx v1.11.2 (#358) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- packages/system/ingress-nginx/values.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/system/ingress-nginx/values.yaml b/packages/system/ingress-nginx/values.yaml index f355dd48..6066cd29 100644 --- a/packages/system/ingress-nginx/values.yaml +++ b/packages/system/ingress-nginx/values.yaml @@ -5,8 +5,8 @@ ingress-nginx: image: registry: ghcr.io image: kvaps/ingress-nginx-with-protobuf-exporter/controller - tag: v1.11.1 - digest: sha256:76ca6d7898445140785091ff4a2b21df8c2b50fd1922fff6bd5118af75d33ab2 + tag: v1.11.2 + digest: sha256:f4194edb06a43c82405167427ebd552b90af9698bd295845418680aebc13f600 allowSnippetAnnotations: true replicaCount: 2 admissionWebhooks: @@ -16,7 +16,7 @@ ingress-nginx: enabled: true extraContainers: - name: protobuf-exporter - image: ghcr.io/kvaps/ingress-nginx-with-protobuf-exporter/protobuf-exporter:v1.11.1@sha256:82abdc9ab80b406dbeb8cd43fd8759b25c5ea77eb95f924bedc61453b9a3f693 + image: ghcr.io/kvaps/ingress-nginx-with-protobuf-exporter/protobuf-exporter:v1.11.2@sha256:25ed6a5f508bbc59134ad786f1e765d1c2187742075a4e828d68ef3f07a78e52 args: - --server.telemetry-address=0.0.0.0:9090 - --server.exporter-address=0.0.0.0:9091 From ec27a19afb3dfa094c656efee107d178934e651c Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Thu, 26 Sep 2024 10:56:53 +0200 Subject: [PATCH 02/41] Add basic alerting system (#355) ![alerta](https://github.com/user-attachments/assets/87f792c1-0e1f-4070-84b1-7335cc0e7810) - Remove grafana-oncall - Add Alerta - Configure basic alerts - Update grafana 10 --> 11 ## Summary by CodeRabbit - **New Features** - Added new configuration options for the Alerta service, enhancing user customization. - Introduced a new Helm chart for the VictoriaMetrics Kubernetes stack, enabling comprehensive monitoring solutions. - Added VMAuth feature for enhanced authentication in the Kubernetes stack. - **Bug Fixes** - Fixed issues with the ETCD dashboard and improved ingress path prefix handling. - **Documentation** - Updated README and release guide for the VictoriaMetrics stack with installation and configuration instructions. - Introduced a changelog for organized tracking of changes. --------- Signed-off-by: Andrei Kvapil --- packages/extra/monitoring/README.md | 15 +- .../templates/alerta/alerta-db.yaml | 16 + .../monitoring/templates/alerta/alerta.yaml | 214 ++ .../templates/dashboard-resourcemap.yaml | 2 + .../monitoring/templates/grafana/grafana.yaml | 4 +- .../templates/oncall/oncall-db.yaml | 15 - .../templates/oncall/oncall-redis.yaml | 66 - .../templates/oncall/oncall-release.yaml | 40 - .../monitoring/templates/vm/vmalert.yaml | 2 +- .../templates/vm/vmalertmanager.yaml | 32 - packages/extra/monitoring/values.schema.json | 35 +- packages/extra/monitoring/values.yaml | 23 +- packages/system/grafana-oncall/Chart.yaml | 3 - packages/system/grafana-oncall/Makefile | 10 - .../grafana-oncall/charts/oncall/Chart.lock | 27 - .../grafana-oncall/charts/oncall/Chart.yaml | 39 - .../grafana-oncall/charts/oncall/README.md | 431 --- .../charts/oncall/templates/NOTES.txt | 41 - .../charts/oncall/templates/_env.tpl | 656 ----- .../charts/oncall/templates/_helpers.tpl | 121 - .../oncall/templates/celery/_helpers.tpl | 26 - .../oncall/templates/celery/deployment.yaml | 89 - .../charts/oncall/templates/cert-issuer.yaml | 22 - .../templates/engine/_helpers-engine.tpl | 26 - .../oncall/templates/engine/deployment.yaml | 98 - .../oncall/templates/engine/job-migrate.yaml | 102 - .../templates/engine/service-external.yaml | 24 - .../templates/engine/service-internal.yaml | 15 - .../oncall/templates/ingress-regular.yaml | 65 - .../templates/integrations/_helpers.tpl | 26 - .../templates/integrations/deployment.yaml | 99 - .../integrations/service-external.yaml | 24 - .../integrations/service-internal.yaml | 17 - .../charts/oncall/templates/secrets.yaml | 98 - .../oncall/templates/serviceaccount.yaml | 18 - .../templates/telegram-polling/_helpers.tpl | 22 - .../telegram-polling/deployment.yaml | 53 - .../charts/oncall/templates/ui/_helpers.tpl | 8 - .../oncall/templates/ui/deployment.yaml | 31 - .../grafana-oncall/charts/oncall/values.yaml | 719 ----- packages/system/grafana-oncall/values.yaml | 19 - packages/system/monitoring/Makefile | 23 + .../system/monitoring/alerts/general.yaml | 57 - .../system/monitoring/alerts/kube-dns.yaml | 25 - .../monitoring/alerts/kube-state-metrics.yaml | 50 - .../system/monitoring/alerts/kubelet.yaml | 63 - .../monitoring/alerts/node-disk-usage.yaml | 357 --- packages/system/monitoring/alerts/node.yaml | 36 - .../system/monitoring/alerts/pod-status.yaml | 34 - .../victoria-metrics-k8s-stack}/.helmignore | 4 +- .../victoria-metrics-k8s-stack/CHANGELOG.md | 688 +++++ .../victoria-metrics-k8s-stack/Chart.lock | 24 + .../victoria-metrics-k8s-stack/Chart.yaml | 66 + .../victoria-metrics-k8s-stack/README.md | 2576 +++++++++++++++++ .../README.md.gotmpl | 300 ++ .../RELEASE_GUIDE.md | 40 + .../RELEASE_NOTES.md | 12 + .../victoria-metrics-k8s-stack/_changelog.md | 13 + .../victoria-metrics-k8s-stack/_index.md | 13 + .../files/rules/generated/etcd.yaml | 165 ++ .../files/rules/generated/general.rules.yaml | 53 + ...les.container_cpu_usage_seconds_total.yaml | 11 + .../k8s.rules.container_memory_cache.yaml | 10 + .../k8s.rules.container_memory_rss.yaml | 10 + .../k8s.rules.container_memory_swap.yaml | 10 + ...es.container_memory_working_set_bytes.yaml | 10 + .../k8s.rules.container_resource.yaml | 79 + .../rules/generated/k8s.rules.pod_owner.yaml | 54 + .../kube-apiserver-availability.rules.yaml | 128 + .../kube-apiserver-burnrate.rules.yaml | 318 ++ .../kube-apiserver-histogram.rules.yaml | 15 + .../rules/generated/kube-apiserver-slos.yaml | 63 + .../kube-prometheus-general.rules.yaml | 9 + .../kube-prometheus-node-recording.rules.yaml | 21 + .../rules/generated/kube-scheduler.rules.yaml | 48 + .../rules/generated/kube-state-metrics.yaml | 55 + .../files/rules/generated/kubelet.rules.yaml | 18 + .../rules/generated/kubernetes-apps.yaml | 257 ++ .../rules/generated/kubernetes-resources.yaml | 113 + .../rules/generated/kubernetes-storage.yaml | 101 + .../kubernetes-system-apiserver.yaml | 62 + .../kubernetes-system-controller-manager.yaml | 13 + .../generated/kubernetes-system-kubelet.yaml | 136 + .../kubernetes-system-scheduler.yaml | 13 + .../rules/generated/kubernetes-system.yaml | 27 + .../rules/generated/node-exporter.rules.yaml | 76 + .../files/rules/generated/node-exporter.yaml | 336 +++ .../files/rules/generated/node-network.yaml | 13 + .../files/rules/generated/node.rules.yaml | 44 + .../templates/_helpers.tpl | 458 +++ .../templates/extra-objects.yaml | 4 + .../templates/rules/rule.yaml | 121 + .../charts/victoria-metrics-k8s-stack/todo.md | 26 + .../values.minikube.yaml | 38 + .../victoria-metrics-k8s-stack/values.yaml | 1233 ++++++++ .../monitoring/templates/cadvisor-scrape.yaml | 3 + .../monitoring/templates/kubelet-scrape.yaml | 4 + .../system/monitoring/templates/vmagent.yaml | 1 + packages/system/monitoring/values.yaml | 20 + 99 files changed, 8226 insertions(+), 3724 deletions(-) create mode 100644 packages/extra/monitoring/templates/alerta/alerta-db.yaml create mode 100644 packages/extra/monitoring/templates/alerta/alerta.yaml delete mode 100644 packages/extra/monitoring/templates/oncall/oncall-db.yaml delete mode 100644 packages/extra/monitoring/templates/oncall/oncall-redis.yaml delete mode 100644 packages/extra/monitoring/templates/oncall/oncall-release.yaml delete mode 100644 packages/extra/monitoring/templates/vm/vmalertmanager.yaml delete mode 100644 packages/system/grafana-oncall/Chart.yaml delete mode 100644 packages/system/grafana-oncall/Makefile delete mode 100644 packages/system/grafana-oncall/charts/oncall/Chart.lock delete mode 100644 packages/system/grafana-oncall/charts/oncall/Chart.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/README.md delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/NOTES.txt delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/_env.tpl delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/_helpers.tpl delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/celery/_helpers.tpl delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/celery/deployment.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/cert-issuer.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/engine/_helpers-engine.tpl delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/engine/deployment.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/engine/job-migrate.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/engine/service-external.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/engine/service-internal.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/ingress-regular.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/integrations/_helpers.tpl delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/integrations/deployment.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/integrations/service-external.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/integrations/service-internal.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/secrets.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/serviceaccount.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/telegram-polling/_helpers.tpl delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/telegram-polling/deployment.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/ui/_helpers.tpl delete mode 100644 packages/system/grafana-oncall/charts/oncall/templates/ui/deployment.yaml delete mode 100644 packages/system/grafana-oncall/charts/oncall/values.yaml delete mode 100644 packages/system/grafana-oncall/values.yaml delete mode 100644 packages/system/monitoring/alerts/general.yaml delete mode 100644 packages/system/monitoring/alerts/kube-dns.yaml delete mode 100644 packages/system/monitoring/alerts/kube-state-metrics.yaml delete mode 100644 packages/system/monitoring/alerts/kubelet.yaml delete mode 100644 packages/system/monitoring/alerts/node-disk-usage.yaml delete mode 100644 packages/system/monitoring/alerts/node.yaml delete mode 100644 packages/system/monitoring/alerts/pod-status.yaml rename packages/system/{grafana-oncall/charts/oncall => monitoring/charts/victoria-metrics-k8s-stack}/.helmignore (87%) create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/CHANGELOG.md create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.lock create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md.gotmpl create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_GUIDE.md create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_NOTES.md create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/_changelog.md create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/_index.md create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_resource.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/extra-objects.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/rules/rule.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/todo.md create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.minikube.yaml create mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.yaml diff --git a/packages/extra/monitoring/README.md b/packages/extra/monitoring/README.md index a5f85901..0aa2ed1a 100644 --- a/packages/extra/monitoring/README.md +++ b/packages/extra/monitoring/README.md @@ -4,9 +4,12 @@ ### Common parameters -| Name | Description | Value | -| ----------------- | --------------------------------------------------------------------------------------------------------- | ------- | -| `host` | The hostname used to access the grafana externally (defaults to 'grafana' subdomain for the tenant host). | `""` | -| `metricsStorages` | Configuration of metrics storage instances | `[]` | -| `logsStorages` | Configuration of logs storage instances | `[]` | -| `oncall.enabled` | Enable Grafana OnCall | `false` | +| Name | Description | Value | +| ------------------------------- | --------------------------------------------------------------------------------------------------------- | ------------------------------------------------ | +| `host` | The hostname used to access the grafana externally (defaults to 'grafana' subdomain for the tenant host). | `""` | +| `metricsStorages` | Configuration of metrics storage instances | `[]` | +| `logsStorages` | Configuration of logs storage instances | `[]` | +| `alerta.storage` | Persistent Volume size for alerta database | `10Gi` | +| `alerta.storageClassName` | StorageClass used to store the data | `""` | +| `alerta.alerts.telegram.token` | telegram token for your bot | `7262461387:AAGtwq16iwuVtWtzoN6TUEMpF00fpC9Xz34` | +| `alerta.alerts.telegram.chatID` | specify multiple ID's separated by comma. Get yours in https://t.me/chatid_echo_bot | `-4520856007` | diff --git a/packages/extra/monitoring/templates/alerta/alerta-db.yaml b/packages/extra/monitoring/templates/alerta/alerta-db.yaml new file mode 100644 index 00000000..e06c322c --- /dev/null +++ b/packages/extra/monitoring/templates/alerta/alerta-db.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: alerta-db +spec: + instances: 2 + storage: + size: {{ required ".Values.alerta.storage is required" .Values.alerta.storage }} + {{- with .Values.alerta.storageClassName }} + storageClass: {{ . }} + {{- end }} + + inheritedMetadata: + labels: + policy.cozystack.io/allow-to-apiserver: "true" diff --git a/packages/extra/monitoring/templates/alerta/alerta.yaml b/packages/extra/monitoring/templates/alerta/alerta.yaml new file mode 100644 index 00000000..e723661e --- /dev/null +++ b/packages/extra/monitoring/templates/alerta/alerta.yaml @@ -0,0 +1,214 @@ +{{- $myNS := lookup "v1" "Namespace" "" .Release.Namespace }} +{{- $ingress := index $myNS.metadata.annotations "namespace.cozystack.io/ingress" }} +{{- $host := index $myNS.metadata.annotations "namespace.cozystack.io/host" }} + +{{- $apiKey := randAlphaNum 32 }} +{{- $existingSecret := lookup "v1" "Secret" .Release.Namespace "alerta" }} +{{- if $existingSecret }} +{{- $apiKey = index $existingSecret.data "alerta-api-key" | b64dec }} +{{- end }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: alerta + labels: + app: alerta +type: Opaque +data: + alerta-api-key: {{ $apiKey | b64enc }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: alerta + labels: + app: alerta +data: + config.js: | + // contents of config.js + 'use strict'; + angular.module('config', []) + .constant('config', { + 'endpoint' : "/api", + 'provider' : "basic" + }) + .constant('colors', {}); +--- +apiVersion: v1 +kind: Service +metadata: + name: alerta + labels: + app: alerta +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app: alerta + release: alerta +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: alerta + labels: + app: alerta +spec: + replicas: 1 + selector: + matchLabels: + app: alerta + release: alerta + template: + metadata: + labels: + app: alerta + release: alerta + spec: + containers: + - name: alerta + image: "alerta/alerta-web:9.0.4" + imagePullPolicy: IfNotPresent + env: + - name: ADMIN_USERS + valueFrom: + secretKeyRef: + key: user + name: grafana-admin-password + - name: ADMIN_PASSWORD + valueFrom: + secretKeyRef: + key: password + name: grafana-admin-password + - name: ADMIN_KEY + valueFrom: + secretKeyRef: + name: alerta + key: alerta-api-key + - name: ALERTA_API_KEY + valueFrom: + secretKeyRef: + name: alerta + key: alerta-api-key + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: alerta-db-app + key: uri + + - name: AUTH_REQUIRED + value: "True" + + {{- if and .Values.alerta.alerts.telegram.chatID .Values.alerta.alerts.telegram.token }} + - name: "PLUGINS" + value: "telegram" + - name: TELEGRAM_CHAT_ID + value: "{{ .Values.alerta.alerts.telegram.chatID }}" + - name: TELEGRAM_TOKEN + value: "{{ .Values.alerta.alerts.telegram.token }}" + - name: TELEGRAM_WEBHOOK_URL + value: "https://alerta.infra.aenix.org/api/webhooks/telegram?api-key={{ $apiKey }}" + {{- end }} + + ports: + - name: http + containerPort: 8080 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 300 + readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 5 + volumeMounts: + - name: alerta-config + mountPath: /app/config.js + subPath: config.js + volumes: + - name: alerta-config + configMap: + name: alerta +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: alerta + labels: + app: alerta + annotations: + acme.cert-manager.io/http01-ingress-class: {{ $ingress }} + cert-manager.io/cluster-issuer: letsencrypt-prod +spec: + ingressClassName: {{ $ingress }} + tls: + - hosts: + - "{{ .Values.host | default (printf "alerta.%s" $host) }}" + secretName: alerta-tls + rules: + - host: "{{ .Values.host | default (printf "alerta.%s" $host) }}" + http: + paths: + - path: / + pathType: ImplementationSpecific + backend: + service: + name: alerta + port: + name: http +--- +apiVersion: v1 +kind: Secret +metadata: + name: alertmanager +type: Opaque +stringData: + alertmanager.yaml: | + global: + resolve_timeout: 5m + route: + group_by: [alertname, namespace, cluster] + group_wait: 15s + group_interval: 15s + repeat_interval: 1h + receiver: 'alerta' + routes: + - receiver: alerta_no_resolve + repeat_interval: 1m + matchers: + - 'event="Heartbeat"' + - receiver: blackhole + matchers: + - 'severity="none"' + - receiver: blackhole + matchers: + - 'alertname=~"CDIDefaultStorageClassDegraded|CDINoDefaultStorageClass"' + receivers: + - name: blackhole + - name: "alerta" + webhook_configs: + - url: 'http://alerta/api/webhooks/prometheus?api-key={{ $apiKey }}' + send_resolved: true + - name: "alerta_no_resolve" + webhook_configs: + - url: 'http://alerta/api/webhooks/prometheus?api-key={{ $apiKey }}' + send_resolved: false +--- +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMAlertmanager +metadata: + name: alertmanager +spec: + replicaCount: 3 + configSecret: alertmanager + podMetadata: + labels: + policy.cozystack.io/allow-to-apiserver: "true" diff --git a/packages/extra/monitoring/templates/dashboard-resourcemap.yaml b/packages/extra/monitoring/templates/dashboard-resourcemap.yaml index e0828484..f15714da 100644 --- a/packages/extra/monitoring/templates/dashboard-resourcemap.yaml +++ b/packages/extra/monitoring/templates/dashboard-resourcemap.yaml @@ -9,6 +9,7 @@ rules: - ingresses resourceNames: - grafana-ingress + - alerta verbs: ["get", "list", "watch"] - apiGroups: - "" @@ -23,4 +24,5 @@ rules: - services resourceNames: - grafana-service + - alerta verbs: ["get", "list", "watch"] diff --git a/packages/extra/monitoring/templates/grafana/grafana.yaml b/packages/extra/monitoring/templates/grafana/grafana.yaml index a4ae5f33..e98573be 100644 --- a/packages/extra/monitoring/templates/grafana/grafana.yaml +++ b/packages/extra/monitoring/templates/grafana/grafana.yaml @@ -56,7 +56,7 @@ spec: mountPath: /var/lib/grafana containers: - name: grafana - image: grafana/grafana:10.1.0 + image: grafana/grafana:11.2.0 securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: false @@ -64,7 +64,7 @@ spec: failureThreshold: 3 env: - name: GF_INSTALL_PLUGINS - value: grafana-worldmap-panel,flant-statusmap-panel,grafana-oncall-app,natel-discrete-panel + value: grafana-worldmap-panel,flant-statusmap-panel,natel-discrete-panel - name: ONCALL_API_URL value: http://grafana-oncall-engine:8080 - name: GF_DATABASE_HOST diff --git a/packages/extra/monitoring/templates/oncall/oncall-db.yaml b/packages/extra/monitoring/templates/oncall/oncall-db.yaml deleted file mode 100644 index 048ec173..00000000 --- a/packages/extra/monitoring/templates/oncall/oncall-db.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{{- if .Values.oncall.enabled }} ---- -apiVersion: postgresql.cnpg.io/v1 -kind: Cluster -metadata: - name: grafana-oncall-db -spec: - instances: 2 - storage: - size: 10Gi - - inheritedMetadata: - labels: - policy.cozystack.io/allow-to-apiserver: "true" -{{- end }} diff --git a/packages/extra/monitoring/templates/oncall/oncall-redis.yaml b/packages/extra/monitoring/templates/oncall/oncall-redis.yaml deleted file mode 100644 index da173a81..00000000 --- a/packages/extra/monitoring/templates/oncall/oncall-redis.yaml +++ /dev/null @@ -1,66 +0,0 @@ -{{- if .Values.oncall.enabled }} ---- -apiVersion: databases.spotahome.com/v1 -kind: RedisFailover -metadata: - name: grafana-oncall - labels: - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} -spec: - sentinel: - replicas: 3 - resources: - requests: - cpu: 100m - limits: - memory: 100Mi - redis: - replicas: 2 - resources: - requests: - cpu: 150m - memory: 400Mi - limits: - cpu: 2 - memory: 1000Mi - storage: - persistentVolumeClaim: - metadata: - name: redisfailover-persistent-data - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 5Gi - exporter: - enabled: true - image: oliver006/redis_exporter:v1.55.0-alpine - args: - - --web.telemetry-path - - /metrics - env: - - name: REDIS_EXPORTER_LOG_FORMAT - value: txt - customConfig: - - tcp-keepalive 0 - - loglevel notice - auth: - secretPath: grafana-oncall-redis-password ---- -apiVersion: v1 -kind: Secret -metadata: - name: grafana-oncall-redis-password - labels: - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} -data: -{{- $existingSecret := lookup "v1" "Secret" .Release.Namespace "grafana-oncall-redis-password" }} - {{- if $existingSecret }} - password: {{ index $existingSecret.data "password" }} - {{- else }} - password: {{ randAlphaNum 32 | b64enc }} - {{- end }} -{{- end }} diff --git a/packages/extra/monitoring/templates/oncall/oncall-release.yaml b/packages/extra/monitoring/templates/oncall/oncall-release.yaml deleted file mode 100644 index 7796e1a7..00000000 --- a/packages/extra/monitoring/templates/oncall/oncall-release.yaml +++ /dev/null @@ -1,40 +0,0 @@ -{{- if .Values.oncall.enabled }} -{{- $myNS := lookup "v1" "Namespace" "" .Release.Namespace }} -{{- $host := index $myNS.metadata.annotations "namespace.cozystack.io/host" }} -apiVersion: helm.toolkit.fluxcd.io/v2 -kind: HelmRelease -metadata: - name: grafana-oncall-system - labels: - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} -spec: - chart: - spec: - chart: cozy-grafana-oncall - reconcileStrategy: Revision - sourceRef: - kind: HelmRepository - name: cozystack-system - namespace: cozy-system - version: '*' - interval: 1m0s - timeout: 5m0s - values: - oncall: - fullnameOverride: grafana-oncall - externalGrafana: - url: "https://{{ .Values.host | default (printf "grafana.%s" $host) }}/" - - externalPostgresql: - host: grafana-oncall-db-rw - db_name: app - user: app - existingSecret: grafana-oncall-db-app - passwordKey: password - - externalRedis: - host: rfrm-grafana-oncall - existingSecret: {{ .Release.Name }}-oncall-redis-password - passwordKey: password -{{- end }} diff --git a/packages/extra/monitoring/templates/vm/vmalert.yaml b/packages/extra/monitoring/templates/vm/vmalert.yaml index 0e6536d3..457b82b9 100644 --- a/packages/extra/monitoring/templates/vm/vmalert.yaml +++ b/packages/extra/monitoring/templates/vm/vmalert.yaml @@ -11,7 +11,7 @@ spec: extraArgs: remoteWrite.disablePathAppend: "true" notifiers: - - url: http://vmalertmanager.{{ $.Release.Namespace }}.svc:9093 + - url: http://vmalertmanager-alertmanager.{{ $.Release.Namespace }}.svc:9093 remoteRead: url: http://vmselect-{{ .name }}.{{ $.Release.Namespace }}.svc:8481/select/0/prometheus remoteWrite: diff --git a/packages/extra/monitoring/templates/vm/vmalertmanager.yaml b/packages/extra/monitoring/templates/vm/vmalertmanager.yaml deleted file mode 100644 index eeb0f971..00000000 --- a/packages/extra/monitoring/templates/vm/vmalertmanager.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: alertmanager -type: Opaque -stringData: - alertmanager.yaml: | - global: - resolve_timeout: 5m - route: - group_by: - - job - - alertname - group_wait: 30s - group_interval: 5m - repeat_interval: 12h - receiver: 'webhook' - receivers: - - name: 'webhook' - webhook_configs: - - url: http://{{ .Release.Name }}-oncall-engine.{{ .Release.Namespace }}.svc:8080/integrations/v1/alertmanager/Kjb2NWxxSlgGtxz9F4ihovQBB/ ---- -apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMAlertmanager -metadata: - name: alertmanager -spec: - replicaCount: 2 - configSecret: alertmanager - podMetadata: - labels: - policy.cozystack.io/allow-to-apiserver: "true" diff --git a/packages/extra/monitoring/values.schema.json b/packages/extra/monitoring/values.schema.json index eadf8d68..f0bb8b3b 100644 --- a/packages/extra/monitoring/values.schema.json +++ b/packages/extra/monitoring/values.schema.json @@ -23,13 +23,38 @@ "type": "object" } }, - "oncall": { + "alerta": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable Grafana OnCall", - "default": false + "storage": { + "type": "string", + "description": "Persistent Volume size for alerta database", + "default": "10Gi" + }, + "storageClassName": { + "type": "string", + "description": "StorageClass used to store the data", + "default": "" + }, + "alerts": { + "type": "object", + "properties": { + "telegram": { + "type": "object", + "properties": { + "token": { + "type": "string", + "description": "telegram token for your bot", + "default": "7262461387:AAGtwq16iwuVtWtzoN6TUEMpF00fpC9Xz34" + }, + "chatID": { + "type": "string", + "description": "specify multiple ID's separated by comma. Get yours in https://t.me/chatid_echo_bot", + "default": "-4520856007" + } + } + } + } } } } diff --git a/packages/extra/monitoring/values.yaml b/packages/extra/monitoring/values.yaml index 6353fef4..36862e13 100644 --- a/packages/extra/monitoring/values.yaml +++ b/packages/extra/monitoring/values.yaml @@ -25,7 +25,22 @@ logsStorages: storage: 10Gi storageClassName: replicated -## @param oncall.enabled Enable Grafana OnCall -## -oncall: - enabled: false +## Configuration for Alerta +## @param alerta.storage Persistent Volume size for alerta database +## @param alerta.storageClassName StorageClass used to store the data +## +alerta: + storage: 10Gi + storageClassName: "" + + alerts: + ## @param alerta.alerts.telegram.token telegram token for your bot + ## @param alerta.alerts.telegram.chatID specify multiple ID's separated by comma. Get yours in https://t.me/chatid_echo_bot + ## example: + ## telegram: + ## token: "7262461387:AAGtwq16iwuVtWtzoN6TUEMpF00fpC9Xz34" + ## chatID: "-4520856007" + ## + telegram: + token: "" + chatID: "" diff --git a/packages/system/grafana-oncall/Chart.yaml b/packages/system/grafana-oncall/Chart.yaml deleted file mode 100644 index 0f24a040..00000000 --- a/packages/system/grafana-oncall/Chart.yaml +++ /dev/null @@ -1,3 +0,0 @@ -apiVersion: v2 -name: cozy-grafana-oncall -version: 0.0.0 # Placeholder, the actual version will be automatically set during the build process diff --git a/packages/system/grafana-oncall/Makefile b/packages/system/grafana-oncall/Makefile deleted file mode 100644 index 096e34a8..00000000 --- a/packages/system/grafana-oncall/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -NAME=grafana-oncall-system - -include ../../../scripts/package.mk - -update: - rm -rf charts - helm repo add grafana https://grafana.github.io/helm-charts - helm repo update grafana - helm pull grafana/oncall --untar --untardir charts - rm -rf charts/oncall/charts diff --git a/packages/system/grafana-oncall/charts/oncall/Chart.lock b/packages/system/grafana-oncall/charts/oncall/Chart.lock deleted file mode 100644 index cab54bbf..00000000 --- a/packages/system/grafana-oncall/charts/oncall/Chart.lock +++ /dev/null @@ -1,27 +0,0 @@ -dependencies: -- name: cert-manager - repository: https://charts.jetstack.io - version: v1.8.0 -- name: mariadb - repository: https://charts.bitnami.com/bitnami - version: 12.2.5 -- name: postgresql - repository: https://charts.bitnami.com/bitnami - version: 11.9.10 -- name: rabbitmq - repository: https://charts.bitnami.com/bitnami - version: 12.0.0 -- name: redis - repository: https://charts.bitnami.com/bitnami - version: 16.13.2 -- name: grafana - repository: https://grafana.github.io/helm-charts - version: 6.57.1 -- name: ingress-nginx - repository: https://kubernetes.github.io/ingress-nginx - version: 4.1.4 -- name: prometheus - repository: https://prometheus-community.github.io/helm-charts - version: 25.8.2 -digest: sha256:edc9fef449a694cd319135e37ac84f8247ac9ad0c48ac86099dae4e428beb7b7 -generated: "2024-01-26T17:54:48.132209769Z" diff --git a/packages/system/grafana-oncall/charts/oncall/Chart.yaml b/packages/system/grafana-oncall/charts/oncall/Chart.yaml deleted file mode 100644 index 3f490e21..00000000 --- a/packages/system/grafana-oncall/charts/oncall/Chart.yaml +++ /dev/null @@ -1,39 +0,0 @@ -apiVersion: v2 -appVersion: v1.3.94 -dependencies: -- condition: cert-manager.enabled - name: cert-manager - repository: https://charts.jetstack.io - version: v1.8.0 -- condition: mariadb.enabled - name: mariadb - repository: https://charts.bitnami.com/bitnami - version: 12.2.5 -- condition: postgresql.enabled - name: postgresql - repository: https://charts.bitnami.com/bitnami - version: 11.9.10 -- condition: rabbitmq.enabled - name: rabbitmq - repository: https://charts.bitnami.com/bitnami - version: 12.0.0 -- condition: redis.enabled - name: redis - repository: https://charts.bitnami.com/bitnami - version: 16.13.2 -- condition: grafana.enabled - name: grafana - repository: https://grafana.github.io/helm-charts - version: 6.57.1 -- condition: ingress-nginx.enabled - name: ingress-nginx - repository: https://kubernetes.github.io/ingress-nginx - version: 4.1.4 -- condition: prometheus.enabled - name: prometheus - repository: https://prometheus-community.github.io/helm-charts - version: 25.8.2 -description: Developer-friendly incident response with brilliant Slack integration -name: oncall -type: application -version: 1.3.94 diff --git a/packages/system/grafana-oncall/charts/oncall/README.md b/packages/system/grafana-oncall/charts/oncall/README.md deleted file mode 100644 index 993f0649..00000000 --- a/packages/system/grafana-oncall/charts/oncall/README.md +++ /dev/null @@ -1,431 +0,0 @@ -# Grafana OnCall Helm Chart - -This Grafana OnCall Chart is the best way to operate Grafana OnCall on Kubernetes. -It will deploy Grafana OnCall engine and celery workers, along with RabbitMQ cluster, Redis Cluster, and MySQL 5.7 database. -It will also deploy cert manager and nginx ingress controller, as Grafana OnCall backend might need to be externally available -to receive alerts from other monitoring systems. Grafana OnCall engine acts as a backend and can be connected to the -Grafana frontend plugin named Grafana OnCall. -Architecture diagram can be found [here](https://raw.githubusercontent.com/grafana/oncall/dev/docs/img/architecture_diagram.png) - -## Production usage - -**Default helm chart configuration is not intended for production.** -The helm chart includes all the services into a single release, which is not recommended for production usage. -It is recommended to run stateful services such as MySQL and RabbitMQ separately from this release or use managed -PaaS solutions. It will significantly reduce the overhead of managing them. -Here are the instructions on how to set up your own [ingress](#set-up-external-access), [MySQL](#connect-external-mysql), -[RabbitMQ](#connect-external-rabbitmq), [Redis](#connect-external-redis) - -### Cluster requirements - -- ensure you can run x86-64/amd64 workloads. arm64 architecture is currently not supported -- kubernetes version 1.25+ is not supported, if cert-manager is enabled - -## Install - -### Prepare the repo - -```bash -# Add the repository -helm repo add grafana https://grafana.github.io/helm-charts -helm repo update -``` - -### Installing the helm chart - -```bash -# Install the chart -helm install \ - --wait \ - --set base_url=example.com \ - --set grafana."grafana\.ini".server.domain=example.com \ - release-oncall \ - grafana/oncall -``` - -Follow the `helm install` output to finish setting up Grafana OnCall backend and Grafana OnCall frontend plugin e.g. - -```bash -👋 Your Grafana OnCall instance has been successfully deployed - - ❗ Set up a DNS record for your domain (use A Record and "@" to point a root domain to the IP address) - Get the external IP address by running the following commands and point example.com to it: - - kubectl get ingress release-oncall -o jsonpath="{.status.loadBalancer.ingress[0].ip}" - - Wait until the dns record got propagated. - NOTE: Check with the following command: nslookup example.com - Try reaching https://example.com/ready/ from the browser, make sure it is not cached locally - - 🦎 Grafana was installed as a part of this helm release. Open https://example.com/grafana/plugins/grafana-oncall-app - The User is admin - Get password by running this command: - - kubectl get secret --namespace default release-oncall-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo - - 🔗 Connect Grafana OnCall Plugin to Grafana OnCall backend: - - Fill the Grafana OnCall Backend URL: - - http://release-oncall-engine:8080 - -🎉🎉🎉 Done! 🎉🎉🎉 -``` - -## Configuration - -You can edit values.yml to make changes to the helm chart configuration and re-deploy the release with the following command: - -```bash -helm upgrade \ - --install \ - --wait \ - --set base_url=example.com \ - --set grafana."grafana\.ini".server.domain=example.com \ - release-oncall \ - grafana/oncall -``` - -### Passwords and external secrets - -As OnCall subcharts are Bitname charts, there is a common approach to secrets. Bundled charts allow specifying passwords -in values.yaml explicitly or as K8s secret value. OnCall chart refers either to secret created in sub-chart or -to specified external secret. -Similarly, if component chart is disabled, the password(s) can be supplied in `external` value -(e.g. externalMysql) explicitly or as K8s secret value. In the first case, the secret is created with the specified -value. In the second case the external secret is used. - -- If `.auth.existingSecret` is non-empty, then this secret is used. Secret keys are pre-defined by chart. -- If subchart supports password files and `.customPasswordFiles` dictionary is non-empty, then password files - are used. Dictionary keys are pre-defined per sub-chart. Password files are not supported by OnCall chart and should - not be used with bundled sub-charts. -- Passwords are specified via `auth` section values, e.g. `auth.password`. K8s secret is created. - - If `.auth.forcePassword` is `true`, then passwords MUST be specified. Otherwise, missing passwords - are generated. - -If external component is used instead of the bundled one: - -- If existingSecret within appropriate external component values is non-empty (e.g. `externalMysql.existingSecret`) then - it is used together with corresponding key names, e.g. `externalMysql.passwordKey`. -- Otherwise, corresponding password values are used, e.g. `externalMysql.password`. K8s secret is created by OnCall chart. - -Below is the summary for the dependent charts. - -MySQL/MariaDB: - -```yaml -database: - type: "mysql" # This is default -mariaDB: - enabled: true # Default - auth: - existingSecret: "" - forcePassword: false - # Secret name: `-mariadb` - rootPassword: "" # Secret key: mariadb-root-password - password: "" # Secret key: mariadb-password - replicationPassword: "" # Secret key: mariadb-replication-password -externalMysql: - password: "" - existingSecret: "" - passwordKey: "" -``` - -Postgres: - -```yaml -database: - type: postgresql -mariadb: - enabled: false # Must be set to false for Postgres -postgresql: - enabled: true # Must be set to true for bundled Postgres - auth: - existingSecret: "" - secretKeys: - adminPasswordKey: "" - userPasswordKey: "" # Not needed - replicationPasswordKey: "" # Not needed with disabled replication - # Secret name: `-postgresql` - postgresPassword: "" # password for admin user postgres. As non-admin user is not created, only this one is relevant. - password: "" # Not needed - replicationPassword: "" # Not needed with disabled replication -externalPostgresql: - user: "" - password: "" - existingSecret: "" - passwordKey: "" -``` - -Rabbitmq: - -```yaml -rabbitmq: - enabled: true - auth: - existingPasswordSecret: "" # Must contain `rabbitmq-password` key - existingErlangSecret: "" # Must contain `rabbitmq-erlang-cookie` key - # Secret name: `-rabbitmq` - password: "" - erlangCookie: "" -externalRabbitmq: - user: "" - password: "" - existingSecret: "" - passwordKey: "" - usernameKey: "" -``` - -Redis: - -```yaml -redis: - enabled: true - auth: - existingSecret: "" - existingSecretPasswordKey: "" - # Secret name: `-redis` - password: "" -externalRedis: - password: "" - existingSecret: "" - passwordKey: "" -``` - -### Running split ingestion and API services - -You can run a detached service for handling integrations by setting up the following variables: - -```yaml -detached_integrations: - enabled: true -detached_integrations_service: - enabled: true -``` - -This will run an integrations-only service listening by default in port 30003. - -### Set up Slack and Telegram - -You can set up Slack connection via following variables: - -```yaml -oncall: - slack: - enabled: true - commandName: oncall - clientId: ~ - clientSecret: ~ - signingSecret: ~ - existingSecret: "" - clientIdKey: "" - clientSecretKey: "" - signingSecretKey: "" - redirectHost: ~ -``` - -`oncall.slack.commandName` is used for changing default bot slash command, -`oncall`. In slack, it could be called via `/`. - -To set up Telegram token and webhook url use: - -```yaml -oncall: - telegram: - enabled: true - token: ~ - webhookUrl: ~ -``` - -To use Telegram long polling instead of webhook use: - -```yaml -telegramPolling: - enabled: true -``` - -### Set up external access - -Grafana OnCall can be connected to the external monitoring systems or grafana deployed to the other cluster. -Nginx Ingress Controller and Cert Manager charts are included in the helm chart with the default configuration. -If you set the DNS A Record pointing to the external IP address of the installation with the Hostname matching -base_url parameter, https will be automatically set up. If grafana is enabled in the chart values, it will also be -available on `https:///grafana/`. See the details in `helm install` output. - -To use a different ingress controller or tls certificate management system, set the following values to -false and edit ingress settings - -```yaml -ingress-nginx: - enabled: false - -cert-manager: - enabled: false - -ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: "nginx" - cert-manager.io/issuer: "letsencrypt-prod" -``` - -### Use PostgreSQL instead of MySQL - -It is possible to use PostgreSQL instead of MySQL. To do so, set mariadb.enabled to `false`, -postgresql.enabled to `true` and database.type to `postgresql`. - -```yaml -mariadb: - enabled: false - -postgresql: - enabled: true - -database: - type: postgresql -``` - -### Connect external MySQL - -It is recommended to use the managed MySQL 5.7 database provided by your cloud provider -Make sure to create the database with the following parameters before installing this chart - -```sql -CREATE DATABASE oncall CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; -``` - -To use an external MySQL instance set mariadb.enabled to `false` and configure the `externalMysql` parameters. - -```yaml -mariadb: - enabled: false - -# Make sure to create the database with the following parameters: -# CREATE DATABASE oncall CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; -externalMysql: - host: - port: - db_name: - user: - password: - existingSecret: "" - usernameKey: username - passwordKey: password -``` - -### Connect external PostgreSQL - -To use an external PostgreSQL instance set mariadb.enabled to `false`, -postgresql.enabled to `false`, database.type to `postgresql` and configure -the `externalPostgresql` parameters. - -```yaml -mariadb: - enabled: false - -postgresql: - enabled: false - -database: - type: postgresql - -# Make sure to create the database with the following parameters: -# CREATE DATABASE oncall WITH ENCODING UTF8; -externalPostgresql: - host: - port: - db_name: - user: - password: - existingSecret: "" - passwordKey: password -``` - -### Connect external RabbitMQ - -Option 1. Install RabbitMQ separately into the cluster using the [official documentation](https://www.rabbitmq.com/kubernetes/operator/operator-overview.html) -Option 2. Use managed solution such as [CloudAMPQ](https://www.cloudamqp.com/) - -To use an external RabbitMQ instance set rabbitmq.enabled to `false` and configure the `externalRabbitmq` parameters. - -```yaml -rabbitmq: - enabled: false # Disable the RabbitMQ dependency from the release - -externalRabbitmq: - host: - port: - user: - password: - protocol: - vhost: - existingSecret: "" - passwordKey: password - usernameKey: username -``` - -### Connect external Redis - -To use an external Redis instance set redis.enabled to `false` and configure the `externalRedis` parameters. - -```yaml -redis: - enabled: false # Disable the Redis dependency from the release - -externalRedis: - host: - password: - existingSecret: "" - passwordKey: password -``` - -## Update - -```bash -# Add & upgrade the repository -helm repo add grafana https://grafana.github.io/helm-charts -helm repo update - -# Re-deploy -helm upgrade \ - --install \ - --wait \ - --set base_url=example.com \ - --set grafana."grafana\.ini".server.domain=example.com \ - release-oncall \ - grafana/oncall -``` - -After re-deploying, please also update the Grafana OnCall plugin on the plugin version page. -See [Grafana docs](https://grafana.com/docs/grafana/latest/administration/plugin-management/#update-a-plugin) for -more info on updating Grafana plugins. - -## Uninstall - -### Uninstalling the helm chart - -```bash -helm delete release-oncall -``` - -### Clean up PVC's - -```bash -kubectl delete pvc data-release-oncall-mariadb-0 data-release-oncall-rabbitmq-0 \ -redis-data-release-oncall-redis-master-0 redis-data-release-oncall-redis-replicas-0 \ -redis-data-release-oncall-redis-replicas-1 redis-data-release-oncall-redis-replicas-2 -``` - -### Clean up secrets - -```bash -kubectl delete secrets certificate-tls release-oncall-cert-manager-webhook-ca release-oncall-ingress-nginx-admission -``` - -## Troubleshooting - -### Issues during initial configuration - -In the event that you run into issues during initial configuration, it is possible that mismatching versions between -your OnCall backend and UI is the culprit. Ensure that the versions match, and if not, -consider updating your `helm` deployment. diff --git a/packages/system/grafana-oncall/charts/oncall/templates/NOTES.txt b/packages/system/grafana-oncall/charts/oncall/templates/NOTES.txt deleted file mode 100644 index 4eee164c..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/NOTES.txt +++ /dev/null @@ -1,41 +0,0 @@ -================================================================= -📞 Grafana OnCall Notes -================================================================= - -👋 Your Grafana OnCall instance has been successfully deployed - -{{- if not .Values.migrate.enabled }} - 🤖 To migrate the database run these commands: - - export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "oncall.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=engine" -o jsonpath="{.items[0].metadata.name}") - kubectl exec -it $POD_NAME -c wait-for-db -- bash -c "python manage.py migrate;" -{{- end }} - - ❗ Set up a DNS record for your domain (use A Record and "@" to point a root domain to the IP address) - Get the external IP address by running the following commands and point {{ .Values.base_url }} to it: - - kubectl get ingress {{ include "oncall.fullname" . }} -o jsonpath="{.status.loadBalancer.ingress[0].ip}" - - Wait until the dns record got propagated. - NOTE: Check with the following command: nslookup {{ .Values.base_url }} - Try reaching https://{{ .Values.base_url }}/ready/ from the browser, make sure it is not cached locally - -{{- if .Values.grafana.enabled }} - 🦎 Grafana was installed as a part of this helm release. Open https://{{ .Values.base_url }}/grafana/plugins/grafana-oncall-app - The User is {{ .Values.grafana.adminUser }} - Get password by running this command: - - kubectl get secret --namespace {{ .Release.Namespace }} {{ template "oncall.grafana.fullname" . }} -o jsonpath="{.data.admin-password}" | base64 --decode ; echo - -{{- else }} - 🦎 Grafana was NOT installed as a part of this helm release. Open external Grafana, go to "Configuration" - "Plugins" and find Grafana OnCall plugin - NOTE: Make sure your external Grafana is available by the network for the containers installed by this release. -{{- end }} - - 🔗 Connect Grafana OnCall Plugin to Grafana OnCall backend: - - Fill the Grafana OnCall Backend URL: - - http://{{ include "oncall.engine.fullname" . }}:8080 - -🎉🎉🎉 Done! 🎉🎉🎉 diff --git a/packages/system/grafana-oncall/charts/oncall/templates/_env.tpl b/packages/system/grafana-oncall/charts/oncall/templates/_env.tpl deleted file mode 100644 index 56dff3e5..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/_env.tpl +++ /dev/null @@ -1,656 +0,0 @@ -{{- define "snippet.oncall.env" -}} -- name: BASE_URL - value: {{ .Values.base_url_protocol }}://{{ .Values.base_url }} -- name: SECRET_KEY - valueFrom: - secretKeyRef: - name: {{ include "snippet.oncall.secret.name" . }} - key: {{ include "snippet.oncall.secret.secretKey" . | quote }} -- name: MIRAGE_SECRET_KEY - valueFrom: - secretKeyRef: - name: {{ include "snippet.oncall.secret.name" . }} - key: {{ include "snippet.oncall.secret.mirageSecretKey" . | quote }} -- name: MIRAGE_CIPHER_IV - value: {{ .Values.oncall.mirageCipherIV | default "1234567890abcdef" | quote }} -- name: DJANGO_SETTINGS_MODULE - value: "settings.helm" -- name: AMIXR_DJANGO_ADMIN_PATH - value: "admin" -- name: OSS - value: "True" -- name: DETACHED_INTEGRATIONS_SERVER - value: {{ .Values.detached_integrations.enabled | toString | title | quote }} -{{- include "snippet.oncall.uwsgi" . }} -- name: BROKER_TYPE - value: {{ .Values.broker.type | default "rabbitmq" }} -- name: GRAFANA_API_URL - value: {{ include "snippet.grafana.url" . | quote }} -{{- end }} - -{{- define "snippet.oncall.secret.name" -}} -{{ if .Values.oncall.secrets.existingSecret -}} - {{ .Values.oncall.secrets.existingSecret }} -{{- else -}} - {{ include "oncall.fullname" . }} -{{- end }} -{{- end }} - -{{- define "snippet.oncall.secret.secretKey" -}} -{{ if .Values.oncall.secrets.existingSecret -}} - {{ required "oncall.secrets.secretKey is required if oncall.secret.existingSecret is not empty" .Values.oncall.secrets.secretKey }} -{{- else -}} - SECRET_KEY -{{- end }} -{{- end }} - -{{- define "snippet.oncall.secret.mirageSecretKey" -}} -{{ if .Values.oncall.secrets.existingSecret -}} - {{ required "oncall.secrets.mirageSecretKey is required if oncall.secret.existingSecret is not empty" .Values.oncall.secrets.mirageSecretKey }} -{{- else -}} - MIRAGE_SECRET_KEY -{{- end }} -{{- end }} - -{{- define "snippet.oncall.uwsgi" -}} -{{- if .Values.uwsgi }} - {{- range $key, $value := .Values.uwsgi }} -- name: UWSGI_{{ $key | upper | replace "-" "_" }} - value: {{ $value | quote }} - {{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.oncall.slack.env" -}} -- name: FEATURE_SLACK_INTEGRATION_ENABLED - value: {{ .Values.oncall.slack.enabled | toString | title | quote }} -{{- if .Values.oncall.slack.enabled }} -- name: SLACK_SLASH_COMMAND_NAME - value: "/{{ .Values.oncall.slack.commandName | default "oncall" }}" -{{- if .Values.oncall.slack.existingSecret }} -- name: SLACK_CLIENT_OAUTH_ID - valueFrom: - secretKeyRef: - name: {{ .Values.oncall.slack.existingSecret }} - key: {{ required "oncall.slack.clientIdKey is required if oncall.slack.existingSecret is not empty" .Values.oncall.slack.clientIdKey | quote }} -- name: SLACK_CLIENT_OAUTH_SECRET - valueFrom: - secretKeyRef: - name: {{ .Values.oncall.slack.existingSecret }} - key: {{ required "oncall.slack.clientSecretKey is required if oncall.slack.existingSecret is not empty" .Values.oncall.slack.clientSecretKey | quote }} -- name: SLACK_SIGNING_SECRET - valueFrom: - secretKeyRef: - name: {{ .Values.oncall.slack.existingSecret }} - key: {{ required "oncall.slack.signingSecretKey is required if oncall.slack.existingSecret is not empty" .Values.oncall.slack.signingSecretKey | quote }} -{{- else }} -- name: SLACK_CLIENT_OAUTH_ID - value: {{ .Values.oncall.slack.clientId | default "" | quote }} -- name: SLACK_CLIENT_OAUTH_SECRET - value: {{ .Values.oncall.slack.clientSecret | default "" | quote }} -- name: SLACK_SIGNING_SECRET - value: {{ .Values.oncall.slack.signingSecret | default "" | quote }} -{{- end }} -- name: SLACK_INSTALL_RETURN_REDIRECT_HOST - value: {{ .Values.oncall.slack.redirectHost | default (printf "https://%s" .Values.base_url) | quote }} -{{- end }} -{{- end }} - -{{- define "snippet.oncall.telegram.env" -}} -{{- if .Values.telegramPolling.enabled -}} -{{- $_ := set .Values.oncall.telegram "enabled" true -}} -{{- end -}} -- name: FEATURE_TELEGRAM_INTEGRATION_ENABLED - value: {{ .Values.oncall.telegram.enabled | toString | title | quote }} -{{- if .Values.oncall.telegram.enabled }} -{{- if .Values.telegramPolling.enabled }} -- name: FEATURE_TELEGRAM_LONG_POLLING_ENABLED - value: {{ .Values.telegramPolling.enabled | toString | title | quote }} -{{- end }} -- name: TELEGRAM_WEBHOOK_HOST - value: {{ .Values.oncall.telegram.webhookUrl | default (printf "https://%s" .Values.base_url) | quote }} -{{- if .Values.oncall.telegram.existingSecret }} -- name: TELEGRAM_TOKEN - valueFrom: - secretKeyRef: - name: {{ .Values.oncall.telegram.existingSecret }} - key: {{ required "oncall.telegram.tokenKey is required if oncall.telegram.existingSecret is not empty" .Values.oncall.telegram.tokenKey | quote }} -{{- else }} -- name: TELEGRAM_TOKEN - value: {{ .Values.oncall.telegram.token | default "" | quote }} -{{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.oncall.twilio.env" }} -{{- with .Values.oncall.twilio }} -{{- if .existingSecret }} -- name: TWILIO_ACCOUNT_SID - valueFrom: - secretKeyRef: - name: {{ .existingSecret }} - key: {{ required "oncall.twilio.accountSid is required if oncall.twilio.existingSecret is not empty" .accountSid | quote }} -{{- if .authTokenKey }} -- name: TWILIO_AUTH_TOKEN - valueFrom: - secretKeyRef: - name: {{ .existingSecret }} - key: {{ required "oncall.twilio.authTokenKey is required if oncall.twilio.existingSecret is not empty" .authTokenKey | quote }} -{{- end }} -- name: TWILIO_NUMBER - valueFrom: - secretKeyRef: - name: {{ .existingSecret }} - key: {{ required "oncall.twilio.phoneNumberKey is required if oncall.twilio.existingSecret is not empty" .phoneNumberKey | quote }} -- name: TWILIO_VERIFY_SERVICE_SID - valueFrom: - secretKeyRef: - name: {{ .existingSecret }} - key: {{ required "oncall.twilio.verifySidKey is required if oncall.twilio.existingSecret is not empty" .verifySidKey | quote }} -{{- if and .apiKeySidKey .apiKeySecretKey }} -- name: TWILIO_API_KEY_SID - valueFrom: - secretKeyRef: - name: {{ .existingSecret }} - key: {{ required "oncall.twilio.apiKeySidKey is required if oncall.twilio.existingSecret is not empty" .apiKeySidKey | quote }} -- name: TWILIO_API_KEY_SECRET - valueFrom: - secretKeyRef: - name: {{ .existingSecret }} - key: {{ required "oncall.twilio.apiKeySecretKey is required if oncall.twilio.existingSecret is not empty" .apiKeySecretKey | quote }} -{{- end }} -{{- else }} -{{- if .accountSid }} -- name: TWILIO_ACCOUNT_SID - value: {{ .accountSid | quote }} -{{- end }} -{{- if .authToken }} -- name: TWILIO_AUTH_TOKEN - value: {{ .authToken | quote }} -{{- end }} -{{- if .phoneNumber }} -- name: TWILIO_NUMBER - value: {{ .phoneNumber | quote }} -{{- end }} -{{- if .verifySid }} -- name: TWILIO_VERIFY_SERVICE_SID - value: {{ .verifySid | quote }} -{{- end }} -{{- if .apiKeySid }} -- name: TWILIO_API_KEY_SID - value: {{ .apiKeySid | quote }} -{{- end }} -{{- if .apiKeySecret }} -- name: TWILIO_API_KEY_SECRET - value: {{ .apiKeySecret | quote }} -{{- end }} -{{- end }} -{{- if .limitPhone }} -- name: PHONE_NOTIFICATIONS_LIMIT - value: {{ .limitPhone | quote }} -{{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.celery.env" }} -{{- if .Values.celery.worker_queue }} -- name: CELERY_WORKER_QUEUE - value: {{ .Values.celery.worker_queue | quote }} -{{- end }} -{{- if .Values.celery.worker_concurrency }} -- name: CELERY_WORKER_CONCURRENCY - value: {{ .Values.celery.worker_concurrency | quote }} -{{- end }} -{{- if .Values.celery.worker_max_tasks_per_child }} -- name: CELERY_WORKER_MAX_TASKS_PER_CHILD - value: {{ .Values.celery.worker_max_tasks_per_child | quote }} -{{- end }} -{{- if .Values.celery.worker_beat_enabled }} -- name: CELERY_WORKER_BEAT_ENABLED - value: {{ .Values.celery.worker_beat_enabled | quote }} -{{- end }} -{{- if .Values.celery.worker_shutdown_interval }} -- name: CELERY_WORKER_SHUTDOWN_INTERVAL - value: {{ .Values.celery.worker_shutdown_interval | quote }} -{{- end }} -{{- end }} - -{{- define "snippet.grafana.url" -}} -{{ if .Values.grafana.enabled -}} - http://{{ include "oncall.grafana.fullname" . }} -{{- else -}} - {{ required "externalGrafana.url is required when not grafana.enabled" .Values.externalGrafana.url }} -{{- end }} -{{- end }} - -{{- define "snippet.mysql.env" -}} -- name: MYSQL_HOST - value: {{ include "snippet.mysql.host" . | quote }} -- name: MYSQL_PORT - value: {{ include "snippet.mysql.port" . | quote }} -- name: MYSQL_DB_NAME - value: {{ include "snippet.mysql.db" . | quote }} -- name: MYSQL_USER -{{- if and (not .Values.mariadb.enabled) .Values.externalMysql.existingSecret .Values.externalMysql.usernameKey (not .Values.externalMysql.user) }} - valueFrom: - secretKeyRef: - name: {{ include "snippet.mysql.password.secret.name" . }} - key: {{ .Values.externalMysql.usernameKey | quote }} -{{- else }} - value: {{ include "snippet.mysql.user" . | quote }} -{{- end }} -- name: MYSQL_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "snippet.mysql.password.secret.name" . }} - key: {{ include "snippet.mysql.password.secret.key" . | quote }} -{{- if not .Values.mariadb.enabled }} -{{- with .Values.externalMysql.options }} -- name: MYSQL_OPTIONS - value: {{ . | quote }} -{{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.mysql.password.secret.name" -}} -{{ if .Values.mariadb.enabled -}} - {{ if .Values.mariadb.auth.existingSecret -}} - {{ .Values.mariadb.auth.existingSecret }} - {{- else -}} - {{ include "oncall.mariadb.fullname" . }} - {{- end }} -{{- else -}} - {{ if .Values.externalMysql.existingSecret -}} - {{ .Values.externalMysql.existingSecret }} - {{- else -}} - {{ include "oncall.fullname" . }}-mysql-external - {{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.mysql.password.secret.key" -}} -{{ if and (not .Values.mariadb.enabled) .Values.externalMysql.existingSecret .Values.externalMysql.passwordKey -}} - {{ .Values.externalMysql.passwordKey }} -{{- else -}} - mariadb-root-password -{{- end }} -{{- end }} - -{{- define "snippet.mysql.host" -}} -{{ if and (not .Values.mariadb.enabled) .Values.externalMysql.host -}} - {{ .Values.externalMysql.host }} -{{- else -}} - {{ include "oncall.mariadb.fullname" . }} -{{- end }} -{{- end }} - -{{- define "snippet.mysql.port" -}} -{{ if and (not .Values.mariadb.enabled) .Values.externalMysql.port -}} - {{ .Values.externalMysql.port }} -{{- else -}} - 3306 -{{- end }} -{{- end }} - -{{- define "snippet.mysql.db" -}} -{{ if and (not .Values.mariadb.enabled) .Values.externalMysql.db_name -}} - {{ .Values.externalMysql.db_name }} -{{- else -}} - {{ .Values.mariadb.auth.database | default "oncall" }} -{{- end }} -{{- end }} - -{{- define "snippet.mysql.user" -}} -{{ if and (not .Values.mariadb.enabled) .Values.externalMysql.user -}} - {{ .Values.externalMysql.user }} -{{- else -}} - {{ .Values.mariadb.auth.username | default "root" }} -{{- end }} -{{- end }} - -{{- define "snippet.postgresql.env" -}} -- name: DATABASE_TYPE - value: {{ .Values.database.type | quote }} -- name: DATABASE_HOST - value: {{ include "snippet.postgresql.host" . | quote }} -- name: DATABASE_PORT - value: {{ include "snippet.postgresql.port" . | quote }} -- name: DATABASE_NAME - value: {{ include "snippet.postgresql.db" . | quote }} -- name: DATABASE_USER - value: {{ include "snippet.postgresql.user" . | quote }} -- name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "snippet.postgresql.password.secret.name" . }} - key: {{ include "snippet.postgresql.password.secret.key" . | quote }} -{{- if not .Values.postgresql.enabled }} -{{- with .Values.externalPostgresql.options }} -- name: DATABASE_OPTIONS - value: {{ . | quote }} -{{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.sqlite.env" -}} -- name: DATABASE_TYPE - value: sqlite3 -- name: DATABASE_NAME - value: /etc/app/oncall.db -{{- end }} - -{{- define "snippet.postgresql.password.secret.name" -}} -{{ if .Values.postgresql.enabled -}} - {{ if .Values.postgresql.auth.existingSecret -}} - {{ .Values.postgresql.auth.existingSecret }} - {{- else -}} - {{ include "oncall.postgresql.fullname" . }} - {{- end }} -{{- else -}} - {{ if .Values.externalPostgresql.existingSecret -}} - {{ .Values.externalPostgresql.existingSecret }} - {{- else -}} - {{ include "oncall.fullname" . }}-postgresql-external - {{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.postgresql.password.secret.key" -}} -{{ if .Values.postgresql.enabled -}} - {{ if .Values.postgresql.auth.existingSecret -}} - {{ required "postgresql.auth.secretKeys.adminPasswordKey is required if database.type=postgres and postgresql.enabled and postgresql.auth.existingSecret" .Values.postgresql.auth.secretKeys.adminPasswordKey }} - {{- else -}} - {{ include "postgresql.userPasswordKey" .Subcharts.postgresql }} - {{- end }} -{{- else -}} - {{ if .Values.externalPostgresql.existingSecret -}} - {{ required "externalPostgresql.passwordKey is required if database.type=postgres and not postgresql.enabled and postgresql.auth.existingSecret" .Values.externalPostgresql.passwordKey }} - {{- else -}} - postgres-password - {{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.postgresql.host" -}} -{{ if not .Values.postgresql.enabled -}} - {{ required "externalPostgresql.host is required if database.type=postgres and not postgresql.enabled" .Values.externalPostgresql.host }} -{{- else -}} - {{ include "oncall.postgresql.fullname" . }} -{{- end }} -{{- end }} - -{{- define "snippet.postgresql.port" -}} -{{ if and (not .Values.postgresql.enabled) .Values.externalPostgresql.port -}} - {{ .Values.externalPostgresql.port }} -{{- else -}} - 5432 -{{- end }} -{{- end }} - -{{- define "snippet.postgresql.db" -}} -{{ if not .Values.postgresql.enabled -}} - {{ .Values.externalPostgresql.db_name | default "oncall" }} -{{- else -}} - {{ .Values.postgresql.auth.database | default "oncall" }} -{{- end }} -{{- end }} - -{{- define "snippet.postgresql.user" -}} -{{ if not .Values.postgresql.enabled -}} - {{ .Values.externalPostgresql.user | default "postgres" }} -{{- else -}} - {{ .Values.postgresql.auth.username | default "postgres" }} -{{- end }} -{{- end }} - -{{- define "snippet.rabbitmq.env" }} -- name: RABBITMQ_USERNAME -{{- if and (not .Values.rabbitmq.enabled) .Values.externalRabbitmq.existingSecret .Values.externalRabbitmq.usernameKey (not .Values.externalRabbitmq.user) }} - valueFrom: - secretKeyRef: - name: {{ include "snippet.rabbitmq.password.secret.name" . }} - key: {{ .Values.externalRabbitmq.usernameKey | quote }} -{{- else }} - value: {{ include "snippet.rabbitmq.user" . | quote }} -{{- end }} -- name: RABBITMQ_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "snippet.rabbitmq.password.secret.name" . }} - key: {{ include "snippet.rabbitmq.password.secret.key" . | quote }} -- name: RABBITMQ_HOST - value: {{ include "snippet.rabbitmq.host" . | quote }} -- name: RABBITMQ_PORT - value: {{ include "snippet.rabbitmq.port" . | quote }} -- name: RABBITMQ_PROTOCOL - value: {{ include "snippet.rabbitmq.protocol" . | quote }} -- name: RABBITMQ_VHOST - value: {{ include "snippet.rabbitmq.vhost" . | quote }} -{{- end }} - -{{- define "snippet.rabbitmq.user" -}} -{{ if not .Values.rabbitmq.enabled -}} - {{ required "externalRabbitmq.user is required if not rabbitmq.enabled" .Values.externalRabbitmq.user }} -{{- else -}} - user -{{- end }} -{{- end }} - -{{- define "snippet.rabbitmq.host" -}} -{{ if not .Values.rabbitmq.enabled -}} - {{ required "externalRabbitmq.host is required if not rabbitmq.enabled" .Values.externalRabbitmq.host }} -{{- else -}} - {{ include "oncall.rabbitmq.fullname" . }} -{{- end }} -{{- end }} - -{{- define "snippet.rabbitmq.port" -}} -{{ if and (not .Values.rabbitmq.enabled) .Values.externalRabbitmq.port -}} - {{ required "externalRabbitmq.port is required if not rabbitmq.enabled" .Values.externalRabbitmq.port }} -{{- else -}} - 5672 -{{- end }} -{{- end }} - -{{- define "snippet.rabbitmq.protocol" -}} -{{ if and (not .Values.rabbitmq.enabled) .Values.externalRabbitmq.protocol -}} - {{ .Values.externalRabbitmq.protocol }} -{{- else -}} - amqp -{{- end }} -{{- end }} - -{{- define "snippet.rabbitmq.vhost" -}} -{{ if and (not .Values.rabbitmq.enabled) .Values.externalRabbitmq.vhost -}} - {{ .Values.externalRabbitmq.vhost }} -{{- end }} -{{- end }} - -{{- define "snippet.rabbitmq.password.secret.name" -}} -{{ if .Values.rabbitmq.enabled -}} - {{ if .Values.rabbitmq.auth.existingPasswordSecret -}} - {{ .Values.rabbitmq.auth.existingPasswordSecret }} - {{- else -}} - {{ include "oncall.rabbitmq.fullname" . }} - {{- end }} -{{- else -}} - {{ if .Values.externalRabbitmq.existingSecret -}} - {{ .Values.externalRabbitmq.existingSecret }} - {{- else -}} - {{ include "oncall.fullname" . }}-rabbitmq-external - {{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.rabbitmq.password.secret.key" -}} -{{ if and (not .Values.rabbitmq.enabled) .Values.externalRabbitmq.passwordKey -}} - {{ .Values.externalRabbitmq.passwordKey }} -{{- else -}} - rabbitmq-password -{{- end }} -{{- end }} - -{{- define "snippet.redis.protocol" -}} -{{ default "redis" .Values.externalRedis.protocol | quote }} -{{- end }} - -{{- define "snippet.redis.host" -}} -{{ if not .Values.redis.enabled -}} - {{ required "externalRedis.host is required if not redis.enabled" .Values.externalRedis.host | quote }} -{{- else -}} - {{ include "oncall.redis.fullname" . }}-master -{{- end }} -{{- end }} - -{{- define "snippet.redis.port" -}} -{{ default 6379 .Values.externalRedis.port | quote }} -{{- end }} - -{{- define "snippet.redis.database" -}} -{{ default 0 .Values.externalRedis.database | quote }} -{{- end }} - -{{- define "snippet.redis.password.secret.name" -}} -{{ if .Values.redis.enabled -}} - {{ if .Values.redis.auth.existingSecret -}} - {{ .Values.redis.auth.existingSecret }} - {{- else -}} - {{ include "oncall.redis.fullname" . }} - {{- end }} -{{- else -}} - {{ if .Values.externalRedis.existingSecret -}} - {{ .Values.externalRedis.existingSecret }} - {{- else -}} - {{ include "oncall.fullname" . }}-redis-external - {{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.redis.password.secret.key" -}} -{{ if .Values.redis.enabled -}} - {{ if .Values.redis.auth.existingSecret -}} - {{ required "redis.auth.existingSecretPasswordKey is required if redis.auth.existingSecret is non-empty" .Values.redis.auth.existingSecretPasswordKey }} - {{- else -}} - redis-password - {{- end }} -{{- else -}} - {{ if .Values.externalRedis.existingSecret -}} - {{ required "externalRedis.passwordKey is required if externalRedis.existingSecret is non-empty" .Values.externalRedis.passwordKey }} - {{- else -}} - redis-password - {{- end }} -{{- end }} -{{- end }} - -{{- define "snippet.redis.env" -}} -- name: REDIS_PROTOCOL - value: {{ include "snippet.redis.protocol" . }} -- name: REDIS_HOST - value: {{ include "snippet.redis.host" . }} -- name: REDIS_PORT - value: {{ include "snippet.redis.port" . }} -- name: REDIS_DATABASE - value: {{ include "snippet.redis.database" . }} -- name: REDIS_USERNAME - value: {{ default "" .Values.externalRedis.username | quote }} -- name: REDIS_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "snippet.redis.password.secret.name" . }} - key: {{ include "snippet.redis.password.secret.key" . | quote}} -{{- if and (not .Values.redis.enabled) .Values.externalRedis.ssl_options.enabled }} -- name: REDIS_USE_SSL - value: "true" -{{- with .Values.externalRedis.ssl_options.ca_certs }} -- name: REDIS_SSL_CA_CERTS - value: {{ . | quote }} -{{- end }} -{{- with .Values.externalRedis.ssl_options.certfile }} -- name: REDIS_SSL_CERTFILE - value: {{ . | quote }} -{{- end }} -{{- with .Values.externalRedis.ssl_options.keyfile }} -- name: REDIS_SSL_KEYFILE - value: {{ . | quote }} -{{- end }} -{{- with .Values.externalRedis.ssl_options.cert_reqs }} -- name: REDIS_SSL_CERT_REQS - value: {{ . | quote }} -{{- end }} -{{- end }} -{{- end }} - -{{- /* -when broker.type != rabbitmq, we do not need to include rabbitmq environment variables -*/}} -{{- define "snippet.broker.env" -}} -{{- include "snippet.redis.env" . }} -{{- if eq .Values.broker.type "rabbitmq" -}} -{{- include "snippet.rabbitmq.env" . }} -{{- end }} -{{- end }} - -{{- define "snippet.db.env" -}} -{{- if eq .Values.database.type "mysql" }} -{{- include "snippet.mysql.env" . }} -{{- else if eq .Values.database.type "postgresql" }} -{{- include "snippet.postgresql.env" . }} -{{- else if eq .Values.database.type "sqlite" -}} -{{- include "snippet.sqlite.env" . }} -{{- else -}} -{{- fail "value for .Values.db.type must be either 'mysql', 'postgresql', or 'sqlite'" }} -{{- end }} -{{- end }} - -{{- define "snippet.oncall.smtp.env" -}} -- name: FEATURE_EMAIL_INTEGRATION_ENABLED - value: {{ .Values.oncall.smtp.enabled | toString | title | quote }} -{{- if .Values.oncall.smtp.enabled }} -- name: EMAIL_HOST - value: {{ .Values.oncall.smtp.host | quote }} -- name: EMAIL_PORT - value: {{ .Values.oncall.smtp.port | default "587" | quote }} -- name: EMAIL_HOST_USER - value: {{ .Values.oncall.smtp.username | quote }} -- name: EMAIL_HOST_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "oncall.fullname" . }}-smtp - key: smtp-password - optional: true -- name: EMAIL_USE_TLS - value: {{ .Values.oncall.smtp.tls | default true | toString | title | quote }} -- name: EMAIL_FROM_ADDRESS - value: {{ .Values.oncall.smtp.fromEmail | quote }} -- name: EMAIL_NOTIFICATIONS_LIMIT - value: {{ .Values.oncall.smtp.limitEmail | default "200" | quote }} -{{- end }} -{{- end }} - -{{- define "snippet.oncall.exporter.env" -}} -{{ if .Values.oncall.exporter.enabled -}} -- name: FEATURE_PROMETHEUS_EXPORTER_ENABLED - value: {{ .Values.oncall.exporter.enabled | toString | title | quote }} -- name: PROMETHEUS_EXPORTER_SECRET - valueFrom: - secretKeyRef: - name: {{ include "oncall.fullname" . }}-exporter - key: exporter-secret - optional: true -{{- else -}} -- name: FEATURE_PROMETHEUS_EXPORTER_ENABLED - value: {{ .Values.oncall.exporter.enabled | toString | title | quote }} -{{- end }} -{{- end }} - -{{- define "snippet.oncall.engine.env" -}} -{{ include "snippet.oncall.env" . }} -{{ include "snippet.oncall.slack.env" . }} -{{ include "snippet.oncall.telegram.env" . }} -{{ include "snippet.oncall.smtp.env" . }} -{{ include "snippet.oncall.twilio.env" . }} -{{ include "snippet.oncall.exporter.env" . }} -{{ include "snippet.db.env" . }} -{{ include "snippet.broker.env" . }} -{{ include "oncall.extraEnvs" . }} -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/_helpers.tpl b/packages/system/grafana-oncall/charts/oncall/templates/_helpers.tpl deleted file mode 100644 index 6486bfe5..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/_helpers.tpl +++ /dev/null @@ -1,121 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "oncall.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "oncall.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "oncall.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "oncall.labels" -}} -helm.sh/chart: {{ include "oncall.chart" . }} -{{ include "oncall.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels -*/}} -{{- define "oncall.selectorLabels" -}} -app.kubernetes.io/name: {{ include "oncall.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} - -{{/* -Create the name of the service account to use -*/}} -{{- define "oncall.serviceAccountName" -}} -{{- if .Values.serviceAccount.create }} -{{- default (include "oncall.fullname" .) .Values.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.serviceAccount.name }} -{{- end }} -{{- end }} - -{{/* Generate the fullname of mariadb subchart */}} -{{- define "oncall.mariadb.fullname" -}} -{{- printf "%s-%s" .Release.Name "mariadb" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* Generate the fullname of postgresql subchart */}} -{{- define "oncall.postgresql.fullname" -}} -{{- printf "%s-%s" .Release.Name "postgresql" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{- define "oncall.grafana.fullname" -}} -{{- printf "%s-%s" .Release.Name "grafana" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* Generate the fullname of rabbitmq subchart */}} -{{- define "oncall.rabbitmq.fullname" -}} -{{- printf "%s-%s" .Release.Name "rabbitmq" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* Generate the fullname of redis subchart */}} -{{- define "oncall.redis.fullname" -}} -{{- printf "%s-%s" .Release.Name "redis" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* Generate engine image name */}} -{{- define "oncall.engine.image" -}} -{{- printf "%s:%s" .Values.image.repository (.Values.image.tag | default .Chart.AppVersion) }} -{{- end }} - -{{- define "oncall.initContainer" }} -- name: wait-for-db - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.image.pullPolicy }} - command: ['sh', '-c', "until (python manage.py migrate --check); do echo Waiting for database migrations; sleep 2; done"] - securityContext: - {{ toYaml .Values.init.securityContext | nindent 4 }} - resources: - {{ toYaml .Values.init.resources | nindent 4 }} - env: - {{- include "snippet.oncall.env" . | nindent 4 }} - {{- include "snippet.db.env" . | nindent 4 }} - {{- include "snippet.broker.env" . | nindent 4 }} - {{- include "oncall.extraEnvs" . | nindent 4 }} -{{- end }} - -{{- define "oncall.extraEnvs" -}} -{{- if .Values.env }} - {{- if (kindIs "map" .Values.env) }} - {{- range $key, $value := .Values.env }} -- name: {{ $key }} - value: {{ $value }} - {{- end -}} - {{/* support previous schema */}} - {{- else }} -{{- toYaml .Values.env }} - {{- end }} -{{- end }} -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/celery/_helpers.tpl b/packages/system/grafana-oncall/charts/oncall/templates/celery/_helpers.tpl deleted file mode 100644 index 8c37e957..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/celery/_helpers.tpl +++ /dev/null @@ -1,26 +0,0 @@ -{{/* -Maximum of 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -*/}} -{{- define "oncall.celery.name" -}} -{{ include "oncall.name" . | trunc 55 }}-celery -{{- end }} - -{{- define "oncall.celery.fullname" -}} -{{ include "oncall.fullname" . | trunc 55 }}-celery -{{- end }} - -{{/* -Engine common labels -*/}} -{{- define "oncall.celery.labels" -}} -{{ include "oncall.labels" . }} -app.kubernetes.io/component: celery -{{- end }} - -{{/* -Engine selector labels -*/}} -{{- define "oncall.celery.selectorLabels" -}} -{{ include "oncall.selectorLabels" . }} -app.kubernetes.io/component: celery -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/celery/deployment.yaml b/packages/system/grafana-oncall/charts/oncall/templates/celery/deployment.yaml deleted file mode 100644 index b2498dd1..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/celery/deployment.yaml +++ /dev/null @@ -1,89 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "oncall.celery.fullname" . }} - labels: - {{- include "oncall.celery.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.celery.replicaCount }} - selector: - matchLabels: - {{- include "oncall.celery.selectorLabels" . | nindent 6 }} - template: - metadata: - {{- with .Values.podAnnotations }} - annotations: - random-annotation: {{ randAlphaNum 10 | lower }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "oncall.celery.selectorLabels" . | nindent 8 }} - {{- if .Values.celery.podLabels }} - {{- toYaml .Values.celery.podLabels | nindent 8}} - {{- end }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "oncall.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - initContainers: - {{- include "oncall.initContainer" . | indent 8 }} - {{- with .Values.celery.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.celery.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.celery.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.celery.topologySpreadConstraints }} - topologySpreadConstraints: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.celery.priorityClassName }} - priorityClassName: {{ . }} - {{- end }} - containers: - - name: {{ .Chart.Name }} - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: {{ include "oncall.engine.image" . }} - {{- if .Values.oncall.devMode }} - command: ["python", "manage.py", "start_celery"] - {{- else }} - command: ["./celery_with_exporter.sh"] - {{- end }} - imagePullPolicy: {{ .Values.image.pullPolicy }} - env: - {{- include "snippet.celery.env" . | nindent 12 }} - {{- include "snippet.oncall.engine.env" . | nindent 12 }} - {{- if .Values.celery.livenessProbe.enabled }} - livenessProbe: - exec: - command: [ - "bash", - "-c", - "celery -A engine inspect ping -d celery@$HOSTNAME" - ] - initialDelaySeconds: {{ .Values.celery.livenessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.celery.livenessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.celery.livenessProbe.timeoutSeconds }} - {{- end }} - resources: - {{- toYaml .Values.celery.resources | nindent 12 }} - {{- with .Values.celery.extraVolumeMounts }} - volumeMounts: {{- . | toYaml | nindent 12 }} - {{- end }} - {{- with .Values.celery.extraContainers }} - {{- tpl . $ | nindent 8 }} - {{- end }} - {{- with .Values.celery.extraVolumes }} - volumes: {{- . | toYaml | nindent 8 }} - {{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/cert-issuer.yaml b/packages/system/grafana-oncall/charts/oncall/templates/cert-issuer.yaml deleted file mode 100644 index 8b1716f3..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/cert-issuer.yaml +++ /dev/null @@ -1,22 +0,0 @@ -{{- if (index .Values "cert-manager").enabled }} -apiVersion: cert-manager.io/v1 -kind: Issuer -metadata: - name: letsencrypt-prod - annotations: - "helm.sh/hook": post-install,post-upgrade -spec: - acme: - # The ACME server URL - server: https://acme-v02.api.letsencrypt.org/directory - # Email address used for ACME registration - email: no-reply@{{ .Values.base_url }} - # Name of a secret used to store the ACME account private key - privateKeySecretRef: - name: letsencrypt-prod - # Enable the HTTP-01 challenge provider - solvers: - - http01: - ingress: - class: nginx -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/engine/_helpers-engine.tpl b/packages/system/grafana-oncall/charts/oncall/templates/engine/_helpers-engine.tpl deleted file mode 100644 index 6d498e93..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/engine/_helpers-engine.tpl +++ /dev/null @@ -1,26 +0,0 @@ -{{/* -Maximum of 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -*/}} -{{- define "oncall.engine.name" -}} -{{ include "oncall.name" . | trunc 55 }}-engine -{{- end }} - -{{- define "oncall.engine.fullname" -}} -{{ include "oncall.fullname" . | trunc 55 }}-engine -{{- end }} - -{{/* -Engine common labels -*/}} -{{- define "oncall.engine.labels" -}} -{{ include "oncall.labels" . }} -app.kubernetes.io/component: engine -{{- end }} - -{{/* -Engien selector labels -*/}} -{{- define "oncall.engine.selectorLabels" -}} -{{ include "oncall.selectorLabels" . }} -app.kubernetes.io/component: engine -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/engine/deployment.yaml b/packages/system/grafana-oncall/charts/oncall/templates/engine/deployment.yaml deleted file mode 100644 index ccb770df..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/engine/deployment.yaml +++ /dev/null @@ -1,98 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "oncall.engine.fullname" . }} - labels: - {{- include "oncall.engine.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.engine.replicaCount }} - selector: - matchLabels: - {{- include "oncall.engine.selectorLabels" . | nindent 6 }} - strategy: - {{- toYaml .Values.engine.updateStrategy | nindent 4 }} - template: - metadata: - {{- with .Values.podAnnotations }} - annotations: - random-annotation: {{ randAlphaNum 10 | lower }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "oncall.engine.selectorLabels" . | nindent 8 }} - {{- if .Values.engine.podLabels }} - {{- toYaml .Values.engine.podLabels | nindent 8}} - {{- end }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "oncall.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - initContainers: - {{- include "oncall.initContainer" . | indent 8 }} - containers: - - name: {{ .Chart.Name }} - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: {{ include "oncall.engine.image" . }} - imagePullPolicy: {{ .Values.image.pullPolicy }} - {{- if .Values.oncall.devMode }} - command: ["sh", "-c", "uwsgi --disable-logging --py-autoreload 3 --ini uwsgi.ini"] - {{- end }} - ports: - - name: http - containerPort: 8080 - protocol: TCP - env: - {{- include "snippet.oncall.engine.env" . | nindent 12 }} - livenessProbe: - httpGet: - path: /health/ - port: http - periodSeconds: 60 - timeoutSeconds: 3 - readinessProbe: - httpGet: - path: /ready/ - port: http - periodSeconds: 60 - timeoutSeconds: 3 - startupProbe: - httpGet: - path: /startupprobe/ - port: http - periodSeconds: 10 - timeoutSeconds: 3 - resources: - {{- toYaml .Values.engine.resources | nindent 12 }} - {{- with .Values.engine.extraVolumeMounts }} - volumeMounts: {{- . | toYaml | nindent 12 }} - {{- end }} - {{- with .Values.engine.extraContainers }} - {{- tpl . $ | nindent 8 }} - {{- end }} - {{- with .Values.engine.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.engine.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.engine.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.engine.topologySpreadConstraints }} - topologySpreadConstraints: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.engine.priorityClassName }} - priorityClassName: {{ . }} - {{- end }} - {{- with .Values.engine.extraVolumes }} - volumes: {{- . | toYaml | nindent 8 }} - {{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/engine/job-migrate.yaml b/packages/system/grafana-oncall/charts/oncall/templates/engine/job-migrate.yaml deleted file mode 100644 index 09782954..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/engine/job-migrate.yaml +++ /dev/null @@ -1,102 +0,0 @@ -{{- if .Values.migrate.enabled -}} -apiVersion: batch/v1 -kind: Job -metadata: - {{- if .Values.migrate.useHook }} - name: {{ printf "%s-migrate" (include "oncall.engine.fullname" .) }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-1" - {{- with .Values.migrate.annotations }} - {{- toYaml . | nindent 4 }} - {{- end }} - {{- else }} - name: {{ printf "%s-migrate-%s" (include "oncall.engine.fullname" .) (now | date "2006-01-02-15-04-05") }} - {{- with .Values.migrate.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} - {{- end }} - labels: - {{- include "oncall.engine.labels" . | nindent 4 }} -spec: - backoffLimit: 15 - {{- if .Values.migrate.ttlSecondsAfterFinished }} - ttlSecondsAfterFinished: {{ .Values.migrate.ttlSecondsAfterFinished }} - {{- end }} - template: - metadata: - name: {{ printf "%s-migrate-%s" (include "oncall.engine.fullname" .) (now | date "2006-01-02-15-04-05") }} - {{- with .Values.podAnnotations }} - annotations: - random-annotation: {{ randAlphaNum 10 | lower }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "oncall.engine.selectorLabels" . | nindent 8 }} - spec: - restartPolicy: Never - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "oncall.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - {{- with .Values.migrate.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.migrate.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.migrate.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ .Chart.Name }}-migrate - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: {{ include "oncall.engine.image" . }} - imagePullPolicy: {{ .Values.image.pullPolicy }} - command: - - /bin/sh - - -c - {{- if eq .Values.database.type "mysql" }} - - | - until (nc -vz $MYSQL_HOST $MYSQL_PORT); - do - echo "waiting for MySQL"; sleep 1; - done - python manage.py migrate - {{- else if eq .Values.database.type "postgresql" }} - - | - until (nc -vz $DATABASE_HOST $DATABASE_PORT); - do - echo "waiting for PostgreSQL"; sleep 1; - done - python manage.py migrate - {{- else }} - - python manage.py migrate - {{- end }} - env: - {{- include "snippet.oncall.env" . | nindent 12 }} - {{- include "snippet.oncall.smtp.env" . | nindent 12 }} - {{- include "snippet.oncall.exporter.env" . | nindent 12 }} - {{- include "snippet.db.env" . | nindent 12 }} - {{- include "snippet.broker.env" . | nindent 12 }} - {{- include "oncall.extraEnvs" . | nindent 12 }} - resources: - {{- toYaml .Values.migrate.resources | nindent 12 }} - {{- with .Values.migrate.extraVolumeMounts }} - volumeMounts: {{- . | toYaml | nindent 10 }} - {{- end }} - {{- with .Values.migrate.extraContainers }} - {{- tpl . $ | nindent 6 }} - {{- end }} - {{- with .Values.migrate.extraVolumes }} - volumes: {{- . | toYaml | nindent 8 }} - {{- end }} -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/engine/service-external.yaml b/packages/system/grafana-oncall/charts/oncall/templates/engine/service-external.yaml deleted file mode 100644 index eec1f0bf..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/engine/service-external.yaml +++ /dev/null @@ -1,24 +0,0 @@ -{{- if .Values.service.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "oncall.engine.fullname" . }}-external - labels: - {{- include "oncall.engine.labels" . | nindent 4 }} - {{- with .Values.service.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - type: {{ .Values.service.type }} - ports: - - port: {{ .Values.service.port }} - targetPort: http - protocol: TCP - name: http - {{- if and (eq .Values.service.type "NodePort") (.Values.service.nodePort) }} - nodePort: {{ .Values.service.nodePort }} - {{- end }} - selector: - {{- include "oncall.engine.selectorLabels" . | nindent 4 }} -{{- end }} \ No newline at end of file diff --git a/packages/system/grafana-oncall/charts/oncall/templates/engine/service-internal.yaml b/packages/system/grafana-oncall/charts/oncall/templates/engine/service-internal.yaml deleted file mode 100644 index 07785035..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/engine/service-internal.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "oncall.engine.fullname" . }} - labels: - {{- include "oncall.engine.labels" . | nindent 4 }} -spec: - type: ClusterIP - ports: - - port: 8080 - targetPort: http - protocol: TCP - name: http - selector: - {{- include "oncall.engine.selectorLabels" . | nindent 4 }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/ingress-regular.yaml b/packages/system/grafana-oncall/charts/oncall/templates/ingress-regular.yaml deleted file mode 100644 index 9a5357ff..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/ingress-regular.yaml +++ /dev/null @@ -1,65 +0,0 @@ -{{- if .Values.ingress.enabled -}} -{{- $fullName := include "oncall.fullname" . -}} -{{- $svcPort := .Values.service.port -}} -{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} - {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} - {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} - {{- end }} -{{- end }} -{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} -apiVersion: networking.k8s.io/v1 -{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} -apiVersion: networking.k8s.io/v1beta1 -{{- else -}} -apiVersion: extensions/v1beta1 -{{- end }} -kind: Ingress -metadata: - name: {{ $fullName }} - labels: - {{- include "oncall.labels" . | nindent 4 }} - {{- with .Values.ingress.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} - ingressClassName: {{ .Values.ingress.className }} - {{- end }} - {{- if .Values.ingress.tls }} - tls: - {{- tpl (toYaml .Values.ingress.tls) . | nindent 4 }} - {{- end }} - rules: - - host: {{ .Values.base_url | quote }} - http: - paths: -{{- if .Values.ingress.extraPaths }} -{{ toYaml .Values.ingress.extraPaths | indent 6}} -{{- end }} - - path: / - pathType: Prefix - backend: - service: - name: {{ include "oncall.engine.fullname" . }} - port: - number: 8080 - {{ if .Values.grafana.enabled }} - - path: /grafana - pathType: Prefix - backend: - service: - name: {{ include "oncall.grafana.fullname" . }} - port: - number: 80 - {{- end }} - {{ if .Values.detached_integrations.enabled }} - - path: /integrations - pathType: Prefix - backend: - service: - name: {{ include "oncall.detached_integrations.fullname" . }} - port: - number: 8080 - {{- end }} -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/integrations/_helpers.tpl b/packages/system/grafana-oncall/charts/oncall/templates/integrations/_helpers.tpl deleted file mode 100644 index 6727ed42..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/integrations/_helpers.tpl +++ /dev/null @@ -1,26 +0,0 @@ -{{/* -Maximum of 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -*/}} -{{- define "oncall.detached_integrations.name" -}} -{{ include "oncall.name" . | trunc 55 }}-integrations -{{- end }} - -{{- define "oncall.detached_integrations.fullname" -}} -{{ include "oncall.fullname" . | trunc 55 }}-integrations -{{- end }} - -{{/* -Integrations common labels -*/}} -{{- define "oncall.detached_integrations.labels" -}} -{{ include "oncall.labels" . }} -app.kubernetes.io/component: integrations -{{- end }} - -{{/* -Integrations selector labels -*/}} -{{- define "oncall.detached_integrations.selectorLabels" -}} -{{ include "oncall.selectorLabels" . }} -app.kubernetes.io/component: integrations -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/integrations/deployment.yaml b/packages/system/grafana-oncall/charts/oncall/templates/integrations/deployment.yaml deleted file mode 100644 index 5e08eaf7..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/integrations/deployment.yaml +++ /dev/null @@ -1,99 +0,0 @@ -{{- if .Values.detached_integrations.enabled -}} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "oncall.detached_integrations.fullname" . }} - labels: - {{- include "oncall.detached_integrations.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.detached_integrations.replicaCount }} - selector: - matchLabels: - {{- include "oncall.detached_integrations.selectorLabels" . | nindent 6 }} - strategy: - {{- toYaml .Values.detached_integrations.updateStrategy | nindent 4 }} - template: - metadata: - {{- with .Values.podAnnotations }} - annotations: - random-annotation: {{ randAlphaNum 10 | lower }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "oncall.detached_integrations.selectorLabels" . | nindent 8 }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "oncall.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - initContainers: - {{- include "oncall.initContainer" . | indent 8 }} - containers: - - name: {{ .Chart.Name }} - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: {{ include "oncall.engine.image" . }} - imagePullPolicy: {{ .Values.image.pullPolicy }} - {{- if .Values.oncall.devMode }} - command: ["sh", "-c", "uwsgi --disable-logging --py-autoreload 3 --ini uwsgi.ini"] - {{- end }} - ports: - - name: http - containerPort: 8080 - protocol: TCP - env: - {{- include "snippet.oncall.engine.env" . | nindent 12 }} - - name: ROOT_URLCONF - value: "engine.integrations_urls" - livenessProbe: - httpGet: - path: /health/ - port: http - periodSeconds: 60 - timeoutSeconds: 3 - readinessProbe: - httpGet: - path: /ready/ - port: http - periodSeconds: 60 - timeoutSeconds: 3 - startupProbe: - httpGet: - path: /startupprobe/ - port: http - periodSeconds: 10 - timeoutSeconds: 3 - resources: - {{- toYaml .Values.detached_integrations.resources | nindent 12 }} - {{- with .Values.detached_integrations.extraVolumeMounts }} - volumeMounts: {{- . | toYaml | nindent 12 }} - {{- end }} - {{- with .Values.detached_integrations.extraContainers }} - {{- tpl . $ | nindent 8 }} - {{- end }} - {{- with .Values.detached_integrations.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.detached_integrations.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.detached_integrations.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.detached_integrations.topologySpreadConstraints }} - topologySpreadConstraints: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.detached_integrations.priorityClassName }} - priorityClassName: {{ . }} - {{- end }} - {{- with .Values.detached_integrations.extraVolumes }} - volumes: {{- . | toYaml | nindent 8 }} - {{- end }} -{{- end -}} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/integrations/service-external.yaml b/packages/system/grafana-oncall/charts/oncall/templates/integrations/service-external.yaml deleted file mode 100644 index 455d4aa0..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/integrations/service-external.yaml +++ /dev/null @@ -1,24 +0,0 @@ -{{- if .Values.detached_integrations_service.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "oncall.detached_integrations.fullname" . }}-external - labels: - {{- include "oncall.detached_integrations.labels" . | nindent 4 }} - {{- with .Values.detached_integrations_service.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - type: {{ .Values.detached_integrations_service.type }} - ports: - - port: {{ .Values.detached_integrations_service.port }} - targetPort: http - protocol: TCP - name: http - {{- if and (eq .Values.detached_integrations_service.type "NodePort") (.Values.detached_integrations_service.nodePort) }} - nodePort: {{ .Values.detached_integrations_service.nodePort }} - {{- end }} - selector: - {{- include "oncall.detached_integrations.selectorLabels" . | nindent 4 }} -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/integrations/service-internal.yaml b/packages/system/grafana-oncall/charts/oncall/templates/integrations/service-internal.yaml deleted file mode 100644 index 3527b247..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/integrations/service-internal.yaml +++ /dev/null @@ -1,17 +0,0 @@ -{{- if .Values.detached_integrations.enabled -}} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "oncall.detached_integrations.fullname" . }} - labels: - {{- include "oncall.detached_integrations.labels" . | nindent 4 }} -spec: - type: ClusterIP - ports: - - port: 8080 - targetPort: http - protocol: TCP - name: http - selector: - {{- include "oncall.detached_integrations.selectorLabels" . | nindent 4 }} -{{- end -}} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/secrets.yaml b/packages/system/grafana-oncall/charts/oncall/templates/secrets.yaml deleted file mode 100644 index 821592fa..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/secrets.yaml +++ /dev/null @@ -1,98 +0,0 @@ -{{- if not .Values.oncall.secrets.existingSecret }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "oncall.fullname" . }} - labels: - {{- include "oncall.labels" . | nindent 4 }} - {{- if .Values.migrate.useHook }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-5" - {{- end }} -type: Opaque -data: - {{ include "snippet.oncall.secret.secretKey" . }}: {{ randAlphaNum 40 | b64enc | quote }} - {{ include "snippet.oncall.secret.mirageSecretKey" . }}: {{ randAlphaNum 40 | b64enc | quote }} ---- -{{- end }} -{{- if and (eq .Values.database.type "mysql") (not .Values.mariadb.enabled) (not .Values.externalMysql.existingSecret) }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "oncall.fullname" . }}-mysql-external - {{- if .Values.migrate.useHook }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-5" - {{- end }} -type: Opaque -data: - mariadb-root-password: {{ required "externalMysql.password is required if not mariadb.enabled and not externalMysql.existingSecret" .Values.externalMysql.password | b64enc | quote }} ---- -{{- end }} -{{- if and (not .Values.postgresql.enabled) (eq .Values.database.type "postgresql") (not .Values.externalPostgresql.existingSecret) }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "oncall.fullname" . }}-postgresql-external - {{- if .Values.migrate.useHook }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-5" - {{- end }} -type: Opaque -data: - postgres-password: {{ required "externalPostgresql.password is required if not postgresql.enabled and not externalPostgresql.existingSecret" .Values.externalPostgresql.password | b64enc | quote }} ---- -{{- end }} -{{- if and (eq .Values.broker.type "rabbitmq") (not .Values.rabbitmq.enabled) (not .Values.externalRabbitmq.existingSecret) }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "oncall.fullname" . }}-rabbitmq-external - {{- if .Values.migrate.useHook }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-5" - {{- end }} -type: Opaque -data: - rabbitmq-password: {{ required "externalRabbitmq.password is required if not rabbitmq.enabled and not externalRabbitmq.existingSecret" .Values.externalRabbitmq.password | b64enc | quote }} ---- -{{- end }} -{{- if and (eq .Values.broker.type "redis") (not .Values.redis.enabled) (not .Values.externalRedis.existingSecret) }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "oncall.fullname" . }}-redis-external - {{- if .Values.migrate.useHook }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-5" - {{- end }} -type: Opaque -data: - redis-password: {{ required "externalRedis.password is required if not redis.enabled and not externalRedis.existingSecret" .Values.externalRedis.password | b64enc | quote }} ---- -{{- end }} -{{- if and .Values.oncall.smtp.enabled .Values.oncall.smtp.password }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "oncall.fullname" . }}-smtp -type: Opaque -data: - smtp-password: {{ .Values.oncall.smtp.password | b64enc | quote }} ---- -{{- end }} -{{- if and .Values.oncall.exporter.enabled .Values.oncall.exporter.authToken }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "oncall.fullname" . }}-exporter -type: Opaque -data: - exporter-secret: {{ .Values.oncall.exporter.authToken | b64enc | quote }} ---- -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/serviceaccount.yaml b/packages/system/grafana-oncall/charts/oncall/templates/serviceaccount.yaml deleted file mode 100644 index d0a5a9eb..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/serviceaccount.yaml +++ /dev/null @@ -1,18 +0,0 @@ -{{- if .Values.serviceAccount.create -}} -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ include "oncall.serviceAccountName" . }} - labels: - {{- include "oncall.labels" . | nindent 4 }} - {{- if or (.Values.migrate.useHook) (.Values.serviceAccount.annotations) }} - annotations: - {{- if .Values.migrate.useHook }} - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-5" - {{- end }} - {{- with .Values.serviceAccount.annotations }} - {{- toYaml . | nindent 4 }} - {{- end }} - {{- end }} -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/telegram-polling/_helpers.tpl b/packages/system/grafana-oncall/charts/oncall/templates/telegram-polling/_helpers.tpl deleted file mode 100644 index d2053dc0..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/telegram-polling/_helpers.tpl +++ /dev/null @@ -1,22 +0,0 @@ -{{/* -Maximum of 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -*/}} -{{- define "oncall.telegramPolling.fullname" -}} -{{ include "oncall.fullname" . | trunc 45 }}-telegram-polling -{{- end }} - -{{/* -Telegram polling common labels -*/}} -{{- define "oncall.telegramPolling.labels" -}} -{{ include "oncall.labels" . }} -app.kubernetes.io/component: telegram-polling -{{- end }} - -{{/* -Telegram polling selector labels -*/}} -{{- define "oncall.telegramPolling.selectorLabels" -}} -{{ include "oncall.selectorLabels" . }} -app.kubernetes.io/component: telegram-polling -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/telegram-polling/deployment.yaml b/packages/system/grafana-oncall/charts/oncall/templates/telegram-polling/deployment.yaml deleted file mode 100644 index 2e448897..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/telegram-polling/deployment.yaml +++ /dev/null @@ -1,53 +0,0 @@ -{{- if .Values.telegramPolling.enabled -}} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "oncall.telegramPolling.fullname" . }} - labels: - {{- include "oncall.telegramPolling.labels" . | nindent 4 }} -spec: - replicas: 1 - selector: - matchLabels: - {{- include "oncall.telegramPolling.selectorLabels" . | nindent 6 }} - template: - metadata: - labels: - {{- include "oncall.telegramPolling.selectorLabels" . | nindent 8 }} - {{- if .Values.telegramPolling.podLabels }} - {{- toYaml .Values.telegramPolling.podLabels | nindent 8 }} - {{- end }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "oncall.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - initContainers: - {{- include "oncall.initContainer" . | nindent 8 }} - containers: - - name: telegram-polling - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: {{ include "oncall.engine.image" . }} - imagePullPolicy: {{ .Values.image.pullPolicy }} - command: ['sh', '-c', 'python manage.py start_telegram_polling'] - env: - {{- include "snippet.oncall.env" . | nindent 12 }} - {{- include "snippet.oncall.telegram.env" . | nindent 12 }} - {{- include "snippet.db.env" . | nindent 12 }} - {{- include "snippet.broker.env" . | nindent 12 }} - {{- include "oncall.extraEnvs" . | nindent 12 }} - {{- with .Values.telegramPolling.resources }} - resources: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.telegramPolling.extraVolumeMounts }} - volumeMounts: {{- . | toYaml | nindent 12 }} - {{- end }} - {{- with .Values.telegramPolling.extraVolumes }} - volumes: {{- . | toYaml | nindent 8 }} - {{- end }} -{{- end -}} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/ui/_helpers.tpl b/packages/system/grafana-oncall/charts/oncall/templates/ui/_helpers.tpl deleted file mode 100644 index 49dcc2e5..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/ui/_helpers.tpl +++ /dev/null @@ -1,8 +0,0 @@ -{{- define "ui.env" -}} -{{- if .Values.ui.env }} - {{- range $key, $value := .Values.ui.env }} -- name: {{ $key }} - value: "{{ $value }}" - {{- end -}} -{{- end }} -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/templates/ui/deployment.yaml b/packages/system/grafana-oncall/charts/oncall/templates/ui/deployment.yaml deleted file mode 100644 index c094368f..00000000 --- a/packages/system/grafana-oncall/charts/oncall/templates/ui/deployment.yaml +++ /dev/null @@ -1,31 +0,0 @@ -{{- if .Values.ui.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: oncall-ui - labels: - app.kubernetes.io/component: oncall-ui -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/component: oncall-ui - template: - metadata: - labels: - app.kubernetes.io/component: oncall-ui - spec: - containers: - - name: oncall-ui - image: "{{ .Values.ui.image.repository }}:{{ .Values.ui.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: IfNotPresent - env: - {{- include "ui.env" . | nindent 12 }} - volumeMounts: - - mountPath: /etc/app - name: hot-reloaded-plugin - volumes: - - name: hot-reloaded-plugin - hostPath: - path: /oncall-plugin -{{- end }} diff --git a/packages/system/grafana-oncall/charts/oncall/values.yaml b/packages/system/grafana-oncall/charts/oncall/values.yaml deleted file mode 100644 index 3306f005..00000000 --- a/packages/system/grafana-oncall/charts/oncall/values.yaml +++ /dev/null @@ -1,719 +0,0 @@ -# Values for configuring the deployment of Grafana OnCall - -# Set the domain name Grafana OnCall will be installed on. -# If you want to install grafana as a part of this release make sure to configure grafana.grafana.ini.server.domain too -base_url: example.com -base_url_protocol: https - -## Optionally specify an array of imagePullSecrets. -## Secrets must be manually created in the namespace. -## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ -## e.g: -## imagePullSecrets: -## - name: myRegistryKeySecretName -imagePullSecrets: [] - -image: - # Grafana OnCall docker image repository - repository: grafana/oncall - tag: - pullPolicy: Always - -# Whether to create additional service for external connections -# ClusterIP service is always created -service: - enabled: false - type: LoadBalancer - port: 8080 - annotations: {} - -# Engine pods configuration -engine: - replicaCount: 1 - resources: - {} - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - - # Labels for engine pods - podLabels: {} - - ## Deployment update strategy - ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy - updateStrategy: - rollingUpdate: - maxSurge: 25% - maxUnavailable: 0 - type: RollingUpdate - - ## Affinity for pod assignment - ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity - affinity: {} - - ## Node labels for pod assignment - ## ref: https://kubernetes.io/docs/user-guide/node-selection/ - nodeSelector: {} - - ## Tolerations for pod assignment - ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ - tolerations: [] - - ## Topology spread constraints for pod assignment - ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ - topologySpreadConstraints: [] - - ## Priority class for the pods - ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/ - priorityClassName: "" - - # Extra containers which runs as sidecar - extraContainers: "" - # extraContainers: | - # - name: cloud-sql-proxy - # image: gcr.io/cloud-sql-connectors/cloud-sql-proxy:2.1.2 - # args: - # - --private-ip - # - --port=5432 - # - example:europe-west3:grafana-oncall-db - - # Extra volume mounts for the main app container - extraVolumeMounts: [] - # - mountPath: /mnt/postgres-tls - # name: postgres-tls - # - mountPath: /mnt/redis-tls - # name: redis-tls - - # Extra volumes for the pod - extraVolumes: [] - # - name: postgres-tls - # configMap: - # name: my-postgres-tls - # defaultMode: 0640 - # - name: redis-tls - # configMap: - # name: my-redis-tls - # defaultMode: 0640 - -detached_integrations_service: - enabled: false - type: LoadBalancer - port: 8080 - annotations: {} - -# Integrations pods configuration -detached_integrations: - enabled: false - replicaCount: 1 - resources: - {} - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - - ## Deployment update strategy - ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy - updateStrategy: - rollingUpdate: - maxSurge: 25% - maxUnavailable: 0 - type: RollingUpdate - - ## Affinity for pod assignment - ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity - affinity: {} - - ## Node labels for pod assignment - ## ref: https://kubernetes.io/docs/user-guide/node-selection/ - nodeSelector: {} - - ## Tolerations for pod assignment - ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ - tolerations: [] - - ## Topology spread constraints for pod assignment - ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ - topologySpreadConstraints: [] - - ## Priority class for the pods - ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/ - priorityClassName: "" - - # Extra containers which runs as sidecar - extraContainers: "" - # extraContainers: | - # - name: cloud-sql-proxy - # image: gcr.io/cloud-sql-connectors/cloud-sql-proxy:2.1.2 - # args: - # - --private-ip - # - --port=5432 - # - example:europe-west3:grafana-oncall-db - - # Extra volume mounts for the container - extraVolumeMounts: [] - # - mountPath: /mnt/postgres-tls - # name: postgres-tls - # - mountPath: /mnt/redis-tls - # name: redis-tls - - # Extra volumes for the pod - extraVolumes: [] - # - name: postgres-tls - # configMap: - # name: my-postgres-tls - # defaultMode: 0640 - # - name: redis-tls - # configMap: - # name: my-redis-tls - # defaultMode: 0640 - -# Celery workers pods configuration -celery: - replicaCount: 1 - worker_queue: "default,critical,long,slack,telegram,webhook,celery,grafana" - worker_concurrency: "1" - worker_max_tasks_per_child: "100" - worker_beat_enabled: "True" - ## Restart of the celery workers once in a given interval as an additional precaution to the probes - ## If this setting is enabled TERM signal will be sent to celery workers - ## It will lead to warm shutdown (waiting for the tasks to complete) and restart the container - ## If this setting is set numbers of pod restarts will increase - ## Comment this line out if you want to remove restarts - worker_shutdown_interval: "65m" - livenessProbe: - enabled: true - initialDelaySeconds: 30 - periodSeconds: 300 - timeoutSeconds: 10 - resources: - {} - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - - # Labels for celery pods - podLabels: {} - - ## Affinity for pod assignment - ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity - affinity: {} - - ## Node labels for pod assignment - ## ref: https://kubernetes.io/docs/user-guide/node-selection/ - nodeSelector: {} - - ## Tolerations for pod assignment - ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ - tolerations: [] - - ## Topology spread constraints for pod assignment - ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ - topologySpreadConstraints: [] - - ## Priority class for the pods - ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/ - priorityClassName: "" - - # Extra containers which runs as sidecar - extraContainers: "" - # extraContainers: | - # - name: cloud-sql-proxy - # image: gcr.io/cloud-sql-connectors/cloud-sql-proxy:2.1.2 - # args: - # - --private-ip - # - --port=5432 - # - example:europe-west3:grafana-oncall-db - - # Extra volume mounts for the main container - extraVolumeMounts: [] - # - mountPath: /mnt/postgres-tls - # name: postgres-tls - # - mountPath: /mnt/redis-tls - # name: redis-tls - - # Extra volumes for the pod - extraVolumes: [] - # - name: postgres-tls - # configMap: - # name: my-postgres-tls - # defaultMode: 0640 - # - name: redis-tls - # configMap: - # name: my-redis-tls - # defaultMode: 0640 - -# Telegram polling pod configuration -telegramPolling: - enabled: false - resources: - {} - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - - # Labels for telegram-polling pods - podLabels: {} - - # Extra volume mounts for the main container - extraVolumeMounts: [] - # - mountPath: /mnt/postgres-tls - # name: postgres-tls - # - mountPath: /mnt/redis-tls - # name: redis-tls - - # Extra volumes for the pod - extraVolumes: [] - # - name: postgres-tls - # configMap: - # name: my-postgres-tls - # defaultMode: 0640 - # - name: redis-tls - # configMap: - # name: my-redis-tls - # defaultMode: 0640 - -oncall: - # this is intended to be used for local development. In short, it will mount the ./engine dir into - # any backend related containers, to allow hot-reloading + also run the containers with slightly modified - # startup commands (which configures the hot-reloading) - devMode: false - - # Override default MIRAGE_CIPHER_IV (must be 16 bytes long) - # For existing installation, this should not be changed. - # mirageCipherIV: 1234567890abcdef - # oncall secrets - secrets: - # Use existing secret. (secretKey and mirageSecretKey is required) - existingSecret: "" - # The key in the secret containing secret key - secretKey: "" - # The key in the secret containing mirage secret key - mirageSecretKey: "" - # Slack configures the Grafana Oncall Slack ChatOps integration. - slack: - # Enable the Slack ChatOps integration for the Oncall Engine. - enabled: false - # Sets the Slack bot slash-command - commandName: oncall - # clientId configures the Slack app OAuth2 client ID. - # api.slack.com/apps/ -> Basic Information -> App Credentials -> Client ID - clientId: ~ - # clientSecret configures the Slack app OAuth2 client secret. - # api.slack.com/apps/ -> Basic Information -> App Credentials -> Client Secret - clientSecret: ~ - # signingSecret - configures the Slack app signature secret used to sign - # requests comming from Slack. - # api.slack.com/apps/ -> Basic Information -> App Credentials -> Signing Secret - signingSecret: ~ - # Use existing secret for clientId, clientSecret and signingSecret. - # clientIdKey, clientSecretKey and signingSecretKey are required - existingSecret: "" - # The key in the secret containing OAuth2 client ID - clientIdKey: "" - # The key in the secret containing OAuth2 client secret - clientSecretKey: "" - # The key in the secret containing the Slack app signature secret - signingSecretKey: "" - # OnCall external URL - redirectHost: ~ - telegram: - enabled: false - token: ~ - webhookUrl: ~ - # Use existing secret. (tokenKey is required) - existingSecret: "" - # The key in the secret containing Telegram token - tokenKey: "" - smtp: - enabled: true - host: ~ - port: ~ - username: ~ - password: ~ - tls: ~ - fromEmail: ~ - exporter: - enabled: false - authToken: ~ - twilio: - # Twilio account SID/username to allow OnCall to send SMSes and make phone calls - accountSid: "" - # Twilio password to allow OnCall to send SMSes and make calls - authToken: "" - # Number from which you will receive calls and SMS - # (NOTE: must be quoted, otherwise would be rendered as float value) - phoneNumber: "" - # SID of Twilio service for number verification. You can create a service in Twilio web interface. - # twilio.com -> verify -> create new service - verifySid: "" - # Twilio API key SID/username to allow OnCall to send SMSes and make phone calls - apiKeySid: "" - # Twilio API key secret/password to allow OnCall to send SMSes and make phone calls - apiKeySecret: "" - # Use existing secret for authToken, phoneNumber, verifySid, apiKeySid and apiKeySecret. - existingSecret: "" - # Twilio password to allow OnCall to send SMSes and make calls - # The key in the secret containing the auth token - authTokenKey: "" - # The key in the secret containing the phone number - phoneNumberKey: "" - # The key in the secret containing verify service sid - verifySidKey: "" - # The key in the secret containing api key sid - apiKeySidKey: "" - # The key in the secret containing the api key secret - apiKeySecretKey: "" - # Phone notifications limit (the only non-secret value). - # TODO: rename to phoneNotificationLimit - limitPhone: - -# Whether to run django database migrations automatically -migrate: - enabled: true - # TTL can be unset by setting ttlSecondsAfterFinished: "" - ttlSecondsAfterFinished: 20 - # use a helm hook to manage the migration job - useHook: false - annotations: {} - - ## Affinity for pod assignment - ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity - affinity: {} - - ## Node labels for pod assignment - ## ref: https://kubernetes.io/docs/user-guide/node-selection/ - nodeSelector: {} - - ## Tolerations for pod assignment - ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ - tolerations: [] - - # Extra containers which runs as sidecar - extraContainers: "" - # extraContainers: | - # - name: cloud-sql-proxy - # image: gcr.io/cloud-sql-connectors/cloud-sql-proxy:2.1.2 - # args: - # - --private-ip - # - --port=5432 - # - example:europe-west3:grafana-oncall-db - resources: - {} - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - - # Extra volume mounts for the main container - extraVolumeMounts: [] - # - mountPath: /mnt/postgres-tls - # name: postgres-tls - # - mountPath: /mnt/redis-tls - # name: redis-tls - - # Extra volumes for the pod - extraVolumes: [] - # - name: postgres-tls - # configMap: - # name: my-postgres-tls - # defaultMode: 0640 - # - name: redis-tls - # configMap: - # name: my-redis-tls - # defaultMode: 0640 - -# Sets environment variables with name capitalized and prefixed with UWSGI_, -# and dashes are substituted with underscores. -# see more: https://uwsgi-docs.readthedocs.io/en/latest/Configuration.html#environment-variables -# Set null to disable all UWSGI environment variables -uwsgi: - listen: 1024 - -# Additional env variables to add to deployments -env: {} - -# Enable ingress object for external access to the resources -ingress: - enabled: true - # className: "" - annotations: - kubernetes.io/ingress.class: "nginx" - cert-manager.io/issuer: "letsencrypt-prod" - tls: - - hosts: - - "{{ .Values.base_url }}" - secretName: certificate-tls - # Extra paths to prepend to the host configuration. If using something - # like an ALB ingress controller, you may want to configure SSL redirects - extraPaths: [] - # - path: /* - # backend: - # serviceName: ssl-redirect - # servicePort: use-annotation - ## Or for k8s > 1.19 - # - path: /* - # pathType: Prefix - # backend: - # service: - # name: ssl-redirect - # port: - # name: use-annotation - -# Whether to install ingress controller -ingress-nginx: - enabled: true - -# Install cert-manager as a part of the release -cert-manager: - enabled: true - # Instal CRD resources - installCRDs: true - webhook: - timeoutSeconds: 30 - # cert-manager tries to use the already used port, changing to another one - # https://github.com/cert-manager/cert-manager/issues/3237 - # https://cert-manager.io/docs/installation/compatibility/ - securePort: 10260 - # Fix self-checks https://github.com/jetstack/cert-manager/issues/4286 - podDnsPolicy: None - podDnsConfig: - nameservers: - - 8.8.8.8 - - 1.1.1.1 - -database: - # can be either mysql or postgresql - type: mysql - -# MySQL is included into this release for the convenience. -# It is recommended to host it separately from this release -# Set mariadb.enabled = false and configure externalMysql -mariadb: - enabled: true - auth: - database: oncall - existingSecret: - primary: - extraEnvVars: - - name: MARIADB_COLLATE - value: utf8mb4_unicode_ci - - name: MARIADB_CHARACTER_SET - value: utf8mb4 - secondary: - extraEnvVars: - - name: MARIADB_COLLATE - value: utf8mb4_unicode_ci - - name: MARIADB_CHARACTER_SET - value: utf8mb4 - -# Make sure to create the database with the following parameters: -# CREATE DATABASE oncall CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; -externalMysql: - host: - port: - db_name: - user: - password: - # Use an existing secret for the mysql password. - existingSecret: - # The key in the secret containing the mysql username - usernameKey: - # The key in the secret containing the mysql password - passwordKey: - # Extra options (see example below) - # Reference: https://pymysql.readthedocs.io/en/latest/modules/connections.html - options: - # options: >- - # ssl_verify_cert=true - # ssl_verify_identity=true - # ssl_ca=/mnt/mysql-tls/ca.crt - # ssl_cert=/mnt/mysql-tls/client.crt - # ssl_key=/mnt/mysql-tls/client.key - -# PostgreSQL is included into this release for the convenience. -# It is recommended to host it separately from this release -# Set postgresql.enabled = false and configure externalPostgresql -postgresql: - enabled: false - auth: - database: oncall - existingSecret: - -# Make sure to create the database with the following parameters: -# CREATE DATABASE oncall WITH ENCODING UTF8; -externalPostgresql: - host: - port: - db_name: - user: - password: - # Use an existing secret for the database password - existingSecret: - # The key in the secret containing the database password - passwordKey: - # Extra options (see example below) - # Reference: https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS - options: - # options: >- - # sslmode=verify-full - # sslrootcert=/mnt/postgres-tls/ca.crt - # sslcert=/mnt/postgres-tls/client.crt - # sslkey=/mnt/postgres-tls/client.key - -# RabbitMQ is included into this release for the convenience. -# It is recommended to host it separately from this release -# Set rabbitmq.enabled = false and configure externalRabbitmq -rabbitmq: - enabled: true - auth: - existingPasswordSecret: - -broker: - type: rabbitmq - -externalRabbitmq: - host: - port: - user: - password: - protocol: - vhost: - # Use an existing secret for the rabbitmq password - existingSecret: - # The key in the secret containing the rabbitmq password - passwordKey: "" - # The key in the secret containing the rabbitmq username - usernameKey: username - -# Redis is included into this release for the convenience. -# It is recommended to host it separately from this release -redis: - enabled: true - auth: - existingSecret: - -externalRedis: - protocol: - host: - port: - database: - username: - password: - # Use an existing secret for the redis password - existingSecret: - # The key in the secret containing the redis password - passwordKey: - - # SSL options - ssl_options: - enabled: false - # CA certificate - ca_certs: - # Client SSL certs - certfile: - keyfile: - # SSL verification mode: "cert_none" | "cert_optional" | "cert_required" - cert_reqs: - -# Grafana is included into this release for the convenience. -# It is recommended to host it separately from this release -grafana: - enabled: true - grafana.ini: - server: - domain: example.com - root_url: "%(protocol)s://%(domain)s/grafana" - serve_from_sub_path: true - persistence: - enabled: true - # Disable psp as PodSecurityPolicy is deprecated in v1.21+, unavailable in v1.25+ - rbac: - pspEnabled: false - plugins: - - grafana-oncall-app - -externalGrafana: - # Example: https://grafana.mydomain.com - url: - -nameOverride: "" -fullnameOverride: "" - -serviceAccount: - # Specifies whether a service account should be created - create: true - # Annotations to add to the service account - annotations: {} - # The name of the service account to use. - # If not set and create is true, a name is generated using the fullname template - name: "" - -podAnnotations: {} - -podSecurityContext: - {} - # fsGroup: 2000 - -securityContext: - {} - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsGroup: 2000 - # runAsUser: 1000 - -init: - securityContext: - {} - # allowPrivilegeEscalation: false - # capabilities: - # drop: - # - ALL - # privileged: false - # readOnlyRootFilesystem: true - # runAsGroup: 2000 - # runAsNonRoot: true - # runAsUser: 1000 - resources: - {} - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - -ui: - # this is intended to be used for local development. In short, it will spin up an additional container - # running the plugin frontend, such that hot reloading can be enabled - enabled: false - image: - repository: oncall/ui - tag: dev - # Additional env vars for the ui container - env: {} - -prometheus: - enabled: false - # extraScrapeConfigs: | - # - job_name: 'oncall-exporter' - # metrics_path: /metrics/ - # static_configs: - # - targets: - # - oncall-dev-engine.default.svc.cluster.local:8080 diff --git a/packages/system/grafana-oncall/values.yaml b/packages/system/grafana-oncall/values.yaml deleted file mode 100644 index a660e0c1..00000000 --- a/packages/system/grafana-oncall/values.yaml +++ /dev/null @@ -1,19 +0,0 @@ -oncall: - database: - type: "postgresql" - broker: - type: "redis" - ingress: - enabled: false - cert-manager: - enabled: false - mariadb: - enabled: false - postgresql: - enabled: false - rabbitmq: - enabled: false - redis: - enabled: false - grafana: - enabled: false diff --git a/packages/system/monitoring/Makefile b/packages/system/monitoring/Makefile index b0425ded..61780ca7 100644 --- a/packages/system/monitoring/Makefile +++ b/packages/system/monitoring/Makefile @@ -19,3 +19,26 @@ update: helm repo add fluent https://fluent.github.io/helm-charts helm repo update fluent helm pull fluent/fluent-bit --untar --untardir charts + # alerts from victoria-metrics-k8s-stack + helm repo add vm https://victoriametrics.github.io/helm-charts/ + helm repo update vm + helm pull vm/victoria-metrics-k8s-stack --untar --untardir charts + rm -rf charts/victoria-metrics-k8s-stack/charts + rm -rf charts/victoria-metrics-k8s-stack/hack + rm -rf charts/victoria-metrics-k8s-stack/templates/victoria-metrics-operator + rm -rf charts/victoria-metrics-k8s-stack/templates/grafana + rm -rf charts/victoria-metrics-k8s-stack/templates/ingress.yaml + rm -rf charts/victoria-metrics-k8s-stack/files/dashboards + rm -f charts/victoria-metrics-k8s-stack/templates/servicemonitors.yaml + rm -f charts/victoria-metrics-k8s-stack/templates/serviceaccount.yaml + rm -f charts/victoria-metrics-k8s-stack/templates/rules/additionalVictoriaMetricsRules.yml + sed -i '/ namespace:/d' charts/victoria-metrics-k8s-stack/templates/rules/rule.yaml + sed -i 's|job="apiserver"|job="kube-apiserver"|g' `grep -rl 'job="apiserver"' charts/victoria-metrics-k8s-stack/files/rules/generated` + sed -i 's|severity: info|severity: informational|g' `grep -rl 'severity: info' ./charts/victoria-metrics-k8s-stack/files/rules/generated` + sed -i 's|severity: none|severity: ok|g' ./charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml + sed -i ./charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml \ + -e '/Watchdog/,/severity:/s/severity: none/severity: ok/' \ + -e '/InfoInhibitor/,/severity:/s/severity: none/severity: major/' + # TODO + rm -f charts/victoria-metrics-k8s-stack/files/rules/generated/alertmanager.rules.yaml + rm -f charts/victoria-metrics-k8s-stack/files/rules/generated/vm*.yaml diff --git a/packages/system/monitoring/alerts/general.yaml b/packages/system/monitoring/alerts/general.yaml deleted file mode 100644 index adf82635..00000000 --- a/packages/system/monitoring/alerts/general.yaml +++ /dev/null @@ -1,57 +0,0 @@ -apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMRule -metadata: - name: alerts-general - namespace: cozy-monitoring -spec: - groups: - - name: coreos.general - labels: - alertname: foo - rules: - - alert: DeadMansSwitch - expr: vector(1) - labels: - severity_level: "4" - annotations: - description: This is a DeadMansSwitch meant to ensure that the entire Alerting - pipeline is functional. - summary: Alerting DeadMansSwitch - - record: fd_utilization - expr: process_open_fds / process_max_fds - - alert: FdExhaustionClose - expr: max(predict_linear(fd_utilization{pod!=""}[1h], 3600 * 4)) BY (job, namespace, pod) > 1 - for: 10m - labels: - severity_level: "4" - annotations: - description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance - will exhaust in file/socket descriptors within the next 4 hours' - summary: file descriptors soon exhausted - - alert: FdExhaustionClose - expr: max(predict_linear(fd_utilization{pod=""}[1h], 3600 * 4)) BY (job, instance) > 1 - for: 10m - labels: - severity_level: "4" - annotations: - description: '{{ $labels.job }}: {{ $labels.instance }} instance - will exhaust in file/socket descriptors within the next 4 hours' - summary: file descriptors soon exhausted - - alert: FdExhaustionClose - expr: max(predict_linear(fd_utilization{pod!=""}[10m], 3600)) BY (job, namespace, pod) > 1 - for: 10m - labels: - severity_level: "3" - annotations: - description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance - will exhaust in file/socket descriptors within the next hour' - summary: file descriptors soon exhausted - - alert: FdExhaustionClose - expr: max(predict_linear(fd_utilization{pod=""}[10m], 3600)) BY (job, instance) > 1 - for: 10m - labels: - severity_level: "3" - annotations: - description: '{{ $labels.job }}: {{ $labels.instance }} instance - will exhaust in file/socket descriptors within the next hour' - summary: file descriptors soon exhausted diff --git a/packages/system/monitoring/alerts/kube-dns.yaml b/packages/system/monitoring/alerts/kube-dns.yaml deleted file mode 100644 index 4310707b..00000000 --- a/packages/system/monitoring/alerts/kube-dns.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMRule -metadata: - name: alerts-node - namespace: cozy-monitoring -spec: - groups: - - name: kubernetes.dns - rules: - - alert: KubernetesDnsTargetDown - expr: absent(up{job="kube-dns"} == 1) - for: 5m - labels: - severity_level: "5" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: "markdown" - description: |- - Prometheus is unable to collect metrics from kube-dns. Thus its status is unknown. - - To debug the problem, use the following commands: - 1. `kubectl -n kube-system describe deployment -l k8s-app=kube-dns` - 2. `kubectl -n kube-system describe pod -l k8s-app=kube-dns` - summary: Kube-dns or CoreDNS are not under monitoring. diff --git a/packages/system/monitoring/alerts/kube-state-metrics.yaml b/packages/system/monitoring/alerts/kube-state-metrics.yaml deleted file mode 100644 index a671283f..00000000 --- a/packages/system/monitoring/alerts/kube-state-metrics.yaml +++ /dev/null @@ -1,50 +0,0 @@ -apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMRule -metadata: - name: alerts-kube-state-metrics - namespace: cozy-monitoring -spec: - groups: - - name: kube-state-metrics - rules: - - alert: DeploymentGenerationMismatch - expr: max(kube_deployment_status_observed_generation != kube_deployment_metadata_generation) by (namespace, deployment) - for: 15m - labels: - severity_level: "4" - annotations: - plk_protocol_version: "1" - description: Observed deployment generation does not match expected one for - deployment {{$labels.namespace}}/{{$labels.deployment}} - summary: Deployment is outdated - - alert: KubeStateMetricsDown - expr: absent(up{job="kube-state-metrics"} == 1) - for: 5m - labels: - severity_level: "3" - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - description: |- - There are no metrics about cluster resources for 5 minutes. - - Most alerts an monitroing panels aren't working. - - To debug the problem: - 1. Check kube-state-metrics pods: `kubectl -n d8-monitoring describe pod -l app=kube-state-metrics` - 2. Check its logs: `kubectl -n d8-monitoring describe deploy kube-state-metrics` - summary: > - Kube-state-metrics is not working in the cluster. - - name: kubernetes.rules - rules: - - alert: KubePodCrashLooping - expr: | - max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics"}[5m]) >= 1 - annotations: - description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container - }}) is in waiting state (reason: "CrashLoopBackOff").' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping - summary: Pod is crash looping. - for: 15m - labels: - severity: warning diff --git a/packages/system/monitoring/alerts/kubelet.yaml b/packages/system/monitoring/alerts/kubelet.yaml deleted file mode 100644 index d0719f2d..00000000 --- a/packages/system/monitoring/alerts/kubelet.yaml +++ /dev/null @@ -1,63 +0,0 @@ -apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMRule -metadata: - name: alerts-kubelet - namespace: cozy-monitoring -spec: - groups: - - name: coreos.kubelet - rules: - - alert: K8SNodeNotReady - expr: min(kube_node_status_condition{condition="Ready",status="true"}) BY (node) == 0 and - min(kube_node_spec_unschedulable == 0) by (node) - for: 1m - labels: - severity_level: "3" - annotations: - plk_protocol_version: "1" - description: The Kubelet on {{ $labels.node }} has not checked in with the API, - or has set itself to NotReady, for more than 10 minutes - summary: Node status is NotReady - - alert: K8SManyNodesNotReady - expr: count(kube_node_status_condition{condition="Ready",status="true"} == 0 and on (node) kube_node_spec_unschedulable == 0) > 1 - and (count(kube_node_status_condition{condition="Ready",status="true"} == 0 and on (node) kube_node_spec_unschedulable == 0) / - count(kube_node_status_condition{condition="Ready",status="true"} and on (node) kube_node_spec_unschedulable == 0)) > 0.2 - for: 1m - labels: - severity_level: "3" - annotations: - plk_protocol_version: "1" - description: '{{ $value }}% of Kubernetes nodes are not ready' - summary: Too many nodes are not ready - - alert: K8SKubeletDown - expr: (count(up{job="kubelet"} == 0) or absent(up{job="kubelet"} == 1)) / count(up{job="kubelet"}) * 100 > 3 - for: 10m - labels: - severity_level: "4" - tier: "cluster" - annotations: - plk_protocol_version: "1" - plk_group_for__target_down: "TargetDown,prometheus=deckhouse,job=kubelet,kubernetes=~kubernetes" - description: Prometheus failed to scrape {{ $value }}% of kubelets. - summary: A few kubelets cannot be scraped - - alert: K8SKubeletDown - expr: (count(up{job="kubelet"} == 0) or absent(up{job="kubelet"} == 1)) / count(up{job="kubelet"}) * 100 > 10 - for: 30m - labels: - severity_level: "3" - tier: "cluster" - annotations: - plk_protocol_version: "1" - plk_group_for__target_down: "TargetDown,prometheus=deckhouse,job=kubelet,kubernetes=~kubernetes" - description: Prometheus failed to scrape {{ $value }}% of kubelets. - summary: Many kubelets cannot be scraped - - alert: K8SKubeletTooManyPods - expr: kubelet_running_pods > on(node) (kube_node_status_capacity{resource="pods",unit="integer"}) * 0.9 - for: 10m - labels: - severity_level: "7" - annotations: - plk_protocol_version: "1" - description: Kubelet {{ $labels.node }} is running {{ $value }} pods, close - to the limit of {{ printf "kube_node_status_capacity{resource=\"pods\",unit=\"integer\",node=\"%s\"}" $labels.node | query | first | value }} - summary: Kubelet is close to pod limit diff --git a/packages/system/monitoring/alerts/node-disk-usage.yaml b/packages/system/monitoring/alerts/node-disk-usage.yaml deleted file mode 100644 index e7ceff1f..00000000 --- a/packages/system/monitoring/alerts/node-disk-usage.yaml +++ /dev/null @@ -1,357 +0,0 @@ -apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMRule -metadata: - name: alerts-node - namespace: cozy-monitoring -spec: - groups: - - name: kubernetes.node.disk_inodes_usage - rules: - - alert: KubeletNodeFSInodesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 < - max by (node, mountpoint) (kubelet_eviction_nodefs_inodes{type="soft"}) - ) - for: 10m - labels: - severity_level: "9" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Soft eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - Threshold at: {{ printf "kubelet_eviction_nodefs_inodes{type=\"soft\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Soft eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - - alert: KubeletNodeFSInodesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 < - max by (node, mountpoint) (kubelet_eviction_nodefs_inodes{type="hard"} + 5) - ) - for: 5m - labels: - severity_level: "7" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Close to hard eviction threshold of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. - - Threshold at: {{ printf "kubelet_eviction_nodefs_inodes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: > - Close to hard eviction threshold of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. - - - alert: KubeletNodeFSInodesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 < - max by (node, mountpoint) (kubelet_eviction_nodefs_inodes{type="hard"}) - ) - labels: - severity_level: "6" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Hard eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - Threshold at: {{ printf "kubelet_eviction_nodefs_inodes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Hard eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - - alert: KubeletNodeFSInodesUsage - expr: | - ( - ( - max by (node, mountpoint) (node_filesystem_files_free) - ) == 0 - ) - * (max by (node, mountpoint) ({__name__=~"kubelet_eviction_nodefs_inodes"})) - labels: - severity_level: "5" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - summary: No more free inodes on nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. - - - alert: KubeletImageFSInodesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 < - max by (node, mountpoint) (kubelet_eviction_imagefs_inodes{type="soft"}) - ) - for: 10m - labels: - severity_level: "9" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Soft eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - Threshold at: {{ printf "kubelet_eviction_imagefs_inodes{type=\"soft\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Soft eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - - alert: KubeletImageFSInodesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 < - max by (node, mountpoint) (kubelet_eviction_imagefs_inodes{type="hard"} + 5) - ) - for: 5m - labels: - severity_level: "7" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Close to hard eviction threshold of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. - - Threshold at: {{ printf "kubelet_eviction_imagefs_inodes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Close to hard eviction threshold of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. - - - alert: KubeletImageFSInodesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 < - max by (node, mountpoint) (kubelet_eviction_imagefs_inodes{type="hard"}) - ) - labels: - severity_level: "6" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Hard eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - Threshold at: {{ printf "kubelet_eviction_imagefs_inodes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Hard eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - - alert: KubeletImageFSInodesUsage - expr: | - ( - ( - max by (node, mountpoint) (node_filesystem_files_free) - ) == 0 - ) - * (max by (node, mountpoint) ({__name__=~"kubelet_eviction_imagefs_inodes"})) - labels: - severity_level: "5" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - summary: No more free inodes on imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. - - - name: kubernetes.node.disk_bytes_usage - rules: - - alert: KubeletNodeFSBytesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < - max by (node, mountpoint) (kubelet_eviction_nodefs_bytes{type="soft"}) - ) - for: 10m - labels: - severity_level: "9" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Soft eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - Threshold at: {{ printf "kubelet_eviction_nodefs_bytes{type=\"soft\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Soft eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - - alert: KubeletNodeFSBytesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < - max by (node, mountpoint) (kubelet_eviction_nodefs_bytes{type="hard"} + 5) - ) - for: 5m - labels: - severity_level: "7" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Close to hard eviction threshold of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. - - Threshold at: {{ printf "kubelet_eviction_nodefs_bytes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Close to hard eviction threshold of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. - - - alert: KubeletNodeFSBytesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < - max by (node, mountpoint) (kubelet_eviction_nodefs_bytes{type="hard"}) - ) - labels: - severity_level: "6" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Hard eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - Threshold at: {{ printf "kubelet_eviction_nodefs_bytes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Hard eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - - alert: KubeletNodeFSBytesUsage - expr: | - ( - ( - max by (node, mountpoint) (node_filesystem_avail_bytes) - ) == 0 - ) - * (max by (node, mountpoint) ({__name__=~"kubelet_eviction_nodefs_bytes"})) - labels: - severity_level: "5" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - summary: No more free space on nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. - - - alert: KubeletImageFSBytesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < - max by (node, mountpoint) (kubelet_eviction_imagefs_bytes{type="soft"}) - ) - for: 10m - labels: - severity_level: "9" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Soft eviction of imagefs (filesystem that the container runtime uses for storing images and container writable layers) on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - Threshold at: {{ printf "kubelet_eviction_imagefs_bytes{type=\"soft\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Soft eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - - alert: KubeletImageFSBytesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < - max by (node, mountpoint) (kubelet_eviction_imagefs_bytes{type="hard"} + 5) - ) - for: 5m - labels: - severity_level: "7" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Close to hard eviction threshold of imagefs (filesystem that the container runtime uses for storing images and container writable layers) on node {{$labels.node}} mountpoint {{$labels.mountpoint}}. - - Threshold at: {{ printf "kubelet_eviction_imagefs_bytes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Close to hard eviction threshold of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. - - - alert: KubeletImageFSBytesUsage - expr: | - ( - max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < - max by (node, mountpoint) (kubelet_eviction_imagefs_bytes{type="hard"}) - ) - labels: - severity_level: "6" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - Hard eviction of imagefs (filesystem that the container runtime uses for storing images and container writable layers) on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - Threshold at: {{ printf "kubelet_eviction_imagefs_bytes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}% - - Currently at: {{ .Value }}% - summary: Hard eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress. - - - alert: KubeletImageFSBytesUsage - expr: | - ( - ( - max by (node, mountpoint) (node_filesystem_avail_bytes) - ) == 0 - ) - * (max by (node, mountpoint) ({__name__=~"kubelet_eviction_imagefs_bytes"})) - labels: - severity_level: "5" - tier: cluster - annotations: - plk_protocol_version: "1" - plk_markup_format: markdown - plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - No more free bytes on imagefs (filesystem that the container runtime uses for storing images and container writable layers) on node {{$labels.node}} mountpoint {{$labels.mountpoint}}. - summary: No more free bytes on imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint. diff --git a/packages/system/monitoring/alerts/node.yaml b/packages/system/monitoring/alerts/node.yaml deleted file mode 100644 index 88b6f72b..00000000 --- a/packages/system/monitoring/alerts/node.yaml +++ /dev/null @@ -1,36 +0,0 @@ -apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMRule -metadata: - name: alerts-node - namespace: cozy-monitoring -spec: - groups: - - name: coreos.node - rules: - - record: node:node_cpu:rate:sum - expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[3m])) - BY (node) - - record: node:node_filesystem_usage:sum - expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"})) - BY (node) - - record: node:node_network_receive_bytes:rate:sum - expr: sum(rate(node_network_receive_bytes[3m])) BY (node) - - record: node:node_network_transmit_bytes:rate:sum - expr: sum(rate(node_network_transmit_bytes[3m])) BY (node) - - record: node:node_cpu:ratio - expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(node) - GROUP_LEFT() count(sum(node_cpu) BY (node, cpu)) BY (node) - - record: cluster:node_cpu:sum_rate5m - expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) - - record: cluster:node_cpu:ratio - expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (node, cpu)) - - alert: NodeExporterDown - expr: absent(up{job="node-exporter"} == 1) - for: 10m - labels: - severity_level: "3" - annotations: - plk_protocol_version: "1" - description: Prometheus could not scrape a node-exporter for more than 10m, - or node-exporters have disappeared from discovery - summary: Prometheus could not scrape a node-exporter diff --git a/packages/system/monitoring/alerts/pod-status.yaml b/packages/system/monitoring/alerts/pod-status.yaml deleted file mode 100644 index 9070d2e8..00000000 --- a/packages/system/monitoring/alerts/pod-status.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMRule -metadata: - name: alerts-node - namespace: cozy-monitoring -spec: - groups: - - name: kubernetes.pod_status_incorrect - rules: - - alert: PodStatusIsIncorrect - expr: > - (count by (node, namespace, pod) (kube_pod_status_ready{condition="true"} == 0) * on (namespace, pod) group_left(node) (max by (namespace, node, pod) (kube_pod_info))) - and - ( - (count by (namespace, pod) (kube_pod_container_status_ready==1) * on (namespace, pod) group_left(node) (max by (namespace, node, pod) (kube_pod_info))) - unless - (count by (namespace, pod) (kube_pod_container_status_ready==0) * on (namespace, pod) group_left(node) (max by (namespace, node, pod) (kube_pod_info))) - ) - for: 10m - annotations: - plk_markup_format: markdown - plk_protocol_version: "1" - plk_create_group_if_not_exists__node_have_pods_with_incorrect_status: "NodeHavePodsWithIncorrectStatus,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - plk_grouped_by__node_have_pods_with_incorrect_status: "NodeHavePodsWithIncorrectStatus,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes" - description: | - There is a {{ $labels.namespace }}/{{ $labels.pod }} Pod in the cluster that runs on the {{ $labels.node }} and listed as NotReady while all the Pod's containers are Ready. - - This could be due to the [Kubernetes bug](https://github.com/kubernetes/kubernetes/issues/80968). - - The recommended course of action: - 1. Find all the Pods having this state: `kubectl get pod -o json --all-namespaces | jq '.items[] | select(.status.phase == "Running") | select(.status.conditions[] | select(.type == "ContainersReady" and .status == "True")) | select(.status.conditions[] | select(.type == "Ready" and .status == "False")) | "\(.spec.nodeName)/\(.metadata.namespace)/\(.metadata.name)"'`; - 2. Find all the Nodes affected: `kubectl get pod -o json --all-namespaces | jq '.items[] | select(.status.phase == "Running") | select(.status.conditions[] | select(.type == "ContainersReady" and .status == "True")) | select(.status.conditions[] | select(.type == "Ready" and .status == "False")) | .spec.nodeName' -r | sort | uniq -c`; - 3. Restart `kubelet` on each Node: `systemctl restart kubelet`. - summary: The state of the {{ $labels.namespace }}/{{ $labels.pod }} Pod running on the {{ $labels.node }} Node is incorrect. You need to restart `kubelet`. diff --git a/packages/system/grafana-oncall/charts/oncall/.helmignore b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/.helmignore similarity index 87% rename from packages/system/grafana-oncall/charts/oncall/.helmignore rename to packages/system/monitoring/charts/victoria-metrics-k8s-stack/.helmignore index fd6e5fd3..8148a4ff 100644 --- a/packages/system/grafana-oncall/charts/oncall/.helmignore +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/.helmignore @@ -22,5 +22,5 @@ *.tmproj .vscode/ -# exclude helm unit tests -tests/ +# Ignore img folder used for documentation +img/ diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/CHANGELOG.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/CHANGELOG.md new file mode 100644 index 00000000..d044b526 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/CHANGELOG.md @@ -0,0 +1,688 @@ +## Next release + +- TODO + +## 0.25.17 + +**Release date:** 2024-09-20 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Added VMAuth to k8s stack. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/829) +- Fixed ETCD dashboard +- Use path prefix from args as a default path prefix for ingress. Related [issue](https://github.com/VictoriaMetrics/helm-charts/issues/1260) +- Allow using vmalert without notifiers configuration. Note that it is required to use `.vmalert.spec.extraArgs["notifiers.blackhole"]: true` in order to start vmalert with a blackhole configuration. + +## 0.25.16 + +**Release date:** 2024-09-10 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Do not truncate servicemonitor, datasources, rules, dashboard, alertmanager & vmalert templates names +- Use service label for node-exporter instead of podLabel. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1458) +- Added common chart to a k8s-stack. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1456) +- Fixed value of custom alertmanager configSecret. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1461) + +## 0.25.15 + +**Release date:** 2024-09-05 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Drop empty endpoints param from scrape configuration +- Fixed proto when TLS is enabled. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1449) + +## 0.25.14 + +**Release date:** 2024-09-04 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- fixed alertmanager templates + +## 0.25.13 + +**Release date:** 2024-09-04 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Use operator's own service monitor + +## 0.25.12 + +**Release date:** 2024-09-03 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Fixed dashboards rendering. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1414) +- Fixed service monitor label name. + +## 0.25.11 + +**Release date:** 2024-09-03 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Merged ingress templates +- Removed custom VMServiceScrape for operator +- Added ability to override default Prometheus-compatible datatasources with all available parameters. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/860). +- Do not use `grafana.dashboards` and `grafana.dashboardProviders`. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1312). +- Migrated Node Exporter dashboard into chart +- Deprecated `grafana.sidecar.jsonData`, `grafana.provisionDefaultDatasource` in a favour of `grafana.sidecar.datasources.default` slice of datasources. +- Fail if no notifiers are set, do not set `notifiers` to null if empty + +## 0.25.10 + +**Release date:** 2024-08-31 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- fixed ingress extraPaths and externalVM urls rendering + +## 0.25.9 + +**Release date:** 2024-08-31 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- fixed vmalert ingress name typo +- Added ability to override default Prometheus-compatible datatasources with all available parameters. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/860). +- Do not use `grafana.dashboards` and `grafana.dashboardProviders`. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1312). + +## 0.25.8 + +**Release date:** 2024-08-30 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- fixed external notifiers rendering, when alertmanager is disabled. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1378) + +## 0.25.7 + +**Release date:** 2024-08-30 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- fixed extra rules template context + +## 0.25.6 + +**Release date:** 2024-08-29 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +**Update note**: Update `kubeProxy.spec` to `kubeProxy.vmScrape.spec` + +**Update note**: Update `kubeScheduler.spec` to `kubeScheduler.vmScrape.spec` + +**Update note**: Update `kubeEtcd.spec` to `kubeEtcd.vmScrape.spec` + +**Update note**: Update `coreDns.spec` to `coreDns.vmScrape.spec` + +**Update note**: Update `kubeDns.spec` to `kubeDns.vmScrape.spec` + +**Update note**: Update `kubeProxy.spec` to `kubeProxy.vmScrape.spec` + +**Update note**: Update `kubeControllerManager.spec` to `kubeControllerManager.vmScrape.spec` + +**Update note**: Update `kubeApiServer.spec` to `kubeApiServer.vmScrape.spec` + +**Update note**: Update `kubelet.spec` to `kubelet.vmScrape.spec` + +**Update note**: Update `kube-state-metrics.spec` to `kube-state-metrics.vmScrape.spec` + +**Update note**: Update `prometheus-node-exporter.spec` to `prometheus-node-exporter.vmScrape.spec` + +**Update note**: Update `grafana.spec` to `grafana.vmScrape.spec` + +- bump version of VM components to [v1.103.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.103.0) +- Added `dashboards.` bool flag to enable dashboard even if component it is for is not installed. +- Allow extra `vmalert.notifiers` without dropping default notifier if `alertmanager.enabled: true` +- Do not drop default notifier, when vmalert.additionalNotifierConfigs is set +- Replaced static url proto with a template, which selects proto depending on a present tls configuration +- Moved kubernetes components monitoring config from `spec` config to `vmScrape.spec` +- Merged servicemonitor templates + +## 0.25.5 + +**Release date:** 2024-08-26 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- TODO + +## 0.25.4 + +**Release date:** 2024-08-26 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- updates operator to [v0.47.2](https://github.com/VictoriaMetrics/operator/releases/tag/v0.47.2) +- kube-state-metrics - 5.16.4 -> 5.25.1 +- prometheus-node-exporter - 4.27.0 -> 4.29.0 +- grafana - 8.3.8 -> 8.4.7 +- added configurable `.Values.global.clusterLabel` to all alerting and recording rules `by` and `on` expressions + +## 0.25.3 + +**Release date:** 2024-08-23 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- updated operator to v0.47.1 release +- Build `app.kubernetes.io/instance` label consistently. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1282) + +## 0.25.2 + +**Release date:** 2024-08-21 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- fixed vmalert ingress name. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1271) +- fixed alertmanager ingress host template rendering. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1270) + +## 0.25.1 + +**Release date:** 2024-08-21 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Added `.Values.global.license` configuration +- Fixed extraLabels rendering. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1248) +- Fixed vmalert url to alertmanager by including its path prefix +- Removed `networking.k8s.io/v1beta1/Ingress` and `extensions/v1beta1/Ingress` support +- Fixed kubedns servicemonitor template. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1255) + +## 0.25.0 + +**Release date:** 2024-08-16 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +**Update note**: it requires to update CRD dependency manually before upgrade + +**Update note**: requires Helm 3.14+ + +- Moved dashboards templating logic out of sync script to Helm template +- Allow to disable default grafana datasource +- Synchronize Etcd dashboards and rules with mixin provided by Etcd +- Add alerting rules for VictoriaMetrics operator. +- Updated alerting rules for VictoriaMetrics components. +- Fixed exact rule annotations propagation to other rules. +- Set minimal kubernetes version to 1.25 +- updates operator to v0.47.0 version + +## 0.24.5 + +**Release date:** 2024-08-01 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM components to [v1.102.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.1) + +## 0.24.4 + +**Release date:** 2024-08-01 + +![AppVersion: v1.102.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Update dependencies: grafana -> 8.3.6. +- Added `.Values.defaultRules.alerting` and `.Values.defaultRules.recording` to setup common properties for all alerting an recording rules + +## 0.24.3 + +**Release date:** 2024-07-23 + +![AppVersion: v1.102.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM components to [v1.102.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.0) + +## 0.24.2 + +**Release date:** 2024-07-15 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- fix vmalertmanager configuration when using `.VMAlertmanagerSpec.ConfigRawYaml`. See [this pull request](https://github.com/VictoriaMetrics/helm-charts/pull/1136). + +## 0.24.1 + +**Release date:** 2024-07-10 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- updates operator to v0.46.4 + +## 0.24.0 + +**Release date:** 2024-07-10 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- added ability to override alerting rules labels and annotations: +- globally - `.Values.defaultRules.rule.spec.labels` (before it was `.Values.defaultRules.additionalRuleLabels`) and `.Values.defaultRules.rule.spec.annotations` +- for all rules in a group - `.Values.defaultRules.groups..rules.spec.labels` and `.Valeus.defaultRules.groups..rules.spec.annotations` +- for each rule individually - `.Values.defaultRules.rules..spec.labels` and `.Values.defaultRules.rules..spec.annotations` +- changed `.Values.defaultRules.rules.` to `.Values.defaultRules.groups..create` +- changed `.Values.defaultRules.appNamespacesTarget` to `.Values.defaultRules.groups..targetNamespace` +- changed `.Values.defaultRules.params` to `.Values.defaultRules.group.spec.params` with ability to override it at `.Values.defaultRules.groups..spec.params` + +## 0.23.6 + +**Release date:** 2024-07-08 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- added ability to override alerting rules labels and annotations: +- globally - `.Values.defaultRules.rule.spec.labels` (before it was `.Values.defaultRules.additionalRuleLabels`) and `.Values.defaultRules.rule.spec.annotations` +- for all rules in a group - `.Values.defaultRules.groups..rules.spec.labels` and `.Valeus.defaultRules.groups..rules.spec.annotations` +- for each rule individually - `.Values.defaultRules.rules..spec.labels` and `.Values.defaultRules.rules..spec.annotations` +- changed `.Values.defaultRules.rules.` to `.Values.defaultRules.groups..create` +- changed `.Values.defaultRules.appNamespacesTarget` to `.Values.defaultRules.groups..targetNamespace` +- changed `.Values.defaultRules.params` to `.Values.defaultRules.group.spec.params` with ability to override it at `.Values.defaultRules.groups..spec.params` + +## 0.23.5 + +**Release date:** 2024-07-04 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Support configuring vmalert `-notifier.config` with `.Values.vmalert.additionalNotifierConfigs`. + +## 0.23.4 + +**Release date:** 2024-07-02 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Add `extraObjects` to allow deploying additional resources with the chart release. + +## 0.23.3 + +**Release date:** 2024-06-26 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Enable [conversion of Prometheus CRDs](https://docs.victoriametrics.com/operator/migration/#objects-conversion) by default. See [this](https://github.com/VictoriaMetrics/helm-charts/pull/1069) pull request for details. +- use bitnami/kubectl image for cleanup instead of deprecated gcr.io/google_containers/hyperkube + +## 0.23.2 + +**Release date:** 2024-06-14 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Do not add `cluster` external label at VMAgent by default. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/774) for the details. + +## 0.23.1 + +**Release date:** 2024-06-10 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- updates operator to v0.45.0 release +- sync latest vm alerts and dashboards. + +## 0.23.0 + +**Release date:** 2024-05-30 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- sync latest etcd v3.5.x rules from [upstream](https://github.com/etcd-io/etcd/blob/release-3.5/contrib/mixin/mixin.libsonnet). +- add Prometheus operator CRDs as an optional dependency. See [this PR](https://github.com/VictoriaMetrics/helm-charts/pull/1022) and [related issue](https://github.com/VictoriaMetrics/helm-charts/issues/341) for the details. + +## 0.22.1 + +**Release date:** 2024-05-14 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- fix missing serviceaccounts patch permission in VM operator, see [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1012) for details. + +## 0.22.0 + +**Release date:** 2024-05-10 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM operator to [0.44.0](https://github.com/VictoriaMetrics/operator/releases/tag/v0.44.0) + +## 0.21.3 + +**Release date:** 2024-04-26 + +![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM components to [v1.101.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.101.0) + +## 0.21.2 + +**Release date:** 2024-04-23 + +![AppVersion: v1.100.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.100.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM operator to [0.43.3](https://github.com/VictoriaMetrics/operator/releases/tag/v0.43.3) + +## 0.21.1 + +**Release date:** 2024-04-18 + +![AppVersion: v1.100.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.100.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +## 0.21.0 + +**Release date:** 2024-04-18 + +![AppVersion: v1.100.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.100.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- TODO + +- bump version of VM operator to [0.43.0](https://github.com/VictoriaMetrics/operator/releases/tag/v0.43.0) +- updates CRDs definitions. + +## 0.20.1 + +**Release date:** 2024-04-16 + +![AppVersion: v1.100.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.100.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- upgraded dashboards and alerting rules, added values file for local (Minikube) setup +- bump version of VM components to [v1.100.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.100.1) + +## 0.20.0 + +**Release date:** 2024-04-02 + +![AppVersion: v1.99.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.99.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM operator to [0.42.3](https://github.com/VictoriaMetrics/operator/releases/tag/v0.42.3) + +## 0.19.4 + +**Release date:** 2024-03-05 + +![AppVersion: v1.99.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.99.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM components to [v1.99.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.99.0) + +## 0.19.3 + +**Release date:** 2024-03-05 + +![AppVersion: v1.98.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.98.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Commented default configuration for alertmanager. It simplifies configuration and makes it more explicit. See this [issue](https://github.com/VictoriaMetrics/helm-charts/issues/473) for details. +- Allow enabling/disabling default k8s rules when installing. See [#904](https://github.com/VictoriaMetrics/helm-charts/pull/904) by @passie. + +## 0.19.2 + +**Release date:** 2024-02-26 + +![AppVersion: v1.98.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.98.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Fix templating of VMAgent `remoteWrite` in case both `VMSingle` and `VMCluster` are disabled. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/865) for details. + +## 0.19.1 + +**Release date:** 2024-02-21 + +![AppVersion: v1.98.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.98.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Update dependencies: victoria-metrics-operator -> 0.28.1, grafana -> 7.3.1. +- Update victoriametrics CRD resources yaml. + +## 0.19.0 + +**Release date:** 2024-02-09 + +![AppVersion: v1.97.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.97.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Do not store original labels in `vmagent`'s memory by default. This reduces memory usage of `vmagent` but makes `vmagent`'s debugging UI less informative. See [this docs](https://docs.victoriametrics.com/vmagent/#relabel-debug) for details on relabeling debug. +- Update dependencies: kube-state-metrics -> 5.16.0, prometheus-node-exporter -> 4.27.0, grafana -> 7.3.0. +- Update victoriametrics CRD resources yaml. +- Update builtin dashboards and rules. + +## 0.18.12 + +**Release date:** 2024-02-01 + +![AppVersion: v1.97.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.97.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM components to [v1.97.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.97.1) +- Fix helm lint when ingress resources enabled - split templates of resources per kind. See [#820](https://github.com/VictoriaMetrics/helm-charts/pull/820) by @MemberIT. + +## 0.18.11 + +**Release date:** 2023-12-15 + +![AppVersion: v1.96.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.96.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Fix missing `.Values.defaultRules.rules.vmcluster` value. See [#801](https://github.com/VictoriaMetrics/helm-charts/pull/801) by @MemberIT. + +## 0.18.10 + +**Release date:** 2023-12-12 + +![AppVersion: v1.96.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.96.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM components to [v1.96.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.96.0) +- Add optional allowCrossNamespaceImport to GrafanaDashboard(s) (#788) + +## 0.18.9 + +**Release date:** 2023-12-08 + +![AppVersion: v1.95.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.95.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Properly use variable from values file for Grafana datasource type. (#769) +- Update dashboards from upstream sources. (#780) + +## 0.18.8 + +**Release date:** 2023-11-16 + +![AppVersion: v1.95.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.95.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM components to [v1.95.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.95.1) + +## 0.18.7 + +**Release date:** 2023-11-15 + +![AppVersion: v1.95.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.95.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM components to [v1.95.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.95.0) +- Support adding extra group parameters for default vmrules. (#752) + +## 0.18.6 + +**Release date:** 2023-11-01 + +![AppVersion: v1.94.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.94.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Fix kube scheduler default scraping port from 10251 to 10259, Kubernetes changed it since 1.23.0. See [this pr](https://github.com/VictoriaMetrics/helm-charts/pull/736) for details. +- Bump version of operator chart to [0.27.4](https://github.com/VictoriaMetrics/helm-charts/releases/tag/victoria-metrics-operator-0.27.4) + +## 0.18.5 + +**Release date:** 2023-10-08 + +![AppVersion: v1.94.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.94.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Update operator chart to [v0.27.3](https://github.com/VictoriaMetrics/helm-charts/releases/tag/victoria-metrics-operator-0.27.3) for fixing [#708](https://github.com/VictoriaMetrics/helm-charts/issues/708) + +## 0.18.4 + +**Release date:** 2023-10-04 + +![AppVersion: v1.94.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.94.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Update dependencies: [victoria-metrics-operator -> 0.27.2](https://github.com/VictoriaMetrics/helm-charts/releases/tag/victoria-metrics-operator-0.27.2), prometheus-node-exporter -> 4.23.2, grafana -> 6.59.5. + +## 0.18.3 + +**Release date:** 2023-10-04 + +![AppVersion: v1.94.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.94.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- bump version of VM components to [v1.94.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.94.0) + +## 0.18.2 + +**Release date:** 2023-09-28 + +![AppVersion: v1.93.5](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.5&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Fix behavior of `vmalert.remoteWriteVMAgent` - remoteWrite.url for VMAlert is correctly generated considering endpoint, name, port and http.pathPrefix of VMAgent + +## 0.18.1 + +**Release date:** 2023-09-21 + +![AppVersion: v1.93.5](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.5&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Bump version of VM components to [v1.93.5](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.5) + +## 0.18.0 + +**Release date:** 2023-09-12 + +![AppVersion: v1.93.4](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.4&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Bump version of `grafana` helm-chart to `6.59.*` +- Bump version of `prometheus-node-exporter` helm-chart to `4.23.*` +- Bump version of `kube-state-metrics` helm-chart to `0.59.*` +- Update alerting rules +- Update grafana dashboards +- Add `make` commands `sync-rules` and `sync-dashboards` +- Add support of VictoriaMetrics datasource + +## 0.17.8 + +**Release date:** 2023-09-11 + +![AppVersion: v1.93.4](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.4&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Bump version of VM components to [v1.93.4](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.4) +- Bump version of operator chart to [0.27.0](https://github.com/VictoriaMetrics/helm-charts/releases/tag/victoria-metrics-operator-0.27.0) + +## 0.17.7 + +**Release date:** 2023-09-07 + +![AppVersion: v1.93.3](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.3&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Bump version of operator helm-chart to `0.26.2` + +## 0.17.6 + +**Release date:** 2023-09-04 + +![AppVersion: v1.93.3](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.3&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Move `cleanupCRD` option to victoria-metrics-operator chart (#593) +- Disable `honorTimestamps` for cadvisor scrape job by default (#617) +- For vmalert all replicas of alertmanager are added to notifiers (only if alertmanager is enabled) (#619) +- Add `grafanaOperatorDashboardsFormat` option (#615) +- Fix query expression for memory calculation in `k8s-views-global` dashboard (#636) +- Bump version of Victoria Metrics components to `v1.93.3` +- Bump version of operator helm-chart to `0.26.0` + +## 0.17.5 + +**Release date:** 2023-08-23 + +![AppVersion: v1.93.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Update VictoriaMetrics components from v1.93.0 to v1.93.1 + +## 0.17.4 + +**Release date:** 2023-08-12 + +![AppVersion: v1.93.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Update VictoriaMetrics components from v1.92.1 to v1.93.0 +- delete an obsolete parameter remaining by mistake (see ) (#602) + +## 0.17.3 + +**Release date:** 2023-07-28 + +![AppVersion: v1.92.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.92.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Update VictoriaMetrics components from v1.92.0 to v1.92.1 (#599) + +## 0.17.2 + +**Release date:** 2023-07-27 + +![AppVersion: v1.92.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.92.0&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Update VictoriaMetrics components from v1.91.3 to v1.92.0 diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.lock b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.lock new file mode 100644 index 00000000..c059a99c --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.lock @@ -0,0 +1,24 @@ +dependencies: +- name: victoria-metrics-common + repository: https://victoriametrics.github.io/helm-charts + version: 0.0.11 +- name: victoria-metrics-operator + repository: https://victoriametrics.github.io/helm-charts + version: 0.34.8 +- name: kube-state-metrics + repository: https://prometheus-community.github.io/helm-charts + version: 5.25.1 +- name: prometheus-node-exporter + repository: https://prometheus-community.github.io/helm-charts + version: 4.39.0 +- name: grafana + repository: https://grafana.github.io/helm-charts + version: 8.4.9 +- name: crds + repository: "" + version: 0.0.0 +- name: prometheus-operator-crds + repository: https://prometheus-community.github.io/helm-charts + version: 11.0.0 +digest: sha256:11b119ebabf4ff0ea2951e7c72f51d0223dc3f50fb061a43b01fe7856491b836 +generated: "2024-09-12T11:50:51.935071545Z" diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.yaml new file mode 100644 index 00000000..90e1012d --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.yaml @@ -0,0 +1,66 @@ +annotations: + artifacthub.io/category: monitoring-logging + artifacthub.io/changes: | + - Added VMAuth to k8s stack. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/829) + - Fixed ETCD dashboard + - Use path prefix from args as a default path prefix for ingress. Related [issue](https://github.com/VictoriaMetrics/helm-charts/issues/1260) + - 'Allow using vmalert without notifiers configuration. Note that it is required to use `.vmalert.spec.extraArgs["notifiers.blackhole"]: true` in order to start vmalert with a blackhole configuration.' + artifacthub.io/license: Apache-2.0 + artifacthub.io/links: | + - name: Sources + url: https://docs.victoriametrics.com/vmgateway + - name: Charts repo + url: https://victoriametrics.github.io/helm-charts/ + - name: Docs + url: https://docs.victoriametrics.com + artifacthub.io/operator: "true" +apiVersion: v2 +appVersion: v1.102.1 +dependencies: +- name: victoria-metrics-common + repository: https://victoriametrics.github.io/helm-charts + version: 0.0.* +- condition: victoria-metrics-operator.enabled + name: victoria-metrics-operator + repository: https://victoriametrics.github.io/helm-charts + version: 0.34.* +- condition: kube-state-metrics.enabled + name: kube-state-metrics + repository: https://prometheus-community.github.io/helm-charts + version: 5.25.* +- condition: prometheus-node-exporter.enabled + name: prometheus-node-exporter + repository: https://prometheus-community.github.io/helm-charts + version: 4.39.* +- condition: grafana.enabled + name: grafana + repository: https://grafana.github.io/helm-charts + version: 8.4.* +- condition: crds.enabled + name: crds + repository: "" + version: 0.0.0 +- condition: prometheus-operator-crds.enabled + name: prometheus-operator-crds + repository: https://prometheus-community.github.io/helm-charts + version: 11.0.* +description: Kubernetes monitoring on VictoriaMetrics stack. Includes VictoriaMetrics + Operator, Grafana dashboards, ServiceScrapes and VMRules +home: https://github.com/VictoriaMetrics/helm-charts +icon: https://avatars.githubusercontent.com/u/43720803?s=200&v=4 +keywords: +- victoriametrics +- operator +- monitoring +- kubernetes +- observability +- tsdb +- metrics +- metricsql +- timeseries +kubeVersion: '>=1.25.0-0' +name: victoria-metrics-k8s-stack +sources: +- https://github.com/VictoriaMetrics/helm-charts +type: application +version: 0.25.17 diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md new file mode 100644 index 00000000..d95202e0 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md @@ -0,0 +1,2576 @@ +![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![Version: 0.25.17](https://img.shields.io/badge/Version-0.25.17-informational?style=flat-square) +[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/victoriametrics)](https://artifacthub.io/packages/helm/victoriametrics/victoria-metrics-k8s-stack) + +Kubernetes monitoring on VictoriaMetrics stack. Includes VictoriaMetrics Operator, Grafana dashboards, ServiceScrapes and VMRules + +* [Overview](#Overview) +* [Configuration](#Configuration) +* [Prerequisites](#Prerequisites) +* [Dependencies](#Dependencies) +* [Quick Start](#How-to-install) +* [Uninstall](#How-to-uninstall) +* [Version Upgrade](#Upgrade-guide) +* [Troubleshooting](#Troubleshooting) +* [Values](#Parameters) + +## Overview +This chart is an All-in-one solution to start monitoring kubernetes cluster. +It installs multiple dependency charts like [grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana), [node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter), [kube-state-metrics](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics) and [victoria-metrics-operator](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-operator). +Also it installs Custom Resources like [VMSingle](https://docs.victoriametrics.com/operator/quick-start#vmsingle), [VMCluster](https://docs.victoriametrics.com/operator/quick-start#vmcluster), [VMAgent](https://docs.victoriametrics.com/operator/quick-start#vmagent), [VMAlert](https://docs.victoriametrics.com/operator/quick-start#vmalert). + +By default, the operator [converts all existing prometheus-operator API objects](https://docs.victoriametrics.com/operator/quick-start#migration-from-prometheus-operator-objects) into corresponding VictoriaMetrics Operator objects. + +To enable metrics collection for kubernetes this chart installs multiple scrape configurations for kuberenetes components like kubelet and kube-proxy, etc. Metrics collection is done by [VMAgent](https://docs.victoriametrics.com/operator/quick-start#vmagent). So if want to ship metrics to external VictoriaMetrics database you can disable VMSingle installation by setting `vmsingle.enabled` to `false` and setting `vmagent.vmagentSpec.remoteWrite.url` to your external VictoriaMetrics database. + +This chart also installs bunch of dashboards and recording rules from [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) project. + +![Overview](img/k8s-stack-overview.png) + +## Configuration + +Configuration of this chart is done through helm values. + +### Dependencies + +Dependencies can be enabled or disabled by setting `enabled` to `true` or `false` in `values.yaml` file. + +**!Important:** for dependency charts anything that you can find in values.yaml of dependency chart can be configured in this chart under key for that dependency. For example if you want to configure `grafana` you can find all possible configuration options in [values.yaml](https://github.com/grafana/helm-charts/blob/main/charts/grafana/values.yaml) and you should set them in values for this chart under grafana: key. For example if you want to configure `grafana.persistence.enabled` you should set it in values.yaml like this: +```yaml +################################################# +### dependencies ##### +################################################# +# Grafana dependency chart configuration. For possible values refer to https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration +grafana: + enabled: true + persistence: + type: pvc + enabled: false +``` + +### VictoriaMetrics components + +This chart installs multiple VictoriaMetrics components using Custom Resources that are managed by [victoria-metrics-operator](https://docs.victoriametrics.com/operator/design) +Each resource can be configured using `spec` of that resource from API docs of [victoria-metrics-operator](https://docs.victoriametrics.com/operator/api). For example if you want to configure `VMAgent` you can find all possible configuration options in [API docs](https://docs.victoriametrics.com/operator/api#vmagent) and you should set them in values for this chart under `vmagent.spec` key. For example if you want to configure `remoteWrite.url` you should set it in values.yaml like this: +```yaml +vmagent: + spec: + remoteWrite: + - url: "https://insert.vmcluster.domain.com/insert/0/prometheus/api/v1/write" +``` + +### ArgoCD issues + +#### Operator self signed certificates +When deploying K8s stack using ArgoCD without Cert Manager (`.Values.victoria-metrics-operator.admissionWebhooks.certManager.enabled: false`) +it will rerender operator's webhook certificates on each sync since Helm `lookup` function is not respected by ArgoCD. +To prevent this please update you K8s stack Application `spec.syncPolicy` and `spec.ignoreDifferences` with a following: + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +... +spec: + ... + syncPolicy: + syncOptions: + # https://argo-cd.readthedocs.io/en/stable/user-guide/sync-options/#respect-ignore-difference-configs + # argocd must also ignore difference during apply stage + # otherwise it ll silently override changes and cause a problem + - RespectIgnoreDifferences=true + ignoreDifferences: + - group: "" + kind: Secret + name: -validation + namespace: kube-system + jsonPointers: + - /data + - group: admissionregistration.k8s.io + kind: ValidatingWebhookConfiguration + name: -admission + jqPathExpressions: + - '.webhooks[]?.clientConfig.caBundle' +``` +where `` is output of `{{ include "vm-operator.fullname" }}` for your setup + +#### `metadata.annotations: Too long: must have at most 262144 bytes` on dashboards + +If one of dashboards ConfigMap is failing with error `Too long: must have at most 262144 bytes`, please make sure you've added `argocd.argoproj.io/sync-options: ServerSideApply=true` annotation to your dashboards: + +```yaml +grafana: + sidecar: + dashboards: + additionalDashboardAnnotations + argocd.argoproj.io/sync-options: ServerSideApply=true +``` + +argocd.argoproj.io/sync-options: ServerSideApply=true + +### Rules and dashboards + +This chart by default install multiple dashboards and recording rules from [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) +you can disable dashboards with `defaultDashboardsEnabled: false` and `experimentalDashboardsEnabled: false` +and rules can be configured under `defaultRules` + +### Prometheus scrape configs +This chart installs multiple scrape configurations for kubernetes monitoring. They are configured under `#ServiceMonitors` section in `values.yaml` file. For example if you want to configure scrape config for `kubelet` you should set it in values.yaml like this: +```yaml +kubelet: + enabled: true + # spec for VMNodeScrape crd + # https://docs.victoriametrics.com/operator/api#vmnodescrapespec + spec: + interval: "30s" +``` + +### Using externally managed Grafana + +If you want to use an externally managed Grafana instance but still want to use the dashboards provided by this chart you can set + `grafana.enabled` to `false` and set `defaultDashboardsEnabled` to `true`. This will install the dashboards + but will not install Grafana. + +For example: +```yaml +defaultDashboardsEnabled: true + +grafana: + enabled: false +``` + +This will create ConfigMaps with dashboards to be imported into Grafana. + +If additional configuration for labels or annotations is needed in order to import dashboard to an existing Grafana you can +set `.grafana.sidecar.dashboards.additionalDashboardLabels` or `.grafana.sidecar.dashboards.additionalDashboardAnnotations` in `values.yaml`: + +For example: +```yaml +defaultDashboardsEnabled: true + +grafana: + enabled: false + sidecar: + dashboards: + additionalDashboardLabels: + key: value + additionalDashboardAnnotations: + key: value +``` + +## Prerequisites + +* Install the follow packages: ``git``, ``kubectl``, ``helm``, ``helm-docs``. See this [tutorial](../../REQUIREMENTS.md). + +* Add dependency chart repositories + +```console +helm repo add grafana https://grafana.github.io/helm-charts +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +* PV support on underlying infrastructure. + +## How to install + +Access a Kubernetes cluster. + +### Setup chart repository (can be omitted for OCI repositories) + +Add a chart helm repository with follow commands: + +```console +helm repo add vm https://victoriametrics.github.io/helm-charts/ + +helm repo update +``` +List versions of `vm/victoria-metrics-k8s-stack` chart available to installation: + +```console +helm search repo vm/victoria-metrics-k8s-stack -l +``` + +### Install `victoria-metrics-k8s-stack` chart + +Export default values of `victoria-metrics-k8s-stack` chart to file `values.yaml`: + + - For HTTPS repository + + ```console + helm show values vm/victoria-metrics-k8s-stack > values.yaml + ``` + - For OCI repository + + ```console + helm show values oci://ghcr.io/victoriametrics/helm-charts/victoria-metrics-k8s-stack > values.yaml + ``` + +Change the values according to the need of the environment in ``values.yaml`` file. + +Test the installation with command: + + - For HTTPS repository + + ```console + helm install vmks vm/victoria-metrics-k8s-stack -f values.yaml -n NAMESPACE --debug --dry-run + ``` + + - For OCI repository + + ```console + helm install vmks oci://ghcr.io/victoriametrics/helm-charts/victoria-metrics-k8s-stack -f values.yaml -n NAMESPACE --debug --dry-run + ``` + +Install chart with command: + + - For HTTPS repository + + ```console + helm install vmks vm/victoria-metrics-k8s-stack -f values.yaml -n NAMESPACE + ``` + + - For OCI repository + + ```console + helm install vmks oci://ghcr.io/victoriametrics/helm-charts/victoria-metrics-k8s-stack -f values.yaml -n NAMESPACE + ``` + +Get the pods lists by running this commands: + +```console +kubectl get pods -A | grep 'vmks' +``` + +Get the application by running this command: + +```console +helm list -f vmks -n NAMESPACE +``` + +See the history of versions of `vmks` application with command. + +```console +helm history vmks -n NAMESPACE +``` + +### Install locally (Minikube) + +To run VictoriaMetrics stack locally it's possible to use [Minikube](https://github.com/kubernetes/minikube). To avoid dashboards and alert rules issues please follow the steps below: + +Run Minikube cluster + +``` +minikube start --container-runtime=containerd --extra-config=scheduler.bind-address=0.0.0.0 --extra-config=controller-manager.bind-address=0.0.0.0 +``` + +Install helm chart + +``` +helm install [RELEASE_NAME] vm/victoria-metrics-k8s-stack -f values.yaml -f values.minikube.yaml -n NAMESPACE --debug --dry-run +``` + +## How to uninstall + +Remove application with command. + +```console +helm uninstall vmks -n NAMESPACE +``` + +CRDs created by this chart are not removed by default and should be manually cleaned up: + +```console +kubectl get crd | grep victoriametrics.com | awk '{print $1 }' | xargs -i kubectl delete crd {} +``` + +## Troubleshooting + +- If you cannot install helm chart with error `configmap already exist`. It could happen because of name collisions, if you set too long release name. + Kubernetes by default, allows only 63 symbols at resource names and all resource names are trimmed by helm to 63 symbols. + To mitigate it, use shorter name for helm chart release name, like: +```bash +# stack - is short enough +helm upgrade -i stack vm/victoria-metrics-k8s-stack +``` + Or use override for helm chart release name: +```bash +helm upgrade -i some-very-long-name vm/victoria-metrics-k8s-stack --set fullnameOverride=stack +``` + +## Upgrade guide + +Usually, helm upgrade doesn't requires manual actions. Just execute command: + +```console +$ helm upgrade [RELEASE_NAME] vm/victoria-metrics-k8s-stack +``` + +But release with CRD update can only be patched manually with kubectl. +Since helm does not perform a CRD update, we recommend that you always perform this when updating the helm-charts version: + +```console +# 1. check the changes in CRD +$ helm show crds vm/victoria-metrics-k8s-stack --version [YOUR_CHART_VERSION] | kubectl diff -f - + +# 2. apply the changes (update CRD) +$ helm show crds vm/victoria-metrics-k8s-stack --version [YOUR_CHART_VERSION] | kubectl apply -f - --server-side +``` + +All other manual actions upgrades listed below: + +### Upgrade to 0.13.0 + +- node-exporter starting from version 4.0.0 is using the Kubernetes recommended labels. Therefore you have to delete the daemonset before you upgrade. + +```bash +kubectl delete daemonset -l app=prometheus-node-exporter +``` +- scrape configuration for kubernetes components was moved from `vmServiceScrape.spec` section to `spec` section. If you previously modified scrape configuration you need to update your `values.yaml` + +- `grafana.defaultDashboardsEnabled` was renamed to `defaultDashboardsEnabled` (moved to top level). You may need to update it in your `values.yaml` + +### Upgrade to 0.6.0 + + All `CRD` must be update to the lastest version with command: + +```bash +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/helm-charts/master/charts/victoria-metrics-k8s-stack/crds/crd.yaml + +``` + +### Upgrade to 0.4.0 + + All `CRD` must be update to `v1` version with command: + +```bash +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/helm-charts/master/charts/victoria-metrics-k8s-stack/crds/crd.yaml + +``` + +### Upgrade from 0.2.8 to 0.2.9 + + Update `VMAgent` crd + +command: +```bash +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.16.0/config/crd/bases/operator.victoriametrics.com_vmagents.yaml +``` + + ### Upgrade from 0.2.5 to 0.2.6 + +New CRD added to operator - `VMUser` and `VMAuth`, new fields added to exist crd. +Manual commands: +```bash +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmusers.yaml +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmauths.yaml +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmalerts.yaml +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmagents.yaml +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmsingles.yaml +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmclusters.yaml +``` + +## Documentation of Helm Chart + +Install ``helm-docs`` following the instructions on this [tutorial](../../REQUIREMENTS.md). + +Generate docs with ``helm-docs`` command. + +```bash +cd charts/victoria-metrics-k8s-stack + +helm-docs +``` + +The markdown generation is entirely go template driven. The tool parses metadata from charts and generates a number of sub-templates that can be referenced in a template file (by default ``README.md.gotmpl``). If no template file is provided, the tool has a default internal template that will generate a reasonably formatted README. + +## Parameters + +The following tables lists the configurable parameters of the chart and their default values. + +Change the values according to the need of the environment in ``victoria-metrics-k8s-stack/values.yaml`` file. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyTypeDefaultDescription
additionalVictoriaMetricsMapstring
+null
+
+
alertmanager.annotationsobject
+{}
+
+
alertmanager.configobject
+receivers:
+    - name: blackhole
+route:
+    receiver: blackhole
+templates:
+    - /etc/vm/configs/**/*.tmpl
+
+

alertmanager configuration

+
alertmanager.enabledbool
+true
+
+
alertmanager.ingressobject
+annotations: {}
+enabled: false
+extraPaths: []
+hosts:
+    - alertmanager.domain.com
+labels: {}
+path: '{{ .Values.alertmanager.spec.routePrefix | default "/" }}'
+pathType: Prefix
+tls: []
+
+

alertmanager ingress configuration

+
alertmanager.monzoTemplate.enabledbool
+true
+
+
alertmanager.specobject
+configSecret: ""
+externalURL: ""
+image:
+    tag: v0.25.0
+port: "9093"
+routePrefix: /
+selectAllByDefault: true
+
+

full spec for VMAlertmanager CRD. Allowed values described here

+
alertmanager.spec.configSecretstring
+""
+
+

if this one defined, it will be used for alertmanager configuration and config parameter will be ignored

+
alertmanager.templateFilesobject
+{}
+
+

extra alert templates

+
argocdReleaseOverridestring
+""
+
+

For correct working need set value ‘argocdReleaseOverride=$ARGOCD_APP_NAME’

+
coreDns.enabledbool
+true
+
+
coreDns.service.enabledbool
+true
+
+
coreDns.service.portint
+9153
+
+
coreDns.service.selector.k8s-appstring
+kube-dns
+
+
coreDns.service.targetPortint
+9153
+
+
coreDns.vmScrapeobject
+spec:
+    endpoints:
+        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+          port: http-metrics
+    jobLabel: jobLabel
+    namespaceSelector:
+        matchNames:
+            - kube-system
+
+

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

+
crds.enabledbool
+true
+
+
dashboardsobject
+node-exporter-full: true
+operator: false
+vmalert: false
+
+

Enable dashboards despite it’s dependency is not installed

+
dashboards.node-exporter-fullbool
+true
+
+

in ArgoCD using client-side apply this dashboard reaches annotations size limit and causes k8s issues without server side apply See this issue

+
defaultDashboardsEnabledbool
+true
+
+

Create default dashboards

+
defaultRulesobject
+alerting:
+    spec:
+        annotations: {}
+        labels: {}
+annotations: {}
+create: true
+group:
+    spec:
+        params: {}
+groups:
+    alertmanager:
+        create: true
+        rules: {}
+    etcd:
+        create: true
+        rules: {}
+    general:
+        create: true
+        rules: {}
+    k8sContainerCpuUsageSecondsTotal:
+        create: true
+        rules: {}
+    k8sContainerMemoryCache:
+        create: true
+        rules: {}
+    k8sContainerMemoryRss:
+        create: true
+        rules: {}
+    k8sContainerMemorySwap:
+        create: true
+        rules: {}
+    k8sContainerMemoryWorkingSetBytes:
+        create: true
+        rules: {}
+    k8sContainerResource:
+        create: true
+        rules: {}
+    k8sPodOwner:
+        create: true
+        rules: {}
+    kubeApiserver:
+        create: true
+        rules: {}
+    kubeApiserverAvailability:
+        create: true
+        rules: {}
+    kubeApiserverBurnrate:
+        create: true
+        rules: {}
+    kubeApiserverHistogram:
+        create: true
+        rules: {}
+    kubeApiserverSlos:
+        create: true
+        rules: {}
+    kubePrometheusGeneral:
+        create: true
+        rules: {}
+    kubePrometheusNodeRecording:
+        create: true
+        rules: {}
+    kubeScheduler:
+        create: true
+        rules: {}
+    kubeStateMetrics:
+        create: true
+        rules: {}
+    kubelet:
+        create: true
+        rules: {}
+    kubernetesApps:
+        create: true
+        rules: {}
+        targetNamespace: .*
+    kubernetesResources:
+        create: true
+        rules: {}
+    kubernetesStorage:
+        create: true
+        rules: {}
+        targetNamespace: .*
+    kubernetesSystem:
+        create: true
+        rules: {}
+    kubernetesSystemApiserver:
+        create: true
+        rules: {}
+    kubernetesSystemControllerManager:
+        create: true
+        rules: {}
+    kubernetesSystemKubelet:
+        create: true
+        rules: {}
+    kubernetesSystemScheduler:
+        create: true
+        rules: {}
+    node:
+        create: true
+        rules: {}
+    nodeNetwork:
+        create: true
+        rules: {}
+    vmHealth:
+        create: true
+        rules: {}
+    vmagent:
+        create: true
+        rules: {}
+    vmcluster:
+        create: true
+        rules: {}
+    vmoperator:
+        create: true
+        rules: {}
+    vmsingle:
+        create: true
+        rules: {}
+labels: {}
+recording:
+    spec:
+        annotations: {}
+        labels: {}
+rule:
+    spec:
+        annotations: {}
+        labels: {}
+rules: {}
+runbookUrl: https://runbooks.prometheus-operator.dev/runbooks
+
+

Create default rules for monitoring the cluster

+
defaultRules.alertingobject
+spec:
+    annotations: {}
+    labels: {}
+
+

Common properties for VMRules alerts

+
defaultRules.alerting.spec.annotationsobject
+{}
+
+

Additional annotations for VMRule alerts

+
defaultRules.alerting.spec.labelsobject
+{}
+
+

Additional labels for VMRule alerts

+
defaultRules.annotationsobject
+{}
+
+

Annotations for default rules

+
defaultRules.groupobject
+spec:
+    params: {}
+
+

Common properties for VMRule groups

+
defaultRules.group.spec.paramsobject
+{}
+
+

Optional HTTP URL parameters added to each rule request

+
defaultRules.groups.etcd.rulesobject
+{}
+
+

Common properties for all rules in a group

+
defaultRules.labelsobject
+{}
+
+

Labels for default rules

+
defaultRules.recordingobject
+spec:
+    annotations: {}
+    labels: {}
+
+

Common properties for VMRules recording rules

+
defaultRules.recording.spec.annotationsobject
+{}
+
+

Additional annotations for VMRule recording rules

+
defaultRules.recording.spec.labelsobject
+{}
+
+

Additional labels for VMRule recording rules

+
defaultRules.ruleobject
+spec:
+    annotations: {}
+    labels: {}
+
+

Common properties for all VMRules

+
defaultRules.rule.spec.annotationsobject
+{}
+
+

Additional annotations for all VMRules

+
defaultRules.rule.spec.labelsobject
+{}
+
+

Additional labels for all VMRules

+
defaultRules.rulesobject
+{}
+
+

Per rule properties

+
defaultRules.runbookUrlstring
+https://runbooks.prometheus-operator.dev/runbooks
+
+

Runbook url prefix for default rules

+
experimentalDashboardsEnabledbool
+true
+
+

Create experimental dashboards

+
externalVM.read.urlstring
+""
+
+
externalVM.write.urlstring
+""
+
+
extraObjectslist
+[]
+
+

Add extra objects dynamically to this chart

+
fullnameOverridestring
+""
+
+
global.clusterLabelstring
+cluster
+
+
global.license.keystring
+""
+
+
global.license.keyRefobject
+{}
+
+
grafana.additionalDataSourceslist
+[]
+
+
grafana.defaultDashboardsTimezonestring
+utc
+
+
grafana.defaultDatasourceTypestring
+prometheus
+
+
grafana.enabledbool
+true
+
+
grafana.forceDeployDatasourcebool
+false
+
+
grafana.ingress.annotationsobject
+{}
+
+
grafana.ingress.enabledbool
+false
+
+
grafana.ingress.extraPathslist
+[]
+
+
grafana.ingress.hosts[0]string
+grafana.domain.com
+
+
grafana.ingress.labelsobject
+{}
+
+
grafana.ingress.pathstring
+/
+
+
grafana.ingress.pathTypestring
+Prefix
+
+
grafana.ingress.tlslist
+[]
+
+
grafana.sidecar.dashboards.additionalDashboardAnnotationsobject
+{}
+
+
grafana.sidecar.dashboards.additionalDashboardLabelsobject
+{}
+
+
grafana.sidecar.dashboards.defaultFolderNamestring
+default
+
+
grafana.sidecar.dashboards.enabledbool
+true
+
+
grafana.sidecar.dashboards.folderstring
+/var/lib/grafana/dashboards
+
+
grafana.sidecar.dashboards.multiclusterbool
+false
+
+
grafana.sidecar.dashboards.provider.namestring
+default
+
+
grafana.sidecar.dashboards.provider.orgidint
+1
+
+
grafana.sidecar.datasources.createVMReplicasDatasourcesbool
+false
+
+
grafana.sidecar.datasources.defaultlist
+- isDefault: true
+  name: VictoriaMetrics
+- isDefault: false
+  name: VictoriaMetrics (DS)
+  type: victoriametrics-datasource
+
+

list of default prometheus compatible datasource configurations. VM url will be added to each of them in templates and type will be set to defaultDatasourceType if not defined

+
grafana.sidecar.datasources.enabledbool
+true
+
+
grafana.sidecar.datasources.initDatasourcesbool
+true
+
+
grafana.vmScrapeobject
+enabled: true
+spec:
+    endpoints:
+        - port: '{{ .Values.grafana.service.portName }}'
+    selector:
+        matchLabels:
+            app.kubernetes.io/name: '{{ include "grafana.name" .Subcharts.grafana }}'
+
+

grafana VM scrape config

+
grafana.vmScrape.specobject
+endpoints:
+    - port: '{{ .Values.grafana.service.portName }}'
+selector:
+    matchLabels:
+        app.kubernetes.io/name: '{{ include "grafana.name" .Subcharts.grafana }}'
+
+

Scrape configuration for Grafana

+
grafanaOperatorDashboardsFormatobject
+allowCrossNamespaceImport: false
+enabled: false
+instanceSelector:
+    matchLabels:
+        dashboards: grafana
+
+

Create dashboards as CRDs (reuqires grafana-operator to be installed)

+
kube-state-metrics.enabledbool
+true
+
+
kube-state-metrics.vmScrapeobject
+enabled: true
+spec:
+    endpoints:
+        - honorLabels: true
+          metricRelabelConfigs:
+            - action: labeldrop
+              regex: (uid|container_id|image_id)
+          port: http
+    jobLabel: app.kubernetes.io/name
+    selector:
+        matchLabels:
+            app.kubernetes.io/instance: '{{ include "vm.release" . }}'
+            app.kubernetes.io/name: '{{ include "kube-state-metrics.name" (index .Subcharts "kube-state-metrics") }}'
+
+

Scrape configuration for Kube State Metrics

+
kubeApiServer.enabledbool
+true
+
+
kubeApiServer.vmScrapeobject
+spec:
+    endpoints:
+        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+          port: https
+          scheme: https
+          tlsConfig:
+            caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+            serverName: kubernetes
+    jobLabel: component
+    namespaceSelector:
+        matchNames:
+            - default
+    selector:
+        matchLabels:
+            component: apiserver
+            provider: kubernetes
+
+

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

+
kubeControllerManager.enabledbool
+true
+
+
kubeControllerManager.endpointslist
+[]
+
+
kubeControllerManager.service.enabledbool
+true
+
+
kubeControllerManager.service.portint
+10257
+
+
kubeControllerManager.service.selector.componentstring
+kube-controller-manager
+
+
kubeControllerManager.service.targetPortint
+10257
+
+
kubeControllerManager.vmScrapeobject
+spec:
+    endpoints:
+        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+          port: http-metrics
+          scheme: https
+          tlsConfig:
+            caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+            serverName: kubernetes
+    jobLabel: jobLabel
+    namespaceSelector:
+        matchNames:
+            - kube-system
+
+

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

+
kubeDns.enabledbool
+false
+
+
kubeDns.service.enabledbool
+false
+
+
kubeDns.service.ports.dnsmasq.portint
+10054
+
+
kubeDns.service.ports.dnsmasq.targetPortint
+10054
+
+
kubeDns.service.ports.skydns.portint
+10055
+
+
kubeDns.service.ports.skydns.targetPortint
+10055
+
+
kubeDns.service.selector.k8s-appstring
+kube-dns
+
+
kubeDns.vmScrapeobject
+spec:
+    endpoints:
+        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+          port: http-metrics-dnsmasq
+        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+          port: http-metrics-skydns
+    jobLabel: jobLabel
+    namespaceSelector:
+        matchNames:
+            - kube-system
+
+

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

+
kubeEtcd.enabledbool
+true
+
+
kubeEtcd.endpointslist
+[]
+
+
kubeEtcd.service.enabledbool
+true
+
+
kubeEtcd.service.portint
+2379
+
+
kubeEtcd.service.selector.componentstring
+etcd
+
+
kubeEtcd.service.targetPortint
+2379
+
+
kubeEtcd.vmScrapeobject
+spec:
+    endpoints:
+        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+          port: http-metrics
+          scheme: https
+          tlsConfig:
+            caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+    jobLabel: jobLabel
+    namespaceSelector:
+        matchNames:
+            - kube-system
+
+

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

+
kubeProxy.enabledbool
+false
+
+
kubeProxy.endpointslist
+[]
+
+
kubeProxy.service.enabledbool
+true
+
+
kubeProxy.service.portint
+10249
+
+
kubeProxy.service.selector.k8s-appstring
+kube-proxy
+
+
kubeProxy.service.targetPortint
+10249
+
+
kubeProxy.vmScrapeobject
+spec:
+    endpoints:
+        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+          port: http-metrics
+          scheme: https
+          tlsConfig:
+            caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+    jobLabel: jobLabel
+    namespaceSelector:
+        matchNames:
+            - kube-system
+
+

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

+
kubeScheduler.enabledbool
+true
+
+
kubeScheduler.endpointslist
+[]
+
+
kubeScheduler.service.enabledbool
+true
+
+
kubeScheduler.service.portint
+10259
+
+
kubeScheduler.service.selector.componentstring
+kube-scheduler
+
+
kubeScheduler.service.targetPortint
+10259
+
+
kubeScheduler.vmScrapeobject
+spec:
+    endpoints:
+        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+          port: http-metrics
+          scheme: https
+          tlsConfig:
+            caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+    jobLabel: jobLabel
+    namespaceSelector:
+        matchNames:
+            - kube-system
+
+

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

+
kubelet.enabledbool
+true
+
+
kubelet.vmScrapeobject
+kind: VMNodeScrape
+spec:
+    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+    honorLabels: true
+    honorTimestamps: false
+    interval: 30s
+    metricRelabelConfigs:
+        - action: labeldrop
+          regex: (uid)
+        - action: labeldrop
+          regex: (id|name)
+        - action: drop
+          regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count)
+          source_labels:
+            - __name__
+    relabelConfigs:
+        - action: labelmap
+          regex: __meta_kubernetes_node_label_(.+)
+        - sourceLabels:
+            - __metrics_path__
+          targetLabel: metrics_path
+        - replacement: kubelet
+          targetLabel: job
+    scheme: https
+    scrapeTimeout: 5s
+    tlsConfig:
+        caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+        insecureSkipVerify: true
+
+

spec for VMNodeScrape crd https://docs.victoriametrics.com/operator/api.html#vmnodescrapespec

+
kubelet.vmScrapes.cadvisorobject
+enabled: true
+spec:
+    path: /metrics/cadvisor
+
+

Enable scraping /metrics/cadvisor from kubelet’s service

+
kubelet.vmScrapes.kubelet.specobject
+{}
+
+
kubelet.vmScrapes.probesobject
+enabled: true
+spec:
+    path: /metrics/probes
+
+

Enable scraping /metrics/probes from kubelet’s service

+
nameOverridestring
+""
+
+
prometheus-node-exporter.enabledbool
+true
+
+
prometheus-node-exporter.extraArgs[0]string
+--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
+
+
prometheus-node-exporter.extraArgs[1]string
+--collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
+
+
prometheus-node-exporter.service.labels.jobLabelstring
+node-exporter
+
+
prometheus-node-exporter.vmScrapeobject
+enabled: true
+spec:
+    endpoints:
+        - metricRelabelConfigs:
+            - action: drop
+              regex: /var/lib/kubelet/pods.+
+              source_labels:
+                - mountpoint
+          port: metrics
+    jobLabel: jobLabel
+    selector:
+        matchLabels:
+            app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}'
+
+

node exporter VM scrape config

+
prometheus-node-exporter.vmScrape.specobject
+endpoints:
+    - metricRelabelConfigs:
+        - action: drop
+          regex: /var/lib/kubelet/pods.+
+          source_labels:
+            - mountpoint
+      port: metrics
+jobLabel: jobLabel
+selector:
+    matchLabels:
+        app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}'
+
+

Scrape configuration for Node Exporter

+
prometheus-operator-crds.enabledbool
+false
+
+
serviceAccount.annotationsobject
+{}
+
+

Annotations to add to the service account

+
serviceAccount.createbool
+true
+
+

Specifies whether a service account should be created

+
serviceAccount.namestring
+""
+
+

If not set and create is true, a name is generated using the fullname template

+
tenantstring
+"0"
+
+
victoria-metrics-operatorobject
+crd:
+    cleanup:
+        enabled: true
+        image:
+            pullPolicy: IfNotPresent
+            repository: bitnami/kubectl
+    create: false
+enabled: true
+operator:
+    disable_prometheus_converter: false
+serviceMonitor:
+    enabled: true
+
+

also checkout here possible ENV variables to configure operator behaviour https://docs.victoriametrics.com/operator/vars

+
victoria-metrics-operator.crd.cleanupobject
+enabled: true
+image:
+    pullPolicy: IfNotPresent
+    repository: bitnami/kubectl
+
+

tells helm to clean up vm cr resources when uninstalling

+
victoria-metrics-operator.crd.createbool
+false
+
+

we disable crd creation by operator chart as we create them in this chart

+
victoria-metrics-operator.operator.disable_prometheus_converterbool
+false
+
+

By default, operator converts prometheus-operator objects.

+
vmagent.additionalRemoteWriteslist
+[]
+
+

remoteWrite configuration of VMAgent, allowed parameters defined in a spec

+
vmagent.annotationsobject
+{}
+
+
vmagent.enabledbool
+true
+
+
vmagent.ingressobject
+annotations: {}
+enabled: false
+extraPaths: []
+hosts:
+    - vmagent.domain.com
+labels: {}
+path: ""
+pathType: Prefix
+tls: []
+
+

vmagent ingress configuration

+
vmagent.ingress.extraPathslist
+[]
+
+

Extra paths to prepend to every host configuration. This is useful when working with annotation based services.

+
vmagent.specobject
+externalLabels: {}
+extraArgs:
+    promscrape.dropOriginalLabels: "true"
+    promscrape.streamParse: "true"
+image:
+    tag: v1.103.0
+port: "8429"
+scrapeInterval: 20s
+selectAllByDefault: true
+
+

full spec for VMAgent CRD. Allowed values described here

+
vmalert.additionalNotifierConfigsobject
+{}
+
+
vmalert.annotationsobject
+{}
+
+
vmalert.enabledbool
+true
+
+
vmalert.ingressobject
+annotations: {}
+enabled: false
+extraPaths: []
+hosts:
+    - vmalert.domain.com
+labels: {}
+path: ""
+pathType: Prefix
+tls: []
+
+

vmalert ingress config

+
vmalert.remoteWriteVMAgentbool
+false
+
+
vmalert.specobject
+evaluationInterval: 15s
+externalLabels: {}
+extraArgs:
+    http.pathPrefix: /
+image:
+    tag: v1.103.0
+port: "8080"
+selectAllByDefault: true
+
+

full spec for VMAlert CRD. Allowed values described here

+
vmalert.templateFilesobject
+{}
+
+

extra vmalert annotation templates

+
vmauth.annotationsobject
+{}
+
+
vmauth.enabledbool
+false
+
+
vmauth.specobject
+discover_backend_ips: true
+port: "8427"
+
+

full spec for VMAuth CRD. Allowed values described here

+
vmcluster.annotationsobject
+{}
+
+
vmcluster.enabledbool
+false
+
+
vmcluster.ingress.insert.annotationsobject
+{}
+
+
vmcluster.ingress.insert.enabledbool
+false
+
+
vmcluster.ingress.insert.extraPathslist
+[]
+
+
vmcluster.ingress.insert.hosts[0]string
+vminsert.domain.com
+
+
vmcluster.ingress.insert.labelsobject
+{}
+
+
vmcluster.ingress.insert.pathstring
+'{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vminsert }}'
+
+
vmcluster.ingress.insert.pathTypestring
+Prefix
+
+
vmcluster.ingress.insert.tlslist
+[]
+
+
vmcluster.ingress.select.annotationsobject
+{}
+
+
vmcluster.ingress.select.enabledbool
+false
+
+
vmcluster.ingress.select.extraPathslist
+[]
+
+
vmcluster.ingress.select.hosts[0]string
+vmselect.domain.com
+
+
vmcluster.ingress.select.labelsobject
+{}
+
+
vmcluster.ingress.select.pathstring
+'{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vmselect }}'
+
+
vmcluster.ingress.select.pathTypestring
+Prefix
+
+
vmcluster.ingress.select.tlslist
+[]
+
+
vmcluster.ingress.storage.annotationsobject
+{}
+
+
vmcluster.ingress.storage.enabledbool
+false
+
+
vmcluster.ingress.storage.extraPathslist
+[]
+
+
vmcluster.ingress.storage.hosts[0]string
+vmstorage.domain.com
+
+
vmcluster.ingress.storage.labelsobject
+{}
+
+
vmcluster.ingress.storage.pathstring
+""
+
+
vmcluster.ingress.storage.pathTypestring
+Prefix
+
+
vmcluster.ingress.storage.tlslist
+[]
+
+
vmcluster.specobject
+replicationFactor: 2
+retentionPeriod: "1"
+vminsert:
+    extraArgs: {}
+    image:
+        tag: v1.103.0-cluster
+    port: "8480"
+    replicaCount: 2
+    resources: {}
+vmselect:
+    cacheMountPath: /select-cache
+    extraArgs: {}
+    image:
+        tag: v1.103.0-cluster
+    port: "8481"
+    replicaCount: 2
+    resources: {}
+    storage:
+        volumeClaimTemplate:
+            spec:
+                resources:
+                    requests:
+                        storage: 2Gi
+vmstorage:
+    image:
+        tag: v1.103.0-cluster
+    replicaCount: 2
+    resources: {}
+    storage:
+        volumeClaimTemplate:
+            spec:
+                resources:
+                    requests:
+                        storage: 10Gi
+    storageDataPath: /vm-data
+
+

full spec for VMCluster CRD. Allowed values described here

+
vmcluster.spec.retentionPeriodstring
+"1"
+
+

Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these docs

+
vmsingle.annotationsobject
+{}
+
+
vmsingle.enabledbool
+true
+
+
vmsingle.ingress.annotationsobject
+{}
+
+
vmsingle.ingress.enabledbool
+false
+
+
vmsingle.ingress.extraPathslist
+[]
+
+
vmsingle.ingress.hosts[0]string
+vmsingle.domain.com
+
+
vmsingle.ingress.labelsobject
+{}
+
+
vmsingle.ingress.pathstring
+""
+
+
vmsingle.ingress.pathTypestring
+Prefix
+
+
vmsingle.ingress.tlslist
+[]
+
+
vmsingle.specobject
+extraArgs: {}
+image:
+    tag: v1.103.0
+port: "8429"
+replicaCount: 1
+retentionPeriod: "1"
+storage:
+    accessModes:
+        - ReadWriteOnce
+    resources:
+        requests:
+            storage: 20Gi
+
+

full spec for VMSingle CRD. Allowed values describe here

+
vmsingle.spec.retentionPeriodstring
+"1"
+
+

Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these docs

+
+ diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md.gotmpl b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md.gotmpl new file mode 100644 index 00000000..7ac63b6e --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md.gotmpl @@ -0,0 +1,300 @@ +{{ template "chart.typeBadge" . }} {{ template "chart.versionBadge" . }} +[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/victoriametrics)](https://artifacthub.io/packages/helm/victoriametrics/victoria-metrics-k8s-stack) + +{{ template "chart.description" . }} + +* [Overview](#Overview) +* [Configuration](#Configuration) +* [Prerequisites](#Prerequisites) +* [Dependencies](#Dependencies) +* [Quick Start](#How-to-install) +* [Uninstall](#How-to-uninstall) +* [Version Upgrade](#Upgrade-guide) +* [Troubleshooting](#Troubleshooting) +* [Values](#Parameters) + + +## Overview +This chart is an All-in-one solution to start monitoring kubernetes cluster. +It installs multiple dependency charts like [grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana), [node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter), [kube-state-metrics](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics) and [victoria-metrics-operator](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-operator). +Also it installs Custom Resources like [VMSingle](https://docs.victoriametrics.com/operator/quick-start#vmsingle), [VMCluster](https://docs.victoriametrics.com/operator/quick-start#vmcluster), [VMAgent](https://docs.victoriametrics.com/operator/quick-start#vmagent), [VMAlert](https://docs.victoriametrics.com/operator/quick-start#vmalert). + +By default, the operator [converts all existing prometheus-operator API objects](https://docs.victoriametrics.com/operator/quick-start#migration-from-prometheus-operator-objects) into corresponding VictoriaMetrics Operator objects. + +To enable metrics collection for kubernetes this chart installs multiple scrape configurations for kuberenetes components like kubelet and kube-proxy, etc. Metrics collection is done by [VMAgent](https://docs.victoriametrics.com/operator/quick-start#vmagent). So if want to ship metrics to external VictoriaMetrics database you can disable VMSingle installation by setting `vmsingle.enabled` to `false` and setting `vmagent.vmagentSpec.remoteWrite.url` to your external VictoriaMetrics database. + +This chart also installs bunch of dashboards and recording rules from [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) project. + +![Overview](img/k8s-stack-overview.png) + + +## Configuration + +Configuration of this chart is done through helm values. + +### Dependencies + +Dependencies can be enabled or disabled by setting `enabled` to `true` or `false` in `values.yaml` file. + +**!Important:** for dependency charts anything that you can find in values.yaml of dependency chart can be configured in this chart under key for that dependency. For example if you want to configure `grafana` you can find all possible configuration options in [values.yaml](https://github.com/grafana/helm-charts/blob/main/charts/grafana/values.yaml) and you should set them in values for this chart under grafana: key. For example if you want to configure `grafana.persistence.enabled` you should set it in values.yaml like this: +```yaml +################################################# +### dependencies ##### +################################################# +# Grafana dependency chart configuration. For possible values refer to https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration +grafana: + enabled: true + persistence: + type: pvc + enabled: false +``` + +### VictoriaMetrics components + +This chart installs multiple VictoriaMetrics components using Custom Resources that are managed by [victoria-metrics-operator](https://docs.victoriametrics.com/operator/design) +Each resource can be configured using `spec` of that resource from API docs of [victoria-metrics-operator](https://docs.victoriametrics.com/operator/api). For example if you want to configure `VMAgent` you can find all possible configuration options in [API docs](https://docs.victoriametrics.com/operator/api#vmagent) and you should set them in values for this chart under `vmagent.spec` key. For example if you want to configure `remoteWrite.url` you should set it in values.yaml like this: +```yaml +vmagent: + spec: + remoteWrite: + - url: "https://insert.vmcluster.domain.com/insert/0/prometheus/api/v1/write" +``` + +### ArgoCD issues + +#### Operator self signed certificates +When deploying K8s stack using ArgoCD without Cert Manager (`.Values.victoria-metrics-operator.admissionWebhooks.certManager.enabled: false`) +it will rerender operator's webhook certificates on each sync since Helm `lookup` function is not respected by ArgoCD. +To prevent this please update you K8s stack Application `spec.syncPolicy` and `spec.ignoreDifferences` with a following: + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +... +spec: + ... + syncPolicy: + syncOptions: + # https://argo-cd.readthedocs.io/en/stable/user-guide/sync-options/#respect-ignore-difference-configs + # argocd must also ignore difference during apply stage + # otherwise it ll silently override changes and cause a problem + - RespectIgnoreDifferences=true + ignoreDifferences: + - group: "" + kind: Secret + name: -validation + namespace: kube-system + jsonPointers: + - /data + - group: admissionregistration.k8s.io + kind: ValidatingWebhookConfiguration + name: -admission + jqPathExpressions: + - '.webhooks[]?.clientConfig.caBundle' +``` +where `` is output of `{{"{{"}} include "vm-operator.fullname" {{"}}"}}` for your setup + +#### `metadata.annotations: Too long: must have at most 262144 bytes` on dashboards + +If one of dashboards ConfigMap is failing with error `Too long: must have at most 262144 bytes`, please make sure you've added `argocd.argoproj.io/sync-options: ServerSideApply=true` annotation to your dashboards: + +```yaml +grafana: + sidecar: + dashboards: + additionalDashboardAnnotations + argocd.argoproj.io/sync-options: ServerSideApply=true +``` + +argocd.argoproj.io/sync-options: ServerSideApply=true + +### Rules and dashboards + +This chart by default install multiple dashboards and recording rules from [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) +you can disable dashboards with `defaultDashboardsEnabled: false` and `experimentalDashboardsEnabled: false` +and rules can be configured under `defaultRules` + +### Prometheus scrape configs +This chart installs multiple scrape configurations for kubernetes monitoring. They are configured under `#ServiceMonitors` section in `values.yaml` file. For example if you want to configure scrape config for `kubelet` you should set it in values.yaml like this: +```yaml +kubelet: + enabled: true + # spec for VMNodeScrape crd + # https://docs.victoriametrics.com/operator/api#vmnodescrapespec + spec: + interval: "30s" +``` + +### Using externally managed Grafana + +If you want to use an externally managed Grafana instance but still want to use the dashboards provided by this chart you can set + `grafana.enabled` to `false` and set `defaultDashboardsEnabled` to `true`. This will install the dashboards + but will not install Grafana. + +For example: +```yaml +defaultDashboardsEnabled: true + +grafana: + enabled: false +``` + +This will create ConfigMaps with dashboards to be imported into Grafana. + +If additional configuration for labels or annotations is needed in order to import dashboard to an existing Grafana you can +set `.grafana.sidecar.dashboards.additionalDashboardLabels` or `.grafana.sidecar.dashboards.additionalDashboardAnnotations` in `values.yaml`: + +For example: +```yaml +defaultDashboardsEnabled: true + +grafana: + enabled: false + sidecar: + dashboards: + additionalDashboardLabels: + key: value + additionalDashboardAnnotations: + key: value +``` + +## Prerequisites + +* Install the follow packages: ``git``, ``kubectl``, ``helm``, ``helm-docs``. See this [tutorial](../../REQUIREMENTS.md). + +* Add dependency chart repositories + +```console +helm repo add grafana https://grafana.github.io/helm-charts +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +* PV support on underlying infrastructure. + +{{ include "chart.installSection" . }} + +### Install locally (Minikube) + +To run VictoriaMetrics stack locally it's possible to use [Minikube](https://github.com/kubernetes/minikube). To avoid dashboards and alert rules issues please follow the steps below: + +Run Minikube cluster + +``` +minikube start --container-runtime=containerd --extra-config=scheduler.bind-address=0.0.0.0 --extra-config=controller-manager.bind-address=0.0.0.0 +``` + +Install helm chart + +``` +helm install [RELEASE_NAME] vm/victoria-metrics-k8s-stack -f values.yaml -f values.minikube.yaml -n NAMESPACE --debug --dry-run +``` + +{{ include "chart.uninstallSection" . }} + +CRDs created by this chart are not removed by default and should be manually cleaned up: + +```console +kubectl get crd | grep victoriametrics.com | awk '{print $1 }' | xargs -i kubectl delete crd {} +``` + +## Troubleshooting + +- If you cannot install helm chart with error `configmap already exist`. It could happen because of name collisions, if you set too long release name. + Kubernetes by default, allows only 63 symbols at resource names and all resource names are trimmed by helm to 63 symbols. + To mitigate it, use shorter name for helm chart release name, like: +```bash +# stack - is short enough +helm upgrade -i stack vm/victoria-metrics-k8s-stack +``` + Or use override for helm chart release name: +```bash +helm upgrade -i some-very-long-name vm/victoria-metrics-k8s-stack --set fullnameOverride=stack +``` + + +## Upgrade guide + +Usually, helm upgrade doesn't requires manual actions. Just execute command: + +```console +$ helm upgrade [RELEASE_NAME] vm/victoria-metrics-k8s-stack +``` + +But release with CRD update can only be patched manually with kubectl. +Since helm does not perform a CRD update, we recommend that you always perform this when updating the helm-charts version: + +```console +# 1. check the changes in CRD +$ helm show crds vm/victoria-metrics-k8s-stack --version [YOUR_CHART_VERSION] | kubectl diff -f - + +# 2. apply the changes (update CRD) +$ helm show crds vm/victoria-metrics-k8s-stack --version [YOUR_CHART_VERSION] | kubectl apply -f - --server-side +``` + +All other manual actions upgrades listed below: + + +### Upgrade to 0.13.0 + + +- node-exporter starting from version 4.0.0 is using the Kubernetes recommended labels. Therefore you have to delete the daemonset before you upgrade. + +```bash +kubectl delete daemonset -l app=prometheus-node-exporter +``` +- scrape configuration for kubernetes components was moved from `vmServiceScrape.spec` section to `spec` section. If you previously modified scrape configuration you need to update your `values.yaml` + +- `grafana.defaultDashboardsEnabled` was renamed to `defaultDashboardsEnabled` (moved to top level). You may need to update it in your `values.yaml` + + +### Upgrade to 0.6.0 + + + All `CRD` must be update to the lastest version with command: + +```bash +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/helm-charts/master/charts/victoria-metrics-k8s-stack/crds/crd.yaml + +``` + +### Upgrade to 0.4.0 + + All `CRD` must be update to `v1` version with command: + +```bash +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/helm-charts/master/charts/victoria-metrics-k8s-stack/crds/crd.yaml + +``` + +### Upgrade from 0.2.8 to 0.2.9 + + Update `VMAgent` crd + +command: +```bash +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.16.0/config/crd/bases/operator.victoriametrics.com_vmagents.yaml +``` + + ### Upgrade from 0.2.5 to 0.2.6 + +New CRD added to operator - `VMUser` and `VMAuth`, new fields added to exist crd. +Manual commands: +```bash +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmusers.yaml +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmauths.yaml +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmalerts.yaml +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmagents.yaml +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmsingles.yaml +kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmclusters.yaml +``` + +{{ include "chart.helmDocs" . }} + +## Parameters + +The following tables lists the configurable parameters of the chart and their default values. + +Change the values according to the need of the environment in ``victoria-metrics-k8s-stack/values.yaml`` file. + +{{ template "chart.valuesTableHtml" . }} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_GUIDE.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_GUIDE.md new file mode 100644 index 00000000..12e01eb5 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_GUIDE.md @@ -0,0 +1,40 @@ +# Release process guidance + +## Update version for VictoriaMetrics kubernetes monitoring stack + +1. Update dependency requirements in [Chart.yml](https://github.com/VictoriaMetrics/helm-charts/blob/master/charts/victoria-metrics-k8s-stack/Chart.yaml) +2. Apply changes via `helm dependency update` +3. Update image tag in chart values: + +
+ + ```console + make sync-rules + make sync-dashboards + ``` +
+4. Bump version of the victoria-metrics-k8s-stack [Chart.yml](https://github.com/VictoriaMetrics/helm-charts/blob/master/charts/victoria-metrics-k8s-stack/Chart.yaml) +5. Run linter: + +
+ + ```console + make lint + ``` + +
+6. Render templates locally to check for errors: + +
+ + ```console + helm template vm-k8s-stack ./charts/victoria-metrics-k8s-stack --output-dir out --values ./charts/victoria-metrics-k8s-stack/values.yaml --debug + ``` + +
+7. Test updated chart by installing it to your kubernetes cluster. +8. Update docs with + ```console + helm-docs + ``` +9. Commit the changes and send a [PR](https://github.com/VictoriaMetrics/helm-charts/pulls) diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_NOTES.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_NOTES.md new file mode 100644 index 00000000..0a413896 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_NOTES.md @@ -0,0 +1,12 @@ +# Release notes for version 0.25.17 + +**Release date:** 2024-09-20 + +![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) +![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) + +- Added VMAuth to k8s stack. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/829) +- Fixed ETCD dashboard +- Use path prefix from args as a default path prefix for ingress. Related [issue](https://github.com/VictoriaMetrics/helm-charts/issues/1260) +- Allow using vmalert without notifiers configuration. Note that it is required to use `.vmalert.spec.extraArgs["notifiers.blackhole"]: true` in order to start vmalert with a blackhole configuration. + diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_changelog.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_changelog.md new file mode 100644 index 00000000..79e80b88 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_changelog.md @@ -0,0 +1,13 @@ +--- +weight: 1 +title: CHANGELOG +menu: + docs: + weight: 1 + identifier: helm-victoriametrics-k8s-stack-changelog + parent: helm-victoriametrics-k8s-stack +url: /helm/victoriametrics-k8s-stack/changelog +aliases: + - /helm/victoriametrics-k8s-stack/changelog/index.html +--- +{{% content "CHANGELOG.md" %}} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_index.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_index.md new file mode 100644 index 00000000..d23dc833 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_index.md @@ -0,0 +1,13 @@ +--- +weight: 9 +title: VictoriaMetrics K8s Stack +menu: + docs: + parent: helm + weight: 9 + identifier: helm-victoriametrics-k8s-stack +url: /helm/victoriametrics-k8s-stack +aliases: + - /helm/victoriametrics-k8s-stack/index.html +--- +{{% content "README.md" %}} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml new file mode 100644 index 00000000..9484dd86 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml @@ -0,0 +1,165 @@ +condition: '{{ .Values.kubeEtcd.enabled }}' +name: etcd +rules: +- alert: etcdMembersDown + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).' + summary: 'etcd cluster members are down.' + condition: '{{ true }}' + expr: |- + max without (endpoint) ( + sum without (instance) (up{job=~".*etcd.*"} == bool 0) + or + count without (To) ( + sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01 + ) + ) + > 0 + for: 10m + labels: + severity: critical +- alert: etcdInsufficientMembers + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).' + summary: 'etcd cluster has insufficient number of members.' + condition: '{{ true }}' + expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"}) without (instance) + 1) / 2) + for: 3m + labels: + severity: critical +- alert: etcdNoLeader + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.' + summary: 'etcd cluster has no leader.' + condition: '{{ true }}' + expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 + for: 1m + labels: + severity: critical +- alert: etcdHighNumberOfLeaderChanges + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.' + summary: 'etcd cluster has high number of leader changes.' + condition: '{{ true }}' + expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4 + for: 5m + labels: + severity: warning +- alert: etcdHighNumberOfFailedGRPCRequests + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: 'etcd cluster has high number of failed grpc requests.' + condition: '{{ true }}' + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) + > 1 + for: 10m + labels: + severity: warning +- alert: etcdHighNumberOfFailedGRPCRequests + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: 'etcd cluster has high number of failed grpc requests.' + condition: '{{ true }}' + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) + > 5 + for: 5m + labels: + severity: critical +- alert: etcdGRPCRequestsSlow + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile of gRPC requests is {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}} for {{`{{`}} $labels.grpc_method {{`}}`}} method.' + summary: 'etcd grpc requests are slow' + condition: '{{ true }}' + expr: |- + histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) + > 0.15 + for: 10m + labels: + severity: critical +- alert: etcdMemberCommunicationSlow + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: 'etcd cluster member communication is slow.' + condition: '{{ true }}' + expr: |- + histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.15 + for: 10m + labels: + severity: warning +- alert: etcdHighNumberOfFailedProposals + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last 30 minutes on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: 'etcd cluster has high number of proposal failures.' + condition: '{{ true }}' + expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 + for: 15m + labels: + severity: warning +- alert: etcdHighFsyncDurations + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fsync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: 'etcd cluster 99th percentile fsync durations are too high.' + condition: '{{ true }}' + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.5 + for: 10m + labels: + severity: warning +- alert: etcdHighFsyncDurations + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fsync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: 'etcd cluster 99th percentile fsync durations are too high.' + condition: '{{ true }}' + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 1 + for: 10m + labels: + severity: critical +- alert: etcdHighCommitDurations + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: 'etcd cluster 99th percentile commit durations are too high.' + condition: '{{ true }}' + expr: |- + histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.25 + for: 10m + labels: + severity: warning +- alert: etcdDatabaseQuotaLowSpace + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size exceeds the defined quota on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.' + summary: 'etcd cluster database is running full.' + condition: '{{ true }}' + expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95 + for: 10m + labels: + severity: critical +- alert: etcdExcessiveDatabaseGrowth + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please check as it might be disruptive.' + summary: 'etcd cluster database growing very fast.' + condition: '{{ true }}' + expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60) > etcd_server_quota_backend_bytes{job=~".*etcd.*"} + for: 10m + labels: + severity: warning +- alert: etcdDatabaseHighFragmentationRatio + annotations: + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size in use on instance {{`{{`}} $labels.instance {{`}}`}} is {{`{{`}} $value | humanizePercentage {{`}}`}} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.' + runbook_url: 'https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation' + summary: 'etcd database size in use is less than 50% of the actual allocated storage.' + condition: '{{ true }}' + expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600 + for: 10m + labels: + severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml new file mode 100644 index 00000000..ae0fa110 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml @@ -0,0 +1,53 @@ +condition: '{{ true }}' +name: general.rules +rules: +- alert: TargetDown + annotations: + description: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/general/targetdown' + summary: 'One or more targets are unreachable.' + condition: '{{ true }}' + expr: 100 * (count(up == 0) BY (job,namespace,service,{{ .Values.global.clusterLabel }}) / count(up) BY (job,namespace,service,{{ .Values.global.clusterLabel }})) > 10 + for: 10m + labels: + severity: warning +- alert: Watchdog + annotations: + description: 'This is an alert meant to ensure that the entire alerting pipeline is functional. + + This alert is always firing, therefore it should always be firing in Alertmanager + + and always fire against a receiver. There are integrations with various notification + + mechanisms that send a notification when this alert is not firing. For example the + + "DeadMansSnitch" integration in PagerDuty. + + ' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/general/watchdog' + summary: 'An alert that should always be firing to certify that Alertmanager is working properly.' + condition: '{{ true }}' + expr: vector(1) + labels: + severity: ok +- alert: InfoInhibitor + annotations: + description: 'This is an alert that is used to inhibit info alerts. + + By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with + + other alerts. + + This alert fires whenever there''s a severity="info" alert, and stops firing when another alert with a + + severity of ''warning'' or ''critical'' starts firing on the same namespace. + + This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". + + ' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/general/infoinhibitor' + summary: 'Info-level alert inhibition.' + condition: '{{ true }}' + expr: ALERTS{severity = "info"} == 1 unless on (namespace,{{ .Values.global.clusterLabel }}) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1 + labels: + severity: major diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml new file mode 100644 index 00000000..85ed9d09 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml @@ -0,0 +1,11 @@ +condition: '{{ true }}' +name: k8s.rules.container_cpu_usage_seconds_total +rules: +- condition: '{{ true }}' + expr: |- + sum by (namespace,pod,container,{{ .Values.global.clusterLabel }}) ( + irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) + ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + 1, max by (namespace,pod,node,{{ .Values.global.clusterLabel }}) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml new file mode 100644 index 00000000..3b22ddd6 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml @@ -0,0 +1,10 @@ +condition: '{{ true }}' +name: k8s.rules.container_memory_cache +rules: +- condition: '{{ true }}' + expr: |- + container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, + max by (namespace,pod,node,{{ .Values.global.clusterLabel }}) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_cache diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml new file mode 100644 index 00000000..6a5f6df3 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml @@ -0,0 +1,10 @@ +condition: '{{ true }}' +name: k8s.rules.container_memory_rss +rules: +- condition: '{{ true }}' + expr: |- + container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, + max by (namespace,pod,node,{{ .Values.global.clusterLabel }}) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_rss diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml new file mode 100644 index 00000000..481e7711 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml @@ -0,0 +1,10 @@ +condition: '{{ true }}' +name: k8s.rules.container_memory_swap +rules: +- condition: '{{ true }}' + expr: |- + container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, + max by (namespace,pod,node,{{ .Values.global.clusterLabel }}) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_swap diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml new file mode 100644 index 00000000..f5ca9504 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml @@ -0,0 +1,10 @@ +condition: '{{ true }}' +name: k8s.rules.container_memory_working_set_bytes +rules: +- condition: '{{ true }}' + expr: |- + container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, + max by (namespace,pod,node,{{ .Values.global.clusterLabel }}) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_working_set_bytes diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_resource.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_resource.yaml new file mode 100644 index 00000000..260a20e2 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_resource.yaml @@ -0,0 +1,79 @@ +condition: '{{ true }}' +name: k8s.rules.container_resource +rules: +- condition: '{{ true }}' + expr: |- + kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace,pod,{{ .Values.global.clusterLabel }}) + group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests +- condition: '{{ true }}' + expr: |- + sum by (namespace,{{ .Values.global.clusterLabel }}) ( + sum by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + max by (namespace,pod,container,{{ .Values.global.clusterLabel }}) ( + kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} + ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_requests:sum +- condition: '{{ true }}' + expr: |- + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace,pod,{{ .Values.global.clusterLabel }}) + group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests +- condition: '{{ true }}' + expr: |- + sum by (namespace,{{ .Values.global.clusterLabel }}) ( + sum by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + max by (namespace,pod,container,{{ .Values.global.clusterLabel }}) ( + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} + ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_requests:sum +- condition: '{{ true }}' + expr: |- + kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace,pod,{{ .Values.global.clusterLabel }}) + group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits +- condition: '{{ true }}' + expr: |- + sum by (namespace,{{ .Values.global.clusterLabel }}) ( + sum by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + max by (namespace,pod,container,{{ .Values.global.clusterLabel }}) ( + kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} + ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_limits:sum +- condition: '{{ true }}' + expr: |- + kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace,pod,{{ .Values.global.clusterLabel }}) + group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits +- condition: '{{ true }}' + expr: |- + sum by (namespace,{{ .Values.global.clusterLabel }}) ( + sum by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + max by (namespace,pod,container,{{ .Values.global.clusterLabel }}) ( + kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} + ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_limits:sum diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml new file mode 100644 index 00000000..3cefefe0 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml @@ -0,0 +1,54 @@ +condition: '{{ true }}' +name: k8s.rules.pod_owner +rules: +- condition: '{{ true }}' + expr: |- + max by (namespace,workload,pod,{{ .Values.global.clusterLabel }}) ( + label_replace( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on (replicaset,namespace,{{ .Values.global.clusterLabel }}) group_left(owner_name) topk by (replicaset,namespace,{{ .Values.global.clusterLabel }}) ( + 1, max by (replicaset,namespace,owner_name,{{ .Values.global.clusterLabel }}) ( + kube_replicaset_owner{job="kube-state-metrics"} + ) + ), + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: deployment + record: namespace_workload_pod:kube_pod_owner:relabel +- condition: '{{ true }}' + expr: |- + max by (namespace,workload,pod,{{ .Values.global.clusterLabel }}) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: daemonset + record: namespace_workload_pod:kube_pod_owner:relabel +- condition: '{{ true }}' + expr: |- + max by (namespace,workload,pod,{{ .Values.global.clusterLabel }}) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: statefulset + record: namespace_workload_pod:kube_pod_owner:relabel +- condition: '{{ true }}' + expr: |- + max by (namespace,workload,pod,{{ .Values.global.clusterLabel }}) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="Job"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: job + record: namespace_workload_pod:kube_pod_owner:relabel diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml new file mode 100644 index 00000000..aab98ce1 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml @@ -0,0 +1,128 @@ +condition: '{{ .Values.kubeApiServer.enabled }}' +interval: 3m +name: kube-apiserver-availability.rules +rules: +- condition: '{{ true }}' + expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 + record: code_verb:apiserver_request_total:increase30d +- condition: '{{ true }}' + expr: sum by (code,{{ .Values.global.clusterLabel }}) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) + labels: + verb: read + record: code:apiserver_request_total:increase30d +- condition: '{{ true }}' + expr: sum by (code,{{ .Values.global.clusterLabel }}) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + labels: + verb: write + record: code:apiserver_request_total:increase30d +- condition: '{{ true }}' + expr: sum by (verb,scope,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_sli_duration_seconds_count{job="kube-apiserver"}[1h])) + record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h +- condition: '{{ true }}' + expr: sum by (verb,scope,{{ .Values.global.clusterLabel }}) (avg_over_time(cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h[30d]) * 24 * 30) + record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d +- condition: '{{ true }}' + expr: sum by (verb,scope,le,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_sli_duration_seconds_bucket[1h])) + record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h +- condition: '{{ true }}' + expr: sum by (verb,scope,le,{{ .Values.global.clusterLabel }}) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30) + record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d +- condition: '{{ true }}' + expr: |- + 1 - ( + ( + # write too slow + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + - + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) + ) + + ( + # read too slow + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) + - + ( + ( + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) + or + vector(0) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + + + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) + ) + ) + + # errors + sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d) + labels: + verb: all + record: apiserver_request:availability30d +- condition: '{{ true }}' + expr: |- + 1 - ( + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) + - + ( + # too slow + ( + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) + or + vector(0) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + + + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) + ) + + + # errors + sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d{verb="read"}) + labels: + verb: read + record: apiserver_request:availability30d +- condition: '{{ true }}' + expr: |- + 1 - ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + - + sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) + ) + + + # errors + sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d{verb="write"}) + labels: + verb: write + record: apiserver_request:availability30d +- condition: '{{ true }}' + expr: sum by (code,resource,{{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[5m])) + labels: + verb: read + record: code_resource:apiserver_request_total:rate5m +- condition: '{{ true }}' + expr: sum by (code,resource,{{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + record: code_resource:apiserver_request_total:rate5m +- condition: '{{ true }}' + expr: sum by (code,verb,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) + record: code_verb:apiserver_request_total:increase1h +- condition: '{{ true }}' + expr: sum by (code,verb,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) + record: code_verb:apiserver_request_total:increase1h +- condition: '{{ true }}' + expr: sum by (code,verb,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) + record: code_verb:apiserver_request_total:increase1h +- condition: '{{ true }}' + expr: sum by (code,verb,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + record: code_verb:apiserver_request_total:increase1h diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml new file mode 100644 index 00000000..6a87f5a7 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml @@ -0,0 +1,318 @@ +condition: '{{ .Values.kubeApiServer.enabled }}' +name: kube-apiserver-burnrate.rules +rules: +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + - + ( + ( + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1d])) + or + vector(0) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1d])) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1d])) + ) + ) + + + # errors + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[1d])) + labels: + verb: read + record: apiserver_request:burnrate1d +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + - + ( + ( + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1h])) + or + vector(0) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1h])) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1h])) + ) + ) + + + # errors + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[1h])) + labels: + verb: read + record: apiserver_request:burnrate1h +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + - + ( + ( + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[2h])) + or + vector(0) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[2h])) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[2h])) + ) + ) + + + # errors + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[2h])) + labels: + verb: read + record: apiserver_request:burnrate2h +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + - + ( + ( + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[30m])) + or + vector(0) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[30m])) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[30m])) + ) + ) + + + # errors + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[30m])) + labels: + verb: read + record: apiserver_request:burnrate30m +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + - + ( + ( + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[3d])) + or + vector(0) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[3d])) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[3d])) + ) + ) + + + # errors + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[3d])) + labels: + verb: read + record: apiserver_request:burnrate3d +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + - + ( + ( + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[5m])) + or + vector(0) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[5m])) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[5m])) + ) + ) + + + # errors + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[5m])) + labels: + verb: read + record: apiserver_request:burnrate5m +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + - + ( + ( + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[6h])) + or + vector(0) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[6h])) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[6h])) + ) + ) + + + # errors + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[6h])) + labels: + verb: read + record: apiserver_request:burnrate6h +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + - + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1d])) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) + labels: + verb: write + record: apiserver_request:burnrate1d +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + - + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1h])) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) + labels: + verb: write + record: apiserver_request:burnrate1h +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + - + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[2h])) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) + labels: + verb: write + record: apiserver_request:burnrate2h +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + - + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[30m])) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) + labels: + verb: write + record: apiserver_request:burnrate30m +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + - + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[3d])) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) + labels: + verb: write + record: apiserver_request:burnrate3d +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + - + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[5m])) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + record: apiserver_request:burnrate5m +- condition: '{{ true }}' + expr: |- + ( + ( + # too slow + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + - + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[6h])) + ) + + + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) + ) + / + sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) + labels: + verb: write + record: apiserver_request:burnrate6h diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml new file mode 100644 index 00000000..4ba999e2 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml @@ -0,0 +1,15 @@ +condition: '{{ .Values.kubeApiServer.enabled }}' +name: kube-apiserver-histogram.rules +rules: +- condition: '{{ true }}' + expr: histogram_quantile(0.99, sum by (le,resource,{{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 + labels: + quantile: '0.99' + verb: read + record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.99, sum by (le,resource,{{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 + labels: + quantile: '0.99' + verb: write + record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml new file mode 100644 index 00000000..ed75cccd --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml @@ -0,0 +1,63 @@ +condition: '{{ .Values.kubeApiServer.enabled }}' +name: kube-apiserver-slos +rules: +- alert: KubeAPIErrorBudgetBurn + annotations: + description: 'The API server is burning too much error budget.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn' + summary: 'The API server is burning too much error budget.' + condition: '{{ true }}' + expr: |- + sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) + and + sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) + for: 2m + labels: + long: 1h + severity: critical + short: 5m +- alert: KubeAPIErrorBudgetBurn + annotations: + description: 'The API server is burning too much error budget.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn' + summary: 'The API server is burning too much error budget.' + condition: '{{ true }}' + expr: |- + sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) + and + sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) + for: 15m + labels: + long: 6h + severity: critical + short: 30m +- alert: KubeAPIErrorBudgetBurn + annotations: + description: 'The API server is burning too much error budget.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn' + summary: 'The API server is burning too much error budget.' + condition: '{{ true }}' + expr: |- + sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) + and + sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) + for: 1h + labels: + long: 1d + severity: warning + short: 2h +- alert: KubeAPIErrorBudgetBurn + annotations: + description: 'The API server is burning too much error budget.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn' + summary: 'The API server is burning too much error budget.' + condition: '{{ true }}' + expr: |- + sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) + and + sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) + for: 3h + labels: + long: 3d + severity: warning + short: 6h diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml new file mode 100644 index 00000000..68295151 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml @@ -0,0 +1,9 @@ +condition: '{{ true }}' +name: kube-prometheus-general.rules +rules: +- condition: '{{ true }}' + expr: count without(instance, pod, node) (up == 1) + record: count:up1 +- condition: '{{ true }}' + expr: count without(instance, pod, node) (up == 0) + record: count:up0 diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml new file mode 100644 index 00000000..ee7e514e --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml @@ -0,0 +1,21 @@ +condition: '{{ true }}' +name: kube-prometheus-node-recording.rules +rules: +- condition: '{{ true }}' + expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance) + record: instance:node_cpu:rate:sum +- condition: '{{ true }}' + expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) + record: instance:node_network_receive_bytes:rate:sum +- condition: '{{ true }}' + expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) + record: instance:node_network_transmit_bytes:rate:sum +- condition: '{{ true }}' + expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) + record: instance:node_cpu:ratio +- condition: '{{ true }}' + expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) + record: cluster:node_cpu:sum_rate5m +- condition: '{{ true }}' + expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) + record: cluster:node_cpu:ratio diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml new file mode 100644 index 00000000..51cdb0e4 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml @@ -0,0 +1,48 @@ +condition: '{{ .Values.kubeScheduler.enabled }}' +name: kube-scheduler.rules +rules: +- condition: '{{ true }}' + expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml new file mode 100644 index 00000000..e392b7a0 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml @@ -0,0 +1,55 @@ +condition: '{{ true }}' +name: kube-state-metrics +rules: +- alert: KubeStateMetricsListErrors + annotations: + description: 'kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricslisterrors' + summary: 'kube-state-metrics is experiencing errors in list operations.' + condition: '{{ true }}' + expr: |- + (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) by ({{ .Values.global.clusterLabel }}) + / + sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])) by ({{ .Values.global.clusterLabel }})) + > 0.01 + for: 15m + labels: + severity: critical +- alert: KubeStateMetricsWatchErrors + annotations: + description: 'kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricswatcherrors' + summary: 'kube-state-metrics is experiencing errors in watch operations.' + condition: '{{ true }}' + expr: |- + (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) by ({{ .Values.global.clusterLabel }}) + / + sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])) by ({{ .Values.global.clusterLabel }})) + > 0.01 + for: 15m + labels: + severity: critical +- alert: KubeStateMetricsShardingMismatch + annotations: + description: 'kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricsshardingmismatch' + summary: 'kube-state-metrics sharding is misconfigured.' + condition: '{{ true }}' + expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) != 0 + for: 15m + labels: + severity: critical +- alert: KubeStateMetricsShardsMissing + annotations: + description: 'kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricsshardsmissing' + summary: 'kube-state-metrics shards are missing.' + condition: '{{ true }}' + expr: |- + 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) - 1 + - + sum( 2 ^ max by (shard_ordinal,{{ .Values.global.clusterLabel }}) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) by ({{ .Values.global.clusterLabel }}) + != 0 + for: 15m + labels: + severity: critical diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml new file mode 100644 index 00000000..98ea1a57 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml @@ -0,0 +1,18 @@ +condition: '{{ .Values.kubelet.enabled }}' +name: kubelet.rules +rules: +- condition: '{{ true }}' + expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance,le,{{ .Values.global.clusterLabel }}) * on (instance,{{ .Values.global.clusterLabel }}) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.99' + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance,le,{{ .Values.global.clusterLabel }}) * on (instance,{{ .Values.global.clusterLabel }}) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.9' + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile +- condition: '{{ true }}' + expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance,le,{{ .Values.global.clusterLabel }}) * on (instance,{{ .Values.global.clusterLabel }}) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.5' + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml new file mode 100644 index 00000000..4e398e37 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml @@ -0,0 +1,257 @@ +condition: '{{ true }}' +name: kubernetes-apps +rules: +- alert: KubePodCrashLooping + annotations: + description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodcrashlooping' + summary: 'Pod is crash looping.' + condition: '{{ true }}' + expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[5m]) >= 1 + for: 15m + labels: + severity: warning +- alert: KubePodNotReady + annotations: + description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodnotready' + summary: 'Pod has been in a non-ready state for more than 15 minutes.' + condition: '{{ true }}' + expr: |- + sum by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}", phase=~"Pending|Unknown|Failed"} + ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(owner_kind) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) ( + 1, max by (namespace,pod,owner_kind,{{ .Values.global.clusterLabel }}) (kube_pod_owner{owner_kind!="Job"}) + ) + ) > 0 + for: 15m + labels: + severity: warning +- alert: KubeDeploymentGenerationMismatch + annotations: + description: 'Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentgenerationmismatch' + summary: 'Deployment generation mismatch due to possible roll-back' + condition: '{{ true }}' + expr: |- + kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + != + kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + for: 15m + labels: + severity: warning +- alert: KubeDeploymentReplicasMismatch + annotations: + description: 'Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentreplicasmismatch' + summary: 'Deployment has not matched the expected number of replicas.' + condition: '{{ true }}' + expr: |- + ( + kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + > + kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + ) and ( + changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[10m]) + == + 0 + ) + for: 15m + labels: + severity: warning +- alert: KubeDeploymentRolloutStuck + annotations: + description: 'Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentrolloutstuck' + summary: 'Deployment rollout is not progressing.' + condition: '{{ true }}' + expr: |- + kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + != 0 + for: 15m + labels: + severity: warning +- alert: KubeStatefulSetReplicasMismatch + annotations: + description: 'StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetreplicasmismatch' + summary: 'StatefulSet has not matched the expected number of replicas.' + condition: '{{ true }}' + expr: |- + ( + kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + != + kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + ) and ( + changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[10m]) + == + 0 + ) + for: 15m + labels: + severity: warning +- alert: KubeStatefulSetGenerationMismatch + annotations: + description: 'StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetgenerationmismatch' + summary: 'StatefulSet generation mismatch due to possible roll-back' + condition: '{{ true }}' + expr: |- + kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + != + kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + for: 15m + labels: + severity: warning +- alert: KubeStatefulSetUpdateNotRolledOut + annotations: + description: 'StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetupdatenotrolledout' + summary: 'StatefulSet update has not been rolled out.' + condition: '{{ true }}' + expr: |- + ( + max by (namespace,statefulset,{{ .Values.global.clusterLabel }}) ( + kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + unless + kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + ) + * + ( + kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + != + kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + ) + ) and ( + changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[5m]) + == + 0 + ) + for: 15m + labels: + severity: warning +- alert: KubeDaemonSetRolloutStuck + annotations: + description: 'DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetrolloutstuck' + summary: 'DaemonSet rollout is stuck.' + condition: '{{ true }}' + expr: |- + ( + ( + kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + != + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + ) or ( + kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + != + 0 + ) or ( + kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + != + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + ) or ( + kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + != + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + ) + ) and ( + changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[5m]) + == + 0 + ) + for: 15m + labels: + severity: warning +- alert: KubeContainerWaiting + annotations: + description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontainerwaiting' + summary: 'Pod container waiting longer than 1 hour' + condition: '{{ true }}' + expr: sum by (namespace,pod,container,{{ .Values.global.clusterLabel }}) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}) > 0 + for: 1h + labels: + severity: warning +- alert: KubeDaemonSetNotScheduled + annotations: + description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetnotscheduled' + summary: 'DaemonSet pods are not scheduled.' + condition: '{{ true }}' + expr: |- + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + - + kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} > 0 + for: 10m + labels: + severity: warning +- alert: KubeDaemonSetMisScheduled + annotations: + description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetmisscheduled' + summary: 'DaemonSet pods are misscheduled.' + condition: '{{ true }}' + expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} > 0 + for: 15m + labels: + severity: warning +- alert: KubeJobNotCompleted + annotations: + description: 'Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobnotcompleted' + summary: 'Job did not complete in time' + condition: '{{ true }}' + expr: |- + time() - max by (namespace,job_name,{{ .Values.global.clusterLabel }}) (kube_job_status_start_time{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + and + kube_job_status_active{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} > 0) > 43200 + labels: + severity: warning +- alert: KubeJobFailed + annotations: + description: 'Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobfailed' + summary: 'Job failed to complete.' + condition: '{{ true }}' + expr: kube_job_failed{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} > 0 + for: 15m + labels: + severity: warning +- alert: KubeHpaReplicasMismatch + annotations: + description: 'HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpareplicasmismatch' + summary: 'HPA has not matched desired number of replicas.' + condition: '{{ true }}' + expr: |- + (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + != + kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}) + and + (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + > + kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}) + and + (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + < + kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}) + and + changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[15m]) == 0 + for: 15m + labels: + severity: warning +- alert: KubeHpaMaxedOut + annotations: + description: 'HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpamaxedout' + summary: 'HPA is running at max replicas' + condition: '{{ true }}' + expr: |- + kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + == + kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} + for: 15m + labels: + severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml new file mode 100644 index 00000000..cf32b91d --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml @@ -0,0 +1,113 @@ +condition: '{{ true }}' +name: kubernetes-resources +rules: +- alert: KubeCPUOvercommit + annotations: + description: 'Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Pods by {{`{{`}} $value {{`}}`}} CPU shares and cannot tolerate node failure.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuovercommit' + summary: 'Cluster has overcommitted CPU resource requests.' + condition: '{{ true }}' + expr: |- + sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) by ({{ .Values.global.clusterLabel }}) - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by ({{ .Values.global.clusterLabel }}) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by ({{ .Values.global.clusterLabel }})) > 0 + and + (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by ({{ .Values.global.clusterLabel }}) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by ({{ .Values.global.clusterLabel }})) > 0 + for: 10m + labels: + severity: warning +- alert: KubeMemoryOvercommit + annotations: + description: 'Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted memory resource requests for Pods by {{`{{`}} $value | humanize {{`}}`}} bytes and cannot tolerate node failure.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubememoryovercommit' + summary: 'Cluster has overcommitted memory resource requests.' + condition: '{{ true }}' + expr: |- + sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by ({{ .Values.global.clusterLabel }}) - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }})) > 0 + and + (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }})) > 0 + for: 10m + labels: + severity: warning +- alert: KubeCPUQuotaOvercommit + annotations: + description: 'Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Namespaces.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuquotaovercommit' + summary: 'Cluster has overcommitted CPU resource requests.' + condition: '{{ true }}' + expr: |- + sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) by ({{ .Values.global.clusterLabel }}) + / + sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) + > 1.5 + for: 5m + labels: + severity: warning +- alert: KubeMemoryQuotaOvercommit + annotations: + description: 'Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted memory resource requests for Namespaces.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubememoryquotaovercommit' + summary: 'Cluster has overcommitted memory resource requests.' + condition: '{{ true }}' + expr: |- + sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) by ({{ .Values.global.clusterLabel }}) + / + sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) + > 1.5 + for: 5m + labels: + severity: warning +- alert: KubeQuotaAlmostFull + annotations: + description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotaalmostfull' + summary: 'Namespace quota is going to be full.' + condition: '{{ true }}' + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 0.9 < 1 + for: 15m + labels: + severity: informational +- alert: KubeQuotaFullyUsed + annotations: + description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotafullyused' + summary: 'Namespace quota is fully used.' + condition: '{{ true }}' + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + == 1 + for: 15m + labels: + severity: informational +- alert: KubeQuotaExceeded + annotations: + description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotaexceeded' + summary: 'Namespace quota has exceeded the limits.' + condition: '{{ true }}' + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 1 + for: 15m + labels: + severity: warning +- alert: CPUThrottlingHigh + annotations: + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}}.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/cputhrottlinghigh' + summary: 'Processes experience elevated CPU throttling.' + condition: '{{ true }}' + expr: |- + sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container,pod,namespace,{{ .Values.global.clusterLabel }}) + / + sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container,pod,namespace,{{ .Values.global.clusterLabel }}) + > ( 25 / 100 ) + for: 15m + labels: + severity: informational diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml new file mode 100644 index 00000000..1e6703c0 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml @@ -0,0 +1,101 @@ +condition: '{{ true }}' +name: kubernetes-storage +rules: +- alert: KubePersistentVolumeFillingUp + annotations: + description: 'The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is only {{`{{`}} $value | humanizePercentage {{`}}`}} free.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumefillingup' + summary: 'PersistentVolume is filling up.' + condition: '{{ true }}' + expr: |- + ( + kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} + ) < 0.03 + and + kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} > 0 + unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1m + labels: + severity: critical +- alert: KubePersistentVolumeFillingUp + annotations: + description: 'Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} is available.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumefillingup' + summary: 'PersistentVolume is filling up.' + condition: '{{ true }}' + expr: |- + ( + kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} + ) < 0.15 + and + kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} > 0 + and + predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1h + labels: + severity: warning +- alert: KubePersistentVolumeInodesFillingUp + annotations: + description: 'The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} only has {{`{{`}} $value | humanizePercentage {{`}}`}} free inodes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeinodesfillingup' + summary: 'PersistentVolumeInodes are filling up.' + condition: '{{ true }}' + expr: |- + ( + kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_inodes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} + ) < 0.03 + and + kubelet_volume_stats_inodes_used{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} > 0 + unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1m + labels: + severity: critical +- alert: KubePersistentVolumeInodesFillingUp + annotations: + description: 'Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is expected to run out of inodes within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} of its inodes are free.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeinodesfillingup' + summary: 'PersistentVolumeInodes are filling up.' + condition: '{{ true }}' + expr: |- + ( + kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_inodes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} + ) < 0.15 + and + kubelet_volume_stats_inodes_used{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} > 0 + and + predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1h + labels: + severity: warning +- alert: KubePersistentVolumeErrors + annotations: + description: 'The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeerrors' + summary: 'PersistentVolume is having issues with provisioning.' + condition: '{{ true }}' + expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 + for: 5m + labels: + severity: critical diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml new file mode 100644 index 00000000..6621da32 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml @@ -0,0 +1,62 @@ +condition: '{{ true }}' +name: kubernetes-system-apiserver +rules: +- alert: KubeClientCertificateExpiration + annotations: + description: 'A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclientcertificateexpiration' + summary: 'Client certificate is about to expire.' + condition: '{{ true }}' + expr: apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on (job,{{ .Values.global.clusterLabel }}) histogram_quantile(0.01, sum by (job,le,{{ .Values.global.clusterLabel }}) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800 + for: 5m + labels: + severity: warning +- alert: KubeClientCertificateExpiration + annotations: + description: 'A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclientcertificateexpiration' + summary: 'Client certificate is about to expire.' + condition: '{{ true }}' + expr: apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on (job,{{ .Values.global.clusterLabel }}) histogram_quantile(0.01, sum by (job,le,{{ .Values.global.clusterLabel }}) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400 + for: 5m + labels: + severity: critical +- alert: KubeAggregatedAPIErrors + annotations: + description: 'Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeaggregatedapierrors' + summary: 'Kubernetes aggregated API has reported errors.' + condition: '{{ true }}' + expr: sum by (name,namespace,{{ .Values.global.clusterLabel }})(increase(aggregator_unavailable_apiservice_total{job="kube-apiserver"}[10m])) > 4 + labels: + severity: warning +- alert: KubeAggregatedAPIDown + annotations: + description: 'Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeaggregatedapidown' + summary: 'Kubernetes aggregated API is down.' + condition: '{{ true }}' + expr: (1 - max by (name,namespace,{{ .Values.global.clusterLabel }})(avg_over_time(aggregator_unavailable_apiservice{job="kube-apiserver"}[10m]))) * 100 < 85 + for: 5m + labels: + severity: warning +- alert: KubeAPIDown + annotations: + description: 'KubeAPI has disappeared from Prometheus target discovery.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapidown' + summary: 'Target disappeared from Prometheus target discovery.' + condition: '{{ .Values.kubeApiServer.enabled }}' + expr: absent(up{job="kube-apiserver"} == 1) + for: 15m + labels: + severity: critical +- alert: KubeAPITerminatedRequests + annotations: + description: 'The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapiterminatedrequests' + summary: 'The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.' + condition: '{{ true }}' + expr: sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="kube-apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) ) > 0.20 + for: 5m + labels: + severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml new file mode 100644 index 00000000..e53aebed --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml @@ -0,0 +1,13 @@ +condition: '{{ .Values.kubeControllerManager.enabled }}' +name: kubernetes-system-controller-manager +rules: +- alert: KubeControllerManagerDown + annotations: + description: 'KubeControllerManager has disappeared from Prometheus target discovery.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontrollermanagerdown' + summary: 'Target disappeared from Prometheus target discovery.' + condition: '{{ .Values.kubeControllerManager.enabled }}' + expr: absent(up{job="kube-controller-manager"} == 1) + for: 15m + labels: + severity: critical diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml new file mode 100644 index 00000000..77af3f58 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml @@ -0,0 +1,136 @@ +condition: '{{ true }}' +name: kubernetes-system-kubelet +rules: +- alert: KubeNodeNotReady + annotations: + description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodenotready' + summary: 'Node is not ready.' + condition: '{{ true }}' + expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 + for: 15m + labels: + severity: warning +- alert: KubeNodeUnreachable + annotations: + description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodeunreachable' + summary: 'Node is unreachable.' + condition: '{{ true }}' + expr: (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1 + for: 15m + labels: + severity: warning +- alert: KubeletTooManyPods + annotations: + description: 'Kubelet ''{{`{{`}} $labels.node {{`}}`}}'' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubelettoomanypods' + summary: 'Kubelet is running at capacity.' + condition: '{{ true }}' + expr: |- + count by (node,{{ .Values.global.clusterLabel }}) ( + (kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on (instance,pod,namespace,{{ .Values.global.clusterLabel }}) group_left(node) topk by (instance,pod,namespace,{{ .Values.global.clusterLabel }}) (1, kube_pod_info{job="kube-state-metrics"}) + ) + / + max by (node,{{ .Values.global.clusterLabel }}) ( + kube_node_status_capacity{job="kube-state-metrics",resource="pods"} != 1 + ) > 0.95 + for: 15m + labels: + severity: informational +- alert: KubeNodeReadinessFlapping + annotations: + description: 'The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodereadinessflapping' + summary: 'Node readiness status is flapping.' + condition: '{{ true }}' + expr: sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (node,{{ .Values.global.clusterLabel }}) > 2 + for: 15m + labels: + severity: warning +- alert: KubeletPlegDurationHigh + annotations: + description: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletplegdurationhigh' + summary: 'Kubelet Pod Lifecycle Event Generator is taking too long to relist.' + condition: '{{ true }}' + expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 + for: 5m + labels: + severity: warning +- alert: KubeletPodStartUpLatencyHigh + annotations: + description: 'Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletpodstartuplatencyhigh' + summary: 'Kubelet Pod startup latency is too high.' + condition: '{{ true }}' + expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance,le,{{ .Values.global.clusterLabel }})) * on (instance,{{ .Values.global.clusterLabel }}) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60 + for: 15m + labels: + severity: warning +- alert: KubeletClientCertificateExpiration + annotations: + description: 'Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificateexpiration' + summary: 'Kubelet client certificate is about to expire.' + condition: '{{ true }}' + expr: kubelet_certificate_manager_client_ttl_seconds < 604800 + labels: + severity: warning +- alert: KubeletClientCertificateExpiration + annotations: + description: 'Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificateexpiration' + summary: 'Kubelet client certificate is about to expire.' + condition: '{{ true }}' + expr: kubelet_certificate_manager_client_ttl_seconds < 86400 + labels: + severity: critical +- alert: KubeletServerCertificateExpiration + annotations: + description: 'Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificateexpiration' + summary: 'Kubelet server certificate is about to expire.' + condition: '{{ true }}' + expr: kubelet_certificate_manager_server_ttl_seconds < 604800 + labels: + severity: warning +- alert: KubeletServerCertificateExpiration + annotations: + description: 'Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificateexpiration' + summary: 'Kubelet server certificate is about to expire.' + condition: '{{ true }}' + expr: kubelet_certificate_manager_server_ttl_seconds < 86400 + labels: + severity: critical +- alert: KubeletClientCertificateRenewalErrors + annotations: + description: 'Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificaterenewalerrors' + summary: 'Kubelet has failed to renew its client certificate.' + condition: '{{ true }}' + expr: increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0 + for: 15m + labels: + severity: warning +- alert: KubeletServerCertificateRenewalErrors + annotations: + description: 'Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificaterenewalerrors' + summary: 'Kubelet has failed to renew its server certificate.' + condition: '{{ true }}' + expr: increase(kubelet_server_expiration_renew_errors[5m]) > 0 + for: 15m + labels: + severity: warning +- alert: KubeletDown + annotations: + description: 'Kubelet has disappeared from Prometheus target discovery.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletdown' + summary: 'Target disappeared from Prometheus target discovery.' + condition: '{{ .Values.kubelet.enabled }}' + expr: absent(up{job="kubelet", metrics_path="/metrics"} == 1) + for: 15m + labels: + severity: critical diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml new file mode 100644 index 00000000..1a0983f2 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml @@ -0,0 +1,13 @@ +condition: '{{ .Values.kubeScheduler.enabled }}' +name: kubernetes-system-scheduler +rules: +- alert: KubeSchedulerDown + annotations: + description: 'KubeScheduler has disappeared from Prometheus target discovery.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeschedulerdown' + summary: 'Target disappeared from Prometheus target discovery.' + condition: '{{ .Values.kubeScheduler.enabled }}' + expr: absent(up{job="kube-scheduler"} == 1) + for: 15m + labels: + severity: critical diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml new file mode 100644 index 00000000..677f4929 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml @@ -0,0 +1,27 @@ +condition: '{{ true }}' +name: kubernetes-system +rules: +- alert: KubeVersionMismatch + annotations: + description: 'There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeversionmismatch' + summary: 'Different semantic versions of Kubernetes components running.' + condition: '{{ true }}' + expr: count by ({{ .Values.global.clusterLabel }}) (count by (git_version,{{ .Values.global.clusterLabel }}) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1 + for: 15m + labels: + severity: warning +- alert: KubeClientErrors + annotations: + description: 'Kubernetes API server client ''{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}'' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.''' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclienterrors' + summary: 'Kubernetes API server client is experiencing errors.' + condition: '{{ true }}' + expr: |- + (sum(rate(rest_client_requests_total{job="kube-apiserver",code=~"5.."}[5m])) by (instance,job,namespace,{{ .Values.global.clusterLabel }}) + / + sum(rate(rest_client_requests_total{job="kube-apiserver"}[5m])) by (instance,job,namespace,{{ .Values.global.clusterLabel }})) + > 0.01 + for: 15m + labels: + severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml new file mode 100644 index 00000000..77cfa5a1 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml @@ -0,0 +1,76 @@ +condition: '{{ true }}' +name: node-exporter.rules +rules: +- condition: '{{ true }}' + expr: |- + count without (cpu, mode) ( + node_cpu_seconds_total{job="node-exporter",mode="idle"} + ) + record: instance:node_num_cpu:sum +- condition: '{{ true }}' + expr: |- + 1 - avg without (cpu) ( + sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) + ) + record: instance:node_cpu_utilisation:rate5m +- condition: '{{ true }}' + expr: |- + ( + node_load1{job="node-exporter"} + / + instance:node_num_cpu:sum{job="node-exporter"} + ) + record: instance:node_load1_per_cpu:ratio +- condition: '{{ true }}' + expr: |- + 1 - ( + ( + node_memory_MemAvailable_bytes{job="node-exporter"} + or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + + node_memory_Cached_bytes{job="node-exporter"} + + + node_memory_MemFree_bytes{job="node-exporter"} + + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) + / + node_memory_MemTotal_bytes{job="node-exporter"} + ) + record: instance:node_memory_utilisation:ratio +- condition: '{{ true }}' + expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) + record: instance:node_vmstat_pgmajfault:rate5m +- condition: '{{ true }}' + expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_seconds:rate5m +- condition: '{{ true }}' + expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_weighted_seconds:rate5m +- condition: '{{ true }}' + expr: |- + sum without (device) ( + rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_bytes_excluding_lo:rate5m +- condition: '{{ true }}' + expr: |- + sum without (device) ( + rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_bytes_excluding_lo:rate5m +- condition: '{{ true }}' + expr: |- + sum without (device) ( + rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_drop_excluding_lo:rate5m +- condition: '{{ true }}' + expr: |- + sum without (device) ( + rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_drop_excluding_lo:rate5m diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml new file mode 100644 index 00000000..3df8ed27 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml @@ -0,0 +1,336 @@ +condition: '{{ true }}' +name: node-exporter +rules: +- alert: NodeFilesystemSpaceFillingUp + annotations: + description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemspacefillingup' + summary: 'Filesystem is predicted to run out of space within the next 24 hours.' + condition: '{{ true }}' + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning +- alert: NodeFilesystemSpaceFillingUp + annotations: + description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemspacefillingup' + summary: 'Filesystem is predicted to run out of space within the next 4 hours.' + condition: '{{ true }}' + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical +- alert: NodeFilesystemAlmostOutOfSpace + annotations: + description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutofspace' + summary: 'Filesystem has less than 5% space left.' + condition: '{{ true }}' + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 30m + labels: + severity: warning +- alert: NodeFilesystemAlmostOutOfSpace + annotations: + description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutofspace' + summary: 'Filesystem has less than 3% space left.' + condition: '{{ true }}' + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 30m + labels: + severity: critical +- alert: NodeFilesystemFilesFillingUp + annotations: + description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemfilesfillingup' + summary: 'Filesystem is predicted to run out of inodes within the next 24 hours.' + condition: '{{ true }}' + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning +- alert: NodeFilesystemFilesFillingUp + annotations: + description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up fast.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemfilesfillingup' + summary: 'Filesystem is predicted to run out of inodes within the next 4 hours.' + condition: '{{ true }}' + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical +- alert: NodeFilesystemAlmostOutOfFiles + annotations: + description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutoffiles' + summary: 'Filesystem has less than 5% inodes left.' + condition: '{{ true }}' + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning +- alert: NodeFilesystemAlmostOutOfFiles + annotations: + description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutoffiles' + summary: 'Filesystem has less than 3% inodes left.' + condition: '{{ true }}' + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical +- alert: NodeNetworkReceiveErrs + annotations: + description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} receive errors in the last two minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodenetworkreceiveerrs' + summary: 'Network interface is reporting many receive errors.' + condition: '{{ true }}' + expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m]) > 0.01 + for: 1h + labels: + severity: warning +- alert: NodeNetworkTransmitErrs + annotations: + description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} transmit errors in the last two minutes.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodenetworktransmiterrs' + summary: 'Network interface is reporting many transmit errors.' + condition: '{{ true }}' + expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m]) > 0.01 + for: 1h + labels: + severity: warning +- alert: NodeHighNumberConntrackEntriesUsed + annotations: + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodehighnumberconntrackentriesused' + summary: 'Number of conntrack are getting close to the limit.' + condition: '{{ true }}' + expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) > 0.75 + labels: + severity: warning +- alert: NodeTextFileCollectorScrapeError + annotations: + description: 'Node Exporter text file collector on {{`{{`}} $labels.instance {{`}}`}} failed to scrape.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodetextfilecollectorscrapeerror' + summary: 'Node Exporter text file collector failed to scrape.' + condition: '{{ true }}' + expr: node_textfile_scrape_error{job="node-exporter"} == 1 + labels: + severity: warning +- alert: NodeClockSkewDetected + annotations: + description: 'Clock at {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodeclockskewdetected' + summary: 'Clock skew detected.' + condition: '{{ true }}' + expr: |- + ( + node_timex_offset_seconds{job="node-exporter"} > 0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0 + ) + or + ( + node_timex_offset_seconds{job="node-exporter"} < -0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0 + ) + for: 10m + labels: + severity: warning +- alert: NodeClockNotSynchronising + annotations: + description: 'Clock at {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodeclocknotsynchronising' + summary: 'Clock not synchronising.' + condition: '{{ true }}' + expr: |- + min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0 + and + node_timex_maxerror_seconds{job="node-exporter"} >= 16 + for: 10m + labels: + severity: warning +- alert: NodeRAIDDegraded + annotations: + description: 'RAID array ''{{`{{`}} $labels.device {{`}}`}}'' at {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/noderaiddegraded' + summary: 'RAID Array is degraded.' + condition: '{{ true }}' + expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 + for: 15m + labels: + severity: critical +- alert: NodeRAIDDiskFailure + annotations: + description: 'At least one device in RAID array at {{`{{`}} $labels.instance {{`}}`}} failed. Array ''{{`{{`}} $labels.device {{`}}`}}'' needs attention and possibly a disk swap.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/noderaiddiskfailure' + summary: 'Failed device in RAID array.' + condition: '{{ true }}' + expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 + labels: + severity: warning +- alert: NodeFileDescriptorLimit + annotations: + description: 'File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefiledescriptorlimit' + summary: 'Kernel is predicted to exhaust file descriptors limit soon.' + condition: '{{ true }}' + expr: |- + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70 + ) + for: 15m + labels: + severity: warning +- alert: NodeFileDescriptorLimit + annotations: + description: 'File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefiledescriptorlimit' + summary: 'Kernel is predicted to exhaust file descriptors limit soon.' + condition: '{{ true }}' + expr: |- + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90 + ) + for: 15m + labels: + severity: critical +- alert: NodeCPUHighUsage + annotations: + description: 'CPU usage at {{`{{`}} $labels.instance {{`}}`}} has been above 90% for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. + + ' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodecpuhighusage' + summary: 'High CPU usage.' + condition: '{{ true }}' + expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!="idle"}[2m]))) * 100 > 90 + for: 15m + labels: + severity: informational +- alert: NodeSystemSaturation + annotations: + description: 'System load per core at {{`{{`}} $labels.instance {{`}}`}} has been above 2 for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. + + This might indicate this instance resources saturation and can cause it becoming unresponsive. + + ' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodesystemsaturation' + summary: 'System saturated, load per core is very high.' + condition: '{{ true }}' + expr: |- + node_load1{job="node-exporter"} + / count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2 + for: 15m + labels: + severity: warning +- alert: NodeMemoryMajorPagesFaults + annotations: + description: 'Memory major pages are occurring at very high rate at {{`{{`}} $labels.instance {{`}}`}}, 500 major page faults per second for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. + + Please check that there is enough memory available at this instance. + + ' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodememorymajorpagesfaults' + summary: 'Memory major page faults are occurring at very high rate.' + condition: '{{ true }}' + expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500 + for: 15m + labels: + severity: warning +- alert: NodeMemoryHighUtilization + annotations: + description: 'Memory is filling up at {{`{{`}} $labels.instance {{`}}`}}, has been above 90% for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. + + ' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodememoryhighutilization' + summary: 'Host is running out of memory.' + condition: '{{ true }}' + expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90 + for: 15m + labels: + severity: warning +- alert: NodeDiskIOSaturation + annotations: + description: 'Disk IO queue (aqu-sq) is high on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}}, has been above 10 for the last 30 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. + + This symptom might indicate disk saturation. + + ' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodediskiosaturation' + summary: 'Disk IO queue is high.' + condition: '{{ true }}' + expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) > 10 + for: 30m + labels: + severity: warning +- alert: NodeSystemdServiceFailed + annotations: + description: 'Systemd service {{`{{`}} $labels.name {{`}}`}} has entered failed state at {{`{{`}} $labels.instance {{`}}`}}' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodesystemdservicefailed' + summary: 'Systemd service has entered failed state.' + condition: '{{ true }}' + expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1 + for: 5m + labels: + severity: warning +- alert: NodeBondingDegraded + annotations: + description: 'Bonding interface {{`{{`}} $labels.master {{`}}`}} on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more slave failures.' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodebondingdegraded' + summary: 'Bonding interface is degraded' + condition: '{{ true }}' + expr: (node_bonding_slaves - node_bonding_active) != 0 + for: 5m + labels: + severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml new file mode 100644 index 00000000..d785e205 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml @@ -0,0 +1,13 @@ +condition: '{{ true }}' +name: node-network +rules: +- alert: NodeNetworkInterfaceFlapping + annotations: + description: 'Network interface "{{`{{`}} $labels.device {{`}}`}}" changing its up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}' + runbook_url: '{{ .Values.defaultRules.runbookUrl }}/general/nodenetworkinterfaceflapping' + summary: 'Network interface is often changing its status' + condition: '{{ true }}' + expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 + for: 2m + labels: + severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml new file mode 100644 index 00000000..80e9fb18 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml @@ -0,0 +1,44 @@ +condition: '{{ true }}' +name: node.rules +rules: +- condition: '{{ true }}' + expr: |- + topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, + max by (node,namespace,pod,{{ .Values.global.clusterLabel }}) ( + label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)") + )) + record: 'node_namespace_pod:kube_pod_info:' +- condition: '{{ true }}' + expr: |- + count by (node,{{ .Values.global.clusterLabel }}) ( + node_cpu_seconds_total{mode="idle",job="node-exporter"} + * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) + topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, node_namespace_pod:kube_pod_info:) + ) + record: node:node_num_cpu:sum +- condition: '{{ true }}' + expr: |- + sum( + node_memory_MemAvailable_bytes{job="node-exporter"} or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + node_memory_Cached_bytes{job="node-exporter"} + + node_memory_MemFree_bytes{job="node-exporter"} + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) by ({{ .Values.global.clusterLabel }}) + record: :node_memory_MemAvailable_bytes:sum +- condition: '{{ true }}' + expr: |- + avg by (node,{{ .Values.global.clusterLabel }}) ( + sum without (mode) ( + rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m]) + ) + ) + record: node:node_cpu_utilization:ratio_rate5m +- condition: '{{ true }}' + expr: |- + avg by ({{ .Values.global.clusterLabel }}) ( + node:node_cpu_utilization:ratio_rate5m + ) + record: cluster:node_cpu:ratio_rate5m diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl new file mode 100644 index 00000000..4429e725 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl @@ -0,0 +1,458 @@ +{{- /* Expand the name of the chart. */ -}} +{{- define "victoria-metrics-k8s-stack.name" -}} + {{- $Chart := (.helm).Chart | default .Chart -}} + {{- $Values := (.helm).Values | default .Values -}} + {{- default $Chart.Name $Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end }} + +{{- /* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/ -}} +{{- define "victoria-metrics-k8s-stack.fullname" -}} + {{- $Values := (.helm).Values | default .Values -}} + {{- $Chart := (.helm).Chart | default .Chart -}} + {{- $Release := (.helm).Release | default .Release -}} + {{- $fullname := "" -}} + {{- if .appKey -}} + {{- $appKey := ternary (list .appKey) .appKey (kindIs "string" .appKey) -}} + {{- $values := $Values -}} + {{- $global := (index $Values.global $Chart.Name) | default dict -}} + {{- range $ak := $appKey }} + {{- $values = (index $values $ak) | default dict -}} + {{- $global = (index $global $ak) | default dict -}} + {{- if $values.name -}} + {{- $fullname = $values.name -}} + {{- else if $global.name -}} + {{- $fullname = $global.name -}} + {{- end -}} + {{- end }} + {{- end -}} + {{- if empty $fullname -}} + {{- if $Values.fullnameOverride -}} + {{- $fullname = $Values.fullnameOverride -}} + {{- else if (dig $Chart.Name "fullnameOverride" "" ($Values.global)) -}} + {{- $fullname = (dig $Chart.Name "fullnameOverride" "" ($Values.global)) -}} + {{- else -}} + {{- $name := default $Chart.Name $Values.nameOverride -}} + {{- if contains $name $Release.Name -}} + {{- $fullname = $Release.Name -}} + {{- else -}} + {{- $fullname = (printf "%s-%s" $Release.Name $name) }} + {{- end -}} + {{- end }} + {{- end -}} + {{- $fullname | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- /* Create chart name and version as used by the chart label. */ -}} +{{- define "victoria-metrics-k8s-stack.chart" -}} + {{- $Chart := (.helm).Chart | default .Chart -}} + {{- printf "%s-%s" $Chart.Name $Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end }} + +{{- /* Create the name of the service account to use */ -}} +{{- define "victoria-metrics-k8s-stack.serviceAccountName" -}} + {{- $Values := (.helm).Values | default .Values -}} + {{- if $Values.serviceAccount.create -}} + {{- default (include "victoria-metrics-k8s-stack.fullname" .) $Values.serviceAccount.name -}} + {{- else -}} + {{- default "default" $Values.serviceAccount.name -}} + {{- end }} +{{- end }} + +{{- /* Common labels */ -}} +{{- define "victoria-metrics-k8s-stack.labels" -}} + {{- $Release := (.helm).Release | default .Release -}} + {{- $Chart := (.helm).Chart | default .Chart -}} + {{- $labels := (fromYaml (include "victoria-metrics-k8s-stack.selectorLabels" .)) -}} + {{- $_ := set $labels "helm.sh/chart" (include "victoria-metrics-k8s-stack.chart" .) -}} + {{- $_ := set $labels "app.kubernetes.io/managed-by" $Release.Service -}} + {{- with $Chart.AppVersion }} + {{- $_ := set $labels "app.kubernetes.io/version" . -}} + {{- end -}} + {{- toYaml $labels -}} +{{- end }} + +{{- define "vm.release" -}} + {{- $Release := (.helm).Release | default .Release -}} + {{- $Values := (.helm).Values | default .Values -}} + {{- default $Release.Name $Values.argocdReleaseOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- /* Selector labels */ -}} +{{- define "victoria-metrics-k8s-stack.selectorLabels" -}} + {{- $labels := .extraLabels | default dict -}} + {{- $_ := set $labels "app.kubernetes.io/name" (include "victoria-metrics-k8s-stack.name" .) -}} + {{- $_ := set $labels "app.kubernetes.io/instance" (include "vm.release" .) -}} + {{- toYaml $labels -}} +{{- end }} + +{{- /* Create the name for VM service */ -}} +{{- define "vm.service" -}} + {{- $Values := (.helm).Values | default .Values -}} + {{- $name := (include "victoria-metrics-k8s-stack.fullname" .) -}} + {{- with .appKey -}} + {{- $prefix := . -}} + {{- if kindIs "slice" $prefix }} + {{- $prefix = last $prefix -}} + {{- end -}} + {{- $prefix = ternary $prefix (printf "vm%s" $prefix) (hasPrefix "vm" $prefix) -}} + {{- $name = printf "%s-%s" $prefix $name -}} + {{- end -}} + {{- if hasKey . "appIdx" -}} + {{- $name = (printf "%s-%d.%s" $name .appIdx $name) -}} + {{- end -}} + {{- $name -}} +{{- end }} + +{{- define "vm.url" -}} + {{- $name := (include "vm.service" .) -}} + {{- $Release := (.helm).Release | default .Release -}} + {{- $Values := (.helm).Values | default .Values -}} + {{- $ns := include "vm.namespace" . -}} + {{- $proto := "http" -}} + {{- $port := 80 -}} + {{- $path := .appRoute | default "/" -}} + {{- $isSecure := false -}} + {{- if .appSecure -}} + {{- $isSecure = .appSecure -}} + {{- end -}} + {{- if .appKey -}} + {{- $appKey := ternary (list .appKey) .appKey (kindIs "string" .appKey) -}} + {{- $spec := $Values -}} + {{- range $ak := $appKey -}} + {{- if hasKey $spec $ak -}} + {{- $spec = (index $spec $ak) -}} + {{- end -}} + {{- if hasKey $spec "spec" -}} + {{- $spec = $spec.spec -}} + {{- end -}} + {{- end -}} + {{- $isSecure = (eq ($spec.extraArgs).tls "true") | default $isSecure -}} + {{- $proto = (ternary "https" "http" $isSecure) -}} + {{- $port = (ternary 443 80 $isSecure) -}} + {{- $port = $spec.port | default $port -}} + {{- $path = dig "http.pathPrefix" $path ($spec.extraArgs | default dict) -}} + {{- end -}} + {{- printf "%s://%s.%s.svc:%d%s" $proto $name $ns (int $port) $path -}} +{{- end -}} + +{{- define "vm.read.endpoint" -}} + {{- $ctx := . -}} + {{- $Values := (.helm).Values | default .Values -}} + {{- $endpoint := default dict -}} + {{- if $Values.vmsingle.enabled -}} + {{- $_ := set $ctx "appKey" "vmsingle" -}} + {{- $_ := set $endpoint "url" (include "vm.url" $ctx) -}} + {{- else if $Values.vmcluster.enabled -}} + {{- $_ := set $ctx "appKey" (list "vmcluster" "vmselect") -}} + {{- $baseURL := (trimSuffix "/" (include "vm.url" $ctx)) -}} + {{- $tenant := ($Values.tenant | default 0) -}} + {{- $_ := set $endpoint "url" (printf "%s/select/%d/prometheus" $baseURL (int $tenant)) -}} + {{- else if $Values.externalVM.read.url -}} + {{- $endpoint = $Values.externalVM.read -}} + {{- end -}} + {{- toYaml $endpoint -}} +{{- end }} + +{{- define "vm.write.endpoint" -}} + {{- $ctx := . -}} + {{- $Values := (.helm).Values | default .Values -}} + {{- $endpoint := default dict -}} + {{- if $Values.vmsingle.enabled -}} + {{- $_ := set $ctx "appKey" "vmsingle" -}} + {{- $baseURL := (trimSuffix "/" (include "vm.url" $ctx)) -}} + {{- $_ := set $endpoint "url" (printf "%s/api/v1/write" $baseURL) -}} + {{- else if $Values.vmcluster.enabled -}} + {{- $_ := set $ctx "appKey" (list "vmcluster" "vminsert") -}} + {{- $baseURL := (trimSuffix "/" (include "vm.url" $ctx)) -}} + {{- $tenant := ($Values.tenant | default 0) -}} + {{- $_ := set $endpoint "url" (printf "%s/insert/%d/prometheus/api/v1/write" $baseURL (int $tenant)) -}} + {{- else if $Values.externalVM.write.url -}} + {{- $endpoint = $Values.externalVM.write -}} + {{- end -}} + {{- toYaml $endpoint -}} +{{- end -}} + +{{- /* VMAlert remotes */ -}} +{{- define "vm.alert.remotes" -}} + {{- $Values := (.helm).Values | default .Values -}} + {{- $remotes := default dict -}} + {{- $fullname := (include "victoria-metrics-k8s-stack.fullname" .) -}} + {{- $ctx := dict "helm" . -}} + {{- $remoteWrite := (include "vm.write.endpoint" $ctx | fromYaml) -}} + {{- if $Values.vmalert.remoteWriteVMAgent -}} + {{- $ctx := dict "helm" . "appKey" "vmagent" -}} + {{- $remoteWrite = dict "url" (printf "%s/api/v1/write" (include "vm.url" $ctx)) -}} + {{- end -}} + {{- $ctx := dict "helm" . -}} + {{- $remoteRead := (fromYaml (include "vm.read.endpoint" $ctx)) -}} + {{- $_ := set $remotes "remoteWrite" $remoteWrite -}} + {{- $_ := set $remotes "remoteRead" $remoteRead -}} + {{- $_ := set $remotes "datasource" $remoteRead -}} + {{- if $Values.vmalert.additionalNotifierConfigs }} + {{- $configName := printf "%s-vmalert-additional-notifier" $fullname -}} + {{- $notifierConfigRef := dict "name" $configName "key" "notifier-configs.yaml" -}} + {{- $_ := set $remotes "notifierConfigRef" $notifierConfigRef -}} + {{- else if $Values.alertmanager.enabled -}} + {{- $notifiers := default list -}} + {{- $appSecure := (not (empty (((.Values.alertmanager).spec).webConfig).tls_server_config)) -}} + {{- $ctx := dict "helm" . "appKey" "alertmanager" "appSecure" $appSecure "appRoute" ((.Values.alertmanager).spec).routePrefix -}} + {{- $alertManagerReplicas := (.Values.alertmanager.spec.replicaCount | default 1 | int) -}} + {{- range until $alertManagerReplicas -}} + {{- $_ := set $ctx "appIdx" . -}} + {{- $notifiers = append $notifiers (dict "url" (include "vm.url" $ctx)) -}} + {{- end }} + {{- $_ := set $remotes "notifiers" $notifiers -}} + {{- end -}} + {{- toYaml $remotes -}} +{{- end -}} + +{{- /* VMAlert templates */ -}} +{{- define "vm.alert.templates" -}} + {{- $Values := (.helm).Values | default .Values}} + {{- $cms := ($Values.vmalert.spec.configMaps | default list) -}} + {{- if $Values.vmalert.templateFiles -}} + {{- $fullname := (include "victoria-metrics-k8s-stack.fullname" .) -}} + {{- $cms = append $cms (printf "%s-vmalert-extra-tpl" $fullname) -}} + {{- end -}} + {{- $output := dict "configMaps" (compact $cms) -}} + {{- toYaml $output -}} +{{- end -}} + +{{- define "vm.license.global" -}} + {{- $license := (deepCopy (.Values.global).license) | default dict -}} + {{- if $license.key -}} + {{- if hasKey $license "keyRef" -}} + {{- $_ := unset $license "keyRef" -}} + {{- end -}} + {{- else if $license.keyRef.name -}} + {{- if hasKey $license "key" -}} + {{- $_ := unset $license "key" -}} + {{- end -}} + {{- else -}} + {{- $license = default dict -}} + {{- end -}} + {{- toYaml $license -}} +{{- end -}} + +{{- /* VMAlert spec */ -}} +{{- define "vm.alert.spec" -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $extraArgs := dict "remoteWrite.disablePathAppend" "true" -}} + {{- if $Values.vmalert.templateFiles -}} + {{- $ruleTmpl := (printf "/etc/vm/configs/%s-vmalert-extra-tpl/*.tmpl" (include "victoria-metrics-k8s-stack.fullname" .)) -}} + {{- $_ := set $extraArgs "rule.templates" $ruleTmpl -}} + {{- end -}} + {{- $vmAlertRemotes := (include "vm.alert.remotes" . | fromYaml) -}} + {{- $vmAlertTemplates := (include "vm.alert.templates" . | fromYaml) -}} + {{- $spec := dict "extraArgs" $extraArgs -}} + {{- with (include "vm.license.global" .) -}} + {{- $_ := set $spec "license" (fromYaml .) -}} + {{- end -}} + {{- with concat ($vmAlertRemotes.notifiers | default list) (.Values.vmalert.spec.notifiers | default list) }} + {{- $_ := set $vmAlertRemotes "notifiers" . }} + {{- end }} + {{- $spec := deepCopy (omit $Values.vmalert.spec "notifiers") | mergeOverwrite $vmAlertRemotes | mergeOverwrite $vmAlertTemplates | mergeOverwrite $spec }} + {{- if not (or (hasKey $spec "notifier") (hasKey $spec "notifiers") (hasKey $spec "notifierConfigRef") (hasKey $spec.extraArgs "notifier.blackhole")) }} + {{- fail "Neither `notifier`, `notifiers` nor `notifierConfigRef` is set for vmalert. If it's intentionally please consider setting `.vmalert.spec.extraArgs.['notifier.blackhole']` to `'true'`"}} + {{- end }} + {{- tpl (deepCopy (omit $Values.vmalert.spec "notifiers") | mergeOverwrite $vmAlertRemotes | mergeOverwrite $vmAlertTemplates | mergeOverwrite $spec | toYaml) . -}} +{{- end }} + +{{- /* VM Agent remoteWrites */ -}} +{{- define "vm.agent.remote.write" -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $remoteWrites := $Values.vmagent.additionalRemoteWrites | default list -}} + {{- if or $Values.vmsingle.enabled $Values.vmcluster.enabled $Values.externalVM.write.url -}} + {{- $ctx := dict "helm" . -}} + {{- $remoteWrites = append $remoteWrites (fromYaml (include "vm.write.endpoint" $ctx)) -}} + {{- end -}} + {{- toYaml (dict "remoteWrite" $remoteWrites) -}} +{{- end -}} + +{{- /* VMAgent spec */ -}} +{{- define "vm.agent.spec" -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $spec := (include "vm.agent.remote.write" . | fromYaml) -}} + {{- with (include "vm.license.global" .) -}} + {{- $_ := set $spec "license" (fromYaml .) -}} + {{- end -}} + {{- tpl (deepCopy $Values.vmagent.spec | mergeOverwrite $spec | toYaml) . -}} +{{- end }} + +{{- /* VMAuth spec */ -}} +{{- define "vm.auth.spec" -}} + {{- $ctx := . -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $unauthorizedAccessConfig := default list }} + {{- if $Values.vmsingle.enabled -}} + {{- $_ := set $ctx "appKey" (list "vmsingle") -}} + {{- $url := (include "vm.url" $ctx) }} + {{- $srcPath := clean (printf "%s/.*" (urlParse $url).path) }} + {{- $unauthorizedAccessConfig = append $unauthorizedAccessConfig (dict "src_paths" (list $srcPath) "url_prefix" (list $url)) }} + {{- else if $Values.vmcluster.enabled -}} + {{- $_ := set $ctx "appKey" (list "vmcluster" "vminsert") -}} + {{- $writeUrl := (include "vm.url" $ctx) }} + {{- $writeSrcPath := clean (printf "%s/insert/.*" (urlParse $writeUrl).path) }} + {{- $unauthorizedAccessConfig = append $unauthorizedAccessConfig (dict "src_paths" (list $writeSrcPath) "url_prefix" (list $writeUrl)) }} + {{- $_ := set $ctx "appKey" (list "vmcluster" "vmselect") -}} + {{- $readUrl := (include "vm.url" $ctx) }} + {{- $readSrcPath := clean (printf "%s/select/.*" (urlParse $readUrl).path) }} + {{- $unauthorizedAccessConfig = append $unauthorizedAccessConfig (dict "src_paths" (list $readSrcPath) "url_prefix" (list $readUrl)) }} + {{- else if or $Values.externalVM.read.url $Values.externalVM.write.url }} + {{- with $Values.externalVM.read.url }} + {{- $srcPath := regexReplaceAll "(.*)/api/.*" (clean (printf "%s/.*" (urlParse .).path)) "${1}" }} + {{- $unauthorizedAccessConfig = append $unauthorizedAccessConfig (dict "src_paths" (list $srcPath) "url_prefix" (list .)) }} + {{- end -}} + {{- with $Values.externalVM.write.url }} + {{- $srcPath := regexReplaceAll "(.*)/api/.*" (clean (printf "%s/.*" (urlParse .).path)) "${1}" }} + {{- $unauthorizedAccessConfig = append $unauthorizedAccessConfig (dict "src_paths" (list $srcPath) "url_prefix" (list .)) }} + {{- end -}} + {{- end -}} + {{- $spec := $Values.vmauth.spec }} + {{- $_ := set $spec "unauthorizedAccessConfig" (concat $unauthorizedAccessConfig ($spec.unauthorizedAccessConfig | default list)) }} + {{- with (include "vm.license.global" .) -}} + {{- $_ := set $spec "license" (fromYaml .) -}} + {{- end -}} + {{- tpl (toYaml $spec) . -}} +{{- end -}} + +{{- /* Alermanager spec */ -}} +{{- define "vm.alertmanager.spec" -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $fullname := (include "victoria-metrics-k8s-stack.fullname" .) -}} + {{- $spec := $Values.alertmanager.spec -}} + {{- if and (not $Values.alertmanager.spec.configRawYaml) (not $Values.alertmanager.spec.configSecret) -}} + {{- $_ := set $spec "configSecret" (printf "%s-alertmanager" $fullname) -}} + {{- end -}} + {{- $templates := default list -}} + {{- if $Values.alertmanager.monzoTemplate.enabled -}} + {{- $configMap := (printf "%s-alertmanager-monzo-tpl" $fullname) -}} + {{- $templates = append $templates (dict "name" $configMap "key" "monzo.tmpl") -}} + {{- end -}} + {{- $configMap := (printf "%s-alertmanager-extra-tpl" $fullname) -}} + {{- range $key, $value := (.Values.alertmanager.templateFiles | default dict) -}} + {{- $templates = append $templates (dict "name" $configMap "key" $key) -}} + {{- end -}} + {{- $_ := set $spec "templates" $templates -}} + {{- toYaml $spec -}} +{{- end -}} + +{{- /* Single spec */ -}} +{{- define "vm.single.spec" -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $extraArgs := default dict -}} + {{- if $Values.vmalert.enabled }} + {{- $ctx := dict "helm" . "appKey" "vmalert" -}} + {{- $_ := set $extraArgs "vmalert.proxyURL" (include "vm.url" $ctx) -}} + {{- end -}} + {{- $spec := dict "extraArgs" $extraArgs -}} + {{- with (include "vm.license.global" .) -}} + {{- $_ := set $spec "license" (fromYaml .) -}} + {{- end -}} + {{- tpl (deepCopy $Values.vmsingle.spec | mergeOverwrite $spec | toYaml) . -}} +{{- end }} + +{{- /* Cluster spec */ -}} +{{- define "vm.select.spec" -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $extraArgs := default dict -}} + {{- if $Values.vmalert.enabled -}} + {{- $ctx := dict "helm" . "appKey" "vmalert" -}} + {{- $_ := set $extraArgs "vmalert.proxyURL" (include "vm.url" $ctx) -}} + {{- end -}} + {{- $spec := dict "extraArgs" $extraArgs -}} + {{- toYaml $spec -}} +{{- end -}} + +{{- define "vm.cluster.spec" -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $spec := (include "vm.select.spec" . | fromYaml) -}} + {{- $clusterSpec := (deepCopy $Values.vmcluster.spec) -}} + {{- with (include "vm.license.global" .) -}} + {{- $_ := set $clusterSpec "license" (fromYaml .) -}} + {{- end -}} + {{- tpl ($clusterSpec | mergeOverwrite (dict "vmselect" $spec) | toYaml) . -}} +{{- end -}} + +{{- define "vm.data.source.enabled" -}} + {{- $Values := (.helm).Values | default .Values -}} + {{- $grafana := $Values.grafana -}} + {{- $isEnabled := false -}} + {{- if $grafana.plugins -}} + {{- range $value := $grafana.plugins -}} + {{- if contains "victoriametrics-datasource" $value -}} + {{- $isEnabled = true -}} + {{- end }} + {{- end }} + {{- end }} + {{- $unsignedPlugins := ((index $grafana "grafana.ini").plugins).allow_loading_unsigned_plugins | default "" -}} + {{- $allowUnsigned := contains "victoriametrics-datasource" $unsignedPlugins -}} + {{- ternary "true" "" (and $isEnabled $allowUnsigned) -}} +{{- end -}} + +{{- /* Datasources */ -}} +{{- define "vm.data.sources" -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $grafana := $Values.grafana -}} + {{- $datasources := $Values.grafana.additionalDataSources | default list -}} + {{- $vmDatasource := "victoriametrics-datasource" -}} + {{- $allowVMDatasource := (ternary false true (empty (include "vm.data.source.enabled" .))) -}} + {{- if or $Values.vmsingle.enabled $Values.vmcluster.enabled -}} + {{- $ctx := dict "helm" . -}} + {{- $readEndpoint:= (include "vm.read.endpoint" $ctx | fromYaml) -}} + {{- $defaultDatasources := default list -}} + {{- range $ds := $grafana.sidecar.datasources.default }} + {{- if not $ds.type -}} + {{- $_ := set $ds "type" $Values.grafana.defaultDatasourceType }} + {{- end -}} + {{- if or (ne $ds.type $vmDatasource) $allowVMDatasource -}} + {{- $_ := set $ds "url" $readEndpoint.url -}} + {{- $defaultDatasources = append $defaultDatasources $ds -}} + {{- end -}} + {{- end }} + {{- $datasources = concat $datasources $defaultDatasources -}} + {{- if and $grafana.sidecar.datasources.createVMReplicasDatasources $defaultDatasources -}} + {{- range $id := until (int $Values.vmsingle.spec.replicaCount) -}} + {{- $_ := set $ctx "appIdx" $id -}} + {{- $readEndpoint := (include "vm.read.endpoint" $ctx | fromYaml) -}} + {{- range $ds := $defaultDatasources -}} + {{- $ds = (deepCopy $ds) -}} + {{- $_ := set $ds "url" $readEndpoint.url -}} + {{- $_ := set $ds "name" (printf "%s-%d" $ds.name $id) -}} + {{- $_ := set $ds "isDefault" false -}} + {{- $datasources = append $datasources $ds -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- toYaml $datasources -}} +{{- end }} + +{{- /* VMRule name */ -}} +{{- define "victoria-metrics-k8s-stack.rulegroup.name" -}} + {{- printf "%s-%s" (include "victoria-metrics-k8s-stack.fullname" .) (.name | replace "_" "") -}} +{{- end -}} + +{{- /* VMRule labels */ -}} +{{- define "victoria-metrics-k8s-stack.rulegroup.labels" -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $labels := (fromYaml (include "victoria-metrics-k8s-stack.labels" .)) -}} + {{- $_ := set $labels "app" (include "victoria-metrics-k8s-stack.name" .) -}} + {{- $labels = mergeOverwrite $labels (deepCopy $Values.defaultRules.labels) -}} + {{- toYaml $labels -}} +{{- end }} + +{{- /* VMRule key */ -}} +{{- define "victoria-metrics-k8s-stack.rulegroup.key" -}} + {{- without (regexSplit "[-_.]" .name -1) "exporter" "rules" | join "-" | camelcase | untitle -}} +{{- end -}} + +{{- /* VMAlertmanager name */ -}} +{{- define "victoria-metrics-k8s-stack.alertmanager.name" -}} + {{- $Values := (.helm).Values | default .Values }} + {{- $Values.alertmanager.name | default (printf "%s-%s" "vmalertmanager" (include "victoria-metrics-k8s-stack.fullname" .) | trunc 63 | trimSuffix "-") -}} +{{- end -}} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/extra-objects.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/extra-objects.yaml new file mode 100644 index 00000000..a9bb3b6b --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/extra-objects.yaml @@ -0,0 +1,4 @@ +{{ range .Values.extraObjects }} +--- +{{ tpl (toYaml .) $ }} +{{ end }} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/rules/rule.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/rules/rule.yaml new file mode 100644 index 00000000..afa2900c --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/rules/rule.yaml @@ -0,0 +1,121 @@ +{{- if .Values.defaultRules.create }} + +{{- /* +Default rules alias +*/}} +{{- $defaultRules := .Values.defaultRules -}} + +{{- /* +Exact rules overrides +*/}} +{{- $exactRules := index $defaultRules "rules" | default dict }} + +{{- /* +Create custom template context +*/}} +{{- $ctx := (dict "Values" (deepCopy .Values) "Release" (deepCopy .Release) "Chart" (deepCopy .Chart) "Template" (deepCopy .Template)) -}} + +{{- /* +Loop through all rules files, that were crafted with hack/sync_rules.py +*/}} +{{- range $groupFile, $_ := .Files.Glob "files/rules/**.yaml" -}} + +{{- /* +Get group name from file +*/}} +{{- $groupBase := base $groupFile -}} +{{- $groupNameRaw := trimSuffix (ext $groupBase) $groupBase -}} + +{{- /* +Create context for templating +*/}} +{{- $_ := set $ctx "name" $groupNameRaw -}} + +{{- /* +Create sanitized group name retrieved from file +*/}} +{{- $groupName := include "victoria-metrics-k8s-stack.rulegroup.key" $ctx -}} + +{{- /* +Merge common group with a group data of a current iteration +*/}} + +{{- $group := mergeOverwrite (deepCopy (dig "group" (default dict) $defaultRules)) (dig "groups" $groupName (default dict) $defaultRules) -}} + +{{- /* +Get group data from file +*/}} +{{- $groupCtx := mergeOverwrite (deepCopy $ctx) $group }} +{{- $groupData := fromYaml (tpl ($.Files.Get $groupFile) $groupCtx) -}} + +{{- /* +Save rules spec from file +*/}} +{{- $rulesSpec := $groupData.rules -}} + +{{- /* +Delete rules from group +*/}} +{{- $_ := unset $groupData "rules" -}} + +{{- /* +Save condition for group from file +*/}} +{{- $groupCondition := (eq $groupData.condition "true") -}} + +{{- /* +Delete condition from group +*/}} +{{- $_ := unset $groupData "condition" -}} + +{{- /* +Merge group spec +*/}} +{{- $groupSpec := mergeOverwrite (deepCopy $groupData) (dig "spec" (default dict) $group) -}} + +{{- /* +Filter out ignore rules +*/}} +{{- $commonRule := dig "rule" (default dict) $defaultRules }} +{{- $commonInGroupRule := dig "rules" (default dict) $group }} +{{- $filteredRulesSpec := default list }} +{{- range $_, $ruleSpec := $rulesSpec }} + {{- $ruleName := $ruleSpec.alert | default "" }} + {{- $ruleKey := (hasKey $ruleSpec "record" | ternary "recording" "alerting") -}} + {{- $ruleCondition := (eq $ruleSpec.condition "true") }} + {{- $_ := unset $ruleSpec "condition" }} + {{- $exactRule := index $exactRules $ruleName | default dict }} + {{- $defaultRule := deepCopy (index $defaultRules $ruleKey) }} + {{- $resultRule := mergeOverwrite (deepCopy $commonRule) $defaultRule $commonInGroupRule $exactRule }} + {{- if (and (dig "create" true $resultRule) $ruleCondition) }} + {{- $ruleSpec := mergeOverwrite (deepCopy $ruleSpec) (dig "spec" (default dict) $resultRule) }} + {{- $filteredRulesSpec = append $filteredRulesSpec $ruleSpec }} + {{- end }} +{{- end -}} +{{- $rulesSpec = $filteredRulesSpec }} + +{{- /* +Check if group is enabled +*/}} +{{- if (and $rulesSpec (dig "create" true $group) $groupCondition) }} +--- +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: {{ include "victoria-metrics-k8s-stack.rulegroup.name" $ctx }} + {{- with (include "victoria-metrics-k8s-stack.rulegroup.labels" $ctx) }} + labels: {{ . | nindent 4 }} + {{- end }} + {{- with $.Values.defaultRules.annotations }} + annotations: {{ toYaml . | nindent 4 }} + {{- end }} +spec: + groups: + - {{ toYaml $groupSpec | indent 4 | trim }} + rules: + {{- range $ruleSpec := $rulesSpec }} + - {{ toYaml $ruleSpec | indent 6 | trim }} + {{- end }} +{{- end }} +{{- end }} +{{- end }} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/todo.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/todo.md new file mode 100644 index 00000000..33f0a470 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/todo.md @@ -0,0 +1,26 @@ +### plans + +* [x] VMCluster +* [x] VMSingle +* [x] VMAgent +* [x] VMAlert +* [x] AlertManager +* [x] Annotations +* [x] ServiceScrapes + * [x] Nodeexporter + * [x] Grafana + * [x] kube-state-metrics + * [x] kube-mixin + * [x] core-dns +* [x] Grafana DS +* [x] Dashboards + * [x] Nodeexporter + * [x] kube-state-metrics + * [x] kube-mixin +* [x] Rules + * [x] kube-mixin + * [x] kube-prometheus + * [x] victoria-metrics +* [ ] ServiceAccounts stuff +* [ ] SelectorOvverride for ServiceScrapes +* [ ] helm hook for uninstall crd objects before chart remove diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.minikube.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.minikube.yaml new file mode 100644 index 00000000..f4438de3 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.minikube.yaml @@ -0,0 +1,38 @@ +vmagent: + spec: + volumes: + - hostPath: + path: /var/lib/minikube/certs/etcd + type: DirectoryOrCreate + name: etcd-certs + volumeMounts: + - mountPath: /var/lib/minikube/certs/etcd + name: etcd-certs +kubeScheduler: + spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true +kubeControllerManager: + spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: true +kubeEtcd: + spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/lib/minikube/certs/etcd/ca.crt + certFile: /var/lib/minikube/certs/etcd/peer.crt + keyFile: /var/lib/minikube/certs/etcd/peer.key diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.yaml new file mode 100644 index 00000000..778f7e00 --- /dev/null +++ b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.yaml @@ -0,0 +1,1233 @@ +global: + clusterLabel: cluster + license: + key: "" + keyRef: {} + # name: secret-license + # key: license + +nameOverride: "" +fullnameOverride: "" +tenant: "0" +# -- If this chart is used in "Argocd" with "releaseName" field then +# -- VMServiceScrapes couldn't select the proper services. +# -- For correct working need set value 'argocdReleaseOverride=$ARGOCD_APP_NAME' +argocdReleaseOverride: "" + +# -- victoria-metrics-operator dependency chart configuration. +# -- For possible values refer to https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-operator#parameters +# -- also checkout here possible ENV variables to configure operator behaviour https://docs.victoriametrics.com/operator/vars +victoria-metrics-operator: + enabled: true + serviceMonitor: + enabled: true + crd: + # -- we disable crd creation by operator chart as we create them in this chart + create: false + + # -- tells helm to clean up vm cr resources when uninstalling + cleanup: + enabled: true + image: + repository: bitnami/kubectl + # use image tag that matches k8s API version by default + # tag: 1.29.6 + pullPolicy: IfNotPresent + operator: + # -- By default, operator converts prometheus-operator objects. + disable_prometheus_converter: false + +serviceAccount: + # -- Specifies whether a service account should be created + create: true + # -- Annotations to add to the service account + annotations: {} + # -- The name of the service account to use. + # -- If not set and create is true, a name is generated using the fullname template + name: "" + +# -- Enable dashboards despite it's dependency is not installed +dashboards: + vmalert: false + operator: false + # -- in ArgoCD using client-side apply this dashboard reaches annotations size limit and causes k8s issues without server side apply + # See [this issue](https://github.com/VictoriaMetrics/helm-charts/tree/disable-node-exporter-dashboard-by-default/charts/victoria-metrics-k8s-stack#metadataannotations-too-long-must-have-at-most-262144-bytes-on-dashboards) + node-exporter-full: true + +# -- Create default rules for monitoring the cluster +defaultRules: + create: true + + # -- Common properties for VMRule groups + group: + spec: + # -- Optional HTTP URL parameters added to each rule request + params: {} + + # -- Common properties for all VMRules + rule: + spec: + # -- Additional labels for all VMRules + labels: {} + # -- Additional annotations for all VMRules + annotations: {} + + # -- Common properties for VMRules alerts + alerting: + spec: + # -- Additional labels for VMRule alerts + labels: {} + # -- Additional annotations for VMRule alerts + annotations: {} + + # -- Common properties for VMRules recording rules + recording: + spec: + # -- Additional labels for VMRule recording rules + labels: {} + # -- Additional annotations for VMRule recording rules + annotations: {} + + # -- Per rule properties + rules: {} + # CPUThrottlingHigh: + # create: true + # spec: + # for: 15m + # labels: + # severity: critical + groups: + etcd: + create: true + # -- Common properties for all rules in a group + rules: {} + # spec: + # annotations: + # dashboard: https://example.com/dashboard/1 + general: + create: true + rules: {} + k8sContainerMemoryRss: + create: true + rules: {} + k8sContainerMemoryCache: + create: true + rules: {} + k8sContainerCpuUsageSecondsTotal: + create: true + rules: {} + k8sPodOwner: + create: true + rules: {} + k8sContainerResource: + create: true + rules: {} + k8sContainerMemoryWorkingSetBytes: + create: true + rules: {} + k8sContainerMemorySwap: + create: true + rules: {} + kubeApiserver: + create: true + rules: {} + kubeApiserverAvailability: + create: true + rules: {} + kubeApiserverBurnrate: + create: true + rules: {} + kubeApiserverHistogram: + create: true + rules: {} + kubeApiserverSlos: + create: true + rules: {} + kubelet: + create: true + rules: {} + kubePrometheusGeneral: + create: true + rules: {} + kubePrometheusNodeRecording: + create: true + rules: {} + kubernetesApps: + create: true + rules: {} + targetNamespace: ".*" + kubernetesResources: + create: true + rules: {} + kubernetesStorage: + create: true + rules: {} + targetNamespace: ".*" + kubernetesSystem: + create: true + rules: {} + kubernetesSystemKubelet: + create: true + rules: {} + kubernetesSystemApiserver: + create: true + rules: {} + kubernetesSystemControllerManager: + create: true + rules: {} + kubeScheduler: + create: true + rules: {} + kubernetesSystemScheduler: + create: true + rules: {} + kubeStateMetrics: + create: true + rules: {} + nodeNetwork: + create: true + rules: {} + node: + create: true + rules: {} + vmagent: + create: true + rules: {} + vmsingle: + create: true + rules: {} + vmcluster: + create: true + rules: {} + vmHealth: + create: true + rules: {} + vmoperator: + create: true + rules: {} + alertmanager: + create: true + rules: {} + + # -- Runbook url prefix for default rules + runbookUrl: https://runbooks.prometheus-operator.dev/runbooks + + # -- Labels for default rules + labels: {} + # -- Annotations for default rules + annotations: {} + +# -- Create default dashboards +defaultDashboardsEnabled: true + +# -- Create experimental dashboards +experimentalDashboardsEnabled: true + +# -- Create dashboards as CRDs (reuqires grafana-operator to be installed) +grafanaOperatorDashboardsFormat: + enabled: false + instanceSelector: + matchLabels: + dashboards: "grafana" + allowCrossNamespaceImport: false + +## -- Provide custom recording or alerting rules to be deployed into the cluster. +additionalVictoriaMetricsMap: +# rule-name: +# groups: +# - name: my_group +# rules: +# - record: my_record +# expr: 100 * my_record + +externalVM: + read: + url: "" + # bearerTokenSecret: + # name: dbaas-read-access-token + # key: bearerToken + write: + url: "" + # bearerTokenSecret: + # name: dbaas-read-access-token + # key: bearerToken + +############## + +# Configures vmsingle params +vmsingle: + annotations: {} + enabled: true + # -- full spec for VMSingle CRD. Allowed values describe [here](https://docs.victoriametrics.com/operator/api#vmsinglespec) + spec: + port: "8429" + image: + tag: v1.103.0 + # -- Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these [docs](https://docs.victoriametrics.com/single-server-victoriametrics/#retention) + retentionPeriod: "1" + replicaCount: 1 + extraArgs: {} + storage: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: "" + # pathType is only for k8s > 1.19 + pathType: Prefix + + hosts: + - vmsingle.domain.com + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + ## Or for k8s > 1.19 + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: vmsingle-ingress-tls + # hosts: + # - vmsingle.domain.com + +vmcluster: + enabled: false + annotations: {} + # -- full spec for VMCluster CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmclusterspec) + spec: + # -- Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these [docs](https://docs.victoriametrics.com/single-server-victoriametrics/#retention) + retentionPeriod: "1" + replicationFactor: 2 + vmstorage: + image: + tag: v1.103.0-cluster + replicaCount: 2 + storageDataPath: "/vm-data" + storage: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 10Gi + resources: + {} + # limits: + # cpu: "1" + # memory: 1500Mi + vmselect: + port: "8481" + image: + tag: v1.103.0-cluster + replicaCount: 2 + cacheMountPath: "/select-cache" + extraArgs: {} + storage: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 2Gi + resources: + {} + # limits: + # cpu: "1" + # memory: "1000Mi" + # requests: + # cpu: "0.5" + # memory: "500Mi" + vminsert: + port: "8480" + image: + tag: v1.103.0-cluster + replicaCount: 2 + extraArgs: {} + resources: + {} + # limits: + # cpu: "1" + # memory: 1000Mi + # requests: + # cpu: "0.5" + # memory: "500Mi" + + ingress: + storage: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: "" + # pathType is only for k8s > 1.19 + pathType: Prefix + + hosts: + - vmstorage.domain.com + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + ## Or for k8s > 1.19 + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: vmstorage-ingress-tls + # hosts: + # - vmstorage.domain.com + select: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: '{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vmselect }}' + # pathType is only for k8s > 1.19 + pathType: Prefix + + hosts: + - vmselect.domain.com + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + ## Or for k8s > 1.19 + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: vmselect-ingress-tls + # hosts: + # - vmselect.domain.com + insert: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: '{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vminsert }}' + # pathType is only for k8s > 1.19 + pathType: Prefix + + hosts: + - vminsert.domain.com + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + ## Or for k8s > 1.19 + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: vminsert-ingress-tls + # hosts: + # - vminsert.domain.com + +alertmanager: + enabled: true + annotations: {} + # -- (object) full spec for VMAlertmanager CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmalertmanagerspec) + spec: + port: "9093" + selectAllByDefault: true + image: + tag: v0.25.0 + externalURL: "" + routePrefix: / + + # -- (string) if this one defined, it will be used for alertmanager configuration and config parameter will be ignored + configSecret: "" + # -- (object) alertmanager configuration + config: + templates: + - "/etc/vm/configs/**/*.tmpl" + route: + # group_by: ["alertgroup", "job"] + # group_wait: 30s + # group_interval: 5m + # repeat_interval: 12h + receiver: "blackhole" + ## routes: + ################################################### + ## Duplicate code_owner routes to teams + ## These will send alerts to team channels but continue + ## processing through the rest of the tree to handled by on-call + # - matchers: + # - code_owner_channel!="" + # - severity=~"info|warning|critical" + # group_by: ["code_owner_channel", "alertgroup", "job"] + # receiver: slack-code-owners + # ################################################### + # ## Standard on-call routes + # - matchers: + # - severity=~"info|warning|critical" + # receiver: slack-monitoring + # continue: true + + # inhibit_rules: + # - target_matchers: + # - severity=~"warning|info" + # source_matchers: + # - severity=critical + # equal: + # - cluster + # - namespace + # - alertname + # - target_matchers: + # - severity=info + # source_matchers: + # - severity=warning + # equal: + # - cluster + # - namespace + # - alertname + # - target_matchers: + # - severity=info + # source_matchers: + # - alertname=InfoInhibitor + # equal: + # - cluster + # - namespace + + receivers: + - name: blackhole + # - name: "slack-monitoring" + # slack_configs: + # - channel: "#channel" + # send_resolved: true + # title: '{{ template "slack.monzo.title" . }}' + # icon_emoji: '{{ template "slack.monzo.icon_emoji" . }}' + # color: '{{ template "slack.monzo.color" . }}' + # text: '{{ template "slack.monzo.text" . }}' + # actions: + # - type: button + # text: "Runbook :green_book:" + # url: "{{ (index .Alerts 0).Annotations.runbook_url }}" + # - type: button + # text: "Query :mag:" + # url: "{{ (index .Alerts 0).GeneratorURL }}" + # - type: button + # text: "Dashboard :grafana:" + # url: "{{ (index .Alerts 0).Annotations.dashboard }}" + # - type: button + # text: "Silence :no_bell:" + # url: '{{ template "__alert_silence_link" . }}' + # - type: button + # text: '{{ template "slack.monzo.link_button_text" . }}' + # url: "{{ .CommonAnnotations.link_url }}" + # - name: slack-code-owners + # slack_configs: + # - channel: "#{{ .CommonLabels.code_owner_channel }}" + # send_resolved: true + # title: '{{ template "slack.monzo.title" . }}' + # icon_emoji: '{{ template "slack.monzo.icon_emoji" . }}' + # color: '{{ template "slack.monzo.color" . }}' + # text: '{{ template "slack.monzo.text" . }}' + # actions: + # - type: button + # text: "Runbook :green_book:" + # url: "{{ (index .Alerts 0).Annotations.runbook }}" + # - type: button + # text: "Query :mag:" + # url: "{{ (index .Alerts 0).GeneratorURL }}" + # - type: button + # text: "Dashboard :grafana:" + # url: "{{ (index .Alerts 0).Annotations.dashboard }}" + # - type: button + # text: "Silence :no_bell:" + # url: '{{ template "__alert_silence_link" . }}' + # - type: button + # text: '{{ template "slack.monzo.link_button_text" . }}' + # url: "{{ .CommonAnnotations.link_url }}" + # + # better alert templates for slack + # source https://gist.github.com/milesbxf/e2744fc90e9c41b47aa47925f8ff6512 + monzoTemplate: + enabled: true + + # -- (object) extra alert templates + templateFiles: + {} + # template_1.tmpl: |- + # {{ define "hello" -}} + # hello, Victoria! + # {{- end }} + # template_2.tmpl: "" + + # -- (object) alertmanager ingress configuration + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: '{{ .Values.alertmanager.spec.routePrefix | default "/" }}' + # pathType is only for k8s > 1.19 + pathType: Prefix + + hosts: + - alertmanager.domain.com + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + ## Or for k8s > 1.19 + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: alertmanager-ingress-tls + # hosts: + # - alertmanager.domain.com + +vmalert: + annotations: {} + enabled: true + + # Controls whether VMAlert should use VMAgent or VMInsert as a target for remotewrite + remoteWriteVMAgent: false + # -- (object) full spec for VMAlert CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmalertspec) + spec: + port: "8080" + selectAllByDefault: true + image: + tag: v1.103.0 + evaluationInterval: 15s + extraArgs: + http.pathPrefix: "/" + + # External labels to add to all generated recording rules and alerts + externalLabels: {} + + # -- (object) extra vmalert annotation templates + templateFiles: + {} + # template_1.tmpl: |- + # {{ define "hello" -}} + # hello, Victoria! + # {{- end }} + # template_2.tmpl: "" + + ## additionalNotifierConfigs allows to configure static notifiers, discover notifiers via Consul and DNS, + ## see specification in https://docs.victoriametrics.com/vmalert/#notifier-configuration-file. + ## This configuration will be created as separate secret and mounted to vmalert pod. + additionalNotifierConfigs: {} + # dns_sd_configs: + # - names: + # - my.domain.com + # type: 'A' + # port: 9093 + # -- (object) vmalert ingress config + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: "" + # pathType is only for k8s > 1.19 + pathType: Prefix + + hosts: + - vmalert.domain.com + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + ## Or for k8s > 1.19 + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: vmalert-ingress-tls + # hosts: + # - vmalert.domain.com + +vmauth: + enabled: false + annotations: {} + # -- (object) full spec for VMAuth CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmauthspec) + spec: + discover_backend_ips: true + port: "8427" + +vmagent: + enabled: true + annotations: {} + # -- remoteWrite configuration of VMAgent, allowed parameters defined in a [spec](https://docs.victoriametrics.com/operator/api#vmagentremotewritespec) + additionalRemoteWrites: + [] + #- url: http://some-remote-write/api/v1/write + # -- (object) full spec for VMAgent CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmagentspec) + spec: + port: "8429" + selectAllByDefault: true + image: + tag: v1.103.0 + scrapeInterval: 20s + externalLabels: {} + # For multi-cluster setups it is useful to use "cluster" label to identify the metrics source. + # For example: + # cluster: cluster-name + extraArgs: + promscrape.streamParse: "true" + # Do not store original labels in vmagent's memory by default. This reduces the amount of memory used by vmagent + # but makes vmagent debugging UI less informative. See: https://docs.victoriametrics.com/vmagent/#relabel-debug + promscrape.dropOriginalLabels: "true" + # -- (object) vmagent ingress configuration + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: "" + # pathType is only for k8s > 1.19 + pathType: Prefix + + hosts: + - vmagent.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + ## Or for k8s > 1.19 + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: vmagent-ingress-tls + # hosts: + # - vmagent.domain.com + +################################################# +### dependencies ##### +################################################# +# Grafana dependency chart configuration. For possible values refer to https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration +grafana: + enabled: true + ## all values for grafana helm chart can be specified here + sidecar: + datasources: + enabled: true + # -- list of default prometheus compatible datasource configurations. + # VM `url` will be added to each of them in templates and `type` will be set to defaultDatasourceType if not defined + default: + - name: VictoriaMetrics + isDefault: true + - name: VictoriaMetrics (DS) + isDefault: false + type: victoriametrics-datasource + initDatasources: true + createVMReplicasDatasources: false + dashboards: + provider: + name: default + orgid: 1 + additionalDashboardLabels: {} + folder: /var/lib/grafana/dashboards + defaultFolderName: default + additionalDashboardAnnotations: {} + enabled: true + multicluster: false + + ## ForceDeployDatasource Create datasource configmap even if grafana deployment has been disabled + forceDeployDatasource: false + + ## Configure additional grafana datasources (passed through tpl) + ## ref: http://docs.grafana.org/administration/provisioning/#datasources + additionalDataSources: [] + # - name: prometheus-sample + # access: proxy + # basicAuth: true + # basicAuthPassword: pass + # basicAuthUser: daco + # editable: false + # jsonData: + # tlsSkipVerify: true + # orgId: 1 + # type: prometheus + # url: https://{{ printf "%s-prometheus.svc" .Release.Name }}:9090 + # version: 1 + + defaultDashboardsTimezone: utc + + # Enabling VictoriaMetrics Datasource in Grafana. See more details here: https://github.com/VictoriaMetrics/grafana-datasource/blob/main/README.md#victoriametrics-datasource-for-grafana + # Note that Grafana will need internet access to install the datasource plugin. + # Uncomment the block below, if you want to enable VictoriaMetrics Datasource in Grafana: + #plugins: + # - "https://github.com/VictoriaMetrics/grafana-datasource/releases/download/v0.5.0/victoriametrics-datasource-v0.5.0.zip;victoriametrics-datasource" + #grafana.ini: + # plugins: + # # Why VictoriaMetrics datasource is unsigned: https://github.com/VictoriaMetrics/grafana-datasource/blob/main/README.md#why-victoriametrics-datasource-is-unsigned + # allow_loading_unsigned_plugins: victoriametrics-datasource + + # Change datasource type in dashboards from Prometheus to VictoriaMetrics. + # you can use `victoriametrics-datasource` instead of `prometheus` if enabled VictoriaMetrics Datasource above + defaultDatasourceType: "prometheus" + + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: / + # pathType is only for k8s > 1.19 + pathType: Prefix + + hosts: + - grafana.domain.com + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + ## Or for k8s > 1.19 + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: grafana-ingress-tls + # hosts: + # - grafana.domain.com + + # -- grafana VM scrape config + vmScrape: + # whether we should create a service scrape resource for grafana + enabled: true + + # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Grafana + spec: + selector: + matchLabels: + app.kubernetes.io/name: '{{ include "grafana.name" .Subcharts.grafana }}' + endpoints: + - port: '{{ .Values.grafana.service.portName }}' + +# prometheus-node-exporter dependency chart configuration. For possible values refer to https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus-node-exporter/values.yaml +prometheus-node-exporter: + enabled: true + + # all values for prometheus-node-exporter helm chart can be specified here + service: + ## Add the 'node-exporter' label to be used by serviceMonitor to match standard common usage in rules and grafana dashboards + ## + labels: + jobLabel: node-exporter + extraArgs: + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) + - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ + # -- node exporter VM scrape config + vmScrape: + # whether we should create a service scrape resource for node-exporter + enabled: true + + # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Node Exporter + spec: + jobLabel: jobLabel + selector: + matchLabels: + app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}' + endpoints: + - port: metrics + metricRelabelConfigs: + - action: drop + source_labels: [mountpoint] + regex: "/var/lib/kubelet/pods.+" +# kube-state-metrics dependency chart configuration. For possible values refer to https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-state-metrics/values.yaml +kube-state-metrics: + enabled: true + ## all values for kube-state-metrics helm chart can be specified here + + # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Kube State Metrics + vmScrape: + enabled: true + spec: + selector: + matchLabels: + app.kubernetes.io/name: '{{ include "kube-state-metrics.name" (index .Subcharts "kube-state-metrics") }}' + app.kubernetes.io/instance: '{{ include "vm.release" . }}' + endpoints: + - port: http + honorLabels: true + metricRelabelConfigs: + - action: labeldrop + regex: (uid|container_id|image_id) + jobLabel: app.kubernetes.io/name + +################################################# +### Service Monitors ##### +################################################# +# Component scraping the kubelets +kubelet: + enabled: true + vmScrapes: + # -- Enable scraping /metrics/cadvisor from kubelet's service + cadvisor: + enabled: true + spec: + path: /metrics/cadvisor + # -- Enable scraping /metrics/probes from kubelet's service + probes: + enabled: true + spec: + path: /metrics/probes + kubelet: + spec: {} + # -- spec for VMNodeScrape crd + # https://docs.victoriametrics.com/operator/api.html#vmnodescrapespec + vmScrape: + kind: VMNodeScrape + spec: + scheme: "https" + honorLabels: true + interval: "30s" + scrapeTimeout: "5s" + tlsConfig: + insecureSkipVerify: true + caFile: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token" + # drop high cardinality label and useless metrics for cadvisor and kubelet + metricRelabelConfigs: + - action: labeldrop + regex: (uid) + - action: labeldrop + regex: (id|name) + - action: drop + source_labels: [__name__] + regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count) + relabelConfigs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - sourceLabels: [__metrics_path__] + targetLabel: metrics_path + - targetLabel: "job" + replacement: "kubelet" + # ignore timestamps of cadvisor's metrics by default + # more info here https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4697#issuecomment-1656540535 + honorTimestamps: false +# Component scraping the kube api server +kubeApiServer: + enabled: true + # -- spec for VMServiceScrape crd + # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec + vmScrape: + spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: https + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: kubernetes + jobLabel: component + namespaceSelector: + matchNames: + - default + selector: + matchLabels: + component: apiserver + provider: kubernetes + +# Component scraping the kube controller manager +kubeControllerManager: + enabled: true + + ## If your kube controller manager is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + ## If using kubeControllerManager.endpoints only the port and targetPort are used + ## + service: + enabled: true + port: 10257 + targetPort: 10257 + selector: + component: kube-controller-manager + + # -- spec for VMServiceScrape crd + # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: kubernetes + +# Component scraping kubeDns. Use either this or coreDns +kubeDns: + enabled: false + # -- + service: + enabled: false + ports: + dnsmasq: + port: 10054 + targetPort: 10054 + skydns: + port: 10055 + targetPort: 10055 + selector: + k8s-app: kube-dns + # -- spec for VMServiceScrape crd + # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - port: http-metrics-dnsmasq + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + - port: http-metrics-skydns + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + +# Component scraping coreDns. Use either this or kubeDns +coreDns: + enabled: true + service: + enabled: true + port: 9153 + targetPort: 9153 + selector: + k8s-app: kube-dns + + # -- spec for VMServiceScrape crd + # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - port: http-metrics + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + +# Component scraping etcd +kubeEtcd: + enabled: true + + ## If your etcd is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + ## Etcd service. If using kubeEtcd.endpoints only the port and targetPort are used + ## + service: + enabled: true + port: 2379 + targetPort: 2379 + selector: + component: etcd + + # -- spec for VMServiceScrape crd + # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +# Component scraping kube scheduler +kubeScheduler: + enabled: true + + ## If your kube scheduler is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + ## If using kubeScheduler.endpoints only the port and targetPort are used + ## + service: + enabled: true + port: 10259 + targetPort: 10259 + selector: + component: kube-scheduler + + # -- spec for VMServiceScrape crd + # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +# Component scraping kube proxy +kubeProxy: + enabled: false + + ## If your kube proxy is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + service: + enabled: true + port: 10249 + targetPort: 10249 + selector: + k8s-app: kube-proxy + + # -- spec for VMServiceScrape crd + # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +## install vm operator crds +crds: + enabled: true + +## install prometheus operator crds +prometheus-operator-crds: + enabled: false + +# -- Add extra objects dynamically to this chart +extraObjects: [] diff --git a/packages/system/monitoring/templates/cadvisor-scrape.yaml b/packages/system/monitoring/templates/cadvisor-scrape.yaml index 4961b7b0..a3f2698a 100644 --- a/packages/system/monitoring/templates/cadvisor-scrape.yaml +++ b/packages/system/monitoring/templates/cadvisor-scrape.yaml @@ -21,6 +21,8 @@ spec: relabelConfigs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) + - action: labeldrop + regex: '.*node_kubevirt_io.*' - sourceLabels: [__metrics_path__] targetLabel: metrics_path - replacement: cadvisor @@ -30,3 +32,4 @@ spec: tlsConfig: caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecureSkipVerify: true + diff --git a/packages/system/monitoring/templates/kubelet-scrape.yaml b/packages/system/monitoring/templates/kubelet-scrape.yaml index 33c5c6ed..bb06b8d0 100644 --- a/packages/system/monitoring/templates/kubelet-scrape.yaml +++ b/packages/system/monitoring/templates/kubelet-scrape.yaml @@ -21,6 +21,8 @@ spec: relabelConfigs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) + - action: labeldrop + regex: '.*node_kubevirt_io.*' - sourceLabels: [__metrics_path__] targetLabel: metrics_path - replacement: kubelet @@ -51,6 +53,8 @@ spec: relabelConfigs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) + - action: labeldrop + regex: '.*node_kubevirt_io.*' - sourceLabels: - __metrics_path__ targetLabel: metrics_path diff --git a/packages/system/monitoring/templates/vmagent.yaml b/packages/system/monitoring/templates/vmagent.yaml index 1c686b28..46dc9e7b 100644 --- a/packages/system/monitoring/templates/vmagent.yaml +++ b/packages/system/monitoring/templates/vmagent.yaml @@ -8,6 +8,7 @@ spec: cluster: cozystack extraArgs: promscrape.streamParse: "true" + promscrape.maxScrapeSize: 32MB remoteWrite: - url: http://vminsert-shortterm.tenant-root.svc:8480/insert/0/prometheus/api/v1/write - url: http://vminsert-longterm.tenant-root.svc:8480/insert/0/prometheus/api/v1/write diff --git a/packages/system/monitoring/values.yaml b/packages/system/monitoring/values.yaml index 6bd9064d..1b505397 100644 --- a/packages/system/monitoring/values.yaml +++ b/packages/system/monitoring/values.yaml @@ -1,3 +1,7 @@ +global: + victoria-metrics-k8s-stack: + fullnameOverride: alerts + metrics-server: defaultArgs: - --cert-dir=/tmp @@ -62,3 +66,19 @@ fluent-bit: Operation lift Nested_under kubernetes Add_prefix kubernetes_ + +victoria-metrics-k8s-stack: + externalVM: + read: + url: "http://vmselect-shortterm.tenant-root.svc:8480/select/0/prometheus/api/v1/read" + write: + url: "http://vminsert-shortterm.tenant-root.svc:8480/insert/0/prometheus/api/v1/write" + + grafana: + enabled: false + kube-state-metrics: + enabled: false + prometheus-node-exporter: + enabled: false + alertmanager: + name: vmalertmanager-alertmanager From 54fd61cd43e78034a24ce252798d60e5bbba311c Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Thu, 26 Sep 2024 11:37:32 +0200 Subject: [PATCH 03/41] Update dashboards (#353) Signed-off-by: Andrei Kvapil ## Summary by CodeRabbit - **New Features** - Added a new data source configuration for Prometheus. - Introduced new panels for network metrics in Kubernetes dashboards. - New "Bar gauge" panel type added to the Kubernetes global views. - Enhanced visualizations with new properties for displaying metrics. - **Bug Fixes** - Updated Prometheus expressions to improve data filtering and accuracy. - **Version Updates** - Upgraded Grafana and plugin versions across multiple dashboard configurations. - **Improvements** - Enhanced dashboard layouts and usability with new visualization options. - Adjusted configurations for better performance and clarity in monitoring metrics. Signed-off-by: Andrei Kvapil --- .../control-plane/deprecated-resources.json | 19 + dashboards/dotdc/k8s-system-coredns.json | 229 +- dashboards/dotdc/k8s-views-global.json | 842 ++- dashboards/dotdc/k8s-views-namespaces.json | 708 ++- dashboards/dotdc/k8s-views-pods.json | 269 +- dashboards/ingress/namespace-detail.json | 6 +- dashboards/main/controller.json | 5628 ++++++++++------- dashboards/main/namespace.json | 4632 ++++++++------ dashboards/main/namespaces.json | 4539 +++++++------ dashboards/main/node.json | 2479 +++----- dashboards/main/pod.json | 5004 ++++++++------- .../victoria-metrics/backupmanager.json | 77 +- dashboards/victoria-metrics/operator.json | 1612 ++--- .../victoriametrics-cluster.json | 1805 ++++-- .../victoria-metrics/victoriametrics.json | 989 ++- dashboards/victoria-metrics/vmagent.json | 1339 +++- dashboards/victoria-metrics/vmalert.json | 473 +- hack/download-dashboards.sh | 2 +- 18 files changed, 18651 insertions(+), 12001 deletions(-) diff --git a/dashboards/control-plane/deprecated-resources.json b/dashboards/control-plane/deprecated-resources.json index 4841d949..92f9404f 100644 --- a/dashboards/control-plane/deprecated-resources.json +++ b/dashboards/control-plane/deprecated-resources.json @@ -590,6 +590,25 @@ "skipUrlSync": false, "sort": 0, "type": "query" + }, + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 2, + "includeAll": false, + "label": "Prometheus", + "multi": false, + "name": "ds_prometheus", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" } ] }, diff --git a/dashboards/dotdc/k8s-system-coredns.json b/dashboards/dotdc/k8s-system-coredns.json index 3d8dbc53..9fb20bae 100644 --- a/dashboards/dotdc/k8s-system-coredns.json +++ b/dashboards/dotdc/k8s-system-coredns.json @@ -120,9 +120,11 @@ "fields": "", "values": false }, - "textMode": "value_and_name" + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": true }, - "pluginVersion": "10.0.1", + "pluginVersion": "10.4.1", "targets": [ { "datasource": { @@ -130,7 +132,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "up{job=\"coredns\", instance=~\"$instance\"}", + "expr": "up{job=~\"$job\", instance=~\"$instance\", cluster=~\"$cluster\"}", "interval": "", "legendFormat": "{{ instance }}", "refId": "A" @@ -150,6 +152,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -163,6 +166,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -225,7 +229,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "rate(process_cpu_seconds_total{job=\"coredns\", instance=~\"$instance\"}[$__rate_interval])", + "expr": "rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])", "interval": "$resolution", "legendFormat": "{{ instance }}", "refId": "A" @@ -245,6 +249,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -258,6 +263,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -319,7 +325,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "process_resident_memory_bytes{job=\"coredns\", instance=~\"$instance\"}", + "expr": "process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\", cluster=~\"$cluster\"}", "interval": "", "legendFormat": "{{ instance }}", "refId": "A" @@ -339,6 +345,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -352,6 +359,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -413,7 +421,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\",proto=\"$protocol\"}[$__rate_interval]))", + "expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\",proto=\"$protocol\", cluster=~\"$cluster\"}[$__rate_interval]))", "interval": "$resolution", "legendFormat": "total $protocol requests", "refId": "A" @@ -433,6 +441,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -446,6 +455,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -507,7 +517,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(coredns_dns_request_size_bytes_sum{instance=~\"$instance\",proto=\"$protocol\"}[$__rate_interval])) by (proto) / sum(rate(coredns_dns_request_size_bytes_count{instance=~\"$instance\",proto=\"$protocol\"}[$__rate_interval])) by (proto)", + "expr": "sum(rate(coredns_dns_request_size_bytes_sum{instance=~\"$instance\",proto=\"$protocol\", cluster=~\"$cluster\"}[$__rate_interval])) by (proto) / sum(rate(coredns_dns_request_size_bytes_count{instance=~\"$instance\",proto=\"$protocol\", cluster=~\"$cluster\"}[$__rate_interval])) by (proto)", "interval": "$resolution", "legendFormat": "average $protocol packet size", "refId": "A" @@ -527,6 +537,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -540,6 +551,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -601,7 +613,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "interval": "$resolution", "legendFormat": "{{ type }}", "refId": "A" @@ -621,6 +633,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -634,6 +647,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -695,7 +709,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(coredns_dns_responses_total{instance=~\"$instance\"}[$__rate_interval])) by (rcode)", + "expr": "sum(rate(coredns_dns_responses_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (rcode)", "interval": "$resolution", "legendFormat": "{{ rcode }}", "refId": "A" @@ -715,6 +729,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -728,6 +743,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -789,7 +805,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(coredns_forward_requests_total[$__rate_interval]))", + "expr": "sum(rate(coredns_forward_requests_total{cluster=~\"$cluster\"}[$__rate_interval]))", "interval": "$resolution", "legendFormat": "total forward requests", "refId": "A" @@ -809,6 +825,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -822,6 +839,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -883,7 +901,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(coredns_forward_responses_total{rcode=~\"SERVFAIL|REFUSED\"}[$__rate_interval])) by (rcode)", + "expr": "sum(rate(coredns_forward_responses_total{rcode=~\"SERVFAIL|REFUSED\", cluster=~\"$cluster\"}[$__rate_interval])) by (rcode)", "interval": "$resolution", "legendFormat": "{{ rcode }}", "refId": "A" @@ -903,6 +921,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -916,6 +935,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -977,7 +997,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(coredns_cache_hits_total{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(coredns_cache_hits_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "interval": "$resolution", "legendFormat": "{{ type }}", "refId": "A" @@ -988,7 +1008,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(coredns_cache_misses_total{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(coredns_cache_misses_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "interval": "$resolution", "legendFormat": "misses", "refId": "B" @@ -1008,6 +1028,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1021,6 +1042,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1082,7 +1104,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(coredns_cache_entries) by (type)", + "expr": "sum(coredns_cache_entries{cluster=~\"$cluster\"}) by (type)", "interval": "", "legendFormat": "{{ type }}", "refId": "A" @@ -1143,7 +1165,8 @@ "layout": "auto" }, "tooltip": { - "show": true, + "mode": "single", + "showColorScale": false, "yHistogram": false }, "yAxis": { @@ -1152,7 +1175,7 @@ "unit": "s" } }, - "pluginVersion": "10.0.1", + "pluginVersion": "10.4.1", "targets": [ { "datasource": { @@ -1160,7 +1183,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(increase(coredns_dns_request_duration_seconds_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le)", + "expr": "sum(increase(coredns_dns_request_duration_seconds_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le)", "format": "heatmap", "legendFormat": "{{le}}", "range": true, @@ -1196,85 +1219,6 @@ "x": 12, "y": 43 }, - "id": 30, - "options": { - "calculate": false, - "cellGap": 1, - "color": { - "exponent": 0.5, - "fill": "dark-orange", - "mode": "scheme", - "reverse": false, - "scale": "exponential", - "scheme": "RdYlBu", - "steps": 64 - }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" - }, - "filterValues": { - "le": 1e-9 - }, - "legend": { - "show": true - }, - "rowsFrame": { - "layout": "auto" - }, - "tooltip": { - "show": true, - "yHistogram": false - }, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "s" - } - }, - "pluginVersion": "10.0.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "editorMode": "code", - "expr": "sum(increase(coredns_forward_request_duration_seconds_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le)", - "format": "heatmap", - "legendFormat": "{{le}}", - "range": true, - "refId": "A" - } - ], - "title": "CoreDNS - Forward request duration", - "type": "heatmap" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 53 - }, "id": 28, "options": { "calculate": false, @@ -1301,7 +1245,8 @@ "layout": "auto" }, "tooltip": { - "show": true, + "mode": "single", + "showColorScale": false, "yHistogram": false }, "yAxis": { @@ -1310,7 +1255,7 @@ "unit": "decbytes" } }, - "pluginVersion": "10.0.1", + "pluginVersion": "10.4.1", "targets": [ { "datasource": { @@ -1318,7 +1263,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(increase(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\", le!=\"0\"}[$__rate_interval])) by (le)", + "expr": "sum(increase(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\", le!=\"0\", cluster=~\"$cluster\"}[$__rate_interval])) by (le)", "format": "heatmap", "legendFormat": "{{le}}", "range": true, @@ -1351,7 +1296,7 @@ "gridPos": { "h": 10, "w": 12, - "x": 12, + "x": 0, "y": 53 }, "id": 29, @@ -1380,7 +1325,8 @@ "layout": "auto" }, "tooltip": { - "show": true, + "mode": "single", + "showColorScale": false, "yHistogram": false }, "yAxis": { @@ -1389,7 +1335,7 @@ "unit": "decbytes" } }, - "pluginVersion": "10.0.1", + "pluginVersion": "10.4.1", "targets": [ { "datasource": { @@ -1397,7 +1343,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(increase(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\", le!=\"0\"}[$__rate_interval])) by (le)", + "expr": "sum(increase(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\", le!=\"0\", cluster=~\"$cluster\"}[$__rate_interval])) by (le)", "format": "heatmap", "legendFormat": "{{le}}", "range": true, @@ -1409,8 +1355,7 @@ } ], "refresh": "30s", - "schemaVersion": 38, - "style": "dark", + "schemaVersion": 39, "tags": [ "Kubernetes", "Prometheus" @@ -1435,6 +1380,34 @@ "skipUrlSync": false, "type": "datasource" }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(kube_node_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, { "current": { "selected": false, @@ -1445,7 +1418,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(up{job=\"coredns\"}, instance)", + "definition": "label_values(up{job=\"$job\", cluster=\"$cluster\"},instance)", "hide": 0, "includeAll": true, "label": "", @@ -1453,8 +1426,9 @@ "name": "instance", "options": [], "query": { - "query": "label_values(up{job=\"coredns\"}, instance)", - "refId": "StandardVariableQuery" + "qryType": 1, + "query": "label_values(up{job=\"$job\", cluster=\"$cluster\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", @@ -1476,7 +1450,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(coredns_dns_requests_total, proto)", + "definition": "label_values(coredns_dns_requests_total{cluster=\"$cluster\"}, proto)", "hide": 0, "includeAll": false, "label": "", @@ -1484,7 +1458,7 @@ "name": "protocol", "options": [], "query": { - "query": "label_values(coredns_dns_requests_total, proto)", + "query": "label_values(coredns_dns_requests_total{cluster=\"$cluster\"}, proto)", "refId": "StandardVariableQuery" }, "refresh": 1, @@ -1498,7 +1472,7 @@ }, { "current": { - "selected": true, + "selected": false, "text": "30s", "value": "30s" }, @@ -1542,6 +1516,37 @@ "queryValue": "", "skipUrlSync": false, "type": "custom" + }, + { + "current": { + "selected": true, + "text": [ + "coredns" + ], + "value": [ + "coredns" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(coredns_build_info{cluster=\"$cluster\"},job)", + "hide": 0, + "includeAll": false, + "multi": true, + "name": "job", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(coredns_build_info{cluster=\"$cluster\"},job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" } ] }, @@ -1553,6 +1558,6 @@ "timezone": "", "title": "Kubernetes / System / CoreDNS", "uid": "k8s_system_coredns", - "version": 13, + "version": 18, "weekStart": "" } diff --git a/dashboards/dotdc/k8s-views-global.json b/dashboards/dotdc/k8s-views-global.json index d0ec2980..aad7e8de 100644 --- a/dashboards/dotdc/k8s-views-global.json +++ b/dashboards/dotdc/k8s-views-global.json @@ -9,25 +9,25 @@ "pluginName": "Prometheus" } ], - "__elements": [], + "__elements": {}, "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, { "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "8.3.4" + "version": "10.3.1" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" + "version": "1.0.0" }, { "type": "panel", @@ -37,8 +37,8 @@ }, { "type": "panel", - "id": "gauge", - "name": "Gauge", + "id": "timeseries", + "name": "Time series", "version": "" } ], @@ -75,7 +75,7 @@ "collapsed": false, "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${datasource}" }, "gridPos": { "h": 1, @@ -127,8 +127,10 @@ "id": 77, "options": { "displayMode": "lcd", + "maxVizHeight": 300, "minVizHeight": 10, "minVizWidth": 0, + "namePlacement": "auto", "orientation": "horizontal", "reduceOptions": { "calcs": [ @@ -138,9 +140,10 @@ "values": false }, "showUnfilled": true, + "sizing": "auto", "valueMode": "color" }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -149,11 +152,11 @@ }, "editorMode": "code", "exemplar": true, - "expr": "avg(1-rate(node_cpu_seconds_total{mode=\"idle\"}[$__rate_interval]))", + "expr": "avg(sum by (instance, cpu) (rate(node_cpu_seconds_total{mode!~\"idle|iowait|steal\", cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])))", "interval": "", - "legendFormat": "Real", + "legendFormat": "Real Linux", "range": true, - "refId": "A" + "refId": "Real Linux" }, { "datasource": { @@ -161,11 +164,25 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\"}) / sum(machine_cpu_cores)", + "exemplar": true, + "expr": "avg(sum by (core) (rate(windows_cpu_time_total{mode!=\"idle\", cluster=\"$cluster\"}[$__rate_interval])))", + "hide": false, + "interval": "", + "legendFormat": "Real Windows", + "range": true, + "refId": "Real Windows" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\", cluster=\"$cluster\"}) / sum(machine_cpu_cores{cluster=\"$cluster\"})", "hide": false, "legendFormat": "Requests", "range": true, - "refId": "B" + "refId": "Requests" }, { "datasource": { @@ -173,14 +190,49 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\"}) / sum(machine_cpu_cores)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\", cluster=\"$cluster\"}) / sum(machine_cpu_cores{cluster=\"$cluster\"})", "hide": false, "legendFormat": "Limits", "range": true, - "refId": "C" + "refId": "Limits" } ], "title": "Global CPU Usage", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Real", + "mode": "reduceRow", + "reduce": { + "include": [ + "Real Linux", + "Real Windows" + ], + "reducer": "mean" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Real Linux": true, + "Real Windows": true, + "Time": true + }, + "indexByName": { + "Limits": 5, + "Real": 1, + "Real Linux": 2, + "Real Windows": 3, + "Requests": 4, + "Time": 0 + }, + "renameByName": {} + } + } + ], "type": "bargauge" }, { @@ -223,8 +275,10 @@ "id": 78, "options": { "displayMode": "lcd", + "maxVizHeight": 300, "minVizHeight": 10, "minVizWidth": 0, + "namePlacement": "auto", "orientation": "horizontal", "reduceOptions": { "calcs": [ @@ -234,10 +288,11 @@ "values": false }, "showUnfilled": true, + "sizing": "auto", "text": {}, "valueMode": "color" }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -246,11 +301,12 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes)", + "expr": "sum(node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"$job\"} - node_memory_MemAvailable_bytes{cluster=\"$cluster\", job=\"$job\"}) / sum(node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"$job\"})", + "hide": false, "interval": "", - "legendFormat": "Real", + "legendFormat": "Real Linux", "range": true, - "refId": "A" + "refId": "Real Linux" }, { "datasource": { @@ -258,11 +314,24 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\"}) / sum(machine_memory_bytes)", + "exemplar": true, + "expr": "sum(windows_memory_available_bytes{cluster=\"$cluster\"} + windows_memory_cache_bytes{cluster=\"$cluster\"}) / sum(windows_os_visible_memory_bytes{cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Real Windows", + "range": true, + "refId": "Real Windows" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\"}) / sum(machine_memory_bytes{cluster=\"$cluster\"})", "hide": false, "legendFormat": "Requests", "range": true, - "refId": "B" + "refId": "Requests" }, { "datasource": { @@ -270,14 +339,50 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\"}) / sum(machine_memory_bytes)", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\", cluster=\"$cluster\"}) / sum(machine_memory_bytes{cluster=\"$cluster\"})", "hide": false, "legendFormat": "Limits", "range": true, - "refId": "C" + "refId": "Limits" } ], "title": "Global RAM Usage", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Real", + "mode": "reduceRow", + "reduce": { + "include": [ + "Real Linux", + "Real Windows" + ], + "reducer": "mean" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Real Linux": true, + "Real Windows": true, + "Time": true + }, + "includeByName": {}, + "indexByName": { + "Limits": 5, + "Real": 3, + "Real Linux": 1, + "Real Windows": 2, + "Requests": 4, + "Time": 0 + }, + "renameByName": {} + } + } + ], "type": "bargauge" }, { @@ -313,6 +418,7 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "last" @@ -320,20 +426,24 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "value" + "textMode": "value", + "wideLayout": true }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "count(count by (node) (kube_node_info))", + "expr": "count(count by (node) (kube_node_info{cluster=\"$cluster\"}))", "interval": "", "legendFormat": "", + "range": true, "refId": "A" } ], @@ -351,11 +461,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -364,6 +476,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -431,7 +544,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(kube_namespace_labels)", + "expr": "sum(kube_namespace_labels{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Namespaces", "refId": "A" @@ -441,7 +554,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_pod_container_status_running)", + "expr": "sum(kube_pod_container_status_running{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Running Containers", "refId": "B" @@ -451,7 +564,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_pod_status_phase{phase=\"Running\"})", + "expr": "sum(kube_pod_status_phase{phase=\"Running\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Running Pods", "refId": "O" @@ -461,7 +574,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_service_info)", + "expr": "sum(kube_service_info{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Services", "refId": "C" @@ -471,7 +584,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_endpoint_info)", + "expr": "sum(kube_endpoint_info{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Endpoints", "refId": "D" @@ -481,7 +594,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_ingress_info)", + "expr": "sum(kube_ingress_info{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Ingresses", "refId": "E" @@ -491,7 +604,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_deployment_labels)", + "expr": "sum(kube_deployment_labels{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Deployments", "refId": "F" @@ -501,7 +614,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_statefulset_labels)", + "expr": "sum(kube_statefulset_labels{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Statefulsets", "refId": "G" @@ -511,7 +624,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_daemonset_labels)", + "expr": "sum(kube_daemonset_labels{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Daemonsets", "refId": "H" @@ -521,7 +634,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_persistentvolumeclaim_info)", + "expr": "sum(kube_persistentvolumeclaim_info{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Persistent Volume Claims", "refId": "I" @@ -531,7 +644,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_hpa_labels)", + "expr": "sum(kube_hpa_labels{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Horizontal Pod Autoscalers", "refId": "J" @@ -541,7 +654,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_configmap_info)", + "expr": "sum(kube_configmap_info{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Configmaps", "refId": "K" @@ -551,7 +664,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_secret_info)", + "expr": "sum(kube_secret_info{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Secrets", "refId": "L" @@ -561,7 +674,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_networkpolicy_labels)", + "expr": "sum(kube_networkpolicy_labels{cluster=\"$cluster\"})", "interval": "", "legendFormat": "Network Policies", "refId": "M" @@ -572,7 +685,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "count(count by (node) (kube_node_info))", + "expr": "count(count by (node) (kube_node_info{cluster=\"$cluster\"}))", "hide": false, "interval": "", "legendFormat": "Nodes", @@ -615,6 +728,7 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "last" @@ -622,17 +736,19 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "value" + "textMode": "value", + "wideLayout": true }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "count(kube_namespace_created)", + "expr": "count(kube_namespace_created{cluster=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -675,17 +791,20 @@ "graphMode": "none", "justifyMode": "center", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ - "mean" + "lastNotNull" ], "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -694,11 +813,11 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(1-rate(node_cpu_seconds_total{mode=\"idle\"}[$__rate_interval]))", + "expr": "sum(rate(node_cpu_seconds_total{mode!~\"idle|iowait|steal\", cluster=\"$cluster\", job=\"$job\"}[$__rate_interval]))", "interval": "", - "legendFormat": "Real", + "legendFormat": "Real Linux", "range": true, - "refId": "A" + "refId": "Real Linux" }, { "datasource": { @@ -706,11 +825,25 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\"})", + "exemplar": true, + "expr": "sum(rate(windows_cpu_time_total{mode!=\"idle\", cluster=\"$cluster\"}[$__rate_interval]))", + "hide": false, + "interval": "", + "legendFormat": "Real Windows", + "range": true, + "refId": "Real Windows" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\", cluster=\"$cluster\"})", "hide": false, "legendFormat": "Requests", "range": true, - "refId": "B" + "refId": "Requests" }, { "datasource": { @@ -718,11 +851,11 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\"})", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\", cluster=\"$cluster\"})", "hide": false, "legendFormat": "Limits", "range": true, - "refId": "C" + "refId": "Limits" }, { "datasource": { @@ -730,14 +863,54 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(machine_cpu_cores)", + "expr": "sum(machine_cpu_cores{cluster=\"$cluster\"})", "hide": false, "legendFormat": "Total", "range": true, - "refId": "D" + "refId": "Total" } ], "title": "CPU Usage", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Real", + "mode": "reduceRow", + "reduce": { + "include": [ + "Real Linux", + "Real Windows" + ], + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Real Linux": true, + "Real Windows": true, + "Time": true, + "Total Linux": true, + "Total Windows": true + }, + "indexByName": { + "Limits": 5, + "Real": 3, + "Real Linux": 1, + "Real Windows": 2, + "Requests": 4, + "Time": 0, + "Total": 8, + "Total Linux": 6, + "Total Windows": 7 + }, + "renameByName": {} + } + } + ], "type": "stat" }, { @@ -774,17 +947,20 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ - "mean" + "lastNotNull" ], "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -793,11 +969,11 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)", + "expr": "sum(node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"$job\"} - node_memory_MemAvailable_bytes{cluster=\"$cluster\", job=\"$job\"})", "interval": "", - "legendFormat": "Real", + "legendFormat": "Real Linux", "range": true, - "refId": "A" + "refId": "Real Linux" }, { "datasource": { @@ -805,11 +981,25 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\"})", + "exemplar": true, + "expr": "sum(windows_os_visible_memory_bytes{cluster=\"$cluster\"} - windows_memory_available_bytes{cluster=\"$cluster\"} - windows_memory_cache_bytes{cluster=\"$cluster\"})", + "hide": false, + "interval": "", + "legendFormat": "Real Windows", + "range": true, + "refId": "Real Windows" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\", cluster=\"$cluster\"})", "hide": false, "legendFormat": "Requests", "range": true, - "refId": "B" + "refId": "Requests" }, { "datasource": { @@ -817,11 +1007,11 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\"})", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\", cluster=\"$cluster\"})", "hide": false, "legendFormat": "Limits", "range": true, - "refId": "C" + "refId": "Limits" }, { "datasource": { @@ -829,14 +1019,51 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(machine_memory_bytes)", + "expr": "sum(machine_memory_bytes{cluster=\"$cluster\"})", "hide": false, "legendFormat": "Total", "range": true, - "refId": "D" + "refId": "Total" } ], "title": "RAM Usage", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Real", + "mode": "reduceRow", + "reduce": { + "include": [ + "Real Linux", + "Real Windows" + ], + "reducer": "mean" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Real Linux": true, + "Real Windows": true, + "Time": true + }, + "includeByName": {}, + "indexByName": { + "Limits": 5, + "Real": 3, + "Real Linux": 1, + "Real Windows": 2, + "Requests": 4, + "Time": 0, + "Total": 6 + }, + "renameByName": {} + } + } + ], "type": "stat" }, { @@ -872,6 +1099,7 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "last" @@ -879,17 +1107,19 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "value" + "textMode": "value", + "wideLayout": true }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_pod_status_phase{phase=\"Running\"})", + "expr": "sum(kube_pod_status_phase{phase=\"Running\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -902,7 +1132,7 @@ "collapsed": false, "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${datasource}" }, "gridPos": { "h": 1, @@ -927,11 +1157,13 @@ "seriesBy": "last" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "CPU %", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "scheme", @@ -940,6 +1172,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineStyle": { "fill": "solid" @@ -1008,14 +1241,43 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "avg(1-rate(node_cpu_seconds_total{mode=\"idle\"}[$__rate_interval]))", + "expr": "avg(sum by (instance, cpu) (rate(node_cpu_seconds_total{mode!~\"idle|iowait|steal\", cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])))", "interval": "$resolution", - "legendFormat": "CPU usage in %", - "refId": "A" + "legendFormat": "Linux", + "range": true, + "refId": "Linux" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "1 - avg(rate(windows_cpu_time_total{cluster=\"$cluster\",mode=\"idle\"}[$__rate_interval]))", + "hide": false, + "interval": "$resolution", + "legendFormat": "Windows", + "range": true, + "refId": "Windows" } ], "title": "Cluster CPU Utilization", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "CPU usage in %", + "mode": "reduceRow", + "reduce": { + "reducer": "mean" + }, + "replaceFields": true + } + } + ], "type": "timeseries" }, { @@ -1029,11 +1291,13 @@ "mode": "continuous-GrYlRd" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "MEMORY", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "scheme", @@ -1042,6 +1306,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1113,14 +1378,41 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes)", + "expr": "sum(node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"$job\"} - node_memory_MemAvailable_bytes{cluster=\"$cluster\", job=\"$job\"}) / sum(node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"$job\"})", "interval": "$resolution", - "legendFormat": "Memory usage in %", + "legendFormat": "Linux", "range": true, - "refId": "A" + "refId": "Linux" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(windows_os_visible_memory_bytes{cluster=\"$cluster\"} - windows_memory_available_bytes{cluster=\"$cluster\"}) / sum(windows_os_visible_memory_bytes{cluster=\"$cluster\"})", + "hide": false, + "interval": "$resolution", + "legendFormat": "Windows", + "range": true, + "refId": "Windows" } ], "title": "Cluster Memory Utilization", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Memory usage in %", + "mode": "reduceRow", + "reduce": { + "reducer": "mean" + }, + "replaceFields": true + } + } + ], "type": "timeseries" }, { @@ -1134,11 +1426,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "CPU CORES", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1147,6 +1441,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineStyle": { "fill": "solid" @@ -1210,17 +1505,21 @@ "sort": "none" } }, - "pluginVersion": "8.3.3", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\"}[$__rate_interval])) by (namespace)", + "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (namespace)\n+ on (namespace)\n(sum(rate(windows_container_cpu_usage_seconds_total{container_id!=\"\", cluster=\"$cluster\"}[$__rate_interval]) * on (container_id) group_left (container, pod, namespace) max by ( container, container_id, pod, namespace) (kube_pod_container_info{container_id!=\"\", cluster=\"$cluster\"}) OR kube_namespace_created{cluster=\"$cluster\"} * 0) by (namespace))", + "format": "time_series", + "hide": false, "interval": "$resolution", "legendFormat": "{{ namespace }}", + "range": true, "refId": "A" } ], @@ -1238,11 +1537,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1251,6 +1552,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1317,10 +1619,12 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{image!=\"\"}) by (namespace)", + "expr": "sum(container_memory_working_set_bytes{image!=\"\", cluster=\"$cluster\"}) by (namespace)\n+ on (namespace)\n(sum(windows_container_memory_usage_commit_bytes{container_id!=\"\", cluster=\"$cluster\"} * on (container_id) group_left (container, pod, namespace) max by ( container, container_id, pod, namespace) (kube_pod_container_info{container_id!=\"\", cluster=\"$cluster\"}) OR kube_namespace_created{cluster=\"$cluster\"} * 0) by (namespace))", "interval": "$resolution", "legendFormat": "{{ namespace }}", + "range": true, "refId": "A" } ], @@ -1338,11 +1642,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "CPU %", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1351,6 +1657,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1418,11 +1725,27 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "avg(1-rate(node_cpu_seconds_total{mode=\"idle\"}[$__rate_interval])) by (instance)", + "expr": "avg(sum by (instance, cpu) (rate(node_cpu_seconds_total{mode!~\"idle|iowait|steal\", cluster=\"$cluster\", job=\"$job\"}[$__rate_interval]))) by (instance)", "interval": "$resolution", "legendFormat": "{{ node }}", - "refId": "A" + "range": true, + "refId": "Linux" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(sum by (instance,core) (rate(windows_cpu_time_total{mode!=\"idle\", cluster=\"$cluster\"}[$__rate_interval]))) by (instance)", + "hide": false, + "interval": "$resolution", + "legendFormat": "{{ node }}", + "range": true, + "refId": "Windows" } ], "title": "CPU Utilization by instance", @@ -1439,11 +1762,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "MEMORY", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1452,6 +1777,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1520,11 +1846,26 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) by (instance)", + "expr": "sum(node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"$job\"} - node_memory_MemAvailable_bytes{cluster=\"$cluster\", job=\"$job\"}) by (instance)", + "hide": false, "interval": "$resolution", - "legendFormat": "{{ node}}", + "legendFormat": "{{ instance }}", "range": true, - "refId": "A" + "refId": "Linux" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(windows_os_visible_memory_bytes{cluster=\"$cluster\"} - windows_memory_available_bytes{cluster=\"$cluster\"}) by (instance)", + "hide": false, + "interval": "$resolution", + "legendFormat": "{{ instance }}", + "range": true, + "refId": "Windows" } ], "title": "Memory Utilization by instance", @@ -1542,11 +1883,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "SECONDS", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1555,6 +1898,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineStyle": { "fill": "solid" @@ -1627,7 +1971,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{image!=\"\"}[$__rate_interval])) by (namespace) > 0", + "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (namespace) > 0", "interval": "$resolution", "legendFormat": "{{ namespace }}", "range": true, @@ -1649,11 +1993,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "NB", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1662,6 +2008,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineStyle": { "fill": "solid" @@ -1734,7 +2081,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(node_cpu_core_throttles_total[$__rate_interval])) by (instance)", + "expr": "sum(rate(node_cpu_core_throttles_total{cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (instance)", "interval": "$resolution", "legendFormat": "{{ instance }}", "range": true, @@ -1768,11 +2115,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1781,6 +2130,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1849,7 +2199,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(kube_pod_status_qos_class) by (qos_class)", + "expr": "sum(kube_pod_status_qos_class{cluster=\"$cluster\"}) by (qos_class)", "interval": "", "legendFormat": "{{ qos_class }} pods", "range": true, @@ -1861,7 +2211,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_info)", + "expr": "sum(kube_pod_info{cluster=\"$cluster\"})", "hide": false, "legendFormat": "Total pods", "range": true, @@ -1882,11 +2232,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1895,6 +2247,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1963,7 +2316,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(kube_pod_status_reason) by (reason)", + "expr": "sum(kube_pod_status_reason{cluster=\"$cluster\"}) by (reason)", "interval": "", "legendFormat": "{{ reason }}", "range": true, @@ -1985,11 +2338,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "points", "fillOpacity": 25, "gradientMode": "opacity", @@ -1998,6 +2353,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -2066,7 +2422,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(container_oom_events_total[$__rate_interval])) by (namespace) > 0", + "expr": "sum(increase(container_oom_events_total{cluster=\"$cluster\"}[$__rate_interval])) by (namespace) > 0", "interval": "", "legendFormat": "{{ namespace }}", "range": true, @@ -2088,11 +2444,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "points", "fillOpacity": 25, "gradientMode": "opacity", @@ -2101,6 +2459,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -2169,7 +2528,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(kube_pod_container_status_restarts_total[$__rate_interval])) by (namespace) > 0", + "expr": "sum(increase(kube_pod_container_status_restarts_total{cluster=\"$cluster\"}[$__rate_interval])) by (namespace) > 0", "interval": "", "legendFormat": "{{ namespace }}", "range": true, @@ -2183,7 +2542,7 @@ "collapsed": false, "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${datasource}" }, "gridPos": { "h": 1, @@ -2208,11 +2567,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "BANDWIDTH", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2221,6 +2582,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -2251,7 +2613,7 @@ } ] }, - "unit": "bytes" + "unit": "binBps" }, "overrides": [] }, @@ -2283,11 +2645,11 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lxc.*|veth.*\"}[$__rate_interval])) by (device)", + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"(veth|azv|lxc).*\", cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (device)", "interval": "$resolution", "legendFormat": "Received : {{ device }}", "range": true, - "refId": "A" + "refId": "Linux Received" }, { "datasource": { @@ -2296,11 +2658,39 @@ }, "editorMode": "code", "exemplar": true, - "expr": "- sum(rate(node_network_transmit_bytes_total{device!~\"lxc.*|veth.*\"}[$__rate_interval])) by (device)", + "expr": "- sum(rate(node_network_transmit_bytes_total{device!~\"(veth|azv|lxc).*\", cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (device)", "interval": "$resolution", "legendFormat": "Transmitted : {{ device }}", "range": true, - "refId": "B" + "refId": "Linux Transmitted" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(windows_net_bytes_received_total{cluster=\"$cluster\"}[$__rate_interval])) by (nic)", + "hide": false, + "interval": "$resolution", + "legendFormat": "Received : {{ nic }}", + "range": true, + "refId": "Windows Received" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(rate(windows_net_bytes_sent_total{cluster=\"$cluster\"}[$__rate_interval])) by (nic)", + "hide": false, + "interval": "$resolution", + "legendFormat": "Transmitted : {{ device }}", + "range": true, + "refId": "Windows Transmitted" } ], "title": "Global Network Utilization by device", @@ -2317,11 +2707,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "DROPPED PACKETS", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2330,6 +2722,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -2392,11 +2785,11 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(node_network_receive_drop_total[$__rate_interval]))", + "expr": "sum(rate(node_network_receive_drop_total{cluster=\"$cluster\", job=\"$job\"}[$__rate_interval]))", "interval": "$resolution", - "legendFormat": "Packets dropped (receive)", + "legendFormat": "Linux Packets dropped (receive)", "range": true, - "refId": "A" + "refId": "Linux Packets dropped (receive)" }, { "datasource": { @@ -2405,14 +2798,87 @@ }, "editorMode": "code", "exemplar": true, - "expr": "- sum(rate(node_network_transmit_drop_total[$__rate_interval]))", + "expr": "- sum(rate(node_network_transmit_drop_total{cluster=\"$cluster\", job=\"$job\"}[$__rate_interval]))", "interval": "$resolution", - "legendFormat": "Packets dropped (transmit)", + "legendFormat": "Linux Packets dropped (transmit)", "range": true, - "refId": "B" + "refId": "Linux Packets dropped (transmit)" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(windows_net_packets_received_discarded_total{cluster=\"$cluster\"}[$__rate_interval]))", + "hide": false, + "interval": "$resolution", + "legendFormat": "Windows Packets dropped (receive)", + "range": true, + "refId": "Windows Packets dropped (receive)" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(rate(windows_net_packets_outbound_discarded_total{cluster=\"$cluster\"}[$__rate_interval]))", + "hide": false, + "interval": "$resolution", + "legendFormat": "Windows Packets dropped (transmit)", + "range": true, + "refId": "Windows Packets dropped (transmit)" } ], "title": "Network Saturation - Packets dropped", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Packets dropped (receive)", + "mode": "reduceRow", + "reduce": { + "include": [ + "Linux Packets dropped (receive)", + "Windows Packets dropped (receive)" + ], + "reducer": "mean" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Packets dropped (transmit)", + "mode": "reduceRow", + "reduce": { + "include": [ + "Linux Packets dropped (transmit)", + "Windows Packets dropped (transmit)" + ], + "reducer": "mean" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Linux Packets dropped (receive)": true, + "Linux Packets dropped (transmit)": true, + "Time": false, + "Windows Packets dropped (receive)": true, + "Windows Packets dropped (transmit)": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + } + ], "type": "timeseries" }, { @@ -2426,11 +2892,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "BANDWIDTH", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2439,6 +2907,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -2469,7 +2938,7 @@ } ] }, - "unit": "bytes" + "unit": "binBps" }, "overrides": [] }, @@ -2501,7 +2970,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(container_network_receive_bytes_total[$__rate_interval])) by (namespace)", + "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\"}[$__rate_interval])) by (namespace)\n+ on (namespace)\n(sum(rate(windows_container_network_receive_bytes_total{container_id!=\"\", cluster=\"$cluster\"}[$__rate_interval]) * on (container_id) group_left (container, pod, namespace) max by ( container, container_id, pod, namespace) (kube_pod_container_info{container_id!=\"\", cluster=\"$cluster\"}) OR kube_namespace_created{cluster=\"$cluster\"} * 0) by (namespace))", "interval": "$resolution", "legendFormat": "Received : {{ namespace }}", "range": true, @@ -2513,7 +2982,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "- sum(rate(container_network_transmit_bytes_total[$__rate_interval])) by (namespace)", + "expr": "- (sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\"}[$__rate_interval])) by (namespace)\n+ on (namespace)\n(sum(rate(windows_container_network_transmit_bytes_total{container_id!=\"\", cluster=\"$cluster\"}[$__rate_interval]) * on (container_id) group_left (container, pod, namespace) max by ( container, container_id, pod, namespace) (kube_pod_container_info{container_id!=\"\", cluster=\"$cluster\"}) OR kube_namespace_created{cluster=\"$cluster\"} * 0) by (namespace)))", "hide": false, "interval": "$resolution", "legendFormat": "Transmitted : {{ namespace }}", @@ -2535,11 +3004,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "BANDWIDTH", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2548,6 +3019,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -2578,7 +3050,7 @@ } ] }, - "unit": "bytes" + "unit": "binBps" }, "overrides": [] }, @@ -2610,11 +3082,11 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(node_network_receive_bytes_total[$__rate_interval])) by (instance)", + "expr": "sum(rate(node_network_receive_bytes_total{cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (instance)", "interval": "$resolution", "legendFormat": "Received bytes in {{ instance }}", "range": true, - "refId": "A" + "refId": "Linux Received bytes" }, { "datasource": { @@ -2622,12 +3094,39 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "- sum(rate(node_network_transmit_bytes_total[$__rate_interval])) by (instance)", + "expr": "- sum(rate(node_network_transmit_bytes_total{cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (instance)", "hide": false, "interval": "$resolution", "legendFormat": "Transmitted bytes in {{ instance }}", "range": true, - "refId": "B" + "refId": "Linux Transmitted bytes" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(windows_net_bytes_received_total{cluster=\"$cluster\"}[$__rate_interval])) by (instance)", + "hide": false, + "interval": "$resolution", + "legendFormat": "Received bytes in {{ instance }}", + "range": true, + "refId": "Windows Received bytes" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "- sum(rate(windows_net_bytes_sent_total{cluster=\"$cluster\"}[$__rate_interval])) by (instance)", + "hide": false, + "interval": "$resolution", + "legendFormat": "Transmitted bytes in {{ instance }}", + "range": true, + "refId": "Windows Transmitted bytes" } ], "title": "Total Network Received (with all virtual devices) by instance", @@ -2645,11 +3144,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "BANDWIDTH", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2658,6 +3159,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -2688,7 +3190,7 @@ } ] }, - "unit": "bytes" + "unit": "binBps" }, "overrides": [] }, @@ -2720,7 +3222,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lxc.*|veth.*|lo\"}[$__rate_interval])) by (instance)", + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"(veth|azv|lxc|lo).*\", cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (instance)", "interval": "$resolution", "legendFormat": "Received bytes in {{ instance }}", "range": true, @@ -2732,12 +3234,40 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "- sum(rate(node_network_transmit_bytes_total{device!~\"lxc.*|veth.*|lo\"}[$__rate_interval])) by (instance)", + "expr": "- sum(rate(node_network_transmit_bytes_total{device!~\"(veth|azv|lxc|lo).*\", cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (instance)", "hide": false, "interval": "$resolution", "legendFormat": "Transmitted bytes in {{ instance }}", "range": true, "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(windows_net_bytes_received_total{nic!~\".*Virtual.*\",cluster=\"$cluster\"}[$__rate_interval])) by (instance)", + "hide": false, + "interval": "$resolution", + "legendFormat": "Received bytes in {{ instance }}", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(rate(windows_net_bytes_sent_total{nic!~\".*Virtual.*\",cluster=\"$cluster\"}[$__rate_interval])) by (instance)", + "hide": false, + "interval": "$resolution", + "legendFormat": "Transmitted bytes in {{ instance }}", + "range": true, + "refId": "D" } ], "title": "Network Received (without loopback) by instance", @@ -2755,11 +3285,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "BANDWIDTH", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2768,6 +3300,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -2798,7 +3331,7 @@ } ] }, - "unit": "bytes" + "unit": "binBps" }, "overrides": [] }, @@ -2830,7 +3363,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(node_network_receive_bytes_total{device=\"lo\"}[$__rate_interval])) by (instance)", + "expr": "sum(rate(node_network_receive_bytes_total{device=\"lo\", cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (instance)", "interval": "$resolution", "legendFormat": "Received bytes in {{ instance }}", "range": true, @@ -2842,7 +3375,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "- sum(rate(node_network_transmit_bytes_total{device=\"lo\"}[$__rate_interval])) by (instance)", + "expr": "- sum(rate(node_network_transmit_bytes_total{device=\"lo\", cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (instance)", "hide": false, "interval": "$resolution", "legendFormat": "Transmitted bytes in {{ instance }}", @@ -2855,8 +3388,7 @@ } ], "refresh": "30s", - "schemaVersion": 38, - "style": "dark", + "schemaVersion": 39, "tags": [ "Kubernetes", "Prometheus" @@ -2866,8 +3398,8 @@ { "current": { "selected": false, - "text": "Prometheus", - "value": "Prometheus" + "text": "", + "value": "" }, "hide": 0, "includeAll": false, @@ -2881,9 +3413,32 @@ "skipUrlSync": false, "type": "datasource" }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(kube_node_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, { "current": { - "selected": true, + "selected": false, "text": "30s", "value": "30s" }, @@ -2927,6 +3482,33 @@ "queryValue": "", "skipUrlSync": false, "type": "custom" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(node_cpu_seconds_total{cluster=\"$cluster\"},job)", + "hide": 0, + "includeAll": false, + "multi": true, + "name": "job", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_cpu_seconds_total{cluster=\"$cluster\"},job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" } ] }, @@ -2938,6 +3520,6 @@ "timezone": "", "title": "Kubernetes / Views / Global", "uid": "k8s_views_global", - "version": 31, + "version": 40, "weekStart": "" } diff --git a/dashboards/dotdc/k8s-views-namespaces.json b/dashboards/dotdc/k8s-views-namespaces.json index 06ff0b9a..6771236c 100644 --- a/dashboards/dotdc/k8s-views-namespaces.json +++ b/dashboards/dotdc/k8s-views-namespaces.json @@ -9,31 +9,37 @@ "pluginName": "Prometheus" } ], - "__elements": [], + "__elements": {}, "__requires": [ + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, { "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "8.3.4" + "version": "10.3.1" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" + "version": "1.0.0" }, { "type": "panel", "id": "stat", "name": "Stat", "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" } ], "annotations": { @@ -122,19 +128,22 @@ }, "id": 46, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, + "sizing": "auto", "text": {} }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -143,7 +152,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\"}[$__rate_interval])) / sum(machine_cpu_cores)", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) / sum(machine_cpu_cores{cluster=\"$cluster\"})", "instant": true, "interval": "", "legendFormat": "", @@ -194,6 +203,8 @@ }, "id": 48, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -204,9 +215,10 @@ }, "showThresholdLabels": false, "showThresholdMarkers": true, + "sizing": "auto", "text": {} }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -214,7 +226,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\"}) / sum(machine_memory_bytes)", + "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"}) / sum(machine_memory_bytes{cluster=\"$cluster\"})", "interval": "", "legendFormat": "", "refId": "A" @@ -234,11 +246,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -247,6 +261,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -313,7 +328,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_pod_container_status_running{namespace=~\"$namespace\"})", + "expr": "sum(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Running Pods", "refId": "A" @@ -323,7 +338,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_service_info{namespace=~\"$namespace\"})", + "expr": "sum(kube_service_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Services", "refId": "B" @@ -333,7 +348,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_ingress_info{namespace=~\"$namespace\"})", + "expr": "sum(kube_ingress_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Ingresses", "refId": "C" @@ -343,7 +358,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_deployment_labels{namespace=~\"$namespace\"})", + "expr": "sum(kube_deployment_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Deployments", "refId": "D" @@ -353,7 +368,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_statefulset_labels{namespace=~\"$namespace\"})", + "expr": "sum(kube_statefulset_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Statefulsets", "refId": "E" @@ -363,7 +378,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_daemonset_labels{namespace=~\"$namespace\"})", + "expr": "sum(kube_daemonset_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Daemonsets", "refId": "F" @@ -373,7 +388,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_persistentvolumeclaim_info{namespace=~\"$namespace\"})", + "expr": "sum(kube_persistentvolumeclaim_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Persistent Volume Claims", "refId": "G" @@ -383,7 +398,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_hpa_labels{namespace=~\"$namespace\"})", + "expr": "sum(kube_hpa_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Horizontal Pod Autoscalers", "refId": "H" @@ -393,7 +408,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_configmap_info{namespace=~\"$namespace\"})", + "expr": "sum(kube_configmap_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Configmaps", "refId": "I" @@ -403,7 +418,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_secret_info{namespace=~\"$namespace\"})", + "expr": "sum(kube_secret_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Secrets", "refId": "J" @@ -413,7 +428,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_networkpolicy_labels{namespace=~\"$namespace\"})", + "expr": "sum(kube_networkpolicy_labels{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Network Policies", "refId": "K" @@ -456,17 +471,20 @@ "graphMode": "none", "justifyMode": "center", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ - "mean" + "lastNotNull" ], "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -475,7 +493,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\"}[$__rate_interval]))", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval]))", "interval": "", "legendFormat": "Real", "range": true, @@ -487,7 +505,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\", resource=\"cpu\"})", + "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\", resource=\"cpu\", cluster=\"$cluster\"})", "hide": false, "legendFormat": "Requests", "range": true, @@ -499,7 +517,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\", resource=\"cpu\"})", + "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\", resource=\"cpu\", cluster=\"$cluster\"})", "hide": false, "legendFormat": "Limits", "range": true, @@ -511,7 +529,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(machine_cpu_cores)", + "expr": "sum(machine_cpu_cores{cluster=\"$cluster\"})", "hide": false, "legendFormat": "Cluster Total", "range": true, @@ -555,17 +573,20 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ - "mean" + "lastNotNull" ], "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "10.0.3", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -574,7 +595,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\"})", + "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Real", "range": true, @@ -586,7 +607,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\", resource=\"memory\"})", + "expr": "sum(kube_pod_container_resource_requests{namespace=~\"$namespace\", resource=\"memory\", cluster=\"$cluster\"})", "hide": false, "legendFormat": "Requests", "range": true, @@ -598,7 +619,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\", resource=\"memory\"})", + "expr": "sum(kube_pod_container_resource_limits{namespace=~\"$namespace\", resource=\"memory\", cluster=\"$cluster\"})", "hide": false, "legendFormat": "Limits", "range": true, @@ -610,7 +631,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(machine_memory_bytes)", + "expr": "sum(machine_memory_bytes{cluster=\"$cluster\"})", "hide": false, "legendFormat": "Cluster Total", "range": true, @@ -648,11 +669,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "CPU CORES", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -661,6 +684,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -723,7 +747,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\"}[$__rate_interval])) by (pod)", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", "interval": "$resolution", "legendFormat": "{{ pod }}", "range": true, @@ -744,11 +768,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -757,6 +783,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -819,7 +846,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\"}) by (pod)", + "expr": "sum(container_memory_working_set_bytes{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}) by (pod)", "interval": "$resolution", "legendFormat": "{{ pod }}", "range": true, @@ -841,11 +868,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "SECONDS", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -854,6 +883,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineStyle": { "fill": "solid" @@ -926,7 +956,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\"}[$__rate_interval])) by (pod) > 0", + "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\", image!=\"\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod) > 0", "interval": "$resolution", "legendFormat": "{{ pod }}", "range": true, @@ -960,11 +990,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -973,6 +1005,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1041,7 +1074,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(kube_pod_status_qos_class{namespace=~\"$namespace\"}) by (qos_class)", + "expr": "sum(kube_pod_status_qos_class{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (qos_class)", "interval": "", "legendFormat": "{{ qos_class }} pods", "range": true, @@ -1053,7 +1086,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_info{namespace=~\"$namespace\"})", + "expr": "sum(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"})", "hide": false, "legendFormat": "Total pods", "range": true, @@ -1074,11 +1107,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1087,6 +1122,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1155,7 +1191,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(kube_pod_status_reason) by (reason)", + "expr": "sum(kube_pod_status_reason{cluster=\"$cluster\"}) by (reason)", "interval": "", "legendFormat": "{{ reason }}", "range": true, @@ -1177,11 +1213,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "points", "fillOpacity": 25, "gradientMode": "opacity", @@ -1190,6 +1228,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1258,7 +1297,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(container_oom_events_total{namespace=~\"${namespace}\"}[$__rate_interval])) by (namespace, pod) > 0", + "expr": "sum(increase(container_oom_events_total{namespace=~\"${namespace}\", cluster=\"$cluster\"}[$__rate_interval])) by (namespace, pod) > 0", "interval": "", "legendFormat": "namespace: {{ namespace }} - pod: {{ pod }}", "range": true, @@ -1280,11 +1319,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "points", "fillOpacity": 25, "gradientMode": "opacity", @@ -1293,6 +1334,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1361,7 +1403,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(kube_pod_container_status_restarts_total{namespace=~\"${namespace}\"}[$__rate_interval])) by (namespace, pod) > 0", + "expr": "sum(increase(kube_pod_container_status_restarts_total{namespace=~\"${namespace}\", cluster=\"$cluster\"}[$__rate_interval])) by (namespace, pod) > 0", "interval": "", "legendFormat": "namespace: {{ namespace }} - pod: {{ pod }}", "range": true, @@ -1382,11 +1424,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1395,6 +1439,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1457,7 +1502,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_status_ready{namespace=~\"$namespace\", pod=~\"${created_by}.*\"})", + "expr": "sum(kube_pod_container_status_ready{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Ready", "range": true, @@ -1469,7 +1514,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_status_running{namespace=~\"$namespace\", pod=~\"${created_by}.*\"})", + "expr": "sum(kube_pod_container_status_running{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Running", "range": true, @@ -1480,7 +1525,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_pod_container_status_waiting{namespace=~\"$namespace\"})", + "expr": "sum(kube_pod_container_status_waiting{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Waiting", "refId": "C" @@ -1490,7 +1535,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_pod_container_status_restarts_total{namespace=~\"$namespace\"})", + "expr": "sum(kube_pod_container_status_restarts_total{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Restarts Total", "refId": "D" @@ -1500,7 +1545,7 @@ "type": "prometheus", "uid": "${datasource}" }, - "expr": "sum(kube_pod_container_status_terminated{namespace=~\"$namespace\"})", + "expr": "sum(kube_pod_container_status_terminated{namespace=~\"$namespace\", cluster=\"$cluster\"})", "interval": "", "legendFormat": "Terminated", "refId": "E" @@ -1520,11 +1565,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1533,6 +1580,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1595,7 +1643,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_pod_container_info{namespace=~\"$namespace\", pod=~\"${created_by}.*\"}) by (pod)", + "expr": "sum(kube_pod_container_info{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}) by (pod)", "interval": "", "legendFormat": "{{ pod }}", "range": true, @@ -1616,11 +1664,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1629,6 +1679,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1691,7 +1742,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(kube_deployment_status_replicas_available{namespace=~\"$namespace\"}) by (deployment)", + "expr": "sum(kube_deployment_status_replicas_available{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (deployment)", "interval": "", "legendFormat": "{{ deployment }}", "range": true, @@ -1712,11 +1763,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1725,6 +1778,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1786,7 +1840,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\", pod=~\"${created_by}.*\"}) by (deployment)", + "expr": "sum(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}) by (deployment)", "interval": "", "legendFormat": "{{ deployment }}", "range": true, @@ -1824,11 +1878,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1837,6 +1893,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1899,7 +1956,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(kubelet_volume_stats_used_bytes{namespace=~\"$namespace\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\"}) by (persistentvolumeclaim)", + "expr": "sum(kubelet_volume_stats_used_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", "interval": "", "legendFormat": "{{ persistentvolumeclaim }}", "refId": "A" @@ -1919,11 +1976,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1932,6 +1991,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -1994,7 +2054,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(kubelet_volume_stats_used_bytes{namespace=~\"$namespace\"}) by (persistentvolumeclaim)", + "expr": "sum(kubelet_volume_stats_used_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", "interval": "", "legendFormat": "{{ persistentvolumeclaim }} - Used", "refId": "A" @@ -2005,7 +2065,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\"}) by (persistentvolumeclaim)", + "expr": "sum(kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", "hide": false, "interval": "", "legendFormat": "{{ persistentvolumeclaim }} - Capacity", @@ -2026,11 +2086,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2039,6 +2101,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, @@ -2101,7 +2164,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "1 - sum(kubelet_volume_stats_inodes_used{namespace=~\"$namespace\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_inodes{namespace=~\"$namespace\"}) by (persistentvolumeclaim)", + "expr": "1 - sum(kubelet_volume_stats_inodes_used{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim) / sum(kubelet_volume_stats_inodes{namespace=~\"$namespace\", cluster=\"$cluster\"}) by (persistentvolumeclaim)", "interval": "", "legendFormat": "{{ persistentvolumeclaim }}", "refId": "A" @@ -2109,11 +2172,475 @@ ], "title": "Persistent Volumes - Inodes", "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 81 + }, + "id": 76, + "panels": [], + "title": "Network", + "type": "row" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 82 + }, + "id": 78, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", + "interval": "$resolution", + "legendFormat": "Received - {{ pod }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(rate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", + "interval": "$resolution", + "legendFormat": "Transmitted - {{ pod }}", + "range": true, + "refId": "B" + } + ], + "title": "Network - Bandwidth by pod", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 82 + }, + "id": 79, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", + "interval": "$resolution", + "legendFormat": "Received - {{ pod }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(rate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", + "interval": "$resolution", + "legendFormat": "Transmitted - {{ pod }}", + "range": true, + "refId": "B" + } + ], + "title": "Network - Packets Rate by pod", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 90 + }, + "id": 80, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", + "interval": "$resolution", + "legendFormat": "Received - {{ pod }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(rate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", + "interval": "$resolution", + "legendFormat": "Transmitted - {{ pod }}", + "range": true, + "refId": "B" + } + ], + "title": "Network - Packets Dropped by pod", + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 90 + }, + "id": 81, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_network_receive_errors_total{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", + "interval": "$resolution", + "legendFormat": "Received - {{ pod }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(rate(container_network_transmit_errors_total{namespace=~\"$namespace\", pod=~\"${created_by}.*\", cluster=\"$cluster\"}[$__rate_interval])) by (pod)", + "interval": "$resolution", + "legendFormat": "Transmitted - {{ pod }}", + "range": true, + "refId": "B" + } + ], + "title": "Network - Errors by pod", + "type": "timeseries" } ], "refresh": "30s", - "schemaVersion": 38, - "style": "dark", + "schemaVersion": 39, "tags": [ "Kubernetes", "Prometheus" @@ -2121,11 +2648,7 @@ "templating": { "list": [ { - "current": { - "selected": false, - "text": "Prometheus", - "value": "Prometheus" - }, + "current": {}, "hide": 0, "includeAll": false, "multi": false, @@ -2139,27 +2662,42 @@ "type": "datasource" }, { - "current": { - "selected": true, - "text": [ - "monitoring" - ], - "value": [ - "monitoring" - ] - }, + "current": {}, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(kube_pod_info, namespace)", + "definition": "label_values(kube_node_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "hide": 0, "includeAll": true, "multi": true, "name": "namespace", "options": [], "query": { - "query": "label_values(kube_pod_info, namespace)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "refId": "StandardVariableQuery" }, "refresh": 1, @@ -2173,7 +2711,7 @@ }, { "current": { - "selected": true, + "selected": false, "text": "30s", "value": "30s" }, @@ -2219,16 +2757,12 @@ "type": "custom" }, { - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, + "current": {}, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(kube_pod_info{namespace=~\"$namespace\", container!=\"\"},created_by_name)", + "definition": "label_values(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"},created_by_name)", "description": "Can be used to filter on a specific deployment, statefulset or deamonset (only relevant panels).", "hide": 0, "includeAll": true, @@ -2236,10 +2770,10 @@ "name": "created_by", "options": [], "query": { - "query": "label_values(kube_pod_info{namespace=~\"$namespace\", container!=\"\"},created_by_name)", + "query": "label_values(kube_pod_info{namespace=~\"$namespace\", cluster=\"$cluster\"},created_by_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, - "refresh": 1, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, @@ -2255,6 +2789,6 @@ "timezone": "", "title": "Kubernetes / Views / Namespaces", "uid": "k8s_views_ns", - "version": 27, + "version": 36, "weekStart": "" } diff --git a/dashboards/dotdc/k8s-views-pods.json b/dashboards/dotdc/k8s-views-pods.json index c441f49d..e3bf3f3f 100644 --- a/dashboards/dotdc/k8s-views-pods.json +++ b/dashboards/dotdc/k8s-views-pods.json @@ -108,6 +108,7 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "Panel only works when a single pod is selected.", "fieldConfig": { "defaults": { "mappings": [], @@ -136,6 +137,7 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "mean" @@ -143,17 +145,20 @@ "fields": "", "values": false }, - "textMode": "name" + "showPercentChange": false, + "textMode": "name", + "wideLayout": true }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": false, - "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\"}", + "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", "instant": true, "interval": "", "legendFormat": "{{ created_by_kind }}: {{ created_by_name }}", @@ -168,12 +173,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "Panel only works when a single pod is selected.", "fieldConfig": { "defaults": { "links": [ { "title": "", - "url": "/d/k8s_views_nodes/kubernetes-views-nodes?var-datasource=${datasource}&var-node=${__data.fields.node}" + "url": "/d/k8s_views_nodes/kubernetes-views-nodes?var-datasource=${datasource}&var-node=${__field.labels.node}" } ], "mappings": [], @@ -202,6 +208,7 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "mean" @@ -209,17 +216,20 @@ "fields": "", "values": false }, - "textMode": "name" + "showPercentChange": false, + "textMode": "name", + "wideLayout": true }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": false, - "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\"}", + "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", "instant": true, "interval": "", "legendFormat": "{{ node }}", @@ -234,6 +244,7 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "Panel only works when a single pod is selected.", "fieldConfig": { "defaults": { "mappings": [], @@ -262,6 +273,7 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "mean" @@ -269,17 +281,20 @@ "fields": "", "values": false }, - "textMode": "name" + "showPercentChange": false, + "textMode": "name", + "wideLayout": true }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": false, - "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\"}", + "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", "instant": true, "interval": "", "legendFormat": "{{ pod_ip }}", @@ -294,6 +309,7 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "Panel only works when a single pod is selected.", "fieldConfig": { "defaults": { "mappings": [], @@ -322,6 +338,7 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "mean" @@ -329,9 +346,11 @@ "fields": "", "values": false }, - "textMode": "name" + "showPercentChange": false, + "textMode": "name", + "wideLayout": true }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -340,7 +359,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", priority_class!=\"\"}", + "expr": "kube_pod_info{namespace=\"$namespace\", pod=\"$pod\", priority_class!=\"\", cluster=\"$cluster\"}", "format": "time_series", "instant": true, "interval": "", @@ -357,6 +376,7 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "Panel only works when a single pod is selected.", "fieldConfig": { "defaults": { "color": { @@ -419,14 +439,17 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [], "fields": "", "values": false }, - "textMode": "name" + "showPercentChange": false, + "textMode": "name", + "wideLayout": true }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -435,7 +458,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "kube_pod_status_qos_class{namespace=\"$namespace\", pod=\"$pod\"} > 0", + "expr": "kube_pod_status_qos_class{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"} > 0", "instant": true, "interval": "", "legendFormat": "{{ qos_class }}", @@ -450,6 +473,7 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "Panel only works when a single pod is selected.", "fieldConfig": { "defaults": { "mappings": [], @@ -482,14 +506,17 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [], "fields": "", "values": false }, - "textMode": "name" + "showPercentChange": false, + "textMode": "name", + "wideLayout": true }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -498,7 +525,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "kube_pod_container_status_last_terminated_reason{namespace=\"$namespace\", pod=\"$pod\"}", + "expr": "kube_pod_container_status_last_terminated_reason{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", "instant": true, "interval": "", "legendFormat": "{{ reason }}", @@ -513,6 +540,7 @@ "type": "prometheus", "uid": "${datasource}" }, + "description": "Panel only works when a single pod is selected.", "fieldConfig": { "defaults": { "mappings": [], @@ -549,14 +577,17 @@ "graphMode": "none", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [], "fields": "", "values": true }, - "textMode": "value" + "showPercentChange": false, + "textMode": "value", + "wideLayout": true }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -565,7 +596,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "kube_pod_container_status_last_terminated_exitcode{namespace=\"$namespace\", pod=\"$pod\"}", + "expr": "kube_pod_container_status_last_terminated_exitcode{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}", "instant": true, "interval": "", "legendFormat": "__auto", @@ -646,6 +677,8 @@ }, "id": 39, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -655,9 +688,10 @@ "values": false }, "showThresholdLabels": false, - "showThresholdMarkers": true + "showThresholdMarkers": true, + "sizing": "auto" }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -666,7 +700,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}[$__rate_interval])) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"})", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"})", "instant": true, "interval": "$resolution", "legendFormat": "Requests", @@ -716,6 +750,8 @@ }, "id": 48, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -725,9 +761,10 @@ "values": false }, "showThresholdLabels": false, - "showThresholdMarkers": true + "showThresholdMarkers": true, + "sizing": "auto" }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -736,7 +773,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}[$__rate_interval])) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"})", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"})", "instant": true, "interval": "$resolution", "legendFormat": "Limits", @@ -790,6 +827,8 @@ }, "id": 40, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -799,17 +838,19 @@ "values": false }, "showThresholdLabels": false, - "showThresholdMarkers": true + "showThresholdMarkers": true, + "sizing": "auto" }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"})", + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"})", "instant": true, "interval": "$resolution", "legendFormat": "Requests", @@ -859,6 +900,8 @@ }, "id": 49, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -868,17 +911,19 @@ "values": false }, "showThresholdLabels": false, - "showThresholdMarkers": true + "showThresholdMarkers": true, + "sizing": "auto" }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) ", + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) ", "instant": true, "interval": "$resolution", "legendFormat": "Limits", @@ -988,7 +1033,7 @@ "showHeader": true, "sortBy": [] }, - "pluginVersion": "10.1.0", + "pluginVersion": "11.2.0", "targets": [ { "datasource": { @@ -997,7 +1042,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", "format": "table", "instant": true, "interval": "", @@ -1012,7 +1057,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", "format": "table", "instant": true, "interval": "", @@ -1027,7 +1072,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", "format": "table", "instant": true, "interval": "", @@ -1041,7 +1086,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", "format": "table", "instant": true, "interval": "", @@ -1055,7 +1100,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\", container!=\"\"}[$__rate_interval])) by (container)", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", "format": "table", "hide": false, "instant": true, @@ -1070,7 +1115,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\", container!=\"\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}) by (container)", "format": "table", "hide": false, "instant": true, @@ -1181,11 +1226,13 @@ "mode": "thresholds" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Percent", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1271,7 +1318,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}[$__rate_interval])) by (container) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", "interval": "$resolution", "legendFormat": "{{ container }} REQUESTS", "range": true, @@ -1283,7 +1330,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}[$__rate_interval])) by (container) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}) by (container)", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"cpu\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", "hide": false, "legendFormat": "{{ container }} LIMITS", "range": true, @@ -1305,11 +1352,13 @@ "mode": "thresholds" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Percent", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1398,7 +1447,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) by (container) / sum(kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", "interval": "", "legendFormat": "{{ container }} REQUESTS", "range": true, @@ -1410,7 +1459,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", cluster=\"$cluster\"}) by (container) / sum(kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$pod\", resource=\"memory\", job=~\"$job\", cluster=\"$cluster\"}) by (container)", "hide": false, "legendFormat": "{{ container }} LIMITS", "range": true, @@ -1431,11 +1480,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "CPU Cores", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1532,7 +1583,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", image!=\"\", container!=\"\"}[$__rate_interval])) by (container)", + "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", "interval": "$resolution", "legendFormat": "{{ container }}", "range": true, @@ -1553,11 +1604,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Bytes", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1637,7 +1690,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=\"$pod\", image!=\"\", container!=\"\"}) by (container)", + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}) by (container)", "interval": "", "legendFormat": "{{ container }}", "range": true, @@ -1659,11 +1712,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "SECONDS", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1745,7 +1800,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\", pod=\"$pod\", image!=\"\", container!=\"\"}[$__rate_interval])) by (container)", + "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{namespace=~\"$namespace\", pod=~\"$pod\", image!=\"\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", "interval": "$resolution", "legendFormat": "{{ container }}", "range": true, @@ -1780,11 +1835,13 @@ "mode": "thresholds" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Percent", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1873,7 +1930,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(container_oom_events_total{namespace=\"${namespace}\", pod=\"${pod}\", container!=\"\"}[$__rate_interval])) by (container)", + "expr": "sum(increase(container_oom_events_total{namespace=\"${namespace}\", pod=\"${pod}\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", "interval": "", "legendFormat": "{{ container }}", "range": true, @@ -1895,11 +1952,13 @@ "mode": "thresholds" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Percent", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -1988,7 +2047,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(kube_pod_container_status_restarts_total{namespace=~\"${namespace}\", pod=\"${pod}\", container!=\"\"}[$__rate_interval])) by (container)", + "expr": "sum(increase(kube_pod_container_status_restarts_total{namespace=~\"${namespace}\", pod=\"${pod}\", container!=\"\", job=~\"$job\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", "interval": "", "legendFormat": "{{ container }}", "range": true, @@ -2035,11 +2094,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2079,7 +2140,7 @@ } ] }, - "unit": "bytes" + "unit": "binBps" }, "overrides": [] }, @@ -2110,7 +2171,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", "interval": "$resolution", "legendFormat": "Received", "refId": "A" @@ -2121,7 +2182,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "- sum(rate(container_network_transmit_bytes_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "- sum(rate(container_network_transmit_bytes_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", "interval": "$resolution", "legendFormat": "Transmitted", "refId": "B" @@ -2141,11 +2202,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2216,7 +2279,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(container_network_receive_packets_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "sum(rate(container_network_receive_packets_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", "interval": "$resolution", "legendFormat": "Received", "refId": "A" @@ -2227,7 +2290,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "- sum(rate(container_network_transmit_packets_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "- sum(rate(container_network_transmit_packets_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", "interval": "$resolution", "legendFormat": "Transmitted", "refId": "B" @@ -2247,11 +2310,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2282,7 +2347,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2321,7 +2387,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(container_network_receive_packets_dropped_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "sum(rate(container_network_receive_packets_dropped_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", "interval": "$resolution", "legendFormat": "Received", "refId": "A" @@ -2332,7 +2398,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "- sum(rate(container_network_transmit_packets_dropped_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "- sum(rate(container_network_transmit_packets_dropped_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", "interval": "$resolution", "legendFormat": "Transmitted", "refId": "B" @@ -2352,11 +2418,13 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 25, "gradientMode": "opacity", @@ -2387,7 +2455,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2426,7 +2495,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "sum(rate(container_network_receive_errors_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "sum(rate(container_network_receive_errors_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", "interval": "$resolution", "legendFormat": "Received", "refId": "A" @@ -2437,7 +2506,7 @@ "uid": "${datasource}" }, "exemplar": true, - "expr": "- sum(rate(container_network_transmit_errors_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "expr": "- sum(rate(container_network_transmit_errors_total{namespace=\"$namespace\", pod=~\"$pod\", cluster=\"$cluster\"}[$__rate_interval]))", "interval": "$resolution", "legendFormat": "Transmitted", "refId": "B" @@ -2448,8 +2517,7 @@ } ], "refresh": "30s", - "schemaVersion": 38, - "style": "dark", + "schemaVersion": 39, "tags": [ "Kubernetes", "Prometheus" @@ -2459,8 +2527,8 @@ { "current": { "selected": false, - "text": "Prometheus", - "value": "Prometheus" + "text": "", + "value": "" }, "hide": 0, "includeAll": false, @@ -2474,6 +2542,34 @@ "skipUrlSync": false, "type": "datasource" }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(kube_node_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, { "current": { "selected": false, @@ -2484,14 +2580,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(kube_pod_info, namespace)", + "definition": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "hide": 0, "includeAll": false, "multi": false, "name": "namespace", "options": [], "query": { - "query": "label_values(kube_pod_info, namespace)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "refId": "Prometheus-namespace-Variable-Query" }, "refresh": 1, @@ -2513,14 +2609,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(kube_pod_info{namespace=\"$namespace\"}, pod)", + "definition": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"}, pod)", "hide": 0, - "includeAll": false, - "multi": false, + "includeAll": true, + "multi": true, "name": "pod", "options": [], "query": { - "query": "label_values(kube_pod_info{namespace=\"$namespace\"}, pod)", + "query": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"}, pod)", "refId": "Prometheus-pod-Variable-Query" }, "refresh": 2, @@ -2534,7 +2630,7 @@ }, { "current": { - "selected": true, + "selected": false, "text": "30s", "value": "30s" }, @@ -2578,6 +2674,33 @@ "queryValue": "", "skipUrlSync": false, "type": "custom" + }, + { + "current": { + "selected": false, + "text": "kube-state-metrics", + "value": "kube-state-metrics" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"},job)", + "hide": 0, + "includeAll": false, + "multi": true, + "name": "job", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_pod_info{namespace=\"$namespace\", cluster=\"$cluster\"},job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" } ] }, @@ -2589,6 +2712,6 @@ "timezone": "", "title": "Kubernetes / Views / Pods", "uid": "k8s_views_pods", - "version": 22, + "version": 30, "weekStart": "" } diff --git a/dashboards/ingress/namespace-detail.json b/dashboards/ingress/namespace-detail.json index 328fed23..9f569d3c 100644 --- a/dashboards/ingress/namespace-detail.json +++ b/dashboards/ingress/namespace-detail.json @@ -1339,11 +1339,7 @@ }, { "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 + "value": "none" }, { "id": "custom.align", diff --git a/dashboards/main/controller.json b/dashboards/main/controller.json index 8972c204..f6eaea06 100644 --- a/dashboards/main/controller.json +++ b/dashboards/main/controller.json @@ -3,7 +3,10 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", @@ -20,15 +23,16 @@ }, "editable": false, "fiscalYearStartMonth": 0, - "gnetId": null, "graphTooltip": 1, - "id": 23, - "iteration": 1640791408237, + "id": 106, "links": [], "liveNow": false, "panels": [ { - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "Note that this table shows the average values for the entire period selected in the dashboard. Consequently, it may contain information about Pods or namespaces that were changed or deleted during the selected period.", "fieldConfig": { "defaults": { @@ -36,9 +40,12 @@ "mode": "thresholds" }, "custom": { - "align": null, - "displayMode": "auto", + "align": "auto", + "cellOptions": { + "type": "auto" + }, "filterable": false, + "inspect": false, "minWidth": 70 }, "decimals": 2, @@ -89,8 +96,7 @@ ] }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "custom.minWidth", @@ -113,8 +119,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "custom.width" @@ -140,8 +145,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -164,8 +168,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "mappings", @@ -213,8 +216,7 @@ "value": 3 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -237,8 +239,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -261,8 +262,7 @@ "value": 3 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -285,8 +285,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -309,8 +308,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -333,8 +331,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -357,8 +354,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -381,8 +377,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -405,8 +400,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -429,8 +423,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -453,8 +446,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -477,20 +469,19 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "mappings", "value": [ { - "type": "value", "options": { "-1": { - "text": "hostNet", - "index": 0 + "index": 0, + "text": "hostNet" } - } + }, + "type": "value" } ] } @@ -515,20 +506,19 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "mappings", "value": [ { - "type": "value", "options": { "-1": { - "text": "hostNet", - "index": 0 + "index": 0, + "text": "hostNet" } - } + }, + "type": "value" } ] } @@ -553,8 +543,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -577,8 +566,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -597,8 +585,7 @@ "value": "short" }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -613,6 +600,18 @@ "value": 225 } ] + }, + { + "matcher": { + "id": "byName", + "options": "Node" + }, + "properties": [ + { + "id": "custom.width", + "value": 212 + } + ] } ] }, @@ -625,12 +624,26 @@ "id": 2, "links": [], "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true, "sortBy": [] }, - "pluginVersion": "8.2.6", + "pluginVersion": "10.2.2", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", "expr": "sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) )", "format": "table", "hide": false, @@ -641,6 +654,10 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (pod, node) (\n sum by (pod, node) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) * on (pod, node) group_right() avg_over_time(kube_pod_info{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range])\n)", "format": "table", "hide": false, @@ -650,6 +667,10 @@ "refId": "B" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Off\"}[$__range]))) * 0\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Initial\"}[$__range]))) * 0 + 1\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Auto\"}[$__range]))) * 0 + 2\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Recreate\"}[$__range]))) * 0 + 3\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) * 0 + 5", "format": "table", "hide": false, @@ -659,6 +680,10 @@ "refId": "C" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "hide": false, @@ -668,6 +693,10 @@ "refId": "D" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "hide": false, @@ -677,6 +706,10 @@ "refId": "E" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (pod)\n(\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])\n * on (controller_type, controller_name) group_left()\n sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{container!=\"POD\", namespace=\"$namespace\", resource=\"cpu\"}[$__range]))\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "hide": false, @@ -686,6 +719,10 @@ "refId": "F" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod)\n (\n sum by (namespace, pod) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n -\n sum by (namespace, pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__range]))\n ) > 0\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "hide": false, @@ -695,6 +732,10 @@ "refId": "G" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod) \n (\n (\n (\n sum by (namespace, pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n -\n sum by (namespace, pod) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__range]))\n ) or sum by (namespace, pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__range]))\n ) > 0\n )\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "hide": false, @@ -704,6 +745,10 @@ "refId": "H" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod) (\n sum by (pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n /\n sum by (pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n * on (pod)\n sum by (pod) (avg_over_time(kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n )\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "hide": false, @@ -713,6 +758,10 @@ "refId": "I" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -721,6 +770,10 @@ "refId": "J" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -729,6 +782,10 @@ "refId": "K" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (pod)\n(\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])\n * on (controller_type, controller_name) group_left()\n sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{container!=\"POD\", namespace=\"$namespace\", resource=\"memory\"}[$__range]))\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -737,6 +794,10 @@ "refId": "L" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod)\n (\n (\n (\n sum by (namespace, pod) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n -\n sum by (namespace, pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__range]))\n ) > 0\n )\n )\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -745,6 +806,10 @@ "refId": "M" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod)\n (\n (\n (\n sum by (namespace, pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n -\n sum by (namespace, pod) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__range]))\n ) or sum by (namespace, pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__range]))\n ) > 0\n )\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -753,7 +818,12 @@ "refId": "N" }, { - "expr": "(\n # Select data rates if Pod had 'hostNetwork: false' during the selected period.\n max_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])\n * on(pod)\n max_over_time(kube_pod_info{host_network=\"false\",namespace=\"$namespace\", pod=~\"$pod\"}[$__range])\n * on(pod)\n # Sum data rates for all interfaces of the Pod.\n sum by (pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n) or (\n # Return -1 if the Pod had 'hostNetwork: false' during the selected period.\n max_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])\n * on(pod)\n max_over_time(kube_pod_info{host_network=\"true\",namespace=\"$namespace\", pod=~\"$pod\"}[$__range])\n * -1\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "(\n # Select data rates if Pod had 'hostNetwork: false' during the selected period.\n max(max_over_time(kube_pod_info{host_network=\"false\",namespace=\"$namespace\", pod=~\"$pod\"}[$__range])) by(pod)\n * on(pod)\n max(max_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) by(pod)\n * on(pod)\n # Sum data rates for all interfaces of the Pod.\n sum by (pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n) or (\n # Return -1 if the Pod had 'hostNetwork: false' during the selected period.\n max_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])\n * on(pod)\n max_over_time(kube_pod_info{host_network=\"true\",namespace=\"$namespace\", pod=~\"$pod\"}[$__range])\n * -1\n)", "format": "table", "instant": true, "intervalFactor": 1, @@ -761,7 +831,12 @@ "refId": "O" }, { - "expr": "(\n # Select data rates if Pod had 'hostNetwork: false' during the selected period.\n max_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])\n * on(pod)\n max_over_time(kube_pod_info{host_network=\"false\",namespace=\"$namespace\", pod=~\"$pod\"}[$__range])\n * on(pod)\n # Sum data rates for all interfaces of the Pod.\n sum by (pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n) or (\n # Return -1 if the Pod had 'hostNetwork: false' during the selected period.\n max_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])\n * on(pod)\n max_over_time(kube_pod_info{host_network=\"true\",namespace=\"$namespace\", pod=~\"$pod\"}[$__range])\n * -1\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "(\n # Select data rates if Pod had 'hostNetwork: false' during the selected period.\n max(max_over_time(kube_pod_info{host_network=\"false\",namespace=\"$namespace\", pod=~\"$pod\"}[$__range])) by(pod)\n * on(pod)\n max(max_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) by(pod)\n * on(pod)\n # Sum data rates for all interfaces of the Pod.\n sum by (pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n) or (\n # Return -1 if the Pod had 'hostNetwork: false' during the selected period.\n max_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])\n * on(pod)\n max_over_time(kube_pod_info{host_network=\"true\",namespace=\"$namespace\", pod=~\"$pod\"}[$__range])\n * -1\n)", "format": "table", "instant": true, "intervalFactor": 1, @@ -769,6 +844,10 @@ "refId": "P" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod) (rate(container_fs_reads_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -777,6 +856,10 @@ "refId": "Q" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod) (rate(container_fs_writes_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -785,6 +868,10 @@ "refId": "R" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range])) \n * on (pod)\n sum by (pod) (increase(kube_pod_container_status_restarts_total{namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))\n)\nor\nsum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__range]) * 0)", "format": "table", "hide": false, @@ -845,270 +932,257 @@ "type": "table" }, { - "cards": { - "cardHSpacing": 2, - "cardMinWidth": 5, - "cardRound": null, - "cardVSpacing": 2 + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateGnYlRd", - "defaultColor": "#757575", - "exponent": 0.5, - "mode": "discrete", - "thresholds": [ - { - "color": "#99440a", - "tooltip": "Some container is not running", - "value": "5" + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, - { - "color": "#e5ac0e", - "tooltip": "Pending", - "value": "1" + "custom": { + "fillOpacity": 70, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false }, - { - "color": "rgb(215, 215, 215)", - "tooltip": "Unknown", - "value": "2" - }, - { - "color": "#bf1b00", - "tooltip": "Failed", - "value": "3" - }, - { - "color": "#e0f9d7", - "tooltip": "Succeeded", - "value": "4" - }, - { - "color": "#508642", - "tooltip": "Running", - "value": "0" + "mappings": [ + { + "options": { + "0": { + "color": "green", + "index": 0, + "text": "Ready" + }, + "1": { + "color": "#99440a", + "index": 1, + "text": "Some containers not running or ready" + }, + "2": { + "color": "#e5ac0e", + "index": 2, + "text": "Pending" + }, + "3": { + "color": "#d7d7d7", + "index": 3, + "text": "Unknown" + }, + "4": { + "color": "#bf1b00", + "index": 4, + "text": "Failed" + }, + "5": { + "color": "#e0f9d7", + "index": 5, + "text": "Succeeded" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] } - ] + }, + "overrides": [] }, - "datasource": "$ds_prometheus", "gridPos": { "h": 8, "w": 24, "x": 0, "y": 7 }, - "highlightCards": true, "id": 53, - "legend": { - "show": true - }, "links": [], - "nullPointMode": "as empty", - "pageSize": 15, - "seriesFilterIndex": -1, - "statusmap": { - "ConfigVersion": "v1" + "options": { + "alignValue": "left", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "mergeValues": false, + "rowHeight": 0.9, + "showValue": "never", + "tooltip": { + "mode": "single", + "sort": "none" + } }, "targets": [ { - "expr": "(\n min by (pod) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_right()\n min_over_time(kube_pod_status_phase{namespace=\"$namespace\", pod=~\"$pod\", phase=\"Running\"}[$__rate_interval])\n ) == 1\n) * 0", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "min by (pod) (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}) by(pod)\n * on (pod) group_right()\n (\n (\n (min by (pod, phase) (label_replace(min_over_time(kube_pod_container_status_ready{namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]), \"phase\", \"Ready\", \"\", \"\")) == 1) * 0 + 0\n )\n or on(pod)\n (\n (min by (pod, phase) (min_over_time(kube_pod_status_phase{namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) == 1) * 0\n + on(phase) group_left()\n (\n label_replace(vector(1), \"phase\", \"Running\", \"\", \"\") or\n label_replace(vector(2), \"phase\", \"Pending\", \"\", \"\") or\n label_replace(vector(3), \"phase\", \"Unknown\", \"\", \"\") or\n label_replace(vector(4), \"phase\", \"Failed\", \"\", \"\") or\n label_replace(vector(5), \"phase\", \"Succeeded\", \"\", \"\")\n )\n )\n ) \n)", "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "A" - }, - { - "expr": "(\n min by (pod, phase) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_right()\n min_over_time(kube_pod_status_phase{namespace=\"$namespace\", pod=~\"$pod\", phase=\"Pending\"}[$__rate_interval])\n ) == 1\n) * 1", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "refId": "B" - }, - { - "expr": "(\n min by (pod, phase) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_right()\n min_over_time(kube_pod_status_phase{namespace=\"$namespace\", pod=~\"$pod\", phase=\"Unknown\"}[$__rate_interval])\n ) == 1\n) * 2", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "refId": "C" - }, - { - "expr": "(\n min by (pod, phase) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_right()\n min_over_time(kube_pod_status_phase{namespace=\"$namespace\", pod=~\"$pod\", phase=\"Failed\"}[$__rate_interval])\n ) == 1\n) * 3", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "refId": "D" - }, - { - "expr": "(\n min by (pod, phase) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_right()\n min_over_time(kube_pod_status_phase{namespace=\"$namespace\", pod=~\"$pod\", phase=\"Succeeded\"}[$__rate_interval])\n ) == 1\n) * 4", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "refId": "E" - }, - { - "expr": "(\n min by (pod) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_right()\n min by (pod) (min_over_time(kube_pod_container_status_ready{namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n ) == 0\n) + 5", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "refId": "F" } ], "title": "Status", - "tooltip": { - "extraInfo": "", - "freezeOnClick": true, - "items": [], - "show": true, - "showExtraInfo": false, - "showItems": false - }, - "type": "flant-statusmap-panel", - "useMax": true, - "usingPagination": false, - "xAxis": { - "show": true - }, - "yAxis": { - "maxWidth": -1, - "minWidth": -1, - "show": true - }, - "yAxisSort": "metrics", - "yLabel": { - "delimiter": "", - "labelTemplate": "", - "usingSplitLabel": false - } + "type": "state-timeline" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "decimals": 0, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The number of Pod restarts", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 3, "w": 24, "x": 0, "y": 15 }, - "hiddenSeries": false, "id": 107, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "(\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__rate_interval])) \n * on (pod)\n sum by (pod) (increase(kube_pod_container_status_restarts_total{namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n) > 0", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", "refId": "A" - }, - { - "expr": "sum (\n sum by (pod) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}[$__rate_interval])) \n * on (pod)\n sum by (pod) (increase(kube_pod_container_status_restarts_total{namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Pods restarts", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": 0, - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -1121,608 +1195,752 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 19 }, - "hiddenSeries": false, "id": 6, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by(pod) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum by(pod) (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", "format": "time_series", "instant": false, "intervalFactor": 1, "legendFormat": "{{ pod }}", "refId": "A" - }, - { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by pod", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "System" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#e24d42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "User" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#1f78c1", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 19 }, - "hiddenSeries": false, "id": 7, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, - { - "alias": "System", - "color": "#e24d42" - }, - { - "alias": "User", - "color": "#1f78c1" + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (rate(container_cpu_system_seconds_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (rate(container_cpu_system_seconds_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "System", + "range": true, "refId": "A" }, { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (rate(container_cpu_user_seconds_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (rate(container_cpu_user_seconds_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "User", + "range": true, "refId": "B" - }, - { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "D" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested CPU resources higher than the actual CPU consumption. In other words, it shows CPU resources that can be \"freed\" without affecting the service.", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 28 }, - "hiddenSeries": false, "id": 59, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by (pod)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n ) > 0\n )", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum by (pod)\n (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n ) > 0\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "A" - }, - { - "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n ) > 0\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Over-requested by pod", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested CPU resources lower than the actual CPU consumption. In other words, it shows CPU resources that need to be \"reserved\" for the service to run smoothly.", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 28 }, - "hiddenSeries": false, "id": 64, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": " (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n )\n or\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n )\n) > 0", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": " (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n )\n or\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n )\n) > 0", "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "A" - }, - { - "expr": "sum (\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n )\n or\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n )\n ) > 0\n )", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Under-requested by pod", - "tooltip": { - "shared": true, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The absence of data on the graph means that container resources are not set", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 5, "w": 24, "x": 0, "y": 36 }, - "hiddenSeries": false, "id": 31, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "(\n sum by (pod) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))) \n / \n sum by (pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n) \n* \nsum by (pod) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"})", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "(\n sum by (pod) (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))) \n / \n sum by (pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n) \n* \nsum by (pod) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "A" - }, - { - "expr": "sum (\n (\n sum by (pod) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))) \n / \n sum by (pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n ) \n * \n sum by (pod) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"})\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Throttling", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": null, - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -1732,62 +1950,149 @@ "id": 61, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", - "fill": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629e51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f4d598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VPA Target" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#447ebc", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, - "w": 6, + "w": 24, "x": 0, - "y": 43 + "y": 42 }, "id": 62, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.13", "repeat": "pod", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Usage", - "color": "#629e51" - }, - { - "alias": "Requests", - "color": "#f4d598" - }, - { - "alias": "Limits", - "color": "#c15c17" - }, - { - "alias": "VPA Target", - "color": "#447ebc" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": true, "targets": [ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", @@ -1818,46 +2123,8 @@ "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "$pod", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "title": "Pods CPU", @@ -1865,7 +2132,10 @@ }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -1875,61 +2145,156 @@ "id": 9, "panels": [ { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", - "fill": 1, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "System" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#e24d42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "User" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#1f78c1", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, - "w": 6, + "w": 24, "x": 0, - "y": 44 + "y": 43 }, "id": 11, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", "repeat": "pod", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false - }, - { - "alias": "System", - "color": "#e24d42" - }, - { - "alias": "User", - "color": "#1f78c1" - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by(pod) (rate(container_cpu_system_seconds_total{node=~\"$node\", container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, @@ -1937,60 +2302,25 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by(pod) (rate(container_cpu_user_seconds_total{node=~\"$node\", container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "User", "refId": "B" - }, - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "D" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "$pod", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "title": "Pods CPU by state", @@ -1998,7 +2328,10 @@ }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -2011,503 +2344,658 @@ "type": "row" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 44 }, - "hiddenSeries": false, "id": 15, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by(pod) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum by(pod) (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "A" - }, - { - "expr": "sum(\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by pod", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The Working set bytes metric is the actual memory used by the container, as it includes active file memory. When its value approaches the limit, the container can be killed by the OOMKiller. This value can be higher than the sum RSS and Cache since not all active file memory is Cache.", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Working set bytes without kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(0, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(255, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 44 }, - "hiddenSeries": false, "id": 16, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Working set bytes without kmem", - "color": "rgb(0, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false }, - { - "alias": "Kmem", - "color": "rgb(255, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_rss{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_rss{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "RSS", + "range": true, "refId": "A" }, { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_cache{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_cache{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Cache", + "range": true, "refId": "B" }, { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_swap{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_swap{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Swap", + "range": true, "refId": "C" }, { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Working set bytes without kmem", + "range": true, "refId": "D" }, { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod)\n sum by (pod) (avg_over_time(container_memory:kmem{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod)\n sum by (pod) (avg_over_time(container_memory:kmem{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Kmem", + "range": true, "refId": "E" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested Memory resources higher than the actual Memory consumption. In other words, it shows Memory resources that can be \"freed\" without affecting the service.", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 53 }, - "hiddenSeries": false, "id": 63, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "(\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod) group_left()\n sum by (pod)\n (\n (\n sum by (namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n ) > 0\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", "refId": "A" - }, - { - "expr": "sum\n(\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod) group_left()\n sum by (pod)\n (\n (\n sum by (namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n ) > 0\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Over-requested by pod", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": null, - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested Memory resources higher than the actual Memory consumption. In other words, it shows Memory resources that need to be \"reserved\" for the service to run smoothly.", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 53 }, - "hiddenSeries": false, "id": 75, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "(\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod) group_left()\n sum by (pod)\n (\n (\n (\n sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by (namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n ) or sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n ) > 0\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", "refId": "A" - }, - { - "expr": "sum\n(\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod) group_left()\n sum by (pod)\n (\n (\n (\n sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by (namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n ) or sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n ) > 0\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Under-requested by pod", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": null, - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -2517,91 +3005,289 @@ "id": 18, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The Working set bytes metric is the actual memory used by the container, as it includes active file memory. When its value approaches the limit, the container can be killed by the OOMKiller. This value can be higher than the sum RSS and Cache since not all active file memory is Cache.", - "fill": 1, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Working set bytes without kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(0, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(255, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f4d598", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VPA Target" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#447ebc", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, "gridPos": { "h": 9, - "w": 6, + "w": 24, "x": 0, - "y": 64 + "y": 63 }, "id": 19, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", "repeat": "pod", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Working set bytes without kmem", - "color": "rgb(0, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "Kmem", - "color": "rgb(255, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "Limits", - "color": "#c15c17", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "Requests", - "color": "#f4d598", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "VPA Target", - "color": "#447ebc", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, "targets": [ { "expr": "sum by (pod) (avg_over_time(container_memory_rss{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\", container!=\"POD\"}[$__rate_interval]))", @@ -2660,46 +3346,8 @@ "refId": "H" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "$pod", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "title": "Pods Memory", @@ -2707,7 +3355,10 @@ }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -2720,25 +3371,34 @@ "type": "row" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "This graph shows Network Receive (except for the hostNetwork Pods)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisPlacement": "auto", + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", + "axisPlacement": "auto", "barAlignment": 0, - "drawStyle": "bars", - "fillOpacity": 100, + "drawStyle": "line", + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "linear", - "lineWidth": 0, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" @@ -2779,38 +3439,36 @@ "overrides": [ { "matcher": { - "id": "byFrameRefID", - "options": "B" + "id": "byName", + "options": "Total" }, "properties": [ { - "id": "min", + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineWidth", "value": 0 }, { - "id": "max", + "id": "custom.fillOpacity", "value": 0 }, - { - "id": "custom.axisPlacement", - "value": "hidden" - }, { "id": "color", "value": { - "mode": "fixed", - "fixedColor": "transparent" + "fixedColor": "yellow", + "mode": "fixed" } } ] } ] }, - "datasource": { - "uid": "${ds_prometheus}", - "type": "prometheus" - }, - "description": "This graph shows Network Receive (except for the hostNetwork Pods)", "gridPos": { "h": 9, "w": 12, @@ -2825,63 +3483,83 @@ "mean" ], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, "pluginVersion": "8.5.13", "targets": [ { - "expr": "kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"} \n* on(pod)\nkube_pod_info{host_network=\"false\", namespace=\"$namespace\", pod=~\"$pod\"}\n* on(pod)\nsum by (pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n* on(pod)\nmax(kube_pod_info{host_network=\"false\", namespace=\"$namespace\", pod=~\"$pod\"}) by(pod)\n* on(pod)\nsum by (pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "A" }, { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"} \n * on(pod)\n kube_pod_info{host_network=\"false\", namespace=\"$namespace\", pod=~\"$pod\"}\n * on(pod)\n sum by (pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" - }, - { - "expr": "# Return -1 for Pods with 'hostNetwork: true' to use in value mappings.\nkube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"} \n* on (pod)\nkube_pod_info{host_network=\"true\", namespace=\"$namespace\", pod=~\"$pod\"} \n*\n-1", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "# Return -1 for Pods with 'hostNetwork: true' to use in value mappings.\nmax(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n* on (pod)\nmax(kube_pod_info{host_network=\"true\", namespace=\"$namespace\", pod=~\"$pod\"}) by(pod)\n*\n-1", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "C" } ], - "timeFrom": null, - "timeShift": null, "title": "Receive", - "transformations": [], + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], "type": "timeseries" }, { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "description": "This graph shows Network Transmit (except for the hostNetwork Pods)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisPlacement": "auto", + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", + "axisPlacement": "auto", "barAlignment": 0, - "drawStyle": "bars", - "fillOpacity": 100, + "drawStyle": "line", + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "linear", - "lineWidth": 0, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" @@ -2922,38 +3600,36 @@ "overrides": [ { "matcher": { - "id": "byFrameRefID", - "options": "B" + "id": "byName", + "options": "Total" }, "properties": [ { - "id": "min", + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineWidth", "value": 0 }, { - "id": "max", + "id": "custom.fillOpacity", "value": 0 }, - { - "id": "custom.axisPlacement", - "value": "hidden" - }, { "id": "color", "value": { - "mode": "fixed", - "fixedColor": "transparent" + "fixedColor": "yellow", + "mode": "fixed" } } ] } ] }, - "datasource": { - "uid": "${ds_prometheus}", - "type": "prometheus" - }, - "description": "This graph shows Network Transmit (except for the hostNetwork Pods)", "gridPos": { "h": 9, "w": 12, @@ -2968,46 +3644,60 @@ "mean" ], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, "pluginVersion": "8.5.13", "targets": [ { - "expr": "kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"} \n* on(pod)\nkube_pod_info{host_network=\"false\", namespace=\"$namespace\", pod=~\"$pod\"}\n* on(pod)\nsum by (pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n* on(pod)\nmax(kube_pod_info{host_network=\"false\", namespace=\"$namespace\", pod=~\"$pod\"}) by(pod)\n* on(pod)\nsum by (pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "A" }, { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"} \n * on(pod)\n kube_pod_info{host_network=\"false\", namespace=\"$namespace\", pod=~\"$pod\"}\n * on(pod)\n sum by (pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" - }, - { - "expr": "# Return -1 for Pods with 'hostNetwork: true' to use in value mappings.\nkube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"} \n* on (pod)\nkube_pod_info{host_network=\"true\", namespace=\"$namespace\", pod=~\"$pod\"} \n*\n-1", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "# Return -1 for Pods with 'hostNetwork: true' to use in value mappings.\nmax(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n* on (pod)\nmax(kube_pod_info{host_network=\"true\", namespace=\"$namespace\", pod=~\"$pod\"}) by(pod)\n*\n-1", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "C" } ], - "timeFrom": null, - "timeShift": null, "title": "Transmit", - "transformations": [], + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], "type": "timeseries" }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -3020,236 +3710,291 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 74 }, - "hiddenSeries": false, "id": 43, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by(pod) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod)\n sum by (pod) (rate(container_fs_reads_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum by(pod) (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}) by(pod)\n * on (pod)\n sum by (pod) (rate(container_fs_reads_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "A" - }, - { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod)\n sum by (pod) (rate(container_fs_reads_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Read", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "iops", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "iops" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 74 }, - "hiddenSeries": false, "id": 58, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by(pod) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod)\n sum by (pod) (rate(container_fs_writes_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum by(pod) (\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}) by(pod)\n * on (pod)\n sum by (pod) (rate(container_fs_writes_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ pod }}", + "range": true, "refId": "A" - }, - { - "expr": "sum (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod)\n sum by (pod) (rate(container_fs_writes_total{node=~\"$node\", container!=\"POD\", pod=~\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Write", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "iops", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -3262,333 +4007,418 @@ "type": "row" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 83 }, - "hiddenSeries": false, "id": 82, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "fill": 0, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by (namespace, controller, persistentvolumeclaim) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n )\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum by (namespace, controller, persistentvolumeclaim) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"}) by(pod)\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Provisioned {{ persistentvolumeclaim }}", + "range": true, "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "PVC Provisioned (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Total/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 83 }, - "hiddenSeries": false, "id": 84, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total/", - "fill": 0, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by (pod, persistentvolumeclaim)\n(\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod) group_right(persistentvolumeclaim)\n (\n (avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]) - avg_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n * \n on (namespace, persistentvolumeclaim) group_left (pod) \n (\n max by (namespace, persistentvolumeclaim, pod) (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])) \n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n )\n )\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum by (pod, persistentvolumeclaim)\n(\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod) group_right(persistentvolumeclaim)\n (\n (avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]) - avg_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n * \n on (namespace, persistentvolumeclaim) group_left (pod) \n (\n max by (namespace, persistentvolumeclaim, pod) (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])) \n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n )\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Usage {{ persistentvolumeclaim }}", + "range": true, "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "PVC Usage (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Total/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "color", + "value": { + "mode": "palette-classic" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 90 }, - "hiddenSeries": false, "id": 83, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total .*/", - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by (pod, persistentvolumeclaim)\n(\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}\n * on (pod) group_right(persistentvolumeclaim) (\n (\n avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]) - avg_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) * \n on (namespace, persistentvolumeclaim) group_left (pod) \n (\n max by (namespace, persistentvolumeclaim, pod) (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])) \n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n )\n )\n)\n/\n(\nsum by (pod, persistentvolumeclaim)\n(\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_right(persistentvolumeclaim)\n (\n avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n * \n on (namespace, persistentvolumeclaim) group_left (pod) \n (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n * on (namespace, persistentvolumeclaim) group_left (pod) \n (\n max by (namespace, persistentvolumeclaim, pod) (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])) \n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n )\n )\n)))", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum by (pod, persistentvolumeclaim)\n(\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\", pod=~\"$pod\"}) by(pod)\n * on (pod) group_right(persistentvolumeclaim) (\n (\n avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]) - avg_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) * \n on (namespace, persistentvolumeclaim) group_left (pod) \n (\n max by (namespace, persistentvolumeclaim, pod) (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])) \n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n )\n )\n)\n/\n(\nsum by (pod, persistentvolumeclaim)\n(\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}) by(pod)\n * on (pod) group_right(persistentvolumeclaim)\n (\n avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n * \n on (namespace, persistentvolumeclaim) group_left (pod) \n (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n * on (namespace, persistentvolumeclaim) group_left (pod) \n (\n max by (namespace, persistentvolumeclaim, pod) (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])) \n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n )\n )\n)))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Usage {{ persistentvolumeclaim }}", + "range": true, "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "PVC Usage in % (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "collapsed": true, - "datasource": null, + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -3596,538 +4426,552 @@ "y": 98 }, "id": 88, - "panels": [ - { - "datasource": "$ds_prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": null, - "displayMode": "auto" - }, - "decimals": 2, - "displayName": "", - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "panels": [], + "title": "PVC Detailed", + "type": "row" + }, + { + "datasource": { + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" }, - "overrides": [ + "inspect": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "displayName", - "value": "Time" - }, - { - "id": "custom.align", - "value": null - } - ] + "color": "green", + "value": null }, { - "matcher": { - "id": "byName", - "options": "persistentvolumeclaim" - }, - "properties": [ - { - "id": "displayName", - "value": "Name" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "storageclass" - }, - "properties": [ - { - "id": "displayName", - "value": "StorageClass" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #B" - }, - "properties": [ - { - "id": "displayName", - "value": "Requested" - }, - { - "id": "unit", - "value": "bytes" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #C" - }, - "properties": [ - { - "id": "displayName", - "value": "Provisioned" - }, - { - "id": "unit", - "value": "bytes" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #D" - }, - "properties": [ - { - "id": "displayName", - "value": "Capacity" - }, - { - "id": "unit", - "value": "bytes" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #E" - }, - "properties": [ - { - "id": "displayName", - "value": "Used bytes" - }, - { - "id": "unit", - "value": "bytes" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #F" - }, - "properties": [ - { - "id": "displayName", - "value": "Used inodes" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #G" - }, - "properties": [ - { - "id": "displayName", - "value": "Used bytes (%)" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #H" - }, - "properties": [ - { - "id": "displayName", - "value": "Used inodes (%)" - }, - { - "id": "unit", - "value": "percentunit" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "pod" - }, - "properties": [ - { - "id": "displayName", - "value": "Used by pod" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null - } - ] + "color": "red", + "value": 80 } ] }, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 99 - }, - "id": 86, - "links": [], - "options": { - "showHeader": true - }, - "pluginVersion": "8.2.6", - "targets": [ - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n kube_persistentvolumeclaim_info{namespace=\"$namespace\"}\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "Name, storageclass", - "refId": "A" + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" }, - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"}\n)", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "Requested (pvc)", - "refId": "B" - }, - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n label_replace(\n kube_persistentvolumeclaim_info{namespace=\"$namespace\"},\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\") \n * on (persistentvolume) group_left() \n kube_persistentvolume_capacity_bytes\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "Provisioned (pv)", - "refId": "C" - }, - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "Capacity (real)", - "refId": "D" - }, - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]) - avg_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "Used bytes", - "refId": "E" - }, - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_inodes_used{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "Used inodes", - "refId": "F" - }, - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n (avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]) - avg_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n /\n avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "Used bytes %", - "refId": "G" - }, - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_inodes_used{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n /\n avg_over_time(kubelet_volume_stats_inodes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "Used inodes %", - "refId": "H" - }, - { - "expr": "max by (namespace, persistentvolumeclaim, pod) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "pod", - "refId": "I" - } - ], - "title": "Overview", - "transformations": [ - { - "id": "merge", - "options": { - "reducers": [] + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align" } + ] + }, + { + "matcher": { + "id": "byName", + "options": "persistentvolumeclaim" }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "persistentvolumeclaim", - "storageclass", - "Value #B", - "Value #C", - "Value #D", - "Value #E", - "Value #F", - "Value #G", - "Value #H", - "pod" - ] - } + "properties": [ + { + "id": "displayName", + "value": "Name" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" } - } + ] + }, + { + "matcher": { + "id": "byName", + "options": "storageclass" + }, + "properties": [ + { + "id": "displayName", + "value": "StorageClass" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "Requested" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "Provisioned" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "Capacity" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "Used bytes" + }, + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "displayName", + "value": "Used inodes" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #G" + }, + "properties": [ + { + "id": "displayName", + "value": "Used bytes (%)" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #H" + }, + "properties": [ + { + "id": "displayName", + "value": "Used inodes (%)" + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "pod" + }, + "properties": [ + { + "id": "displayName", + "value": "Used by pod" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 99 + }, + "id": 86, + "links": [], + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" ], - "type": "table" + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n kube_persistentvolumeclaim_info{namespace=\"$namespace\"}\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Name, storageclass", + "refId": "A" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 108 - }, - "hiddenSeries": false, - "id": 89, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"}\n)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Requested (pvc)", + "refId": "B" + }, + { + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n label_replace(\n kube_persistentvolumeclaim_info{namespace=\"$namespace\"},\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\") \n * on (persistentvolume) group_left() \n kube_persistentvolume_capacity_bytes\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "Provisioned (pv)", + "refId": "C" + }, + { + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Capacity (real)", + "refId": "D" + }, + { + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]) - avg_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Used bytes", + "refId": "E" + }, + { + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_inodes_used{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Used inodes", + "refId": "F" + }, + { + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n (avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]) - avg_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n /\n avg_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Used bytes %", + "refId": "G" + }, + { + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_inodes_used{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n /\n avg_over_time(kubelet_volume_stats_inodes{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Used inodes %", + "refId": "H" + }, + { + "expr": "max by (namespace, persistentvolumeclaim, pod) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\", pod=~\"$pod\"} \n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "pod", + "refId": "I" + } + ], + "title": "Overview", + "transformations": [ + { + "id": "merge", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "persistentvolumeclaim", - "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Total", - "fill": 0, - "stack": false + "reducers": [] + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "persistentvolumeclaim", + "storageclass", + "Value #B", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #G", + "Value #H", + "pod" + ] } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n label_replace(\n kube_persistentvolumeclaim_info{namespace=\"$namespace\"},\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\") \n * on (persistentvolume) group_left() \n avg_over_time(kube_persistentvolume_capacity_bytes[$__rate_interval])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Provisioned", - "refId": "B" - }, - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_capacity_bytes{namespace=\"$namespace\"}[$__rate_interval])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Capacity", - "refId": "A" - }, - { - "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_capacity_bytes{namespace=\"$namespace\"}[$__rate_interval]) - avg_over_time(kubelet_volume_stats_available_bytes{namespace=\"$namespace\"}[$__rate_interval])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used bytes", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "$persistentvolumeclaim", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null } } ], - "title": "PVC Detailed", - "type": "row" + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 108 + }, + "id": 89, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "repeat": "persistentvolumeclaim", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n label_replace(\n kube_persistentvolumeclaim_info{namespace=\"$namespace\"},\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\") \n * on (persistentvolume) group_left() \n avg_over_time(kube_persistentvolume_capacity_bytes[$__rate_interval])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"}\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Provisioned", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_capacity_bytes{namespace=\"$namespace\"}[$__rate_interval])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"}\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Capacity", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n avg_over_time(kubelet_volume_stats_capacity_bytes{namespace=\"$namespace\"}[$__rate_interval]) - avg_over_time(kubelet_volume_stats_available_bytes{namespace=\"$namespace\"}[$__rate_interval])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"}\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used bytes", + "range": true, + "refId": "C" + } + ], + "title": "$persistentvolumeclaim", + "type": "timeseries" } ], "refresh": "30s", - "schemaVersion": 32, - "style": "dark", + "schemaVersion": 38, "tags": [ "main" ], @@ -4139,8 +4983,6 @@ "text": "default", "value": "default" }, - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Prometheus", @@ -4164,10 +5006,11 @@ "$__all" ] }, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "definition": "label_values(kubernetes_build_info, node)", - "description": null, - "error": null, "hide": 0, "includeAll": true, "label": "Node", @@ -4188,16 +5031,16 @@ "useTags": false }, { - "allValue": null, "current": { "selected": false, - "text": "candi-dashboard-stage", - "value": "candi-dashboard-stage" + "text": "d8-monitoring", + "value": "d8-monitoring" + }, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" }, - "datasource": "$ds_prometheus", "definition": "label_values(kube_pod_info{node=~\"$node\"}, namespace)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Namespace", @@ -4218,16 +5061,16 @@ "useTags": false }, { - "allValue": null, "current": { "selected": false, - "text": "sts/postgres", - "value": "sts/postgres" + "text": "sts/prometheus-main", + "value": "sts/prometheus-main" }, - "datasource": "$ds_prometheus", - "definition": "label_values(kube_controller_pod{node=~\"$node\", namespace=~\"$namespace\"}, controller)", - "description": null, - "error": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "definition": "label_values(kube_controller_pod{node=~\"$node\", namespace=~\"$namespace\"},controller)", "hide": 0, "includeAll": false, "label": "Controller", @@ -4235,8 +5078,9 @@ "name": "controller", "options": [], "query": { - "query": "label_values(kube_controller_pod{node=~\"$node\", namespace=~\"$namespace\"}, controller)", - "refId": "main-controller-Variable-Query" + "qryType": 1, + "query": "label_values(kube_controller_pod{node=~\"$node\", namespace=~\"$namespace\"},controller)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", @@ -4258,10 +5102,11 @@ "$__all" ] }, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "definition": "label_values(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}, pod)", - "description": null, - "error": null, "hide": 0, "includeAll": true, "label": "Pod", @@ -4292,10 +5137,11 @@ "$__all" ] }, - "datasource": "$ds_prometheus", - "definition": "query_result(max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))", - "description": null, - "error": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "definition": "query_result((kube_controller_pod{namespace=\"$namespace\",controller=\"$controller\"}) * on(pod) group_right(controller_name,persistentvolumeclaims) max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__range]))", "hide": 2, "includeAll": true, "label": "PersistentVolumeClaim", @@ -4303,8 +5149,9 @@ "name": "persistentvolumeclaim", "options": [], "query": { - "query": "query_result(max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{node=~\"$node\", namespace=\"$namespace\", pod=~\"$pod\"}[$__range]))", - "refId": "main-persistentvolumeclaim-Variable-Query" + "qryType": 3, + "query": "query_result((kube_controller_pod{namespace=\"$namespace\",controller=\"$controller\"}) * on(pod) group_right(controller_name,persistentvolumeclaims) max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__range]))", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 2, "regex": "/.*persistentvolumeclaim=\"([^\"]+)\".*/", @@ -4349,5 +5196,6 @@ "timezone": "", "title": "Namespace / Controller", "uid": "IRPuf4ymk1", - "version": 2 + "version": 6, + "weekStart": "" } diff --git a/dashboards/main/namespace.json b/dashboards/main/namespace.json index eea95317..9a53033c 100644 --- a/dashboards/main/namespace.json +++ b/dashboards/main/namespace.json @@ -24,7 +24,6 @@ "editable": false, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "iteration": 1684513475244, "links": [], "liveNow": false, "panels": [ @@ -40,7 +39,10 @@ "mode": "thresholds" }, "custom": { - "displayMode": "auto", + "align": "auto", + "cellOptions": { + "type": "auto" + }, "filterable": false, "inspect": false, "minWidth": 70 @@ -607,7 +609,9 @@ "links": [], "maxPerRow": 6, "options": { + "cellHeight": "sm", "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" @@ -617,10 +621,14 @@ "showHeader": true, "sortBy": [] }, - "pluginVersion": "8.5.13", + "pluginVersion": "10.2.2", "repeatDirection": "h", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "max by (job, namespace, controller, controller_name) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]))", "format": "table", "hide": false, @@ -631,6 +639,10 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Off\"}[$__range]))) * 0\nor\nsum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Initial\"}[$__range]))) * 0 + 1\nor\nsum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Auto\"}[$__range]))) * 0 + 2\nor\nsum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Recreate\"}[$__range]))) * 0 + 3\nor\nsum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) * 0 + 5", "format": "table", "hide": false, @@ -640,6 +652,10 @@ "refId": "B" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -648,6 +664,10 @@ "refId": "C" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (pod) group_left()\n sum by (pod) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -656,6 +676,10 @@ "refId": "D" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (controller_type, controller_name) group_left()\n sum by(controller_type, controller_name) (avg_over_time(vpa_target_recommendation{container!=\"POD\",namespace=\"$namespace\", resource=\"cpu\"}[$__range]))\n ) \nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -664,6 +688,10 @@ "refId": "E" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) > 0\n )\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -672,6 +700,10 @@ "refId": "F" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) or sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) > 0\n )\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -680,6 +712,10 @@ "refId": "G" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() ((sum by (pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__range])) / sum by (pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__range]))) * sum by (pod) (rate(kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}[$__range]))))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -688,6 +724,10 @@ "refId": "H" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -696,6 +736,10 @@ "refId": "I" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (pod) group_left()\n sum by (namespace, pod)\n (\n avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__range])\n )\n )\n or\n count (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -704,6 +748,10 @@ "refId": "J" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (controller_type, controller_name) group_left()\n sum by(controller_type, controller_name) (avg_over_time(vpa_target_recommendation{container!=\"POD\",namespace=\"$namespace\", resource=\"memory\"}[$__range]))\n ) \n or \ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -712,6 +760,10 @@ "refId": "K" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n -\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) > 0\n )\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -720,6 +772,10 @@ "refId": "L" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller)\n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range]))\n ) > 0\n )\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -728,7 +784,12 @@ "refId": "M" }, { - "expr": "sum by(controller) ( # Data rate of the controller is a sum of data rates from its Pods.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n sum by(pod) # Use sum if there are multiple interaces in the Pod.\n (\n rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n * # Select only Pods with 'hostNetwork: false', because receive_bytes values are meaningful only for Pods with hostNetwork: false.\n on(pod)\n kube_pod_info{host_network=\"false\", namespace=\"$namespace\"}\n )\n)\nor # Return -1 value for Pods with 'hostNetwork: true' to rewrite by value mapping.\n# Use max to get one '-1' per controller\n(max by(controller) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n (kube_pod_info{host_network=\"true\", namespace=\"$namespace\"})\n) * -1)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum by(controller) ( # Data rate of the controller is a sum of data rates from its Pods.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n sum by(pod) # Use sum if there are multiple interaces in the Pod.\n (\n max(rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__range])) by(pod)\n * # Select only Pods with 'hostNetwork: false', because receive_bytes values are meaningful only for Pods with hostNetwork: false.\n on(pod)\n kube_pod_info{host_network=\"false\", namespace=\"$namespace\"}\n )\n)\nor # Return -1 value for Pods with 'hostNetwork: true' to rewrite by value mapping.\n# Use max to get one '-1' per controller\n(max by(controller) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n (kube_pod_info{host_network=\"true\", namespace=\"$namespace\"})\n) * -1)", "format": "table", "instant": true, "intervalFactor": 1, @@ -736,7 +797,12 @@ "refId": "N" }, { - "expr": "sum by(controller) ( # Data rate of the controller is a sum of data rates from its Pods.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n sum by(pod) # Use sum if there are multiple interaces in the Pod.\n (\n rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__range])\n * # Select only Pods with 'hostNetwork: false', because receive_bytes values are meaningful only for Pods with hostNetwork: false.\n on(pod)\n kube_pod_info{host_network=\"false\", namespace=\"$namespace\"}\n )\n)\nor # Return -1 value for Pods with 'hostNetwork: true' to rewrite by value mapping.\n# Use max to get one '-1' per controller\n(max by(controller) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n (kube_pod_info{host_network=\"true\", namespace=\"$namespace\"})\n) * -1)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum by(controller) ( # Data rate of the controller is a sum of data rates from its Pods.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n sum by(pod) # Use sum if there are multiple interaces in the Pod.\n (\n max(rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__range])) by(pod)\n * # Select only Pods with 'hostNetwork: false', because receive_bytes values are meaningful only for Pods with hostNetwork: false.\n on(pod)\n kube_pod_info{host_network=\"false\", namespace=\"$namespace\"}\n )\n)\nor # Return -1 value for Pods with 'hostNetwork: true' to rewrite by value mapping.\n# Use max to get one '-1' per controller\n(max by(controller) (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on(pod)\n group_left() # Do not drop controller label from kube_controller_pod.\n (kube_pod_info{host_network=\"true\", namespace=\"$namespace\"})\n) * -1)", "format": "table", "instant": true, "intervalFactor": 1, @@ -744,6 +810,10 @@ "refId": "O" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() sum by (pod) (rate(container_fs_reads_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -752,6 +822,10 @@ "refId": "P" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() sum by (pod) (rate(container_fs_writes_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -760,6 +834,10 @@ "refId": "Q" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "max by (controller) (max by (namespace, controller_type, controller_name, controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) * on (namespace, controller_type, controller_name) group_left() max by (namespace, controller_type, controller_name) (avg_over_time(kube_controller_replicas{node=~\"$node\", namespace=\"$namespace\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "hide": false, @@ -769,6 +847,10 @@ "refId": "R" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "max by (job, namespace, controller_type, controller_name, controller) ((max by (job, namespace, controller_type, controller_name, controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]))) * on (job, namespace, controller_type, controller_name) group_right(controller)\n(avg_over_time(kube_controller_replicas[$__range]) - avg_over_time(kube_controller_replicas_ready[$__range])))\nor\nmax by (job, namespace, controller_type, controller_name, controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) * 0", "format": "table", "hide": false, @@ -778,6 +860,10 @@ "refId": "S" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller) \n (\n avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])\n * on (pod) group_left()\n sum by (pod) (increase(kube_pod_container_status_restarts_total{node=~\"$node\", namespace=\"$namespace\"}[$__range]))\n )\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "hide": false, @@ -787,6 +873,10 @@ "refId": "T" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (controller) (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range]) * on (pod) group_left() sum by (pod) (avg_over_time(container_memory:kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__range])))\nor\ncount (avg_over_time(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}[$__range])) by (controller) * 0", "format": "table", "instant": true, @@ -845,355 +935,404 @@ "type": "table" }, { - "cards": { - "cardHSpacing": 2, - "cardMinWidth": 5, - "cardVSpacing": 2 - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateGnYlRd", - "defaultColor": "#757575", - "exponent": 0.5, - "mode": "discrete", - "thresholds": [ - { - "color": "#bf1b00", - "tooltip": "100% not ready", - "value": "0" - }, - { - "color": "#508642", - "tooltip": "100% ready", - "value": "1" - }, - { - "color": "#ea6460", - "tooltip": "60-99% not ready", - "value": "2" - }, - { - "color": "#e5ac0e", - "tooltip": "30-59% not ready", - "value": "3" - }, - { - "color": "#f4d598", - "tooltip": "1-29% not ready", - "value": "4" - } - ] - }, "datasource": { "uid": "$ds_prometheus" }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 70, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false + }, + "decimals": 0, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "dark-red", + "value": 0 + }, + { + "color": "light-red", + "value": 30 + }, + { + "color": "semi-dark-yellow", + "value": 60 + }, + { + "color": "green", + "value": 100 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 }, - "hideBranding": false, - "highlightCards": true, "id": 223, - "legend": { - "show": true - }, "links": [], - "nullPointMode": "as empty", - "pageSize": 15, - "seriesFilterIndex": -1, - "statusmap": { - "ConfigVersion": "v1" + "options": { + "alignValue": "left", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "mergeValues": false, + "rowHeight": 0.9, + "showValue": "never", + "tooltip": { + "mode": "single", + "sort": "none" + } }, "targets": [ { - "expr": "((max by (job, namespace, controller_type, controller_name, controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"})) * on (job, namespace, controller_type, controller_name) group_right(controller) \n(kube_controller_replicas_ready/kube_controller_replicas == 1)) * 1", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "kube_controller_replicas_ready{namespace=~\"$namespace\", controller_type=~\"$controller_type\", controller_type != \"ReplicaSet\", controller_name=~\"$controller\"}/kube_controller_replicas{namespace=~\"$namespace\", controller_type=~\"$controller_type\", controller_type != \"ReplicaSet\", controller_name=~\"$controller\"}*100", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{ controller }}", + "legendFormat": "{{controller_type}}/{{ controller_name }}", + "range": true, "refId": "A" - }, - { - "expr": "((max by (job, namespace, controller_type, controller_name, controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"})) * on (job, namespace, controller_type, controller_name) group_right(controller) \n((kube_controller_replicas_ready/kube_controller_replicas > 0) and (kube_controller_replicas_ready/kube_controller_replicas < 0.3)) > bool 0) * 2", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ controller }}", - "refId": "B" - }, - { - "expr": "((max by (job, namespace, controller_type, controller_name, controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"})) * on (job, namespace, controller_type, controller_name) group_right(controller) \n((kube_controller_replicas_ready/kube_controller_replicas >= 0.3) and (kube_controller_replicas_ready/kube_controller_replicas < 0.6)) > bool 0) * 3", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ controller }}", - "refId": "C" - }, - { - "expr": "((max by (job, namespace, controller_type, controller_name, controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"})) * on (job, namespace, controller_type, controller_name) group_right(controller) \n((kube_controller_replicas_ready/kube_controller_replicas >= 0.6) and (kube_controller_replicas_ready/kube_controller_replicas < 1)) > bool 0) * 4", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ controller }}", - "refId": "D" - }, - { - "expr": "((max by (job, namespace, controller_type, controller_name, controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"})) * on (job, namespace, controller_type, controller_name) group_right(controller)\n(kube_controller_replicas_ready/kube_controller_replicas == 0)) * 5", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ controller }}", - "refId": "E" } ], "title": "Status", - "tooltip": { - "extraInfo": "", - "freezeOnClick": true, - "items": [], - "show": true, - "showExtraInfo": false, - "showItems": false - }, - "type": "flant-statusmap-panel", - "useMax": true, - "usingPagination": false, - "xAxis": { - "show": true - }, - "yAxis": { - "maxWidth": -1, - "minWidth": -1, - "show": true - }, - "yAxisSort": "metrics", - "yLabel": { - "delimiter": "", - "labelTemplate": "", - "usingSplitLabel": false - } + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "StatefulSet", + "renamePattern": "sts" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "DaemonSet", + "renamePattern": "ds" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "Deployment", + "renamePattern": "deploy" + } + } + ], + "type": "state-timeline" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, - "decimals": 0, "description": "The number of Pods controlled by the Controller", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 4, "w": 12, "x": 0, "y": 16 }, - "hiddenSeries": false, "id": 794, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (controller) \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (max_over_time(kube_pod_info{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (max_over_time(kube_pod_info{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Pods count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": 0, - "format": "short", - "label": "", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, - "decimals": 0, "description": "The number of Pod restarts", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 4, "w": 12, "x": 12, "y": 16 }, - "hiddenSeries": false, "id": 661, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (controller) \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (increase(kube_pod_container_status_restarts_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (increase(kube_pod_container_status_restarts_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Pods restarts", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": 0, - "format": "short", - "label": "", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { "collapsed": false, @@ -1213,182 +1352,300 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 21 }, - "hiddenSeries": false, "id": 6, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Usage by controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "System" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#e24d42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "User" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#1f78c1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 21 }, - "hiddenSeries": false, "id": 10, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, - { - "alias": "System", - "color": "#e24d42" - }, - { - "alias": "User", - "color": "#1f78c1" + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_system_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))))", "format": "time_series", "intervalFactor": 1, @@ -1396,378 +1653,448 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_user_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))))", "format": "time_series", "intervalFactor": 1, "legendFormat": "User", "refId": "B" - }, - { - "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "D" } ], - "thresholds": [], - "timeRegions": [], "title": "Usage by state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "This graph shows the requested CPU resources higher than the actual CPU consumption. In other words, it shows CPU resources that can be \"freed\" without affecting the service.", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 30 }, - "hiddenSeries": false, "id": 404, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Over-requested by controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "$$hashKey": "object:149", - "format": "short", - "label": "cores", - "logBase": 1, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:150", - "format": "short", - "logBase": 1, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "This graph shows the requested CPU resources lower than the actual CPU consumption. In other words, it shows CPU resources that need to be \"reserved\" for the service to run smoothly.", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 30 }, - "hiddenSeries": false, "id": 538, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", "format": "time_series", "instant": false, "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Under-requested by controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "The absence of data on the graph means that container resources are not set", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 39 }, - "hiddenSeries": false, "id": 262, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() ((sum by (pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])) / sum by (pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))) * sum by (pod) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"})))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() ((sum by (pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])) / sum by (pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))) * sum by (pod) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=\"$namespace\"}))))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Throttling", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { "collapsed": true, @@ -1784,15 +2111,127 @@ "id": 343, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "", - "fill": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629e51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f4d598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VPA Target" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#447ebc", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 6, @@ -1800,48 +2239,22 @@ "y": 47 }, "id": 341, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.13", "repeat": "controller", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Usage", - "color": "#629e51" - }, - { - "alias": "Requests", - "color": "#f4d598" - }, - { - "alias": "Limits", - "color": "#c15c17" - }, - { - "alias": "VPA Target", - "color": "#447ebc" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": true, "targets": [ { "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval])))", @@ -1872,37 +2285,8 @@ "refId": "F" } ], - "thresholds": [], - "timeRegions": [], "title": "$controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "cores", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" } ], "title": "Controllers CPU", @@ -1923,15 +2307,128 @@ "id": 12, "panels": [ { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", - "fill": 1, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "line" + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "System" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#e24d42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "User" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#1f78c1", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 6, @@ -1939,47 +2436,28 @@ "y": 48 }, "id": 14, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", "repeat": "controller", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false - }, - { - "alias": "System", - "color": "#e24d42" - }, - { - "alias": "User", - "color": "#1f78c1" - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_system_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval])))", "format": "time_series", "interval": "", @@ -1988,51 +2466,25 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_user_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "User", "refId": "B" - }, - { - "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval])))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "D" } ], - "thresholds": [], - "timeRegions": [], "title": "$controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" } ], "title": "Controllers CPU by state", @@ -2056,59 +2508,114 @@ "type": "row" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 49 }, - "hiddenSeries": false, "id": 40, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { "datasource": { @@ -2120,118 +2627,176 @@ "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds_prometheus" - }, - "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Usage by controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "$$hashKey": "object:205", - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:206", - "format": "short", - "logBase": 1, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "The Working set bytes metric is the actual memory used by the container, as it includes active file memory. When its value approaches the limit, the container can be killed by the OOM killer. This value can be higher than the sum RSS and Cache since not all active file memory is Cache.", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Working set bytes without kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(0, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(255, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 49 }, - "hiddenSeries": false, "id": 41, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Working set bytes without kmem", - "color": "rgb(0, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false }, - { - "alias": "Kmem", - "color": "rgb(255, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod) group_left()\n sum by (pod) (avg_over_time(container_memory_rss{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", @@ -2271,92 +2836,119 @@ "refId": "E" } ], - "thresholds": [], - "timeRegions": [], "title": "Usage by state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds_prometheus" }, "description": "This graph shows the requested Memory resources higher than the actual Memory consumption. In other words, it shows Memory resources that can be \"freed\" without affecting the service.", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 58 }, - "hiddenSeries": false, "id": 489, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { "datasource": { @@ -2368,166 +2960,152 @@ "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds_prometheus" - }, - "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Over-requested by controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "$$hashKey": "object:95", - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:96", - "format": "short", - "logBase": 1, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "This graph shows the requested Memory resources higher than the actual Memory consumption. In other words, it shows Memory resources that need to be \"reserved\" for the service to run smoothly.", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 58 }, - "hiddenSeries": false, "id": 575, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n )\n or\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n +\n sum by(namespace, pod, container) (avg_over_time(container_memory:kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n )\n ) > 0\n )\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" - }, - { - "expr": "sum by (controller)\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory:kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ controller }}", - "refId": "C" - }, - { - "expr": "sum\n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (namespace, pod) group_left()\n sum by (namespace, pod)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory:kmem{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "D" } ], - "thresholds": [], - "timeRegions": [], "title": "Under-requested by controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { "collapsed": true, @@ -2544,15 +3122,267 @@ "id": 43, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "The Working set bytes metric is the actual memory used by the container, as it includes active file memory. When its value approaches the limit, the container can be killed by the OOM killer. This value can be higher than the sum RSS and Cache since not all active file memory is Cache.", - "fill": 1, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Working set bytes without kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(0, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(255, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VPA Target" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#447ebc", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f4d598", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 6, @@ -2560,75 +3390,22 @@ "y": 67 }, "id": 44, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.13", "repeat": "controller", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Working set bytes without kmem", - "color": "rgb(0, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "Kmem", - "color": "rgb(255, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "VPA Target", - "color": "#447ebc", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "Requests", - "color": "#f4d598", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "Limits", - "color": "#c15c17", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, "targets": [ { "expr": "sum \n (\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=\"$controller\"}\n * on (pod) group_left() \n sum by (pod) (avg_over_time(container_memory_rss{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))\n )", @@ -2689,36 +3466,8 @@ "refId": "H" } ], - "thresholds": [], - "timeRegions": [], "title": "$controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" } ], "title": "Controllers Memory", @@ -2742,206 +3491,288 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "This graph shows Network Receive (except for the hostNetwork Pods)", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 68 }, - "hiddenSeries": false, "id": 79, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 2, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "# Data rate for the controller is a sum of data rates of its Pods.\nsum by (controller) (\n # Select Pods by controller_type and controller.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on (pod)\n group_left() # Preserve controller label from the 'kube_controller_pod' metric.\n (\n # Select Pods with hostNetwork: false.\n kube_pod_info{host_network=\"false\",namespace=\"$namespace\"}\n * on(pod)\n # Sum data rate for all interfaces in the Pod. \n sum by (pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "# Data rate for the controller is a sum of data rates of its Pods.\nsum by (controller) (\n # Select Pods by controller_type and controller.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"}\n * on (pod)\n group_left() # Preserve controller label from the 'kube_controller_pod' metric.\n (\n # Select Pods with hostNetwork: false.\n max(kube_pod_info{host_network=\"false\",namespace=\"$namespace\"}) by(pod)\n * on(pod)\n # Sum data rate for all interfaces in the Pod. \n sum by (pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", + "range": true, "refId": "A" - }, - { - "expr": "# Total is a sum of data rates of all Pods in selected containers.\nsum (\n # Select Pods by controller_type and controller.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on (pod)\n # Select Pods with hostNetwork: false.\n kube_pod_info{host_network=\"false\",namespace=\"$namespace\"}\n * on(pod)\n # Sum data rate for all interfaces in the Pod. \n sum by (pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Receive", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "Bps", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "This graph shows Network Transmit (except for the hostNetwork Pods)", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 68 }, - "hiddenSeries": false, "id": 240, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 2, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "# Data rate for the controller is a sum of data rates of its Pods.\nsum by (controller) (\n # Select Pods by controller_type and controller.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on (pod)\n group_left() # Preserve controller label from the 'kube_controller_pod' metric.\n (\n # Select Pods with hostNetwork: false.\n kube_pod_info{host_network=\"false\",namespace=\"$namespace\"}\n * on(pod)\n # Sum data rate for all interfaces in the Pod. \n sum by (pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "# Data rate for the controller is a sum of data rates of its Pods.\nsum by (controller) (\n # Select Pods by controller_type and controller.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on (pod)\n group_left() # Preserve controller label from the 'kube_controller_pod' metric.\n (\n # Select Pods with hostNetwork: false.\n max(kube_pod_info{host_network=\"false\",namespace=\"$namespace\"}) by(pod)\n * on(pod)\n # Sum data rate for all interfaces in the Pod. \n sum by (pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", + "range": true, "refId": "A" - }, - { - "expr": "# Total is a sum of data rates of all Pods in selected containers.\nsum (\n # Select Pods by controller_type and controller.\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} \n * on (pod)\n # Select Pods with hostNetwork: false.\n kube_pod_info{host_network=\"false\",namespace=\"$namespace\"}\n * on(pod)\n # Sum data rate for all interfaces in the Pod. \n sum by (pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Transmit", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "Bps", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { "collapsed": false, @@ -2961,204 +3792,278 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "iops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 77 }, - "hiddenSeries": false, "id": 156, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_fs_reads_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_fs_reads_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Read", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "iops", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 77 }, - "hiddenSeries": false, "id": 241, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.5.13", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_fs_writes_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ controller }}", "refId": "A" - }, - { - "expr": "sum (sum by (controller) (kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller_type=~\"$controller_type\", controller=~\"$controller\"} * on (pod) group_left() sum by (pod) (rate(container_fs_writes_total{node=~\"$node\", container!=\"POD\", namespace=\"$namespace\"}[$__rate_interval]))))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Write", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "iops", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { "collapsed": false, @@ -3178,62 +4083,125 @@ "type": "row" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "controller" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 85 }, - "hiddenSeries": false, "id": 598, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total/", - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n)", "format": "time_series", "intervalFactor": 1, @@ -3241,207 +4209,244 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=\"$namespace\"}[$__rate_interval])\n ) \n * \n sum by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n\n )\n unless\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n) \nand (count (kube_node_info) == count (kube_node_info{node=~\"$node\"}))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Unused PVC", "refId": "B" - }, - { - "expr": "sum (\n sum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n )\n)\n+\n(sum (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{namespace=\"$namespace\"}[$__rate_interval])\n ) \n * \n sum by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n\n )\n unless\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n) * (count (kube_node_info) == bool count (kube_node_info{node=~\"$node\"})))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "C" } ], - "thresholds": [], - "timeRegions": [], "title": "PVC Provisioned (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "controller" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 85 }, - "hiddenSeries": false, "id": 596, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total/", - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Usage {{ controller }}", "refId": "A" - }, - { - "expr": "sum(\n sum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "PVC Usage (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "uid": "$ds_prometheus" }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 92 }, - "hiddenSeries": false, "id": 597, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total .*/", - "fill": 0, - "linewidth": 2 + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { "expr": "sum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n)\n/\nsum by (namespace, controller) (\n max by (namespace, controller, persistentvolumeclaim)\n (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n\n * on (namespace, persistentvolumeclaim) group_left(pod) \n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__rate_interval])\n ) \n\n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"} \n )\n)", @@ -3451,36 +4456,8 @@ "refId": "A" } ], - "thresholds": [], - "timeRegions": [], "title": "PVC Usage in % (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { "collapsed": true, @@ -3506,7 +4483,11 @@ "mode": "thresholds" }, "custom": { - "displayMode": "auto" + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false }, "decimals": 2, "displayName": "", @@ -3515,7 +4496,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3778,9 +4760,18 @@ "id": 600, "links": [], "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true }, - "pluginVersion": "8.2.6", + "pluginVersion": "10.2.2", "targets": [ { "expr": "max by (persistentvolumeclaim, storageclass) (\n max by (namespace, persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__range])\n ) \n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"}\n )\n* on (namespace, persistentvolumeclaim) group_right() kube_persistentvolumeclaim_info{namespace=\"$namespace\"})", @@ -3891,118 +4882,140 @@ "type": "table" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { + "type": "prometheus", "uid": "$ds_prometheus" }, "description": "", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, "gridPos": { "h": 7, "w": 24, "x": 0, "y": 107 }, - "hiddenSeries": false, "id": 603, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", + "pluginVersion": "8.5.13", "repeat": "persistentvolumeclaim", "repeatDirection": "h", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { - "expr": "\n\nmax by (persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__range])\n ) \n * on (namespace, pod) group_left(controller)\n kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"}\n )\n)\n* on (persistentvolumeclaim) group_right()\nmax by (persistentvolumeclaim) (\n max_over_time(kube_persistentvolume_capacity_bytes[$__rate_interval]) \n * on (persistentvolume) group_right() \n (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\", persistentvolumeclaim=~\"$persistentvolumeclaim\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n )\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim) (\n max by (namespace, persistentvolumeclaim, pod) (\n max_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\"}[$__range])\n ) \n * on (namespace, pod) group_left(controller)\n max(kube_controller_pod{node=~\"$node\", namespace=\"$namespace\", controller=~\"$controller\"}) by(namespace, pod)\n )\n)\n* on (persistentvolumeclaim) group_right()\nmax by (persistentvolumeclaim) (\n max_over_time(kube_persistentvolume_capacity_bytes[$__rate_interval]) \n * on (persistentvolume) group_right() \n (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\", persistentvolumeclaim=~\"$persistentvolumeclaim\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n )\n))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Provisioned", + "range": true, "refId": "A" }, { - "expr": "max by (persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\"}[$__rate_interval])\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\"}[$__rate_interval])\n))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Capacity", + "range": true, "refId": "B" }, { - "expr": "max by (persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\"}[$__rate_interval])\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum(max by (persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\"}[$__rate_interval])\n))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Used", + "range": true, "refId": "C" } ], - "thresholds": [], - "timeRegions": [], "title": "$persistentvolumeclaim", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 2, - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": false - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" } ], "title": "PVC Detailed", "type": "row" } ], - "refresh": "", - "schemaVersion": 36, - "style": "dark", + "refresh": false, + "schemaVersion": 38, "tags": [ "main" ], @@ -4094,13 +5107,9 @@ { "allValue": ".*", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - ".*" - ] + "selected": false, + "text": "All", + "value": "$__all" }, "datasource": { "type": "prometheus", @@ -4226,6 +5235,17 @@ "tagsQuery": "", "type": "query", "useTags": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "filters": [], + "hide": 0, + "name": "Filters", + "skipUrlSync": false, + "type": "adhoc" } ] }, diff --git a/dashboards/main/namespaces.json b/dashboards/main/namespaces.json index 19292d3b..bfbaf781 100644 --- a/dashboards/main/namespaces.json +++ b/dashboards/main/namespaces.json @@ -3,7 +3,10 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", @@ -20,22 +23,26 @@ }, "editable": false, "fiscalYearStartMonth": 0, - "gnetId": null, "graphTooltip": 1, - "iteration": 1638543860824, + "id": 61, "links": [], "liveNow": false, "panels": [ { "columns": [], - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Note that this table shows the average values for the entire period selected in the dashboard. Consequently, it may contain information about Pods or namespaces that were changed or deleted during the selected period.", "fieldConfig": { "defaults": { "custom": { - "align": null, - "displayMode": "auto", + "align": "auto", + "cellOptions": { + "type": "auto" + }, "filterable": false, + "inspect": false, "minWidth": 50 }, "mappings": [], @@ -69,8 +76,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "displayName", @@ -114,8 +120,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "mappings", @@ -142,8 +147,7 @@ "value": 3 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -166,8 +170,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -190,8 +193,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -214,8 +216,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -238,8 +239,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -262,8 +262,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -286,8 +285,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -310,8 +308,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -334,8 +331,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -358,8 +354,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -382,8 +377,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -406,8 +400,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -430,8 +423,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -454,8 +446,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -478,8 +469,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -498,8 +488,7 @@ "value": "short" }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -518,8 +507,7 @@ "value": "short" }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -538,8 +526,7 @@ "value": "short" }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -567,10 +554,19 @@ "id": 4, "links": [], "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true }, "pageSize": 50, - "pluginVersion": "8.2.3", + "pluginVersion": "10.2.2", "scroll": true, "showHeader": true, "sort": { @@ -588,7 +584,6 @@ { "alias": "Namespace", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -609,7 +604,6 @@ { "alias": "VPA %", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -626,7 +620,6 @@ { "alias": "CPU", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -643,7 +636,6 @@ { "alias": "Req CPU", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -660,7 +652,6 @@ { "alias": "Over-req CPU", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -677,7 +668,6 @@ { "alias": "Under-req CPU", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -694,7 +684,6 @@ { "alias": "Throttling cores", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -711,7 +700,6 @@ { "alias": "Memory", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -728,7 +716,6 @@ { "alias": "Req Memory", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -745,7 +732,6 @@ { "alias": "Over-req Memory", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -762,7 +748,6 @@ { "alias": "Under-req Memory", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -779,7 +764,6 @@ { "alias": "RX Network", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -796,7 +780,6 @@ { "alias": "TX Network", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -813,7 +796,6 @@ { "alias": "Read IOPS", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -830,7 +812,6 @@ { "alias": "Write IOPS", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -847,7 +828,6 @@ { "alias": "Pod Restarts", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -864,7 +844,6 @@ { "alias": "", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -1062,246 +1041,279 @@ "type": "table" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "decimals": 0, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The number of Pods in each namespace", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 4, "w": 12, "x": 0, "y": 8 }, - "hiddenSeries": false, "id": 332, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "count by (namespace) (max_over_time(kube_pod_info{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "B" - }, - { - "expr": "count(kube_pod_info{node=~\"$node\", namespace=~\"$namespace\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Pods count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": 0, - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "decimals": 0, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The number of Pod restarts", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 4, "w": 12, "x": 12, "y": 8 }, - "hiddenSeries": false, "id": 564, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace) (increase(kube_pod_container_status_restarts_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "B" - }, - { - "expr": "sum (increase(kube_pod_container_status_restarts_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Pods restarts", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": 0, - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -1314,187 +1326,296 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 13 }, - "hiddenSeries": false, "id": 6, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "A" - }, - { - "expr": "sum (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by namespace", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "System" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#e24d42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "User" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#1f78c1", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 13 }, - "hiddenSeries": false, "id": 10, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, - { - "alias": "System", - "color": "#e24d42" - }, - { - "alias": "User", - "color": "#1f78c1" + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum (rate(container_cpu_system_seconds_total{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, @@ -1502,413 +1623,443 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum (rate(container_cpu_user_seconds_total{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "User", "refId": "B" - }, - { - "expr": "sum (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "D" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested CPU resources higher than the actual CPU consumption. In other words, it shows CPU resources that can be \"freed\" without affecting the service.", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 22 }, - "hiddenSeries": false, "id": 333, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n ) > 0\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "B" - }, - { - "expr": "sum\n(\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n ) > 0\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Over-requested by namespace", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested CPU resources lower than the actual CPU consumption. In other words, it shows CPU resources that need to be \"reserved\" for the service to run smoothly.", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 22 }, - "hiddenSeries": false, "id": 386, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace)\n (\n (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n )\n > 0\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "B" - }, - { - "expr": "sum\n (\n (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n )\n > 0\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Under-requested by namespace", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The absence of data on the graph means that container resources are not set", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 5, "w": 24, "x": 0, "y": 29 }, - "hiddenSeries": false, "id": 315, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace) ((sum by (namespace, pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by (namespace, pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))) * sum by (namespace, pod) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=~\"$namespace\"}))", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "A" - }, - { - "expr": "sum(sum by (namespace) ((sum by (namespace, pod) (rate(container_cpu_cfs_throttled_periods_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by (namespace, pod) (rate(container_cpu_cfs_periods_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))) * sum by (namespace, pod) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",node=~\"$node\", namespace=~\"$namespace\"})))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Throttling", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -1918,62 +2069,145 @@ "id": 343, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", - "fill": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629e51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f4d598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VPA Target" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#447ebc", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 6, "x": 0, - "y": 40 + "y": 35 }, "id": 352, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.13", "repeat": "namespace", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Usage", - "color": "#629e51" - }, - { - "alias": "Requests", - "color": "#f4d598" - }, - { - "alias": "Limits", - "color": "#c15c17" - }, - { - "alias": "VPA Target", - "color": "#447ebc" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": true, "targets": [ { "expr": "sum by (namespace) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))", @@ -2005,46 +2239,8 @@ "refId": "D" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "$namespace", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "title": "Namespaces CPU", @@ -2052,7 +2248,10 @@ }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -2062,61 +2261,152 @@ "id": 12, "panels": [ { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", - "fill": 1, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "System" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#e24d42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "User" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#1f78c1", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 6, "x": 0, - "y": 41 + "y": 36 }, "id": 14, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", "repeat": "namespace", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false - }, - { - "alias": "System", - "color": "#e24d42" - }, - { - "alias": "User", - "color": "#1f78c1" - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace) (rate(container_cpu_system_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))", "format": "time_series", "interval": "", @@ -2125,69 +2415,36 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace) (rate(container_cpu_user_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "User", "refId": "B" - }, - { - "expr": "sum by (namespace) (rate(container_cpu_usage_seconds_total{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "D" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "$namespace", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], - "repeat": null, "title": "Namespaces CPU by state", "type": "row" }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -2200,189 +2457,301 @@ "type": "row" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 37 }, - "hiddenSeries": false, "id": 40, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "A" - }, - { - "expr": "sum (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by namespace", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The Working set bytes metric is the actual memory used by the container, as it includes active file memory. When its value approaches the limit, the container can be killed by the OOMKiller. This value can be higher than the sum RSS and Cache since not all active file memory is Cache.", "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Working set bytes without kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(0, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(255, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 37 }, - "hiddenSeries": false, "id": 41, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Working set bytes without kmem", - "color": "rgb(0, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false }, - { - "alias": "Kmem", - "color": "rgb(255, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { "expr": "sum (avg_over_time(container_memory_rss{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", @@ -2422,268 +2791,283 @@ "refId": "E" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested Memory resources higher than the actual Memory consumption. In other words, it shows Memory resources that can be \"freed\" without affecting the service.", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 46 }, - "hiddenSeries": false, "id": 361, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n ) > 0\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "A" - }, - { - "expr": "sum\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n ) > 0\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Over-requested by namespace", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested Memory resources higher than the actual Memory consumption. In other words, it shows Memory resources that need to be \"reserved\" for the service to run smoothly.", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 46 }, - "hiddenSeries": false, "id": 387, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace)\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n )\n > 0\n )", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "A" - }, - { - "expr": "sum\n (\n (\n (\n sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{node=~\"$node\", container!=\"POD\", namespace=~\"$namespace\"}[$__rate_interval]))\n )\n > 0\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Under-requested by namespace", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -2693,89 +3077,297 @@ "id": 43, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The Working set bytes metric is the actual memory used by the container, as it includes active file memory. When its value approaches the limit, the container can be killed by the OOMKiller. This value can be higher than the sum RSS and Cache since not all active file memory is Cache.", - "fill": 1, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Working set bytes without kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(0, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(255, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VPA Target" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#447ebc", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f4d598", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 6, "x": 0, - "y": 59 + "y": 54 }, "id": 44, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.13", "repeat": "namespace", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Working set bytes without kmem", - "color": "rgb(0, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "Kmem", - "color": "rgb(255, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "VPA Target", - "color": "#447ebc", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "Requests", - "color": "#f4d598", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "Limits", - "color": "#c15c17", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, "targets": [ { "expr": "sum by (namespace) (avg_over_time(container_memory_rss{node=~\"$node\", namespace=\"$namespace\", container!=\"POD\"}[$__rate_interval]))", @@ -2836,46 +3428,8 @@ "refId": "H" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "$namespace", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "title": "Namespaces Memory", @@ -2883,7 +3437,10 @@ }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -2896,226 +3453,296 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows Network Receive (except for the hostNetwork Pods)", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 55 }, - "hiddenSeries": false, "id": 79, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 2, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by (namespace) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])) \n- \n(\n sum by (namespace) (\n sum by (namespace, pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n * \n sum by (namespace, pod) (kube_pod_info{host_network=\"true\",namespace=~\"$namespace\"})\n )\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "(sum by (namespace) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])) \n-\n(\n sum by (namespace) (\n sum by (namespace, pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n * \n sum by (namespace, pod) (kube_pod_info{host_network=\"true\",namespace=~\"$namespace\"})\n )\n))\nor sum by (namespace) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])) ", "format": "time_series", + "hide": false, "intervalFactor": 1, "legendFormat": "{{ namespace }}", + "range": true, "refId": "A" - }, - { - "expr": "sum (\n sum by (namespace) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])) \n - \n (\n sum by (namespace) (\n sum by (namespace, pod) (rate(container_network_receive_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n * \n sum by (namespace, pod) (kube_pod_info{host_network=\"true\",namespace=~\"$namespace\"})\n )\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Receive", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows Network Transmit (except for the hostNetwork Pods)", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 55 }, - "hiddenSeries": false, "id": 276, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 2, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "sum by (namespace) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])) \n- \n(\n sum by (namespace) (\n sum by (namespace, pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n * \n sum by (namespace, pod) (kube_pod_info{host_network=\"true\",namespace=~\"$namespace\"})\n )\n)", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "(sum by (namespace) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])) \n- \n(\n sum by (namespace) (\n sum by (namespace, pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n * \n sum by (namespace, pod) (kube_pod_info{host_network=\"true\",namespace=~\"$namespace\"})\n )\n))\nor\nsum by (namespace) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])) ", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", + "range": true, "refId": "A" - }, - { - "expr": "sum (\n sum by (namespace) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])) \n - \n (\n sum by (namespace) (\n sum by (namespace, pod) (rate(container_network_transmit_bytes_total{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]))\n * \n sum by (namespace, pod) (kube_pod_info{host_network=\"true\",namespace=~\"$namespace\"})\n )\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Transmit", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -3128,224 +3755,281 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "fill": 1, - "fillGradient": 0, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "iops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 64 }, - "hiddenSeries": false, "id": 156, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace) (rate(container_fs_reads_total{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "A" - }, - { - "expr": "sum (rate(container_fs_reads_total{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Read", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "iops", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "fill": 1, - "fillGradient": 0, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "iops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 64 }, - "hiddenSeries": false, "id": 277, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace) (rate(container_fs_writes_total{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ namespace }}", "refId": "A" - }, - { - "expr": "sum (rate(container_fs_writes_total{node=~\"$node\", namespace=~\"$namespace\", container!=\"POD\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Write", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "iops", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -3358,235 +4042,334 @@ "type": "row" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 73 }, - "hiddenSeries": false, "id": 519, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total/", - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "(sum by (namespace) (\n (\n sum by (namespace, persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{node=~\"$node\"})\n * on (namespace, persistentvolumeclaim)\n sum by (namespace, persistentvolume, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=~\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n )\n or on(namespace, persistentvolumeclaim) (\n sum by (persistentvolume) (\n max_over_time(kube_persistentvolume_capacity_bytes[$__rate_interval])\n )\n * on(persistentvolume) group_left(namespace, persistentvolumeclaim)\n sum by (namespace, persistentvolume, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=~\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n )\n) and on (namespace) (kube_namespace_created * scalar(count (kube_node_info) == bool count (kube_node_info{node=~\"$node\"})) > 0))\nor sum by (namespace) (\n sum by (namespace, persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{node=~\"$node\"})\n * on (namespace, persistentvolumeclaim)\n sum by (namespace, persistentvolume, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=~\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Provisioned {{ namespace }}", "refId": "A" - }, - { - "expr": "sum(\n(sum by (namespace) (\n (\n sum by (namespace, persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{node=~\"$node\"})\n * on (namespace, persistentvolumeclaim)\n sum by (namespace, persistentvolume, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=~\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n )\n or on(namespace, persistentvolumeclaim) (\n sum by (persistentvolume) (\n max_over_time(kube_persistentvolume_capacity_bytes[$__rate_interval])\n )\n * on(persistentvolume) group_left(namespace, persistentvolumeclaim)\n sum by (namespace, persistentvolume, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=~\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n )\n) and on (namespace) (kube_namespace_created * scalar(count (kube_node_info) == bool count (kube_node_info{node=~\"$node\"})) > 0))\nor sum by (namespace) (\n sum by (namespace, persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes{node=~\"$node\"})\n * on (namespace, persistentvolumeclaim)\n sum by (namespace, persistentvolume, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=~\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n )\n))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "PVC Provisioned (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 73 }, - "hiddenSeries": false, "id": 520, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total/", - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace) (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])\n )\n * on (namespace, persistentvolumeclaim) group_left(persistentvolume)\n\tsum by (namespace, persistentvolume, persistentvolumeclaim) (\n\t\tlabel_replace(\n\t\t\tmax_over_time(kube_persistentvolumeclaim_info{namespace=~\"$namespace\"}[$__rate_interval]),\n\t\t\t\"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n\t\t\t+ on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n\t)\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Usage {{ namespace }}", "refId": "A" - }, - { - "expr": "sum(\n sum by (namespace) (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])\n )\n \t* on (namespace, persistentvolumeclaim) group_left(persistentvolume)\n\t\tsum by (namespace, persistentvolume, persistentvolumeclaim) (\n\t\t\tlabel_replace(\n\t\t\t\tmax_over_time(kube_persistentvolumeclaim_info{namespace=~\"$namespace\"}[$__rate_interval]), \n\t\t\t\t\"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n\t\t\t\t+ on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n\t\t)\n )\n)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "PVC Used (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, "gridPos": { "h": 8, "w": 24, @@ -3594,39 +4377,24 @@ "y": 80 }, "id": 521, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "8.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total/", - "fill": 0, - "linewidth": 2, - "stack": false + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { "expr": "sum by (namespace) (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval]) - max_over_time(kubelet_volume_stats_available_bytes{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])\n )\n * on (namespace, persistentvolumeclaim)\n\tsum by (namespace, persistentvolumeclaim) (\n\t\tlabel_replace(\n\t\t\tmax_over_time(\n\t\t\t\tkube_persistentvolumeclaim_info{namespace=~\"$namespace\"}[$__rate_interval]),\n\t\t\t\t\"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n\t\t\t\t+ on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n\t)\n)\n/\nsum by (namespace) (\n sum by (namespace, persistentvolumeclaim) (\n max_over_time(kubelet_volume_stats_capacity_bytes{node=~\"$node\", namespace=~\"$namespace\"}[$__rate_interval])\n )\n * on (namespace, persistentvolumeclaim)\n\tsum by (namespace, persistentvolumeclaim) (\n\t\tlabel_replace(\n\t\t\tmax_over_time(\n\t\t\t\tkube_persistentvolumeclaim_info{namespace=~\"$namespace\"}[$__rate_interval]),\n\t\t\t\t\"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n\t\t\t\t+ on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0)\n\t)\n)", @@ -3636,51 +4404,12 @@ "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "PVC Used in % (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "refresh": "30s", - "schemaVersion": 32, - "style": "dark", + "schemaVersion": 38, "tags": [ "main" ], @@ -3688,12 +4417,10 @@ "list": [ { "current": { - "selected": false, + "selected": true, "text": "default", "value": "default" }, - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Prometheus", @@ -3701,6 +4428,7 @@ "name": "ds_prometheus", "options": [], "query": "prometheus", + "queryValue": "", "refresh": 1, "regex": "", "skipUrlSync": false, @@ -3717,10 +4445,11 @@ "$__all" ] }, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "definition": "label_values(kubernetes_build_info, node)", - "description": null, - "error": null, "hide": 0, "includeAll": true, "label": "Node", @@ -3751,10 +4480,11 @@ "$__all" ] }, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "definition": "label_values(kube_pod_info{node=~\"$node\"}, namespace)", - "description": null, - "error": null, "hide": 0, "includeAll": true, "label": "Namespace", @@ -3808,5 +4538,6 @@ "timezone": "", "title": "Namespaces", "uid": "V_WjJ-Pmk", - "version": 2 + "version": 1, + "weekStart": "" } diff --git a/dashboards/main/node.json b/dashboards/main/node.json index 747e961f..53b3f67a 100644 --- a/dashboards/main/node.json +++ b/dashboards/main/node.json @@ -4,23 +4,38 @@ { "$$hashKey": "object:1058", "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "editable": false, + "fiscalYearStartMonth": 0, "gnetId": 1860, "graphTooltip": 0, - "iteration": 1630508489644, + "id": 45, + "iteration": 1712832386957, + "links": [], + "liveNow": false, "panels": [ { "collapsed": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -29,27 +44,28 @@ }, "id": 261, "panels": [], - "repeat": null, "title": "Quick CPU / Mem / Disk", "type": "row" }, { - "cacheTimeout": null, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Busy state of all CPU cores together", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "custom": {}, "mappings": [ { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } ], "max": 100, @@ -96,7 +112,7 @@ "showThresholdMarkers": true, "text": {} }, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "targets": [ { "expr": "(((count(count(node_cpu_seconds_total{node=~\"$node\",job=\"node-exporter\"}) by (cpu))) - avg(sum by (mode)(rate(node_cpu_seconds_total{mode='idle',node=~\"$node\",job=\"node-exporter\"}[$__rate_interval])))) * 100) / count(count(node_cpu_seconds_total{node=~\"$node\",job=\"node-exporter\"}) by (cpu))", @@ -111,22 +127,24 @@ "type": "gauge" }, { - "cacheTimeout": null, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Busy state of all CPU cores together (5 min average)", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "custom": {}, "mappings": [ { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } ], "max": 100, @@ -173,7 +191,7 @@ "showThresholdMarkers": true, "text": {} }, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "targets": [ { "expr": "avg(node_load5{node=~\"$node\",job=\"node-exporter\"}) / count(count(node_cpu_seconds_total{node=~\"$node\",job=\"node-exporter\"}) by (cpu)) * 100", @@ -188,22 +206,24 @@ "type": "gauge" }, { - "cacheTimeout": null, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Busy state of all CPU cores together (15 min average)", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "custom": {}, "mappings": [ { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } ], "max": 100, @@ -250,7 +270,7 @@ "showThresholdMarkers": true, "text": {} }, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "targets": [ { "expr": "avg(node_load15{node=~\"$node\",job=\"node-exporter\"}) / count(count(node_cpu_seconds_total{node=~\"$node\",job=\"node-exporter\"}) by (cpu)) * 100", @@ -264,15 +284,15 @@ "type": "gauge" }, { - "cacheTimeout": null, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Non available RAM memory", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "custom": {}, "decimals": 0, "mappings": [], "max": 100, @@ -320,7 +340,7 @@ "showThresholdMarkers": true, "text": {} }, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "targets": [ { "expr": "((node_memory_MemTotal_bytes{node=~\"$node\",job=\"node-exporter\"} - node_memory_MemFree_bytes{node=~\"$node\",job=\"node-exporter\"}) / (node_memory_MemTotal_bytes{node=~\"$node\",job=\"node-exporter\"} )) * 100", @@ -343,22 +363,24 @@ "type": "gauge" }, { - "cacheTimeout": null, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Used Swap", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "custom": {}, "mappings": [ { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } ], "max": 100, @@ -405,7 +427,7 @@ "showThresholdMarkers": true, "text": {} }, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "targets": [ { "expr": "((node_memory_SwapTotal_bytes{node=~\"$node\",job=\"node-exporter\"} - node_memory_SwapFree_bytes{node=~\"$node\",job=\"node-exporter\"}) / (node_memory_SwapTotal_bytes{node=~\"$node\",job=\"node-exporter\"} )) * 100", @@ -418,22 +440,24 @@ "type": "gauge" }, { - "cacheTimeout": null, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Used Root FS", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "custom": {}, "mappings": [ { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } ], "max": 100, @@ -480,7 +504,7 @@ "showThresholdMarkers": true, "text": {} }, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "targets": [ { "expr": "100 - ((node_filesystem_avail_bytes{node=~\"$node\",job=\"node-exporter\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{node=~\"$node\",job=\"node-exporter\",mountpoint=\"/\",fstype!=\"rootfs\"})", @@ -494,30 +518,43 @@ "type": "gauge" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Total number of CPU cores", "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "format": "short", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, "gridPos": { "h": 2, "w": 2, @@ -525,41 +562,23 @@ "y": 1 }, "id": 14, - "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "maxPerRow": 6, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "pluginVersion": "8.5.13", "targets": [ { "expr": "count(count(node_cpu_seconds_total{node=~\"$node\",job=\"node-exporter\"}) by (cpu))", @@ -569,45 +588,48 @@ "step": 240 } ], - "thresholds": "", "title": "CPU Cores", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$ds_prometheus", - "decimals": 1, + "datasource": { + "uid": "$ds_prometheus" + }, "description": "System uptime", "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" }, "overrides": [] }, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, "gridPos": { "h": 2, "w": 4, @@ -616,42 +638,23 @@ }, "hideTimeOverride": true, "id": 15, - "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "$$hashKey": "object:1094", - "name": "value to text", - "value": 1 - }, - { - "$$hashKey": "object:1095", - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "null", - "nullText": null, - "postfix": "s", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "pluginVersion": "8.5.13", "targets": [ { "expr": "node_time_seconds{node=~\"$node\",job=\"node-exporter\"} - node_boot_time_seconds{node=~\"$node\",job=\"node-exporter\"}", @@ -660,46 +663,52 @@ "step": 240 } ], - "thresholds": "", "title": "Uptime", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "$$hashKey": "object:1097", - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$ds_prometheus", - "decimals": 0, + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Total RootFS", "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 70 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "bytes" }, "overrides": [] }, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, "gridPos": { "h": 2, "w": 2, @@ -707,41 +716,23 @@ "y": 3 }, "id": 23, - "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "maxPerRow": 6, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "pluginVersion": "8.5.13", "targets": [ { "expr": "node_filesystem_size_bytes{node=~\"$node\",job=\"node-exporter\",mountpoint=\"/\",fstype!=\"rootfs\"}", @@ -752,45 +743,48 @@ "step": 240 } ], - "thresholds": "70,90", "title": "RootFS Total", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$ds_prometheus", - "decimals": 0, + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Total RAM", "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, "overrides": [] }, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, "gridPos": { "h": 2, "w": 2, @@ -798,41 +792,23 @@ "y": 3 }, "id": 75, - "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "maxPerRow": 6, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "70%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "pluginVersion": "8.5.13", "targets": [ { "expr": "node_memory_MemTotal_bytes{node=~\"$node\",job=\"node-exporter\"}", @@ -841,45 +817,48 @@ "step": 240 } ], - "thresholds": "", "title": "RAM Total", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$ds_prometheus", - "decimals": 0, + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Total SWAP", "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, "overrides": [] }, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, "gridPos": { "h": 2, "w": 2, @@ -887,41 +866,23 @@ "y": 3 }, "id": 18, - "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "maxPerRow": 6, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "70%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "tableColumn": "", + "pluginVersion": "8.5.13", "targets": [ { "expr": "node_memory_SwapTotal_bytes{node=~\"$node\",job=\"node-exporter\"}", @@ -930,22 +891,14 @@ "step": 240 } ], - "thresholds": "", "title": "SWAP Total", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { "collapsed": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -954,7 +907,6 @@ }, "id": 263, "panels": [], - "repeat": null, "title": "Basic CPU / Mem / Net / Disk", "type": "row" }, @@ -978,12 +930,13 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "description": "Basic CPU info", "fieldConfig": { "defaults": { - "custom": {}, "links": [] }, "overrides": [] @@ -1007,8 +960,6 @@ "rightSide": false, "show": true, "sideWidth": 250, - "sort": null, - "sortDesc": null, "total": false, "values": false }, @@ -1021,7 +972,7 @@ "alertThreshold": true }, "percentage": true, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", @@ -1103,9 +1054,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU Basic", "tooltip": { "shared": true, @@ -1114,9 +1063,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -1133,16 +1080,12 @@ { "$$hashKey": "object:124", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1170,12 +1113,13 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "description": "Basic memory usage", "fieldConfig": { "defaults": { - "custom": {}, "links": [] }, "overrides": [] @@ -1211,7 +1155,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", @@ -1285,9 +1229,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Basic", "tooltip": { "shared": true, @@ -1296,9 +1238,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -1307,22 +1247,17 @@ "format": "bytes", "label": "", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1354,11 +1289,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Basic network info per interface", "fieldConfig": { "defaults": { - "custom": {}, "links": [] }, "overrides": [] @@ -1396,7 +1332,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", @@ -1428,9 +1364,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic Basic", "tooltip": { "shared": true, @@ -1439,33 +1373,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "pps", "label": "", "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1473,12 +1399,13 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 3, "description": "Disk space used of all filesystems mounted", "fieldConfig": { "defaults": { - "custom": {}, "links": [] }, "overrides": [] @@ -1516,7 +1443,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", @@ -1535,9 +1462,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk Space Used Basic", "tooltip": { "shared": true, @@ -1546,16 +1471,13 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percent", - "label": null, "logBase": 1, "max": "100", "min": "0", @@ -1563,21 +1485,19 @@ }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -1602,7 +1522,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "description": "", "fieldConfig": { @@ -1631,8 +1553,6 @@ "rightSide": false, "show": true, "sideWidth": 250, - "sort": null, - "sortDesc": null, "total": false, "values": true }, @@ -1649,7 +1569,6 @@ "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": true, @@ -1729,9 +1648,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU", "tooltip": { "shared": true, @@ -1740,9 +1657,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -1757,16 +1672,12 @@ }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1794,7 +1705,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "description": "", "fieldConfig": { @@ -1823,8 +1736,6 @@ "rightSide": false, "show": true, "sideWidth": 350, - "sort": null, - "sortDesc": null, "total": false, "values": true }, @@ -1933,9 +1844,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Stack", "tooltip": { "shared": true, @@ -1944,9 +1853,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -1955,22 +1862,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1983,7 +1885,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -2053,9 +1957,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic", "tooltip": { "shared": true, @@ -2064,9 +1966,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -2076,23 +1976,17 @@ "format": "bps", "label": "bits out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:5885", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2100,7 +1994,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 3, "description": "", "fieldConfig": { @@ -2162,9 +2058,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk Space Used", "tooltip": { "shared": true, @@ -2173,9 +2067,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -2184,22 +2076,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2207,7 +2094,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { @@ -2361,9 +2250,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk IOps", "tooltip": { "shared": false, @@ -2372,9 +2259,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -2383,22 +2268,16 @@ "format": "iops", "label": "IO read (-) / write (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2408,7 +2287,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 3, "description": "", "fieldConfig": { @@ -2436,8 +2317,6 @@ "min": true, "rightSide": false, "show": true, - "sort": null, - "sortDesc": null, "total": false, "values": true }, @@ -2504,9 +2383,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "I/O Usage Read / Write", "tooltip": { "shared": true, @@ -2515,9 +2392,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": false, "values": [] }, @@ -2527,8 +2402,6 @@ "format": "Bps", "label": "bytes read (-) / write (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { @@ -2536,14 +2409,11 @@ "format": "ms", "label": "", "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2553,7 +2423,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 3, "description": "", "fieldConfig": { @@ -2581,8 +2453,6 @@ "min": true, "rightSide": false, "show": true, - "sort": null, - "sortDesc": null, "total": false, "values": true }, @@ -2615,9 +2485,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "I/O Utilization", "tooltip": { "shared": true, @@ -2626,9 +2494,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": false, "values": [] }, @@ -2638,7 +2504,6 @@ "format": "percentunit", "label": "%util", "logBase": 1, - "max": null, "min": "0", "show": true }, @@ -2647,24 +2512,22 @@ "format": "s", "label": "", "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "repeat": null, "title": "CPU / Memory / Net / Disk", "type": "row" }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -2696,7 +2559,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -2763,9 +2628,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Active / Inactive", "tooltip": { "shared": true, @@ -2774,9 +2637,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -2785,22 +2646,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2826,7 +2682,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -2902,9 +2760,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Commited", "tooltip": { "shared": true, @@ -2913,9 +2769,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -2924,22 +2778,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2965,7 +2814,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -3052,9 +2903,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Active / Inactive Detail", "tooltip": { "shared": true, @@ -3063,9 +2912,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -3074,22 +2921,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3117,7 +2959,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -3144,7 +2988,6 @@ "min": true, "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -3192,9 +3035,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Writeback and Dirty", "tooltip": { "shared": true, @@ -3203,9 +3044,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -3214,22 +3053,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3255,7 +3089,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -3349,9 +3185,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Shared and Mapped", "tooltip": { "shared": true, @@ -3360,9 +3194,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -3372,23 +3204,18 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:4107", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3416,7 +3243,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -3443,7 +3272,6 @@ "min": true, "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -3483,9 +3311,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Slab", "tooltip": { "shared": true, @@ -3494,9 +3320,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -3505,22 +3329,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3547,7 +3366,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -3574,7 +3395,6 @@ "min": true, "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -3625,9 +3445,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Vmalloc", "tooltip": { "shared": true, @@ -3636,9 +3454,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -3647,22 +3463,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3688,7 +3499,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -3747,9 +3560,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Bounce", "tooltip": { "shared": true, @@ -3758,9 +3569,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -3769,22 +3578,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3811,7 +3615,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -3838,7 +3644,6 @@ "min": true, "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -3883,9 +3688,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Anonymous", "tooltip": { "shared": true, @@ -3894,9 +3697,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -3905,22 +3706,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3946,7 +3742,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -4013,9 +3811,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Kernel / CPU", "tooltip": { "shared": true, @@ -4024,9 +3820,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -4035,22 +3829,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4077,7 +3866,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -4104,7 +3895,6 @@ "min": true, "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -4152,9 +3942,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory HugePages Counter", "tooltip": { "shared": true, @@ -4163,9 +3951,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -4174,7 +3960,6 @@ "format": "short", "label": "pages", "logBase": 1, - "max": null, "min": "0", "show": true }, @@ -4182,14 +3967,11 @@ "format": "short", "label": "", "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4216,7 +3998,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -4243,7 +4027,6 @@ "min": true, "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -4283,9 +4066,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory HugePages Size", "tooltip": { "shared": true, @@ -4294,9 +4075,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -4305,7 +4084,6 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, @@ -4313,14 +4091,11 @@ "format": "short", "label": "", "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4347,7 +4122,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -4376,7 +4153,6 @@ "min": true, "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -4424,9 +4200,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory DirectMap", "tooltip": { "shared": true, @@ -4435,9 +4209,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -4446,22 +4218,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4487,7 +4254,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -4554,9 +4323,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Unevictable and MLocked", "tooltip": { "shared": true, @@ -4565,9 +4332,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -4576,22 +4341,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4619,7 +4379,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -4646,7 +4408,6 @@ "min": true, "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -4678,9 +4439,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory NFS", "tooltip": { "shared": true, @@ -4689,9 +4448,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -4700,32 +4457,28 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "repeat": null, "title": "Memory Meminfo", "type": "row" }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -4739,7 +4492,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -4809,9 +4564,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Pages In / Out", "tooltip": { "shared": true, @@ -4820,9 +4573,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -4831,22 +4582,16 @@ "format": "short", "label": "pages out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4854,7 +4599,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -4924,9 +4671,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Pages Swap In / Out", "tooltip": { "shared": true, @@ -4935,9 +4680,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -4946,22 +4689,16 @@ "format": "short", "label": "pages out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4987,7 +4724,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -5069,9 +4808,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Page Faults", "tooltip": { "shared": true, @@ -5080,9 +4817,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -5092,23 +4827,18 @@ "format": "short", "label": "faults", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:6134", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5136,7 +4866,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 2, "fieldConfig": { "defaults": { @@ -5163,7 +4895,6 @@ "min": true, "rightSide": false, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -5195,9 +4926,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "OOM Killer", "tooltip": { "shared": true, @@ -5206,9 +4935,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -5218,33 +4945,29 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:5374", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "repeat": null, "title": "Memory Vmstat", "type": "row" }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -5258,7 +4981,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { @@ -5338,9 +5063,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Time Syncronized Drift", "tooltip": { "shared": true, @@ -5349,9 +5072,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -5360,22 +5081,17 @@ "format": "s", "label": "seconds", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", "label": "counter", "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5383,7 +5099,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { @@ -5439,9 +5157,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Time PLL Adjust", "tooltip": { "shared": true, @@ -5450,9 +5166,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -5461,22 +5175,16 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5484,7 +5192,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { @@ -5553,9 +5263,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Time Syncronized Status", "tooltip": { "shared": true, @@ -5564,9 +5272,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -5575,22 +5281,16 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5598,7 +5298,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { @@ -5662,9 +5364,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Time Misc", "tooltip": { "shared": true, @@ -5673,9 +5373,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -5684,22 +5382,16 @@ "format": "s", "label": "seconds", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], @@ -5708,7 +5400,9 @@ }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -5722,7 +5416,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -5786,9 +5482,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Processes Status", "tooltip": { "shared": true, @@ -5797,9 +5491,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -5809,23 +5501,18 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:6501", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5833,7 +5520,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -5889,9 +5578,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Processes State", "tooltip": { "shared": true, @@ -5900,9 +5587,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -5912,23 +5597,18 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:6501", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5936,7 +5616,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -5993,9 +5675,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Processes Forks", "tooltip": { "shared": true, @@ -6004,9 +5684,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -6016,23 +5694,18 @@ "format": "short", "label": "forks / sec", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:6641", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -6040,7 +5713,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -6124,9 +5799,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Processes Memory", "tooltip": { "shared": true, @@ -6135,9 +5808,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -6146,22 +5817,17 @@ "format": "decbytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -6169,7 +5835,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -6240,9 +5908,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "PIDs Number and Limit", "tooltip": { "shared": true, @@ -6251,9 +5917,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -6263,23 +5927,18 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:6501", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -6287,7 +5946,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -6358,9 +6019,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Threads Number and Limit", "tooltip": { "shared": true, @@ -6369,9 +6028,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -6381,23 +6038,18 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:6501", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], @@ -6406,7 +6058,9 @@ }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -6420,7 +6074,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -6461,7 +6117,6 @@ "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": false, @@ -6486,9 +6141,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Context Switches / Interrupts", "tooltip": { "shared": true, @@ -6497,9 +6150,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -6508,22 +6159,17 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -6531,7 +6177,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -6572,7 +6220,6 @@ "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": false, @@ -6604,9 +6251,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "System Load", "tooltip": { "shared": true, @@ -6615,9 +6260,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -6627,23 +6270,18 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:6262", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -6651,7 +6289,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -6707,9 +6347,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU time spent in user and system contexts", "tooltip": { "shared": true, @@ -6718,9 +6356,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -6730,23 +6366,17 @@ "format": "s", "label": "seconds", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:4861", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -6754,7 +6384,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -6810,9 +6442,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Entropy", "tooltip": { "shared": true, @@ -6821,9 +6451,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -6833,23 +6461,18 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:6569", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -6857,7 +6480,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -6925,9 +6550,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "File Descriptors", "tooltip": { "shared": true, @@ -6936,9 +6559,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -6948,33 +6569,29 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:6339", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "repeat": null, "title": "System Misc", "type": "row" }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -6988,7 +6605,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The number (after merges) of I/O requests completed per second for the device", "fieldConfig": { "defaults": { @@ -7032,7 +6651,6 @@ "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ { "$$hashKey": "object:2033", @@ -7160,9 +6778,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk IOps Completed", "tooltip": { "shared": false, @@ -7171,9 +6787,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -7183,23 +6797,17 @@ "format": "iops", "label": "IO read (-) / write (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:2187", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -7207,7 +6815,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The number of bytes read from or written to the device per second", "fieldConfig": { "defaults": { @@ -7359,9 +6969,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk R/W Data", "tooltip": { "shared": false, @@ -7370,9 +6978,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -7382,23 +6988,17 @@ "format": "Bps", "label": "bytes read (-) / write (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:370", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -7406,7 +7006,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The average time for requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.", "fieldConfig": { "defaults": { @@ -7560,9 +7162,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk Average Wait Time", "tooltip": { "shared": false, @@ -7571,9 +7171,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -7583,23 +7181,17 @@ "format": "s", "label": "time. read (-) / write (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:442", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -7607,7 +7199,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The average queue length of the requests that were issued to the device", "fieldConfig": { "defaults": { @@ -7748,9 +7342,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Average Queue Size", "tooltip": { "shared": false, @@ -7759,9 +7351,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -7771,23 +7361,18 @@ "format": "none", "label": "aqu-sz", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:514", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -7795,7 +7380,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The number of read and write requests merged per second that were queued to the device", "fieldConfig": { "defaults": { @@ -7947,9 +7534,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk R/W Merged", "tooltip": { "shared": false, @@ -7958,9 +7543,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -7970,23 +7553,17 @@ "format": "iops", "label": "I/Os", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:586", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -7994,7 +7571,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "Percentage of elapsed time during which I/O requests were issued to the device (bandwidth utilization for the device). Device saturation occurs when this value is close to 100% for devices serving requests serially. But for devices serving requests in parallel, such as RAID arrays and modern SSDs, this number does not reflect their performance limits.", "fieldConfig": { "defaults": { @@ -8142,9 +7721,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Time Spent Doing I/Os", "tooltip": { "shared": false, @@ -8153,9 +7730,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -8165,23 +7740,18 @@ "format": "percentunit", "label": "%util", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:658", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -8189,7 +7759,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The number of outstanding requests at the instant the sample was taken. Incremented as requests are given to appropriate struct request_queue and decremented as they finish.", "fieldConfig": { "defaults": { @@ -8330,9 +7902,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Instantaneous Queue Size", "tooltip": { "shared": false, @@ -8341,9 +7911,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -8353,23 +7921,18 @@ "format": "iops", "label": "I/Os", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:730", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -8377,7 +7940,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { @@ -8543,9 +8108,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk IOps Discards completed / merged", "tooltip": { "shared": false, @@ -8554,9 +8117,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -8566,33 +8127,28 @@ "format": "iops", "label": "IOs", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:2187", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "repeat": null, "title": "Storage Disk", "type": "row" }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -8606,12 +8162,13 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "decimals": 3, "description": "", "fieldConfig": { "defaults": { - "custom": {}, "links": [] }, "overrides": [] @@ -8622,7 +8179,7 @@ "h": 10, "w": 12, "x": 0, - "y": 14 + "y": 28 }, "hiddenSeries": false, "id": 43, @@ -8646,7 +8203,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", @@ -8685,9 +8242,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Filesystem space available", "tooltip": { "shared": true, @@ -8696,9 +8251,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -8708,141 +8261,132 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:3827", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "description": "", "fieldConfig": { "defaults": { - "custom": {}, - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, "overrides": [] }, - "fill": 2, - "fillGradient": 0, "gridPos": { "h": 10, "w": 12, "x": 12, - "y": 14 + "y": 28 }, - "hiddenSeries": false, - "id": 41, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, + "id": 316, "links": [], - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.4.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": true, + "pluginVersion": "8.5.13", "targets": [ { - "expr": "node_filesystem_files_free{node=~\"$node\",job=\"node-exporter\",device!~'rootfs'}", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "expr": "node_filesystem_size_bytes{node=~\"$node\",job=\"node-exporter\",device!~'rootfs'}", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "{{mountpoint}} - Free file nodes", + "legendFormat": "{{mountpoint}} - Size", "refId": "A", "step": 240 } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "File Nodes Free", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:3894", - "format": "short", - "label": "file nodes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:3895", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Filesystem size", + "type": "timeseries" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { - "custom": {}, "links": [] }, "overrides": [] @@ -8853,7 +8397,7 @@ "h": 10, "w": 12, "x": 0, - "y": 24 + "y": 38 }, "hiddenSeries": false, "id": 28, @@ -8876,7 +8420,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", @@ -8903,9 +8447,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "File Descriptor", "tooltip": { "shared": false, @@ -8914,9 +8456,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -8925,22 +8465,17 @@ "format": "short", "label": "files", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -8948,11 +8483,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { - "custom": {}, "links": [] }, "overrides": [] @@ -8963,10 +8499,10 @@ "h": 10, "w": 12, "x": 12, - "y": 24 + "y": 38 }, "hiddenSeries": false, - "id": 219, + "id": 41, "legend": { "alignAsTable": true, "avg": true, @@ -8987,7 +8523,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", @@ -8997,20 +8533,18 @@ "steppedLine": true, "targets": [ { - "expr": "node_filesystem_files{node=~\"$node\",job=\"node-exporter\",device!~'rootfs'}", + "expr": "node_filesystem_files_free{node=~\"$node\",job=\"node-exporter\",device!~'rootfs'}", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "{{mountpoint}} - File nodes total", + "legendFormat": "{{mountpoint}} - Free file nodes", "refId": "A", "step": 240 } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, - "title": "File Nodes Size", + "title": "File Nodes Free", "tooltip": { "shared": true, "sort": 0, @@ -9018,33 +8552,28 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { + "$$hashKey": "object:3894", "format": "short", - "label": "file Nodes", + "label": "file nodes", "logBase": 1, - "max": null, "min": "0", "show": true }, { + "$$hashKey": "object:3895", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -9054,12 +8583,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", - "decimals": null, + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { - "custom": {}, "links": [] }, "overrides": [] @@ -9070,7 +8599,7 @@ "h": 10, "w": 12, "x": 0, - "y": 34 + "y": 48 }, "hiddenSeries": false, "id": 44, @@ -9096,7 +8625,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.2", + "pluginVersion": "8.5.13", "pointradius": 5, "points": false, "renderer": "flot", @@ -9123,9 +8652,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Filesystem in ReadOnly / Error", "tooltip": { "shared": true, @@ -9134,9 +8661,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -9153,26 +8678,119 @@ { "$$hashKey": "object:3671", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$ds_prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 48 + }, + "hiddenSeries": false, + "id": 219, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.5.13", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "node_filesystem_files{node=~\"$node\",job=\"node-exporter\",device!~'rootfs'}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - File nodes total", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "File Nodes Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "file Nodes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false } } ], - "repeat": null, "title": "Storage Filesystem", "type": "row" }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -9191,7 +8809,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -9261,9 +8881,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic by Packets", "tooltip": { "shared": true, @@ -9272,9 +8890,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -9283,22 +8899,16 @@ "format": "pps", "label": "packets out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -9306,7 +8916,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -9380,9 +8992,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic Errors", "tooltip": { "shared": true, @@ -9391,9 +9001,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -9402,22 +9010,16 @@ "format": "pps", "label": "packets out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -9425,7 +9027,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -9499,9 +9103,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic Drop", "tooltip": { "shared": true, @@ -9510,9 +9112,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -9521,22 +9121,16 @@ "format": "pps", "label": "packets out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -9544,7 +9138,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -9618,9 +9214,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic Compressed", "tooltip": { "shared": true, @@ -9629,9 +9223,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -9640,22 +9232,16 @@ "format": "pps", "label": "packets out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -9663,7 +9249,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -9729,9 +9317,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic Multicast", "tooltip": { "shared": true, @@ -9740,9 +9326,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -9751,22 +9335,16 @@ "format": "pps", "label": "packets out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -9774,7 +9352,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -9848,9 +9428,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic Fifo", "tooltip": { "shared": true, @@ -9859,9 +9437,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -9870,22 +9446,16 @@ "format": "pps", "label": "packets out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -9893,7 +9463,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -9961,9 +9533,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic Frame", "tooltip": { "shared": true, @@ -9972,9 +9542,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -9984,23 +9552,17 @@ "format": "pps", "label": "packets out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:590", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -10008,7 +9570,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -10069,9 +9633,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic Carrier", "tooltip": { "shared": true, @@ -10080,9 +9642,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -10091,22 +9651,16 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -10114,7 +9668,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -10180,9 +9736,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Traffic Colls", "tooltip": { "shared": true, @@ -10191,9 +9745,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -10202,22 +9754,16 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -10225,7 +9771,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -10296,9 +9844,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "NF Contrack", "tooltip": { "shared": true, @@ -10307,9 +9853,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -10319,23 +9863,18 @@ "format": "short", "label": "entries", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:679", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -10343,7 +9882,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -10399,9 +9940,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "ARP Entries", "tooltip": { "shared": true, @@ -10410,9 +9949,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -10421,22 +9958,17 @@ "format": "short", "label": "Entries", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -10444,7 +9976,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -10500,9 +10034,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "MTU", "tooltip": { "shared": true, @@ -10511,9 +10043,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -10523,22 +10053,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -10546,7 +10071,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -10602,9 +10129,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Speed", "tooltip": { "shared": true, @@ -10613,9 +10138,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -10625,22 +10148,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -10648,7 +10166,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -10704,9 +10224,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Queue Length", "tooltip": { "shared": true, @@ -10715,9 +10233,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -10727,22 +10243,17 @@ "format": "none", "label": "packets", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -10750,7 +10261,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -10818,9 +10331,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Operational Status", "tooltip": { "shared": true, @@ -10829,9 +10340,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -10840,32 +10349,27 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "repeat": null, "title": "Network Traffic", "type": "row" }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -10879,7 +10383,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -10971,9 +10477,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Sockstat TCP", "tooltip": { "shared": true, @@ -10982,9 +10486,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -10993,22 +10495,17 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -11016,7 +10513,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -11091,9 +10590,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Sockstat UDP", "tooltip": { "shared": true, @@ -11102,9 +10599,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -11113,22 +10608,17 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -11136,7 +10626,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -11195,9 +10687,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Sockstat Used", "tooltip": { "shared": true, @@ -11206,9 +10696,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -11217,22 +10705,17 @@ "format": "short", "label": "sockets", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -11240,7 +10723,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -11307,9 +10792,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Sockstat Memory Size", "tooltip": { "shared": true, @@ -11318,9 +10801,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -11329,22 +10810,17 @@ "format": "bytes", "label": "bytes", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -11352,7 +10828,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -11427,9 +10905,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Sockstat FRAG / RAW", "tooltip": { "shared": true, @@ -11438,9 +10914,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -11450,33 +10924,29 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:1573", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "repeat": null, "title": "Network Sockstat", "type": "row" }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -11490,7 +10960,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -11567,9 +11039,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Netstat IP In / Out Octets", "tooltip": { "shared": true, @@ -11578,9 +11048,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -11590,23 +11058,17 @@ "format": "short", "label": "octects out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:1890", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -11614,7 +11076,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -11676,9 +11140,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Netstat IP Forwarding", "tooltip": { "shared": true, @@ -11687,9 +11149,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -11699,23 +11159,18 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:1958", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -11723,8 +11178,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", - "decimals": null, + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -11798,9 +11254,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "ICMP In / Out", "tooltip": { "shared": true, @@ -11809,9 +11263,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -11820,22 +11272,16 @@ "format": "short", "label": "messages out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -11843,8 +11289,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", - "decimals": null, + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -11910,9 +11357,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "ICMP Errors", "tooltip": { "shared": true, @@ -11921,9 +11366,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -11932,22 +11375,16 @@ "format": "short", "label": "messages out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -11955,8 +11392,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", - "decimals": null, + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -12034,9 +11472,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "UDP In / Out", "tooltip": { "shared": true, @@ -12045,9 +11481,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -12056,22 +11490,16 @@ "format": "short", "label": "datagrams out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -12079,7 +11507,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -12169,9 +11599,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "UDP Errors", "tooltip": { "shared": true, @@ -12180,9 +11608,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -12192,23 +11618,17 @@ "format": "short", "label": "datagrams", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:4233", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -12216,8 +11636,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", - "decimals": null, + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -12296,9 +11717,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "TCP In / Out", "tooltip": { "shared": true, @@ -12307,9 +11726,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -12318,22 +11735,16 @@ "format": "short", "label": "datagrams out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -12341,7 +11752,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { @@ -12432,9 +11845,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "TCP Errors", "tooltip": { "shared": true, @@ -12443,9 +11854,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -12454,22 +11863,17 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -12477,7 +11881,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -12554,9 +11960,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "TCP Connections", "tooltip": { "shared": true, @@ -12565,9 +11969,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -12577,23 +11979,18 @@ "format": "short", "label": "connections", "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:470", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -12601,7 +11998,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { @@ -12688,9 +12087,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "TCP SynCookie", "tooltip": { "shared": true, @@ -12699,9 +12096,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -12710,22 +12105,16 @@ "format": "short", "label": "counter out (-) / in (+)", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -12733,7 +12122,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {}, @@ -12801,9 +12192,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "TCP Direct Transition", "tooltip": { "shared": true, @@ -12812,9 +12201,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -12823,22 +12210,17 @@ "format": "short", "label": "connections", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -12846,7 +12228,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "custom": {} @@ -12933,9 +12317,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "TCP TimeWait", "tooltip": { "shared": true, @@ -12944,9 +12326,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -12955,32 +12335,28 @@ "format": "short", "label": "Counter", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "repeat": null, "title": "Network Netstat", "type": "row" }, { "collapsed": true, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -12994,7 +12370,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { @@ -13053,9 +12431,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Node Exporter Scrape Time", "tooltip": { "shared": true, @@ -13064,9 +12440,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -13075,22 +12449,16 @@ "format": "s", "label": "seconds", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -13098,7 +12466,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "", "fieldConfig": { "defaults": { @@ -13171,9 +12541,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Node Exporter Scrape", "tooltip": { "shared": true, @@ -13182,9 +12550,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -13194,33 +12560,26 @@ "format": "short", "label": "counter", "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:1485", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "repeat": null, "title": "Node Exporter", "type": "row" } ], "refresh": "30s", - "schemaVersion": 27, + "schemaVersion": 36, "style": "dark", "tags": [ "nodes" @@ -13233,8 +12592,6 @@ "text": "default", "value": "default" }, - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "datasource", @@ -13248,16 +12605,16 @@ "type": "datasource" }, { - "allValue": null, "current": { "selected": false, "text": "dev-master-0", "value": "dev-master-0" }, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "definition": "", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Node:", @@ -13273,23 +12630,18 @@ "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { - "allValue": null, "current": { "selected": false, "text": "[a-z]+|nvme[0-9]+n[0-9]+", "value": "[a-z]+|nvme[0-9]+n[0-9]+" }, - "description": null, - "error": null, "hide": 2, "includeAll": false, - "label": null, "multi": false, "name": "diskdevices", "options": [ @@ -13337,5 +12689,6 @@ "timezone": "browser", "title": "Node", "uid": "rYdddlPWk", - "version": 4 + "version": 4, + "weekStart": "" } diff --git a/dashboards/main/pod.json b/dashboards/main/pod.json index 36c172b8..fdca7f65 100644 --- a/dashboards/main/pod.json +++ b/dashboards/main/pod.json @@ -3,7 +3,10 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", @@ -20,14 +23,16 @@ }, "editable": false, "fiscalYearStartMonth": 0, - "gnetId": null, "graphTooltip": 1, - "iteration": 1640791001978, + "id": 117, "links": [], "liveNow": false, "panels": [ { - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "Note that this table shows the average values for the entire period selected in the dashboard. Consequently, it may contain information about Pods or namespaces that were changed or deleted during the selected period.", "fieldConfig": { "defaults": { @@ -35,9 +40,12 @@ "mode": "thresholds" }, "custom": { - "align": null, - "displayMode": "auto", + "align": "auto", + "cellOptions": { + "type": "auto" + }, "filterable": false, + "inspect": false, "minWidth": 75 }, "decimals": 2, @@ -47,8 +55,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -70,8 +77,7 @@ "value": "Time" }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "custom.minWidth", @@ -98,8 +104,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "custom.minWidth", @@ -122,8 +127,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -146,8 +150,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "mappings", @@ -195,8 +198,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -219,8 +221,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -243,8 +244,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -267,8 +267,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -291,8 +290,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -315,8 +313,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -339,8 +336,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -363,8 +359,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -387,8 +382,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -411,8 +405,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -435,8 +428,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -459,20 +451,19 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "mappings", "value": [ { - "type": "value", "options": { "-1": { - "text": "hostNet", - "index": 0 + "index": 0, + "text": "hostNet" } - } + }, + "type": "value" } ] } @@ -497,20 +488,19 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "mappings", "value": [ { - "type": "value", "options": { "-1": { - "text": "hostNet", - "index": 0 + "index": 0, + "text": "hostNet" } - } + }, + "type": "value" } ] } @@ -535,8 +525,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -559,8 +548,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -579,8 +567,7 @@ "value": "short" }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -603,8 +590,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" }, { "id": "custom.minWidth", @@ -627,8 +613,7 @@ "value": "short" }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] } @@ -643,11 +628,25 @@ "id": 141, "links": [], "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true }, - "pluginVersion": "8.2.6", + "pluginVersion": "10.2.2", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", "expr": "kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}", "format": "table", "hide": false, @@ -658,6 +657,10 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "max by (pod, node) (avg_over_time(kube_pod_info{namespace=\"$namespace\", pod=\"$pod\"}[$__range]))", "format": "table", "instant": true, @@ -666,6 +669,10 @@ "refId": "S" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (pod) (avg_over_time(kube_controller_pod{namespace=\"$namespace\", pod=\"$pod\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Off\"}[$__range]))) * 0\nor\nsum by (pod) (avg_over_time(kube_controller_pod{namespace=\"$namespace\", pod=\"$pod\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Initial\"}[$__range]))) * 0 + 1\nor\nsum by (pod) (avg_over_time(kube_controller_pod{namespace=\"$namespace\", pod=\"$pod\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Auto\"}[$__range]))) * 0 + 2\nor\nsum by (pod) (avg_over_time(kube_controller_pod{namespace=\"$namespace\", pod=\"$pod\"}[$__range]) * on (controller_type, controller_name) group_left() sum by (controller_type, controller_name) (avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", update_mode=\"Recreate\"}[$__range]))) * 0 + 3\nor\nsum by (pod) (avg_over_time(kube_controller_pod{namespace=\"$namespace\", pod=\"$pod\"}[$__range])) * 0 + 5", "format": "table", "hide": false, @@ -675,6 +682,10 @@ "refId": "B" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (container) (rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container=~\"$container\"}[$__range]))\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "hide": false, @@ -684,6 +695,10 @@ "refId": "C" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -692,6 +707,10 @@ "refId": "D" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (container) (\n sum by (controller_type, controller_name, pod) (avg_over_time(kube_controller_pod{namespace=\"$namespace\", pod=\"$pod\"}[$__range]))\n * on (controller_type, controller_name) group_right() \n sum by (controller_type, controller_name, container) (\n avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", container=~\"$container\", resource=\"cpu\"}[$__range])))\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -700,6 +719,10 @@ "refId": "E" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n -\n sum by (container) (rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n) > 0\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -708,6 +731,10 @@ "refId": "F" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n (\n sum by (container) (rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n -\n sum by (container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n ) or sum by (container) (rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n) > 0\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -716,6 +743,10 @@ "refId": "G" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n (\n sum by (container) (rate(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range])) \n / \n sum by (container) (rate(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n )\n * sum by (container) (avg_over_time(kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n)\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -724,6 +755,10 @@ "refId": "H" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (container) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container=~\"$container\"}[$__range]))\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -732,6 +767,10 @@ "refId": "I" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -740,6 +779,10 @@ "refId": "J" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (container) (\n sum by (controller_type, controller_name, pod) (avg_over_time(kube_controller_pod{namespace=\"$namespace\", pod=\"$pod\"}[$__range]))\n * on (controller_type, controller_name) group_right() \n sum by (controller_type, controller_name, container) (\n avg_over_time(vpa_target_recommendation{namespace=\"$namespace\", container=~\"$container\", resource=\"memory\"}[$__range])))\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -748,6 +791,10 @@ "refId": "K" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n sum by (container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n -\n sum by (container) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n) > 0\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -756,6 +803,10 @@ "refId": "L" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "(\n (\n sum by (container) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n -\n sum by (container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n ) or sum by (container) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]))\n) > 0\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -764,7 +815,12 @@ "refId": "M" }, { - "expr": "(\n # Show data rate for the Pod if it had hostNetwork: false during the selected period.\n max_over_time(kube_pod_info{host_network=\"false\", namespace=\"$namespace\", pod=\"$pod\"}[$__range])\n * on(pod)\n sum by (pod) (rate(container_network_receive_bytes_total{namespace=\"$namespace\", pod=\"$pod\"}[$__range]))\n) or (\n # Else return -1 if the Pod had hostNetwork: true during the selecte period.\n max_over_time(kube_pod_info{host_network=\"true\", namespace=\"$namespace\", pod=\"$pod\"}[$__range]) * -1\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "(\n # Show data rate for the Pod if it had hostNetwork: false during the selected period.\n sum by (pod) (rate(container_network_receive_bytes_total{namespace=\"$namespace\", pod=\"$pod\"}[$__range]))\n * on(pod)\n max(max_over_time(kube_pod_info{host_network=\"false\", namespace=\"$namespace\", pod=\"$pod\"}[$__range])) by(pod)\n) or (\n # Else return -1 if the Pod had hostNetwork: true during the selecte period.\n max_over_time(kube_pod_info{host_network=\"true\", namespace=\"$namespace\", pod=\"$pod\"}[$__range]) * -1\n)", "format": "table", "hide": false, "instant": true, @@ -773,7 +829,12 @@ "refId": "N" }, { - "expr": "(\n # Show data rate for the Pod if it had hostNetwork: false during the selected period.\n max_over_time(kube_pod_info{host_network=\"false\", namespace=\"$namespace\", pod=\"$pod\"}[$__range])\n * on(pod)\n sum by (pod) (rate(container_network_transmit_bytes_total{namespace=\"$namespace\", pod=\"$pod\"}[$__range]))\n) or (\n # Else return -1 if the Pod had hostNetwork: true during the selecte period.\n max_over_time(kube_pod_info{host_network=\"true\", namespace=\"$namespace\", pod=\"$pod\"}[$__range]) * -1\n)", + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "(\n # Show data rate for the Pod if it had hostNetwork: false during the selected period.\n max(max_over_time(kube_pod_info{host_network=\"false\", namespace=\"$namespace\", pod=\"$pod\"}[$__range])) by(pod)\n * on(pod)\n sum by (pod) (rate(container_network_transmit_bytes_total{namespace=\"$namespace\", pod=\"$pod\"}[$__range]))\n) or (\n # Else return -1 if the Pod had hostNetwork: true during the selecte period.\n max_over_time(kube_pod_info{host_network=\"true\", namespace=\"$namespace\", pod=\"$pod\"}[$__range]) * -1\n)", "format": "table", "hide": false, "instant": true, @@ -782,6 +843,10 @@ "refId": "O" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by(container) (rate(container_fs_reads_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}[$__range]))", "format": "table", "hide": false, @@ -791,6 +856,10 @@ "refId": "P" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by(container) (rate(container_fs_writes_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}[$__range]))", "format": "table", "hide": false, @@ -800,6 +869,10 @@ "refId": "Q" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (container) (increase(kube_pod_container_status_restarts_total{namespace=\"$namespace\", pod=\"$pod\"}[$__range]))", "format": "table", "hide": false, @@ -809,6 +882,10 @@ "refId": "R" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (container) (label_replace(increase(klog_pod_oomkill{namespace=\"$namespace\", pod_name=\"$pod\"}[$__range]), \"container\", \"$1\", \"container_name\", \"(.+)\"))", "format": "table", "hide": false, @@ -818,6 +895,10 @@ "refId": "T" }, { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, "expr": "sum by (container) (avg_over_time(container_memory:kmem{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container=~\"$container\"}[$__range]))\nor\nsum by (container) (avg_over_time(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__range]) * 0)", "format": "table", "instant": true, @@ -877,431 +958,417 @@ "type": "table" }, { - "cards": { - "cardHSpacing": 2, - "cardMinWidth": 5, - "cardRound": null, - "cardVSpacing": 2 + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateGnYlRd", - "defaultColor": "#757575", - "exponent": 0.5, - "mode": "discrete", - "thresholds": [ - { - "color": "#9ac48a", - "tooltip": "ready", - "value": "0" + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, - { - "color": "#f4d598", - "tooltip": "running", - "value": "1" + "custom": { + "fillOpacity": 70, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false }, - { - "color": "#890f02", - "tooltip": "terminated/Error", - "value": "2" - }, - { - "color": "#e24d42", - "tooltip": "terminated/Completed", - "value": "3" - }, - { - "color": "#f29191", - "tooltip": "terminated/ContainerCannotRun", - "value": "4" - }, - { - "color": "#fce2de", - "tooltip": "terminated/OOMKilled", - "value": "5" - }, - { - "color": "#1f78c1", - "tooltip": "waiting/ContainerCreating", - "value": "6" - }, - { - "color": "#64b0c8", - "tooltip": "waiting/CrashLoopBackOff", - "value": "7" - }, - { - "color": "#65c5db", - "tooltip": "waiting/CreateContainerConfigError", - "value": "8" - }, - { - "color": "#badff4", - "tooltip": "waiting/ErrImagePull", - "value": "9" - }, - { - "color": "#cffaff", - "tooltip": "waiting/ImagePullBackOff", - "value": "10" + "mappings": [ + { + "options": { + "0": { + "color": "#9ac48a", + "index": 0, + "text": "ready" + }, + "1": { + "color": "#f4d598", + "index": 1, + "text": "running" + }, + "2": { + "color": "#890f02", + "index": 2, + "text": "terminated/Error" + }, + "3": { + "color": "#e24d42", + "index": 3, + "text": "terminated/Completed" + }, + "4": { + "color": "#f29191", + "index": 4, + "text": "terminated/ContainerCannotRun" + }, + "5": { + "color": "#fce2de", + "index": 5, + "text": "terminated/OOMKilled" + }, + "6": { + "color": "#1f78c1", + "index": 6, + "text": "waiting/ContainerCreating" + }, + "7": { + "color": "#64b0c8", + "index": 7, + "text": "waiting/CrashLoopBackoff" + }, + "8": { + "color": "#65c5db", + "index": 8, + "text": "waiting/CreateContainerConfigError" + }, + "9": { + "color": "#badff4", + "index": 9, + "text": "waiting/ErrImagePull" + }, + "10": { + "color": "#cffaff", + "index": 10, + "text": "waiting/ImagePullBackOff" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] } - ] + }, + "overrides": [] }, - "datasource": "$ds_prometheus", "gridPos": { "h": 8, "w": 24, "x": 0, "y": 9 }, - "highlightCards": true, "id": 47, - "legend": { - "show": true - }, "links": [], - "nullPointMode": "as empty", - "pageSize": 15, - "seriesFilterIndex": -1, - "statusmap": { - "ConfigVersion": "v1" + "options": { + "alignValue": "left", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "mergeValues": false, + "rowHeight": 0.9, + "showValue": "never", + "tooltip": { + "mode": "single", + "sort": "none" + } }, "targets": [ { - "expr": "(min_over_time(kube_pod_container_status_ready{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) == 1) * 0", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "expr": "min by (container) (\n ((min_over_time(kube_pod_container_status_ready{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) == 1) * 0) or\n ((min_over_time(kube_pod_container_status_running{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) == 1) * 1) or\n (\n (max_over_time(kube_pod_container_status_terminated_reason{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) == 1)\n * on(reason) group_left()\n (\n label_replace(vector(2), \"reason\", \"Error\", \"\", \"\") or\n label_replace(vector(3), \"reason\", \"Completed\", \"\", \"\") or\n label_replace(vector(4), \"reason\", \"ContainerCannotRun\", \"\", \"\") or\n label_replace(vector(5), \"reason\", \"OOMKilled\", \"\", \"\")\n )\n ) or\n (\n (max_over_time(kube_pod_container_status_waiting_reason{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) == 1)\n * on(reason) group_left()\n (\n label_replace(vector(6), \"reason\", \"ContainerCreating\", \"\", \"\") or\n label_replace(vector(7), \"reason\", \"CrashLoopBackOff\", \"\", \"\") or\n label_replace(vector(8), \"reason\", \"CreateContainerConfigError\", \"\", \"\") or\n label_replace(vector(9), \"reason\", \"ErrImagePull\", \"\", \"\") or\n label_replace(vector(10), \"reason\", \"ImagePullBackOff\", \"\", \"\")\n )\n )\n)", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "A" - }, - { - "expr": "(min_over_time(kube_pod_container_status_running{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) == 1) * 1", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container }}", - "refId": "B" - }, - { - "expr": "(max_over_time(kube_pod_container_status_terminated_reason{namespace=\"$namespace\", pod=\"$pod\", reason=\"Error\"}[$__rate_interval]) == 1) * 2", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container }}", - "refId": "C" - }, - { - "expr": "(max_over_time(kube_pod_container_status_terminated_reason{namespace=\"$namespace\", pod=\"$pod\", reason=\"Completed\"}[$__rate_interval]) == 1) * 3", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container }}", - "refId": "D" - }, - { - "expr": "(max_over_time(kube_pod_container_status_terminated_reason{namespace=\"$namespace\", pod=\"$pod\", reason=\"ContainerCannotRun\"}[$__rate_interval]) == 1) * 4", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container }}", - "refId": "E" - }, - { - "expr": "(max_over_time(kube_pod_container_status_terminated_reason{namespace=\"$namespace\", pod=\"$pod\", reason=\"OOMKilled\"}[$__rate_interval]) == 1) * 5", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container }}", - "refId": "F" - }, - { - "expr": "(max_over_time(kube_pod_container_status_waiting_reason{namespace=\"$namespace\", pod=\"$pod\", reason=\"ContainerCreating\"}[$__rate_interval]) == 1) * 6", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container }}", - "refId": "G" - }, - { - "expr": "(max_over_time(kube_pod_container_status_waiting_reason{namespace=\"$namespace\", pod=\"$pod\", reason=\"CrashLoopBackOff\"}[$__rate_interval]) == 1) * 7", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "{{ container }}", - "refId": "H" - }, - { - "expr": "(max_over_time(kube_pod_container_status_waiting_reason{namespace=\"$namespace\", pod=\"$pod\", reason=\"CreateContainerConfigError\"}[$__rate_interval]) == 1) * 8", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container }}", - "refId": "I" - }, - { - "expr": "(max_over_time(kube_pod_container_status_waiting_reason{namespace=\"$namespace\", pod=\"$pod\", reason=\"ErrImagePull\"}[$__rate_interval]) == 1) * 9", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container }}", - "refId": "J" - }, - { - "expr": "(max_over_time(kube_pod_container_status_waiting_reason{namespace=\"$namespace\", pod=\"$pod\", reason=\"ImagePullBackOff\"}[$__rate_interval]) == 1) * 10", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container }}", - "refId": "K" } ], "title": "Status", - "tooltip": { - "extraInfo": "", - "freezeOnClick": true, - "items": [], - "show": true, - "showExtraInfo": false, - "showItems": false - }, - "type": "flant-statusmap-panel", - "useMax": true, - "usingPagination": false, - "xAxis": { - "show": true - }, - "yAxis": { - "maxWidth": -1, - "minWidth": -1, - "show": true - }, - "yAxisSort": "metrics", - "yLabel": { - "delimiter": "", - "labelTemplate": "", - "usingSplitLabel": false - } + "type": "state-timeline" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The number of container restarts", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 4, "w": 12, "x": 0, "y": 17 }, - "hiddenSeries": false, "id": 166, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (container) (increase(kube_pod_container_status_restarts_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "A" - }, - { - "expr": "sum (increase(kube_pod_container_status_restarts_total{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Containers restarts", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": 0, - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The number of container OOM kills", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 4, "w": 12, "x": 12, "y": 17 }, - "hiddenSeries": false, "id": 167, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (container) (increase(oom_kills:normalized{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "A" - }, - { - "expr": "sum (increase(oom_kills:normalized{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "OOM Killed Processes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": 0, - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -1314,176 +1381,294 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "fill": 1, - "fillGradient": 0, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 22 }, - "hiddenSeries": false, "id": 2, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by(container) (rate(container_cpu_usage_seconds_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "instant": false, "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "A" - }, - { - "expr": "sum (rate(container_cpu_usage_seconds_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by container", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "System" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#e24d42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "User" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#1f78c1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 22 }, - "hiddenSeries": false, "id": 3, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total.*/", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, - { - "alias": "System", - "color": "#e24d42" - }, - { - "alias": "User", - "color": "#1f78c1" + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by(pod) (rate(container_cpu_system_seconds_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "instant": false, @@ -1492,398 +1677,448 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by(pod) (rate(container_cpu_user_seconds_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "User", "refId": "B" - }, - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "D" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested CPU resources higher than the actual CPU consumption. In other words, it shows CPU resources that can be \"freed\" without affecting the service.", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 31 }, - "hiddenSeries": false, "id": 76, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace, pod, container)\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{container!=\"POD\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n ) > 0\n )", "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "C" - }, - { - "expr": "sum\n (\n (\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{container!=\"POD\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n ) > 0\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Over-requested by container", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested CPU resources lower than the actual CPU consumption. In other words, it shows CPU resources that need to be \"reserved\" for the service to run smoothly.", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 31 }, - "hiddenSeries": false, "id": 123, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (namespace, pod, container)\n (\n (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n ) > 0\n )", "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "C" - }, - { - "expr": "sum\n (\n (\n (\n sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n -\n sum by(namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n ) or sum by(namespace, pod, container) (rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n ) > 0\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Under-requested by container", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "The absence of data on the graph means that container resources are not set", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 39 }, - "hiddenSeries": false, "id": 36, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "(sum by (pod, container) (rate(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", container=~\"$container\", pod=\"$pod\"}[$__rate_interval])) / sum by (container, pod) (rate(container_cpu_cfs_periods_total{namespace=\"$namespace\", container=~\"$container\", pod=\"$pod\"}[$__rate_interval]))) * sum by (pod, container) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "A" - }, - { - "expr": "sum by (pod) ((sum by (pod, container) (rate(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", container=~\"$container\", pod=\"$pod\"}[$__rate_interval])) / sum by (container, pod) (rate(container_cpu_cfs_periods_total{namespace=\"$namespace\", container=~\"$container\", pod=\"$pod\"}[$__rate_interval]))) * sum by (pod, container) (kube_pod_container_resource_limits{resource=\"cpu\",unit=\"core\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Throttling", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -1893,62 +2128,145 @@ "id": 95, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", - "fill": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629e51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f4d598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VPA Target" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#447ebc", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, - "w": 8, + "w": 12, "x": 0, "y": 46 }, "id": 102, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", "repeat": "container", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Usage", - "color": "#629e51" - }, - { - "alias": "Requests", - "color": "#f4d598" - }, - { - "alias": "Limits", - "color": "#c15c17" - }, - { - "alias": "VPA Target", - "color": "#447ebc" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": true, "targets": [ { "expr": "sum by(container) (rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container=\"$container\"}[$__rate_interval]))", @@ -1979,54 +2297,19 @@ "refId": "F" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "$container", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "title": "Containers CPU", "type": "row" }, { - "collapsed": true, - "datasource": null, + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -2034,139 +2317,204 @@ "y": 46 }, "id": 5, - "panels": [ - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", - "fill": 1, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 47 - }, - "id": 7, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "container", - "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false - }, - { - "alias": "System", - "color": "#e24d42" - }, - { - "alias": "User", - "color": "#1f78c1" - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(container) (rate(container_cpu_system_seconds_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\", container=\"$container\"}[$__rate_interval]))", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "System", - "refId": "A" - }, - { - "expr": "sum by(container) (rate(container_cpu_user_seconds_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\", container=\"$container\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "User", - "refId": "B" - }, - { - "expr": "sum by(container) (rate(container_cpu_usage_seconds_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\", container=\"$container\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "D" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "$container", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, + "panels": [], "title": "Containers CPU by state", "type": "row" }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "description": "The total value may not equal the sum of system and user times because of the kernel's cgroup accounting peculiarities. Read more here: https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "System" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#e24d42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "User" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#1f78c1", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 47 + }, + "id": 7, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "repeat": "container", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "expr": "sum by(container) (rate(container_cpu_system_seconds_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\", container=\"$container\"}[$__rate_interval]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "System", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "expr": "sum by(container) (rate(container_cpu_user_seconds_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\", container=\"$container\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "User", + "refId": "B" + } + ], + "title": "$container", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 47 + "y": 65 }, "id": 12, "panels": [], @@ -2174,177 +2522,298 @@ "type": "row" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "fill": 1, - "fillGradient": 0, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 12, "x": 0, - "y": 48 + "y": 66 }, - "hiddenSeries": false, "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by(container) (avg_over_time(container_memory_working_set_bytes:without_kmem{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "instant": false, "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "A" - }, - { - "expr": "sum (avg_over_time(container_memory_working_set_bytes:without_kmem{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by container", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The Working set bytes metric is the actual memory used by the container, as it includes active file memory. When its value approaches the limit, the container can be killed by the OOMKiller. This value can be higher than the sum RSS and Cache since not all active file memory is Cache.", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Working set bytes without kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(0, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(255, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, "gridPos": { "h": 9, "w": 12, "x": 12, - "y": 48 + "y": 66 }, - "hiddenSeries": false, "id": 8, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Working set bytes without kmem", - "color": "rgb(0, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false }, - { - "alias": "Kmem", - "color": "rgb(255, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false + "tooltip": { + "mode": "multi", + "sort": "none" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { "expr": "sum by(pod) (avg_over_time(container_memory_rss{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}[$__rate_interval]))", @@ -2383,368 +2852,583 @@ "refId": "E" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Usage by state", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested Memory resources higher than the actual Memory consumption. In other words, it shows Memory resources that can be \"freed\" without affecting the service.", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 57 + "y": 75 }, - "hiddenSeries": false, "id": 106, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (container)\n (\n (\n sum by (namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n -\n sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[$__rate_interval]))\n ) > 0\n )", "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "C" - }, - { - "expr": "sum\n (\n (\n sum by (namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n -\n sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[$__rate_interval]))\n ) > 0\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Over-requested by container", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": null, - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph shows the requested Memory resources higher than the actual Memory consumption. In other words, it shows Memory resources that need to be \"reserved\" for the service to run smoothly.", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 57 + "y": 75 }, - "hiddenSeries": false, "id": 136, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "lines": false, - "stack": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by (container)\n (\n (\n (\n sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n -\n sum by (namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[$__rate_interval]))\n ) or sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[$__rate_interval]))\n ) > 0\n )", "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "C" - }, - { - "expr": "sum\n (\n (\n (\n sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"}[$__rate_interval]))\n -\n sum by (namespace, pod, container) (avg_over_time(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[$__rate_interval]))\n ) or sum by (namespace, pod, container) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[$__rate_interval]))\n ) > 0\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "A" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Under-requested by container", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "decimals": null, - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": null, - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 65 + "y": 83 }, "id": 16, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "The Working set bytes metric is the actual memory used by the container, as it includes active file memory. When its value approaches the limit, the container can be killed by the OOMKiller. This value can be higher than the sum RSS and Cache since not all active file memory is Cache.", - "fill": 1, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Working set bytes without kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(0, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Kmem" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "rgb(255, 0, 0)", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Limits" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Requests" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f4d598", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VPA Target" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#447ebc", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, "gridPos": { "h": 9, - "w": 8, + "w": 12, "x": 0, - "y": 66 + "y": 75 }, "id": 19, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.13", "repeat": "container", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "Working set bytes without kmem", - "color": "rgb(0, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "Kmem", - "color": "rgb(255, 0, 0)", - "dashes": true, - "fill": 0, - "linewidth": 2, - "stack": false - }, - { - "alias": "Limits", - "color": "#c15c17", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "Requests", - "color": "#f4d598", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - }, - { - "alias": "VPA Target", - "color": "#447ebc", - "dashes": true, - "fill": 0, - "lines": false, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, "targets": [ { "expr": "sum by(container) (avg_over_time(container_memory_rss{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container=\"$container\"}[$__rate_interval]))", @@ -2805,46 +3489,8 @@ "refId": "H" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "$container", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "title": "Containers Memory", @@ -2852,12 +3498,15 @@ }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 66 + "y": 84 }, "id": 25, "panels": [], @@ -2865,25 +3514,34 @@ "type": "row" }, { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "description": "This graph shows Network Receive (except for the hostNetwork Pods)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisPlacement": "auto", + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", + "axisPlacement": "auto", "barAlignment": 0, - "drawStyle": "bars", - "fillOpacity": 100, + "drawStyle": "line", + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "linear", - "lineWidth": 0, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" @@ -2914,8 +3572,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -2943,24 +3600,19 @@ { "id": "color", "value": { - "mode": "fixed", - "fixedColor": "transparent" + "fixedColor": "transparent", + "mode": "fixed" } } ] } ] }, - "datasource": { - "uid": "${ds_prometheus}", - "type": "prometheus" - }, - "description": "This graph shows Network Receive (except for the hostNetwork Pods)", "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 67 + "y": 85 }, "id": 27, "links": [], @@ -2970,7 +3622,8 @@ "mean" ], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true }, "tooltip": { "mode": "multi", @@ -2994,32 +3647,39 @@ "refId": "B" } ], - "timeFrom": null, - "timeShift": null, "title": "Receive", "transformations": [], "type": "timeseries" }, { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "description": "This graph shows Network Transmit (except for the hostNetwork Pods)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisPlacement": "auto", + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", + "axisPlacement": "auto", "barAlignment": 0, - "drawStyle": "bars", - "fillOpacity": 100, + "drawStyle": "line", + "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "linear", - "lineWidth": 0, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" @@ -3050,8 +3710,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -3079,24 +3738,19 @@ { "id": "color", "value": { - "mode": "fixed", - "fixedColor": "transparent" + "fixedColor": "transparent", + "mode": "fixed" } } ] } ] }, - "datasource": { - "uid": "${ds_prometheus}", - "type": "prometheus" - }, - "description": "This graph shows Network Transmit (except for the hostNetwork Pods)", "gridPos": { "h": 7, "w": 12, "x": 12, - "y": 67 + "y": 85 }, "id": 62, "links": [], @@ -3106,7 +3760,10 @@ "mean" ], "displayMode": "table", - "placement": "right" + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { "mode": "multi", @@ -3130,20 +3787,21 @@ "refId": "B" } ], - "timeFrom": null, - "timeShift": null, "title": "Transmit", "transformations": [], "type": "timeseries" }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 74 + "y": 92 }, "id": 30, "panels": [], @@ -3151,229 +3809,292 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "fill": 1, - "fillGradient": 0, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 75 + "y": 93 }, - "hiddenSeries": false, "id": 32, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by(container) (rate(container_fs_reads_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "A" - }, - { - "expr": "sum (rate(container_fs_reads_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Read", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "iops", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", - "fill": 1, - "fillGradient": 0, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 75 + "y": 93 }, - "hiddenSeries": false, "id": 63, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Total", - "bars": false, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "sum by(container) (rate(container_fs_writes_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ container }}", "refId": "A" - }, - { - "expr": "sum (rate(container_fs_writes_total{container!=\"POD\", pod=\"$pod\", namespace=\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Write", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "iops", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 83 + "y": 101 }, "id": 150, "panels": [], @@ -3381,271 +4102,356 @@ "type": "row" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Total/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 84 + "y": 102 }, - "hiddenSeries": false, "id": 148, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total/", - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "avg_over_time(kubelet_volume_stats_capacity_bytes{namespace=\"$namespace\"}[$__rate_interval])\n* \non (namespace, persistentvolumeclaim) group_left (pod) \n(\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])\n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Provisioned {{ persistentvolumeclaim }}", "refId": "A" - }, - { - "expr": "sum (\n avg_over_time(kubelet_volume_stats_capacity_bytes{namespace=\"$namespace\"}[$__rate_interval])\n * \n on (namespace, persistentvolumeclaim) group_left (pod) \n (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])\n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "PVC Provisioned (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + "transformations": [ { - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "id": "calculateField", + "options": {} } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Total/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 84 + "y": 102 }, - "hiddenSeries": false, "id": 151, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Total/", - "fill": 0, - "linewidth": 2, - "stack": false + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + }, + "pluginVersion": "8.5.13", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "expr": "(avg_over_time(kubelet_volume_stats_capacity_bytes{namespace=\"$namespace\"}[$__rate_interval]) - avg_over_time(kubelet_volume_stats_available_bytes{namespace=\"$namespace\"}[$__rate_interval]))\n* \non (namespace, persistentvolumeclaim) group_left (pod) \n(\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])\n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Usage {{ persistentvolumeclaim }}", "refId": "A" - }, - { - "expr": "sum (\n (avg_over_time(kubelet_volume_stats_capacity_bytes{namespace=\"$namespace\"}[$__rate_interval]) - avg_over_time(kubelet_volume_stats_available_bytes{namespace=\"$namespace\"}[$__rate_interval]))\n * \n on (namespace, persistentvolumeclaim) group_left (pod) \n (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])\n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Total", - "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "PVC Usage (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 92 + "y": 110 }, - "hiddenSeries": false, "id": 152, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null as zero", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, + "pluginVersion": "8.5.13", "targets": [ { "expr": "(avg_over_time(kubelet_volume_stats_capacity_bytes{namespace=\"$namespace\"}[$__rate_interval]) - avg_over_time(kubelet_volume_stats_available_bytes{namespace=\"$namespace\"}[$__rate_interval]))\n* \non (namespace, persistentvolumeclaim) group_left (pod) \n(\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])\n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n)\n/\n(\n avg_over_time(kubelet_volume_stats_capacity_bytes{namespace=\"$namespace\"}[$__rate_interval])\n * \n on (namespace, persistentvolumeclaim) group_left (pod) \n (\n avg_over_time(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])\n * on (namespace, persistentvolumeclaim) group_right(pod) \n max by (namespace, persistentvolumeclaim) (\n label_replace(\n max_over_time(kube_persistentvolumeclaim_info{namespace=\"$namespace\"}[$__rate_interval]),\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\")\n + on (persistentvolume) group_left() max by (persistentvolume) (kube_persistentvolume_is_local == 0))\n * on (namespace, persistentvolumeclaim) group_right (pod)\n avg_over_time(kube_persistentvolumeclaim_status_phase{namespace=\"$namespace\", phase=\"Bound\"}[$__rate_interval])\n )\n)", @@ -3655,68 +4461,38 @@ "refId": "B" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "PVC Usage in % (except local storage classes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 100 + "y": 118 }, "id": 156, "panels": [ { - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": { - "align": null, - "displayMode": "auto" + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false }, "decimals": 2, "displayName": "", @@ -3725,8 +4501,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3748,8 +4523,7 @@ "value": "Time" }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -3772,8 +4546,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -3796,8 +4569,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -3820,8 +4592,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -3844,8 +4615,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -3868,8 +4638,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -3892,8 +4661,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -3912,8 +4680,7 @@ "value": "short" }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -3936,8 +4703,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] }, @@ -3960,8 +4726,7 @@ "value": 2 }, { - "id": "custom.align", - "value": null + "id": "custom.align" } ] } @@ -3971,14 +4736,23 @@ "h": 9, "w": 24, "x": 0, - "y": 101 + "y": 110 }, "id": 154, "links": [], "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true }, - "pluginVersion": "8.2.6", + "pluginVersion": "10.2.2", "targets": [ { "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n kube_persistentvolumeclaim_info{namespace=\"$namespace\"}\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=~\"$pod\"}\n)", @@ -4084,60 +4858,90 @@ "type": "table" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds_prometheus", + "datasource": { + "uid": "$ds_prometheus" + }, "description": "This graph does not show any localstorage-related information due to incorrectly calculating the occupied space on localstorage disks", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 110 + "y": 119 }, - "hiddenSeries": false, "id": 157, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, - "percentage": false, - "pluginVersion": "8.2.6", - "pointradius": 5, - "points": false, - "renderer": "flot", + "pluginVersion": "8.5.13", "repeat": "persistentvolumeclaim", "repeatDirection": "h", - "seriesOverrides": [ - { - "alias": "/Total/", - "fill": 0, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { "expr": "max by (namespace, persistentvolumeclaim, storageclass) (\n label_replace(\n kube_persistentvolumeclaim_info{namespace=\"$namespace\"},\n \"persistentvolume\", \"$1\", \"volumename\", \"(.*)\") \n * on (persistentvolume) group_left() \n avg_over_time(kube_persistentvolume_capacity_bytes[$__rate_interval])\n) \nand on (namespace, persistentvolumeclaim) \nmax by (namespace, persistentvolumeclaim) (\n kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", persistentvolumeclaim=\"$persistentvolumeclaim\", pod=~\"$pod\"}\n)", @@ -4161,46 +4965,8 @@ "refId": "C" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "$persistentvolumeclaim", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "title": "PVC Detailed", @@ -4208,8 +4974,7 @@ } ], "refresh": "30s", - "schemaVersion": 32, - "style": "dark", + "schemaVersion": 38, "tags": [ "main" ], @@ -4221,8 +4986,6 @@ "text": "default", "value": "default" }, - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Prometheus", @@ -4237,16 +5000,16 @@ "type": "datasource" }, { - "allValue": null, "current": { "selected": false, - "text": "candi-dashboard-stage", - "value": "candi-dashboard-stage" + "text": "d8-monitoring", + "value": "d8-monitoring" + }, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" }, - "datasource": "$ds_prometheus", "definition": "", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Namespace", @@ -4267,16 +5030,16 @@ "useTags": false }, { - "allValue": null, "current": { "selected": false, - "text": "postgres-0", - "value": "postgres-0" + "text": "prometheus-main-0", + "value": "prometheus-main-0" + }, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" }, - "datasource": "$ds_prometheus", "definition": "", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Pod", @@ -4299,7 +5062,7 @@ { "allValue": "", "current": { - "selected": true, + "selected": false, "text": [ "All" ], @@ -4307,10 +5070,11 @@ "$__all" ] }, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "definition": "", - "description": null, - "error": null, "hide": 2, "includeAll": true, "label": "Container", @@ -4333,7 +5097,7 @@ { "allValue": ".*", "current": { - "selected": true, + "selected": false, "text": [ "All" ], @@ -4341,10 +5105,11 @@ "$__all" ] }, - "datasource": "$ds_prometheus", + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "definition": "label_values(kube_pod_spec_volumes_persistentvolumeclaims_info{namespace=\"$namespace\", pod=\"$pod\"}, persistentvolumeclaim)", - "description": null, - "error": null, "hide": 2, "includeAll": true, "label": "PersistentVolumeClaim", @@ -4365,16 +5130,16 @@ "useTags": false }, { - "allValue": null, "current": { "selected": false, - "text": "No controller", - "value": "No controller" + "text": "Deployment", + "value": "Deployment" + }, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" }, - "datasource": "$ds_prometheus", "definition": "label_values(kube_controller_pod{namespace=~\"$namespace\"}, controller_type)", - "description": null, - "error": null, "hide": 2, "includeAll": false, "label": "Controller Type", @@ -4395,16 +5160,16 @@ "useTags": false }, { - "allValue": null, "current": { "selected": false, - "text": "", - "value": "" + "text": "aggregating-proxy", + "value": "aggregating-proxy" + }, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" }, - "datasource": "$ds_prometheus", "definition": "label_values(kube_controller_pod{namespace=~\"$namespace\"}, controller_name)", - "description": null, - "error": null, "hide": 2, "includeAll": false, "label": "Controller Name", @@ -4425,12 +5190,12 @@ "useTags": false }, { - "datasource": "$ds_prometheus", - "description": null, - "error": null, + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, "filters": [], "hide": 0, - "label": null, "name": "Filters", "skipUrlSync": false, "type": "adhoc" @@ -4469,5 +5234,6 @@ "timezone": "", "title": "Namespace / Controller / Pod", "uid": "m41rB4yiz1", - "version": 2 + "version": 2, + "weekStart": "" } diff --git a/dashboards/victoria-metrics/backupmanager.json b/dashboards/victoria-metrics/backupmanager.json index 71e44416..74c3c9d2 100644 --- a/dashboards/victoria-metrics/backupmanager.json +++ b/dashboards/victoria-metrics/backupmanager.json @@ -12,7 +12,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "9.0.4" + "version": "10.4.0" }, { "type": "datasource", @@ -124,9 +124,11 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.0.4", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -198,9 +200,11 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.0.4", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -260,9 +264,11 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.0.4", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -323,9 +329,11 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.0.4", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -399,9 +407,11 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.0.4", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -471,9 +481,11 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.0.4", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -546,9 +558,11 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.0.4", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -577,7 +591,9 @@ }, "custom": { "align": "auto", - "displayMode": "auto", + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], @@ -630,7 +646,9 @@ }, "id": 22, "options": { + "cellHeight": "sm", "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" @@ -645,7 +663,7 @@ } ] }, - "pluginVersion": "9.0.4", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -700,7 +718,9 @@ }, "custom": { "align": "auto", - "displayMode": "auto", + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], @@ -753,7 +773,9 @@ }, "id": 21, "options": { + "cellHeight": "sm", "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" @@ -768,7 +790,7 @@ } ] }, - "pluginVersion": "9.0.4", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -885,7 +907,7 @@ "min", "mean" ], - "displayMode": "table", + "displayMode": "list", "placement": "bottom", "showLegend": false }, @@ -1106,7 +1128,9 @@ }, "custom": { "align": "auto", - "displayMode": "auto", + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], @@ -1251,7 +1275,8 @@ "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom" + "placement": "bottom", + "showLegend": true }, "orientation": "auto", "showValue": "auto", @@ -1343,7 +1368,7 @@ "max", "mean" ], - "displayMode": "table", + "displayMode": "list", "placement": "right", "showLegend": false }, @@ -1436,7 +1461,8 @@ "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom" + "placement": "bottom", + "showLegend": true }, "orientation": "auto", "showValue": "auto", @@ -1657,8 +1683,7 @@ } ], "refresh": "1m", - "schemaVersion": 36, - "style": "dark", + "schemaVersion": 39, "tags": [], "templating": { "list": [ diff --git a/dashboards/victoria-metrics/operator.json b/dashboards/victoria-metrics/operator.json index 4f9cc6c7..6f171613 100644 --- a/dashboards/victoria-metrics/operator.json +++ b/dashboards/victoria-metrics/operator.json @@ -6,13 +6,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "9.2.2" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph (old)", - "version": "" + "version": "10.4.0" }, { "type": "datasource", @@ -31,6 +25,12 @@ "id": "text", "name": "Text", "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" } ], "annotations": { @@ -67,7 +67,7 @@ "collapsed": false, "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "PB894574A363DF0AF" }, "gridPos": { "h": 1, @@ -81,7 +81,7 @@ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "PB894574A363DF0AF" }, "refId": "A" } @@ -110,7 +110,7 @@ "content": "
$version
", "mode": "markdown" }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -170,10 +170,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -205,12 +207,12 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "red", "value": null }, { - "color": "red", - "value": 80 + "color": "green", + "value": 1800 } ] }, @@ -237,9 +239,11 @@ "fields": "", "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -261,49 +265,88 @@ "type": "stat" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, "gridPos": { "h": 13, "w": 12, "x": 0, "y": 8 }, - "hiddenSeries": false, "id": 12, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, - "percentage": false, - "pluginVersion": "9.2.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -311,87 +354,99 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(controller_runtime_reconcile_total{job=~\"$job\",instance=~\"$instance\",result=~\"requeue_after|requeue|success\"}[$__rate_interval])) by(controller)", + "expr": "sum(rate(controller_runtime_reconcile_total{job=~\"$job\",instance=~\"$instance\",result=~\"requeue_after|requeue|success\"}[$__rate_interval])) by(controller) > 0", "legendFormat": "{{controller}}", "range": true, "refId": "A" } ], - "thresholds": [], - "timeRegions": [], "title": "Reconciliation rate by controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "", - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, "gridPos": { "h": 13, "w": 12, "x": 12, "y": 8 }, - "hiddenSeries": false, "id": 16, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, - "percentage": false, - "pluginVersion": "9.2.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -405,41 +460,14 @@ "refId": "A" } ], - "thresholds": [], - "timeRegions": [], "title": "Log message rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "collapsed": false, + "collapsed": true, "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "PB894574A363DF0AF" }, "gridPos": { "h": 1, @@ -448,402 +476,442 @@ "y": 21 }, "id": 6, - "panels": [], - "targets": [ + "panels": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, + "description": "Non zero metrics indicates about error with CR object definition (typos or incorrect values) or errors with kubernetes API connection.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 2 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(controller_runtime_reconcile_errors_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(controller) > 0 ", + "instant": false, + "legendFormat": "{{controller}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(controller_runtime_reconcile_total{job=~\"$job\",instance=~\"$instance\",result=\"error\"}[$__rate_interval])) by(controller) > 0", + "hide": false, + "legendFormat": "{{label_name}}", + "range": true, + "refId": "B" + } + ], + "title": "reconcile errors by controller", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Operator limits number of reconcilation events to 5 events per 2 seconds.\n For now, this limit is applied only for vmalert and vmagent controllers.\n It should reduce load at kubernetes cluster and increase operator performance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 2 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(operator_reconcile_throttled_events_total[$__rate_interval])) by(controller)", + "legendFormat": "{{controller}}", + "range": true, + "refId": "A" + } + ], + "title": "throttled reconcilation events", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Number of objects waiting in the queue for reconciliation. Non-zero values indicate that operator cannot process CR objects changes with the given resources.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(workqueue_depth{job=~\"$job\",instance=~\"$instance\"}) by (name)", + "legendFormat": "{{label_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Working queue depth", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": " For controllers with StatefulSet it's ok to see latency greater then 3 seconds. It could be vmalertmanager,vmcluster or vmagent in statefulMode.\n\n For other controllers, latency greater then 1 second may indicate issues with kubernetes cluster or operator's performance.\n ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(controller_runtime_reconcile_time_seconds_bucket[$__rate_interval])) by(le,controller))", + "legendFormat": "q.99 {{controller}}", + "range": true, + "refId": "A" + } + ], + "title": "Reconcilation latency by controller", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PB894574A363DF0AF" + }, "refId": "A" } ], "title": "Troubleshooting", "type": "row" }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "description": "Non zero metrics indicates about error with CR object definition (typos or incorrect values) or errors with kubernetes API connection.", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 22 - }, - "hiddenSeries": false, - "id": 10, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.3.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(rate(controller_runtime_reconcile_errors_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) by(controller) > 0 ", - "instant": false, - "legendFormat": "{{controller}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(controller_runtime_reconcile_total{job=~\"$job\",instance=~\"$instance\",result=\"error\"}[$__rate_interval])) by(controller) > 0", - "hide": false, - "legendFormat": "{{label_name}}", - "range": true, - "refId": "B" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "reconcile errors by controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "description": "Operator limits number of reconcilation events to 5 events per 2 seconds.\n For now, this limit is applied only for vmalert and vmagent controllers.\n It should reduce load at kubernetes cluster and increase operator performance.", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 22 - }, - "hiddenSeries": false, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.3.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "sum(rate(operator_reconcile_throttled_events_total[$__rate_interval])) by(controller)", - "legendFormat": "{{controller}}", - "range": true, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "throttled reconcilation events", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "description": "Number of objects waiting in the queue for reconciliation. Non-zero values indicate that operator cannot process CR objects changes with the given resources.", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 30 - }, - "hiddenSeries": false, - "id": 20, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.3.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "max(workqueue_depth{job=~\"$job\",instance=~\"$instance\"}) by (name)", - "legendFormat": "{{label_name}}", - "range": true, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Wokring queue depth", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "description": " For controllers with StatefulSet it's ok to see latency greater then 3 seconds. It could be vmalertmanager,vmcluster or vmagent in statefulMode.\n\n For other controllers, latency greater then 1 second may indicate issues with kubernetes cluster or operator's performance.\n ", - "fieldConfig": { - "defaults": { - "unit": "s" - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 30 - }, - "hiddenSeries": false, - "id": 26, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.3.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99,sum(rate(controller_runtime_reconcile_time_seconds_bucket[$__rate_interval])) by(le,controller) )", - "legendFormat": "q.99 {{controller}}", - "range": true, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Reconcilation latency by controller", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, { "collapsed": false, "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "PB894574A363DF0AF" }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 41 + "y": 22 }, "id": 4, "panels": [], @@ -851,7 +919,7 @@ { "datasource": { "type": "prometheus", - "uid": "$ds" + "uid": "PB894574A363DF0AF" }, "refId": "A" } @@ -860,55 +928,89 @@ "type": "row" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "bytes" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 42 + "y": 23 }, - "hiddenSeries": false, "id": 28, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, - "percentage": false, - "pluginVersion": "8.3.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -958,79 +1060,90 @@ "refId": "D" } ], - "thresholds": [], - "timeRegions": [], "title": "Memory usage ($instance)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 42 + "y": 23 }, - "hiddenSeries": false, "id": 30, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "8.3.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -1044,79 +1157,89 @@ "refId": "A" } ], - "thresholds": [], - "timeRegions": [], "title": "CPU ($instance)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, - "fill": 1, - "fillGradient": 0, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 50 + "y": 31 }, - "hiddenSeries": false, "id": 32, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "8.3.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -1130,86 +1253,92 @@ "refId": "A" } ], - "thresholds": [], - "timeRegions": [], "title": "Goroutines ($instance)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "s" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 50 + "y": 31 }, - "hiddenSeries": false, "id": 34, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "8.3.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "10.4.0", "targets": [ { "datasource": { @@ -1223,40 +1352,12 @@ "refId": "A" } ], - "thresholds": [], - "timeRegions": [], "title": "GC duration ($instance)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "type": "timeseries" } ], "refresh": "", - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 39, "tags": [ "operator", "VictoriaMetrics" @@ -1265,9 +1366,9 @@ "list": [ { "current": { - "selected": false, - "text": "cloud-c15", - "value": "cloud-c15" + "selected": true, + "text": "VictoriaMetrics", + "value": "PF64AB64142051B50" }, "hide": 0, "includeAll": false, @@ -1331,15 +1432,16 @@ "type": "prometheus", "uid": "$ds" }, - "definition": "label_values(vm_app_version{job=\"$job\", instance=\"$instance\"}, version)", + "definition": "label_values(vm_app_version{job=\"$job\", instance=~\"$instance\"},version)", "hide": 2, "includeAll": false, "multi": false, "name": "version", "options": [], "query": { - "query": "label_values(vm_app_version{job=\"$job\", instance=\"$instance\"}, version)", - "refId": "StandardVariableQuery" + "qryType": 1, + "query": "label_values(vm_app_version{job=\"$job\", instance=~\"$instance\"},version)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, "regex": "", diff --git a/dashboards/victoria-metrics/victoriametrics-cluster.json b/dashboards/victoria-metrics/victoriametrics-cluster.json index 6abd17a6..1c57d7d5 100644 --- a/dashboards/victoria-metrics/victoriametrics-cluster.json +++ b/dashboards/victoria-metrics/victoriametrics-cluster.json @@ -6,7 +6,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "9.2.7" + "version": "10.4.2" }, { "type": "datasource", @@ -76,7 +76,7 @@ "uid": "$ds" }, "enable": true, - "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset 20m) by(version))", + "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset $__interval) by(version))", "hide": true, "iconColor": "dark-blue", "name": "version change", @@ -89,8 +89,7 @@ "uid": "$ds" }, "enable": true, - "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"})) by(job)", - "hide": true, + "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job)", "iconColor": "dark-yellow", "name": "restarts", "textFormat": "{{job}} restarted" @@ -109,7 +108,7 @@ "targetBlank": true, "title": "Cluster Wiki", "type": "link", - "url": "https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/Cluster-VictoriaMetrics" + "url": "https://docs.victoriametrics.com/cluster-victoriametrics" }, { "icon": "external link", @@ -152,7 +151,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "How many datapoints are in storage", + "description": "How many [data samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) are in storage", "fieldConfig": { "defaults": { "color": { @@ -179,7 +178,6 @@ "y": 1 }, "id": 131, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -193,10 +191,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -248,7 +248,6 @@ "y": 1 }, "id": 124, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -262,10 +261,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -319,7 +320,6 @@ "y": 1 }, "id": 130, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -333,10 +333,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -389,7 +391,6 @@ "y": 1 }, "id": 126, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -403,10 +404,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -432,7 +435,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the number of active time series with new data points inserted during the last hour. High value may result in ingestion slowdown. \n\nSee more details here https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series", + "description": "Shows the number of [active time series](https://docs.victoriametrics.com/faq/#what-is-an-active-time-series) with new data points inserted during the last hour. High value may result in ingestion slowdown.", "fieldConfig": { "defaults": { "color": { @@ -459,7 +462,6 @@ "y": 4 }, "id": 34, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -473,10 +475,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -529,7 +533,6 @@ "y": 4 }, "id": 35, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -543,10 +546,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -599,7 +604,6 @@ "y": 4 }, "id": 112, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -613,10 +617,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -668,7 +674,6 @@ "y": 4 }, "id": 128, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -682,10 +687,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -718,7 +725,9 @@ }, "custom": { "align": "auto", - "displayMode": "auto", + "cellOptions": { + "type": "auto" + }, "inspect": false, "minWidth": 50 }, @@ -772,7 +781,9 @@ }, "id": 149, "options": { + "cellHeight": "sm", "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" @@ -787,7 +798,7 @@ } ] }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -816,6 +827,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -830,6 +842,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "stepAfter", "lineWidth": 1, "pointSize": 5, @@ -929,13 +942,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "How many datapoints are inserted into cluster per second by protocol before the replication.", + "description": "How many [data samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) are inserted into cluster per second by protocol before the replication. Check vminsert metrics if there are any issues with ingestion.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -949,6 +963,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" @@ -1044,6 +1059,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1057,6 +1073,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1100,7 +1117,6 @@ "y": 13 }, "id": 6, - "links": [], "options": { "legend": { "calcs": [ @@ -1143,13 +1159,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the number of active time series with new data points inserted during the last hour across all storage nodes. High value may result in ingestion slowdown. \n\nSee following link for details:", + "description": "Shows the number of [active time series](https://docs.victoriametrics.com/faq/#what-is-an-active-time-series) with new data points inserted during the last hour across all storage nodes. High value may result in ingestion slowdown and high memory usage.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1163,6 +1180,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1206,13 +1224,6 @@ "y": 21 }, "id": 12, - "links": [ - { - "targetBlank": true, - "title": "troubleshooting", - "url": "https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#troubleshooting" - } - ], "options": { "legend": { "calcs": [ @@ -1260,6 +1271,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1273,6 +1285,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1316,7 +1329,6 @@ "y": 21 }, "id": 8, - "links": [], "options": { "legend": { "calcs": [ @@ -1366,6 +1378,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1379,6 +1392,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1422,7 +1436,6 @@ "y": 29 }, "id": 52, - "links": [], "options": { "legend": { "calcs": [ @@ -1472,6 +1485,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1485,6 +1499,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1534,7 +1549,6 @@ "y": 29 }, "id": 104, - "links": [], "options": { "legend": { "calcs": [ @@ -1601,6 +1615,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1614,6 +1629,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1643,8 +1659,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1652,7 +1667,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -1660,7 +1676,7 @@ "h": 8, "w": 12, "x": 0, - "y": 38 + "y": 3 }, "id": 66, "links": [], @@ -1713,6 +1729,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1726,6 +1743,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1755,8 +1773,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1764,7 +1781,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -1772,7 +1790,7 @@ "h": 8, "w": 12, "x": 12, - "y": 38 + "y": 3 }, "id": 138, "links": [], @@ -1824,6 +1842,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1837,6 +1856,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1866,8 +1886,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1875,7 +1894,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -1883,7 +1903,7 @@ "h": 8, "w": 12, "x": 0, - "y": 46 + "y": 11 }, "id": 64, "links": [], @@ -1938,6 +1958,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1951,6 +1972,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1973,8 +1995,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1982,7 +2003,8 @@ } ] }, - "unit": "Bps" + "unit": "Bps", + "unitScale": true }, "overrides": [ { @@ -2003,7 +2025,7 @@ "h": 8, "w": 12, "x": 12, - "y": 46 + "y": 11 }, "id": 122, "links": [], @@ -2074,6 +2096,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2087,6 +2110,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2111,8 +2135,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2120,7 +2143,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [ { @@ -2144,7 +2168,7 @@ "h": 8, "w": 12, "x": 0, - "y": 54 + "y": 19 }, "id": 117, "links": [], @@ -2199,6 +2223,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2212,6 +2237,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2234,8 +2260,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2243,7 +2268,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [ { @@ -2264,7 +2290,7 @@ "h": 8, "w": 12, "x": 12, - "y": 54 + "y": 19 }, "id": 204, "links": [], @@ -2334,6 +2360,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2347,6 +2374,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2371,8 +2399,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2380,7 +2407,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -2388,7 +2416,7 @@ "h": 8, "w": 12, "x": 0, - "y": 62 + "y": 27 }, "id": 68, "links": [], @@ -2442,6 +2470,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2455,6 +2484,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2476,8 +2506,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2485,7 +2514,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -2493,7 +2523,7 @@ "h": 8, "w": 12, "x": 12, - "y": 62 + "y": 27 }, "id": 119, "options": { @@ -2544,6 +2574,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2557,6 +2588,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2581,8 +2613,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2590,7 +2621,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -2598,7 +2630,7 @@ "h": 8, "w": 12, "x": 0, - "y": 70 + "y": 35 }, "id": 70, "links": [], @@ -2652,6 +2684,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2665,6 +2698,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2686,8 +2720,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2695,7 +2728,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -2703,7 +2737,7 @@ "h": 8, "w": 12, "x": 12, - "y": 70 + "y": 35 }, "id": 120, "options": { @@ -2742,6 +2776,114 @@ ], "title": "TCP connections rate ($instance)", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the percent of CPU spent on garbage collection.\n\nIf % is high, then CPU usage can be decreased by changing GOGC to higher values. Increasing GOGC value will increase memory usage, and decrease CPU usage.\n\nTry searching for keyword `GOGC` at https://docs.victoriametrics.com/troubleshooting/ ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 210, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(job)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU spent on GC ($instance)", + "type": "timeseries" } ], "title": "Resource usage ($job)", @@ -2766,7 +2908,34 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the rate and total number of new series created over last 24h.\n\nHigh churn rate tightly connected with database performance and may result in unexpected OOM's or slow queries. It is recommended to always keep an eye on this metric to avoid unexpected cardinality \"explosions\".\n\nThe higher churn rate is, the more resources required to handle it. Consider to keep the churn rate as low as possible.\n\nTo investigate stats about most expensive series use `api/v1/status/tsdb` handler. More details here https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format\n\nGood references to read:\n* https://www.robustperception.io/cardinality-is-key\n* https://valyala.medium.com/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b", + "description": "", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 211, + "links": [], + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "See [Troubleshooting](https://docs.victoriametrics.com/troubleshooting/) docs.", + "mode": "markdown" + }, + "pluginVersion": "10.3.1", + "transparent": true, + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the rate and total number of new series created over last 24h.\n\nHigh [churn rate](https://docs.victoriametrics.com/faq/#what-is-high-churn-rate) tightly connected with database performance and may result in unexpected OOM's or slow queries. It is recommended to always keep an eye on this metric to avoid unexpected [cardinality](https://docs.victoriametrics.com/keyconcepts/#cardinality) \"explosions\".\n\nThe higher churn rate is, the more resources required to handle it. Consider to keep the churn rate as low as possible.\n\nTo investigate stats about most expensive series use `api/v1/status/tsdb` handler. More details here https://docs.victoriametrics.com/cluster-victoriametrics/#url-format\n\nGood references to read:\n* https://www.robustperception.io/cardinality-is-key\n* https://valyala.medium.com/high-cardinality-tsdb-benchmarks-victoriametrics-vs-timescaledb-vs-influxdb-13e6ee64dd6b", "fieldConfig": { "defaults": { "color": { @@ -2842,7 +3011,7 @@ "h": 8, "w": 12, "x": 0, - "y": 31 + "y": 23 }, "id": 102, "options": { @@ -2897,7 +3066,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The percentage of slow inserts comparing to total insertion rate during the last 5 minutes. \n\nThe less value is better. If percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of active time series. \n\nIn general, VictoriaMetrics requires ~1KB or RAM per active time series, so it should be easy calculating the required amounts of RAM for the current workload according to capacity planning docs. But the resulting number may be far from the real number because the required amounts of memory depends on many other factors such as the number of labels per time series and the length of label values. See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183", + "description": "The percentage of slow inserts comparing to total insertion rate during the last 5 minutes. \n\nThe less value is better. If percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of [active time series](https://docs.victoriametrics.com/faq/#what-is-an-active-time-series). \n\nIn general, VictoriaMetrics requires ~1KB or RAM per active time series, so it should be easy calculating the required amounts of RAM for the current workload according to capacity planning docs. But the resulting number may be far from the real number because the required amounts of memory depends on many other factors such as the number of labels per time series and the length of label values. See also [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for details.", "fieldConfig": { "defaults": { "color": { @@ -2956,7 +3125,7 @@ "h": 8, "w": 12, "x": 12, - "y": 31 + "y": 23 }, "id": 108, "options": { @@ -3057,14 +3226,14 @@ "h": 8, "w": 12, "x": 0, - "y": 39 + "y": 31 }, "id": 142, "links": [ { "targetBlank": true, "title": "Readonly mode", - "url": "https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#readonly-mode" + "url": "https://docs.victoriametrics.com/cluster-victoriametrics/#readonly-mode" } ], "options": { @@ -3109,7 +3278,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Slow queries according to `search.logSlowQueryDuration` flag, which is `5s` by default.", + "description": "Shows % of slow queries according to `search.logSlowQueryDuration` flag, which is `5s` by default.\n\nThe less value is better.", "fieldConfig": { "defaults": { "color": { @@ -3120,6 +3289,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, @@ -3160,7 +3330,7 @@ } ] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, @@ -3168,7 +3338,7 @@ "h": 8, "w": 12, "x": 12, - "y": 39 + "y": 31 }, "id": 107, "options": { @@ -3194,13 +3364,15 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(rate(vm_slow_queries_total{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval]))", + "editorMode": "code", + "expr": "sum(rate(vm_slow_queries_total{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval]))\n/\nsum(rate(vm_http_requests_total{job=~\"$job_select\", instance=~\"$instance\", path=~\"/select/.*\"}[$__rate_interval]))", "interval": "", - "legendFormat": "slow queries rate", + "legendFormat": "slow queries %", + "range": true, "refId": "A" } ], - "title": "Slow queries rate ($instance)", + "title": "Slow queries % ($instance)", "type": "timeseries" }, { @@ -3267,7 +3439,7 @@ "h": 8, "w": 12, "x": 0, - "y": 47 + "y": 39 }, "id": 170, "links": [], @@ -3313,7 +3485,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n\nThis prevents from ingesting metrics with too many labels. The value of `maxLabelsPerTimeseries` must be adjusted for your workload.\n\nWhen limit is exceeded (graph is > 0) - extra labels are dropped, which could result in unexpected identical time series.", + "description": "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n\nThis prevents from ingesting metrics with too many labels. The value of `maxLabelsPerTimeseries` must be adjusted for your workload.\n\nWhen limit is exceeded (graph is > 0) - extra labels are dropped, which could result in unexpected identical time series. See more details about dropped labels in vminsert logs.", "fieldConfig": { "defaults": { "color": { @@ -3373,7 +3545,7 @@ "h": 8, "w": 12, "x": 12, - "y": 47 + "y": 39 }, "id": 116, "links": [], @@ -3475,7 +3647,7 @@ "h": 9, "w": 12, "x": 0, - "y": 55 + "y": 47 }, "id": 144, "options": { @@ -3578,7 +3750,7 @@ "h": 9, "w": 12, "x": 12, - "y": 55 + "y": 47 }, "id": 58, "links": [], @@ -3634,7 +3806,9 @@ }, "custom": { "align": "auto", - "displayMode": "auto", + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], @@ -3679,10 +3853,10 @@ ] }, "gridPos": { - "h": 7, + "h": 6, "w": 24, "x": 0, - "y": 64 + "y": 56 }, "id": 183, "options": { @@ -3701,7 +3875,7 @@ } ] }, - "pluginVersion": "9.1.0", + "pluginVersion": "9.2.7", "targets": [ { "datasource": { @@ -3747,6 +3921,108 @@ } ], "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows how many rows were ignored on insertion due to corrupted or out of retention timestamps.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 135, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(vm_rows_ignored_total{job=~\"$job_storage\", instance=~\"$instance\"}[1h])) by (reason)", + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Rows ignored for last 1h ($instance)", + "type": "timeseries" } ], "title": "Troubleshooting", @@ -3830,7 +4106,7 @@ "h": 9, "w": 12, "x": 0, - "y": 21 + "y": 37 }, "id": 76, "links": [], @@ -3946,7 +4222,7 @@ "h": 9, "w": 12, "x": 12, - "y": 21 + "y": 37 }, "id": 86, "links": [], @@ -4071,7 +4347,7 @@ "h": 8, "w": 12, "x": 0, - "y": 30 + "y": 46 }, "id": 80, "links": [], @@ -4176,7 +4452,7 @@ "h": 8, "w": 12, "x": 12, - "y": 30 + "y": 46 }, "id": 78, "links": [], @@ -4220,7 +4496,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The number of rows or bytes that vminesrt internal buffer contains at the moment.", + "description": "The number of rows or bytes that vminsert internal buffer contains at the moment.", "fieldConfig": { "defaults": { "color": { @@ -4292,7 +4568,7 @@ "h": 8, "w": 12, "x": 0, - "y": 38 + "y": 54 }, "id": 82, "options": { @@ -4399,7 +4675,7 @@ "h": 8, "w": 12, "x": 12, - "y": 38 + "y": 54 }, "id": 74, "options": { @@ -4465,6 +4741,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4478,6 +4755,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4501,7 +4779,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -4520,7 +4799,6 @@ "y": 6 }, "id": 100, - "links": [], "options": { "legend": { "calcs": [ @@ -4563,13 +4841,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the approx time needed to reach 100% of disk capacity for at least one vmstorage node based on the following params:\n* free disk space;\n* row ingestion rate;\n* dedup rate;\n* compression.\n\nUse this panel for capacity planning in order to estimate the time remaining for running out of the disk space.", + "description": "Shows the approx time needed to reach 100% of disk capacity for at least one vmstorage node based on the following params:\n* free disk space;\n* row ingestion rate;\n* compression.\n\nNote: this panel doesn't account for deduplication process.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4583,6 +4862,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4612,7 +4892,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -4631,7 +4912,6 @@ "y": 6 }, "id": 113, - "links": [], "options": { "legend": { "calcs": [ @@ -4658,7 +4938,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "min(vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n - \n ignoring(type) rate(vm_deduplicated_samples_total{job=~\"$job_storage\", instance=~\"$instance\", type=\"merge\"}[1d])\n ) * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n))", + "expr": "min(vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n))", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -4682,6 +4962,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4695,6 +4976,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4724,7 +5006,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -4743,7 +5026,6 @@ "y": 14 }, "id": 151, - "links": [], "options": { "legend": { "calcs": [ @@ -4827,6 +5109,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4840,6 +5123,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4869,7 +5153,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -4888,7 +5173,6 @@ "y": 14 }, "id": 167, - "links": [], "options": { "legend": { "calcs": [ @@ -4965,13 +5249,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Value > 0 means vmstorage is in readonly mode.", + "description": "Shows how many ongoing insertions (not API /write calls) on disk are taking place, where:\n* `max` - equal to number of CPUs;\n* `current` - current number of goroutines busy with inserting rows into underlying storage.\n\nEvery successful API /write call results into flush on disk. The `max` is an internal limit and can't be changed. It is always equal to the number of CPUs. \n\nWhen `current` hits `max` constantly, it means storage is overloaded and requires more CPU (see CPU usage) or disks with more IOPS (see disk writes and reads panels in Resource Usage row).", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4985,6 +5270,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5006,110 +5292,8 @@ "mode": "absolute", "steps": [ { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 21 - }, - "id": 141, - "links": [ - { - "targetBlank": true, - "title": "Readonly mode", - "url": "https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#readonly-mode" - } - ], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "exemplar": true, - "expr": "vm_storage_is_read_only{job=~\"$job_storage\", instance=~\"$instance\"} > 0", - "interval": "", - "legendFormat": "{{ instance }}", - "range": true, - "refId": "A" - } - ], - "title": "Readonly mode", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "description": "Shows how many ongoing insertions (not API /write calls) on disk are taking place, where:\n* `max` - equal to number of CPUs;\n* `current` - current number of goroutines busy with inserting rows into underlying storage.\n\nEvery successful API /write call results into flush on disk. The `max` is an internal limit and can't be changed. It is always equal to the number of CPUs. \n\nWhen `current` hits `max` constantly, it means storage is overloaded and requires more CPU or faster disk.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -5140,10 +5324,10 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, + "x": 0, "y": 21 }, - "id": 133, + "id": 212, "links": [ { "targetBlank": true, @@ -5207,13 +5391,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The number of on-going merges in storage nodes. It is expected to have high numbers for `storage/small` metric.", + "description": "Shows the number of concurrently executed read requests, where:\n* `max` - equal to `-search.maxConcurrentRequest` cmd-line flag;\n* `current` - current number of concurrent select requests executed by storage.\n\nEach concurrent select request requires RAM, CPU and disk IO resources during processing. Too many concurrent requests could result into OOM exceptions.\n\nWhen `current` hits `max` constantly, it means storage is overloaded and requires more CPU (see CPU usage) or disks with more IOPS (see disk writes and reads panels in Resource Usage row).\n\nSee more at https://docs.victoriametrics.com/cluster-victoriametrics/#resource-usage-limits", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5227,6 +5412,142 @@ "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 133, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(\n max_over_time(vm_vmselect_concurrent_requests_current{job=~\"$job_storage\", \n instance=~\"$instance\"}[$__rate_interval])\n)", + "interval": "", + "legendFormat": "current", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "min(vm_vmselect_concurrent_requests_capacity{job=~\"$job_storage\", instance=~\"$instance\"})", + "hide": false, + "interval": "", + "legendFormat": "max", + "range": true, + "refId": "B" + } + ], + "title": "Concurrent selects ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "The max number of on-going merges across storage nodes.\n The drastic change in number of merges could be a sign of on-going deduplication/downsampling activity.\n It is expected to have high numbers for `storage/small` metric.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5251,7 +5572,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -5296,7 +5618,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(max_over_time(vm_active_merges{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])) by(type)", + "expr": "max(max_over_time(vm_active_merges{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])) by(type)", "legendFormat": "__auto", "range": true, "refId": "A" @@ -5310,13 +5632,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The number of rows merged per second by storage nodes.", + "description": "The number of rows merged per second by storage nodes. Merge speed depends on available CPU and disk IO bandwidth.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5330,6 +5653,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5354,7 +5678,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -5420,6 +5745,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5433,6 +5759,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5462,7 +5789,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -5481,7 +5809,6 @@ "y": 37 }, "id": 20, - "links": [], "options": { "legend": { "calcs": [ @@ -5559,6 +5886,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5572,6 +5900,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5595,7 +5924,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -5614,7 +5944,6 @@ "y": 37 }, "id": 22, - "links": [], "options": { "legend": { "calcs": [ @@ -5658,13 +5987,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the percentage of used disk space by type: datapoints or indexdb. Normally, indexdb takes much less space comparing to datapoints. But with high churn rate the size of the indexdb could grow significantly.\n\nThe sum of the % can be > 100% since panel shows max % per-job and per-instance. It means different instance can have different ratio between datapoints and indexdb size.", + "description": "Shows the percentage of used disk space by type: datapoints or indexdb. Normally, indexdb takes much less space comparing to datapoints. But with high [churn rate](https://docs.victoriametrics.com/faq/#what-is-high-churn-rate) the size of the indexdb could grow significantly.\n\nThe sum of the % can be > 100% since panel shows max % per-job and per-instance. It means different instance can have different ratio between datapoints and indexdb size.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5678,6 +6008,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5707,7 +6038,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null } ] }, @@ -5722,7 +6054,6 @@ "y": 45 }, "id": 202, - "links": [], "options": { "legend": { "calcs": [ @@ -5786,6 +6117,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5799,6 +6131,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5822,7 +6155,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -5858,7 +6192,6 @@ "y": 45 }, "id": 14, - "links": [], "options": { "legend": { "calcs": [ @@ -5916,13 +6249,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows how many rows were ignored on insertion due to corrupted or out of retention timestamps.", + "description": "Shows network usage by vmstorage services.\n* Writes show traffic sent to clients.\n* Reads show traffic received from clients.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5936,6 +6270,139 @@ "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 53 + }, + "id": 206, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_tcplistener_read_bytes_total{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])) by(name) * 8 > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "read from {{name}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_tcplistener_written_bytes_total{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])) by(name) * 8 > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "write to {{name}}", + "range": true, + "refId": "B" + } + ], + "title": "Network usage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Value > 0 means vmstorage is in readonly mode.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5957,7 +6424,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -5972,26 +6440,27 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, + "x": 12, "y": 53 }, - "id": 135, + "id": 141, + "links": [ + { + "targetBlank": true, + "title": "Readonly mode", + "url": "https://docs.victoriametrics.com/cluster-victoriametrics/#readonly-mode" + } + ], "options": { "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true + "showLegend": false }, "tooltip": { "mode": "multi", - "sort": "none" + "sort": "desc" } }, "pluginVersion": "9.1.0", @@ -6003,14 +6472,14 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(increase(vm_rows_ignored_total{job=~\"$job_storage\", instance=~\"$instance\"}[1h])) by (reason)", + "expr": "vm_storage_is_read_only{job=~\"$job_storage\", instance=~\"$instance\"} > 0", "interval": "", - "legendFormat": "__auto", + "legendFormat": "{{ instance }}", "range": true, "refId": "A" } ], - "title": "Rows ignored for last 1h ($instance)", + "title": "Readonly mode", "type": "timeseries" } ], @@ -6095,7 +6564,7 @@ "h": 8, "w": 12, "x": 0, - "y": 98 + "y": 50 }, "id": 92, "links": [], @@ -6221,7 +6690,7 @@ "h": 8, "w": 12, "x": 12, - "y": 98 + "y": 50 }, "id": 95, "links": [], @@ -6343,7 +6812,7 @@ "h": 8, "w": 12, "x": 0, - "y": 106 + "y": 58 }, "id": 163, "links": [], @@ -6487,7 +6956,7 @@ "h": 8, "w": 12, "x": 12, - "y": 106 + "y": 58 }, "id": 165, "links": [], @@ -6627,7 +7096,7 @@ "h": 8, "w": 12, "x": 0, - "y": 114 + "y": 66 }, "id": 178, "links": [], @@ -6674,7 +7143,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "99th percentile of number of raw datapoints read per queried time series.", + "description": "99th percentile of number of [data samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) read per queried time series.", "fieldConfig": { "defaults": { "color": { @@ -6734,7 +7203,7 @@ "h": 8, "w": 12, "x": 12, - "y": 114 + "y": 66 }, "id": 180, "links": [], @@ -6781,7 +7250,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "99th percentile of number of raw datapoints read per query.", + "description": "99th percentile of number of [data samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) read per query.", "fieldConfig": { "defaults": { "color": { @@ -6841,7 +7310,7 @@ "h": 8, "w": 12, "x": 0, - "y": 122 + "y": 74 }, "id": 179, "links": [], @@ -6888,7 +7357,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "99th percentile of number of raw datapoints scanner per query.\n\nThis number can exceed number of DatapointsReadPerQuery if `step` query arg passed to [/api/v1/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries) is smaller than the lookbehind window set in square brackets of [rollup function](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions). For example, if `increase(some_metric[1h])` is executed with the `step=5m`, then the same raw samples on a hour time range are scanned `1h/5m=12` times. See [this article](https://valyala.medium.com/how-to-optimize-promql-and-metricsql-queries-85a1b75bf986) for details.", + "description": "99th percentile of number of [data samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) scanner per query.\n\nThis number can exceed number of DatapointsReadPerQuery if `step` query arg passed to [/api/v1/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries) is smaller than the lookbehind window set in square brackets of [rollup function](https://docs.victoriametrics.com/metricsql/#rollup-functions). For example, if `increase(some_metric[1h])` is executed with the `step=5m`, then the same [data samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) on a hour time range are scanned `1h/5m=12` times. See [this article](https://valyala.medium.com/how-to-optimize-promql-and-metricsql-queries-85a1b75bf986) for details.", "fieldConfig": { "defaults": { "color": { @@ -6948,7 +7417,7 @@ "h": 8, "w": 12, "x": 12, - "y": 122 + "y": 74 }, "id": 181, "links": [], @@ -6995,7 +7464,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "", + "description": "Shows network usage between vmselects and clients, such as vmalert, Grafana, vmui, etc.", "fieldConfig": { "defaults": { "color": { @@ -7064,9 +7533,9 @@ }, "gridPos": { "h": 8, - "w": 24, + "w": 12, "x": 0, - "y": 130 + "y": 82 }, "id": 93, "links": [], @@ -7098,7 +7567,7 @@ "expr": "sum(rate(vm_tcplistener_read_bytes_total{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0", "format": "time_series", "intervalFactor": 1, - "legendFormat": "read", + "legendFormat": "read from client", "range": true, "refId": "A" }, @@ -7112,12 +7581,142 @@ "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "write ", + "legendFormat": "write to client", "range": true, "refId": "B" } ], - "title": "Network usage ($instance)", + "title": "Network usage: clients ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows network usage between vmselects and vmstorages.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 82 + }, + "id": 207, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_tcpdialer_read_bytes_total{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "read from vmstorage", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_tcpdialer_written_bytes_total{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "write to vmstorage", + "range": true, + "refId": "B" + } + ], + "title": "Network usage: vmstorage ($instance)", "type": "timeseries" } ], @@ -7202,7 +7801,7 @@ "h": 8, "w": 12, "x": 0, - "y": 24 + "y": 51 }, "id": 97, "links": [], @@ -7328,7 +7927,7 @@ "h": 8, "w": 12, "x": 12, - "y": 24 + "y": 51 }, "id": 99, "links": [], @@ -7452,7 +8051,7 @@ "h": 8, "w": 12, "x": 0, - "y": 32 + "y": 59 }, "id": 185, "links": [], @@ -7596,7 +8195,7 @@ "h": 8, "w": 12, "x": 12, - "y": 32 + "y": 59 }, "id": 187, "links": [], @@ -7676,220 +8275,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 40 - }, - "id": 90, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(vm_tcplistener_read_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "read", - "range": true, - "refId": "A" - } - ], - "title": "Network usage ($instance)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 88, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Last *", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "editorMode": "code", - "expr": "max(histogram_quantile(0.99, sum(increase(vm_rows_per_insert_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange)))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "max", - "range": true, - "refId": "A" - } - ], - "title": "Rows per insert ($instance)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$ds" - }, - "description": "Shows the saturation level of connection between vminsert and vmstorage components. If the threshold of 0.9sec is reached, then the connection is saturated by more than 90% and vminsert won't be able to keep up. This usually means that more vminsert or vmstorage nodes must be added to the cluster in order to increase the total number of vminsert -> vmstorage links.\n", + "description": "Shows the saturation level of connection between vminsert and vmstorage components. \n\nIf the threshold of 0.9sec is reached, then the connection is saturated by more than 90% and vminsert won't be able to keep up. This usually means that either vminsert or vmstorage nodes are struggling with the load. Verify CPU/mem saturation of both components and network saturation between them.\nIf vminsert resources are saturated - consider adding more resources or scale vminserts horizontally.\n\nIf vminsert resources and network are fine, check vmstorage metrics for anomalies.", "fieldConfig": { "defaults": { "color": { @@ -7949,7 +8335,7 @@ "h": 8, "w": 12, "x": 0, - "y": 48 + "y": 67 }, "id": 139, "links": [], @@ -8056,7 +8442,7 @@ "h": 8, "w": 12, "x": 12, - "y": 48 + "y": 67 }, "id": 114, "links": [], @@ -8094,6 +8480,373 @@ ], "title": "Storage reachability ($instance)", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows network usage between vminserts and clients, such as vmagent, Prometheus, or any other client pushing metrics to vminsert.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "id": 208, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_tcplistener_read_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "read from client", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_tcplistener_written_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "write to client", + "range": true, + "refId": "B" + } + ], + "title": "Network usage: clients ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows network usage between vminserts and vmstorages.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 75 + }, + "id": 209, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_tcpdialer_read_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "read from vmstorage", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_tcpdialer_written_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "write to vmstorage", + "range": true, + "refId": "B" + } + ], + "title": "Network usage: vmstorage ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 83 + }, + "id": 88, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(histogram_quantile(0.99, sum(increase(vm_rows_per_insert_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange)))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "max", + "range": true, + "refId": "A" + } + ], + "title": "Rows per insert ($instance)", + "type": "timeseries" } ], "title": "vminsert ($instance)", @@ -8118,7 +8871,7 @@ "h": 2, "w": 24, "x": 0, - "y": 84 + "y": 100 }, "id": 198, "options": { @@ -8182,8 +8935,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -8195,7 +8947,7 @@ "h": 8, "w": 12, "x": 0, - "y": 86 + "y": 102 }, "id": 189, "links": [], @@ -8284,8 +9036,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -8297,7 +9048,7 @@ "h": 8, "w": 12, "x": 12, - "y": 86 + "y": 102 }, "id": 190, "links": [], @@ -8386,8 +9137,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -8399,7 +9149,7 @@ "h": 7, "w": 12, "x": 0, - "y": 94 + "y": 110 }, "id": 192, "links": [], @@ -8447,7 +9197,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the approx time needed to reach 100% of disk capacity based on the following params:\n* free disk space;\n* row ingestion rate;\n* dedup rate;\n* compression.\n\nUse this panel for capacity planning in order to estimate the time remaining for running out of the disk space.", + "description": "Shows the approx time needed to reach 100% of disk capacity based on the following params:\n* free disk space;\n* row ingestion rate;\n* compression.\n\nNote: this panel doesn't account for deduplication process.", "fieldConfig": { "defaults": { "color": { @@ -8490,8 +9240,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -8507,7 +9256,7 @@ "h": 7, "w": 12, "x": 12, - "y": 94 + "y": 110 }, "id": 196, "links": [], @@ -8537,7 +9286,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n - \n ignoring(type) rate(vm_deduplicated_samples_total{job=~\"$job_storage\", instance=~\"$instance\", type=\"merge\"}[1d])\n ) * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n)", + "expr": "vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -8597,8 +9346,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -8610,7 +9358,7 @@ "h": 8, "w": 12, "x": 0, - "y": 101 + "y": 117 }, "id": 200, "links": [], @@ -8699,8 +9447,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -8712,7 +9459,7 @@ "h": 8, "w": 12, "x": 12, - "y": 101 + "y": 117 }, "id": 201, "links": [], @@ -8815,8 +9562,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -8832,7 +9578,7 @@ "h": 8, "w": 12, "x": 0, - "y": 109 + "y": 125 }, "id": 203, "links": [], @@ -8881,17 +9627,16 @@ "type": "row" } ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", + "refresh": "", + "schemaVersion": 39, "tags": [], "templating": { "list": [ { "current": { - "selected": true, - "text": "VictoriaMetrics", - "value": "VictoriaMetrics" + "selected": false, + "text": "VictoriaMetrics - cluster", + "value": "PAF93674D0B4E9963" }, "hide": 0, "includeAll": false, @@ -9063,4 +9808,4 @@ "uid": "oS7Bi_0Wz", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/dashboards/victoria-metrics/victoriametrics.json b/dashboards/victoria-metrics/victoriametrics.json index 65cbd553..3703a555 100644 --- a/dashboards/victoria-metrics/victoriametrics.json +++ b/dashboards/victoria-metrics/victoriametrics.json @@ -6,7 +6,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "9.2.7" + "version": "10.4.2" }, { "type": "datasource", @@ -76,7 +76,7 @@ "uid": "$ds" }, "enable": true, - "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset 20m) by(short_versionversion))", + "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset $__interval) by(version))", "hide": true, "iconColor": "dark-blue", "name": "version", @@ -85,7 +85,7 @@ } ] }, - "description": "Overview for single node VictoriaMetrics v1.83.0 or higher", + "description": "Overview for single-node VictoriaMetrics v1.83.0 or higher", "editable": true, "fiscalYearStartMonth": 0, "gnetId": 10229, @@ -168,7 +168,7 @@ "content": "
$version
", "mode": "markdown" }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -213,7 +213,6 @@ "y": 1 }, "id": 26, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -227,16 +226,19 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, + "editorMode": "code", "exemplar": false, "expr": "sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})", "format": "time_series", @@ -282,7 +284,6 @@ "y": 1 }, "id": 107, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -296,10 +297,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -352,7 +355,6 @@ "y": 1 }, "id": 108, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -366,10 +368,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -426,7 +430,6 @@ "y": 1 }, "id": 77, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -440,10 +443,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -510,10 +515,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -522,7 +529,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "vm_app_uptime_seconds{job=~\"$job\", instance=~\"$instance\"}", + "expr": "min(vm_app_uptime_seconds{job=~\"$job\", instance=~\"$instance\"})", "instant": true, "interval": "", "legendFormat": "", @@ -537,7 +544,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the number of active time series with new data points inserted during the last hour. High value may result in ingestion slowdown. \n\nSee more details here https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series", + "description": "Shows the number of [active time series](https://docs.victoriametrics.com/faq/#what-is-an-active-time-series) with new data points inserted during the last hour. High value may result in ingestion slowdown.", "fieldConfig": { "defaults": { "color": { @@ -564,7 +571,6 @@ "y": 3 }, "id": 38, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -578,18 +584,21 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, + "editorMode": "code", "exemplar": false, - "expr": "vm_cache_entries{job=~\"$job\", instance=~\"$instance\", type=\"storage/hour_metric_ids\"}", + "expr": "sum(vm_cache_entries{job=~\"$job\", instance=~\"$instance\", type=\"storage/hour_metric_ids\"})", "format": "time_series", "instant": true, "interval": "", @@ -633,7 +642,6 @@ "y": 3 }, "id": 81, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -647,10 +655,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -703,7 +713,6 @@ "y": 3 }, "id": 82, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -717,10 +726,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -729,7 +740,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) / sum(vm_rows{job=~\"$job\", instance=~\"$instance\"})", + "expr": "max(sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) / sum(vm_rows{job=~\"$job\", instance=~\"$instance\"}))", "format": "time_series", "instant": true, "interval": "", @@ -773,7 +784,6 @@ "y": 3 }, "id": 78, - "links": [], "maxDataPoints": 100, "options": { "colorMode": "value", @@ -787,10 +797,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -847,6 +859,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -860,6 +873,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -903,7 +917,6 @@ "y": 6 }, "id": 106, - "links": [], "options": { "legend": { "calcs": [ @@ -928,16 +941,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(vm_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type) > 0", + "expr": "sum(rate(vm_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type, instance) > 0", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{instance}} - {{type}}", "range": true, "refId": "A" } ], - "title": "Datapoints ingestion rate ($instance)", + "title": "Datapoints ingestion rate", "type": "timeseries" }, { @@ -952,6 +965,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -965,6 +979,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1008,7 +1023,6 @@ "y": 6 }, "id": 12, - "links": [], "options": { "legend": { "calcs": [ @@ -1035,16 +1049,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(vm_http_requests_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (path) > 0", + "expr": "sum(rate(vm_http_requests_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (path, instance) > 0", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{path}}", + "legendFormat": "{{instance}} - {{path}}", "range": true, "refId": "A" } ], - "title": "Requests rate ($instance)", + "title": "Requests rate", "type": "timeseries" }, { @@ -1052,13 +1066,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the number of active time series with new data points inserted during the last hour. High value may result in ingestion slowdown. \n\nSee following link for details:", + "description": "Shows the number of [active time series](https://docs.victoriametrics.com/faq/#what-is-an-active-time-series) with new data points inserted during the last hour. High value may result in ingestion slowdown. \n\nSee following link for details:", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1072,6 +1087,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1119,7 +1135,7 @@ { "targetBlank": true, "title": "troubleshooting", - "url": "https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#troubleshooting" + "url": "https://docs.victoriametrics.com/troubleshooting" } ], "options": { @@ -1145,14 +1161,16 @@ "type": "prometheus", "uid": "$ds" }, + "editorMode": "code", "expr": "vm_cache_entries{job=~\"$job\", instance=~\"$instance\", type=\"storage/hour_metric_ids\"}", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Active time series", + "legendFormat": "{{instance}}", + "range": true, "refId": "A" } ], - "title": "Active time series ($instance)", + "title": "Active time series", "type": "timeseries" }, { @@ -1167,6 +1185,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1180,6 +1199,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1223,7 +1243,6 @@ "y": 14 }, "id": 22, - "links": [], "options": { "legend": { "calcs": [ @@ -1250,15 +1269,15 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(vm_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"}) by (path) > 0", + "expr": "max(vm_request_duration_seconds{job=~\"$job\", instance=~\"$instance\", quantile=\"0.99\"}) by (instance, path) > 0", "format": "time_series", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{instance}} - {{path}}", "range": true, "refId": "A" } ], - "title": "Query duration 0.99 quantile ($instance)", + "title": "Query duration 0.99 quantile", "type": "timeseries" }, { @@ -1273,6 +1292,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1286,6 +1306,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1329,7 +1350,6 @@ "y": 22 }, "id": 35, - "links": [], "options": { "legend": { "calcs": [ @@ -1354,16 +1374,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(rate(vm_http_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (path) > 0", + "expr": "sum(rate(vm_http_request_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, path) > 0", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{instance}} - {{path}}", "range": true, "refId": "A" } ], - "title": "Requests error rate ($instance)", + "title": "Requests error rate", "type": "timeseries" }, { @@ -1378,6 +1398,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1391,6 +1412,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1456,9 +1478,9 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (level, location) > 0", + "expr": "sum(rate(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[$__rate_interval])) by (instance, level, location) > 0", "interval": "5m", - "legendFormat": "{{level}}: {{location}}", + "legendFormat": "{{instance}} - {{level}}: {{location}}", "range": true, "refId": "A" } @@ -1492,6 +1514,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1505,6 +1528,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1528,8 +1552,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1537,7 +1560,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -1583,7 +1607,7 @@ "refId": "A" } ], - "title": "RSS memory % usage ($instance)", + "title": "RSS memory % usage", "type": "timeseries" }, { @@ -1598,6 +1622,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1611,6 +1636,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1634,8 +1660,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1643,7 +1668,8 @@ } ] }, - "unit": "bytes" + "unit": "bytes", + "unitScale": true }, "overrides": [] }, @@ -1680,11 +1706,13 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "editorMode": "code", + "expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "requested from system", + "legendFormat": "{{instance}} - requested from system", + "range": true, "refId": "A" }, { @@ -1692,11 +1720,13 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"}) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"})", + "editorMode": "code", + "expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance) + sum(vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "heap inuse", + "legendFormat": "{{instance}} - heap inuse", + "range": true, "refId": "B" }, { @@ -1704,11 +1734,13 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(go_memstats_stack_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", + "editorMode": "code", + "expr": "sum(go_memstats_stack_inuse_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "stack inuse", + "legendFormat": "{{instance}} - stack inuse", + "range": true, "refId": "C" }, { @@ -1716,12 +1748,14 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"})", + "editorMode": "code", + "expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "resident", + "legendFormat": "{{instance}} - resident", + "range": true, "refId": "D" }, { @@ -1729,17 +1763,19 @@ "type": "prometheus", "uid": "$ds" }, + "editorMode": "code", "exemplar": false, - "expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"})", + "expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "resident anonymous", + "legendFormat": "{{instance}} - resident anonymous", + "range": true, "refId": "E" } ], - "title": "Memory usage ($instance)", + "title": "Memory usage", "type": "timeseries" }, { @@ -1754,6 +1790,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1767,6 +1804,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1790,8 +1828,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1799,7 +1836,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -1845,7 +1883,7 @@ "refId": "A" } ], - "title": "RSS anonymous memory % usage ($instance)", + "title": "RSS anonymous memory % usage", "type": "timeseries" }, { @@ -1859,6 +1897,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1872,6 +1911,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1895,8 +1935,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1904,7 +1943,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -1952,7 +1992,7 @@ "refId": "A" } ], - "title": "CPU % usage ($instance)", + "title": "CPU % usage", "type": "timeseries" }, { @@ -1967,6 +2007,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1980,6 +2021,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2004,8 +2046,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2013,7 +2054,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [ { @@ -2077,7 +2119,7 @@ "refId": "A" } ], - "title": "Open FDs ($instance)", + "title": "Open FDs", "type": "timeseries" }, { @@ -2085,13 +2127,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "", + "description": "CPU cores used by instance", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2105,6 +2148,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2128,8 +2172,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2137,7 +2180,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [ { @@ -2190,11 +2234,13 @@ "type": "prometheus", "uid": "$ds" }, + "editorMode": "code", "expr": "rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "CPU cores used", + "legendFormat": "{{instance}}", + "range": true, "refId": "A" }, { @@ -2202,17 +2248,19 @@ "type": "prometheus", "uid": "$ds" }, + "editorMode": "code", "exemplar": false, - "expr": "process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}", + "expr": "min(process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"})", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, "legendFormat": "Limit", + "range": true, "refId": "B" } ], - "title": "CPU ($instance)", + "title": "CPU", "type": "timeseries" }, { @@ -2226,6 +2274,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2239,6 +2288,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2263,8 +2313,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2272,7 +2321,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -2318,7 +2368,7 @@ "refId": "A" } ], - "title": "Goroutines ($instance)", + "title": "Goroutines", "type": "timeseries" }, { @@ -2333,6 +2383,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2346,6 +2397,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2368,8 +2420,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2377,13 +2428,14 @@ } ] }, - "unit": "bytes" + "unit": "bytes", + "unitScale": true }, "overrides": [ { "matcher": { - "id": "byName", - "options": "read" + "id": "byRegexp", + "options": "/.*read/" }, "properties": [ { @@ -2427,12 +2479,14 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "editorMode": "code", + "expr": "sum(rate(process_io_storage_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "read", + "legendFormat": "{{instance}} - read", + "range": true, "refId": "A" }, { @@ -2440,16 +2494,18 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "editorMode": "code", + "expr": "sum(rate(process_io_storage_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "write", + "legendFormat": "{{instance}} - write", + "range": true, "refId": "B" } ], - "title": "Disk writes/reads ($instance)", + "title": "Disk writes/reads", "type": "timeseries" }, { @@ -2463,6 +2519,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2476,6 +2533,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2500,8 +2558,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2509,7 +2566,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -2555,7 +2613,7 @@ "refId": "A" } ], - "title": "Threads ($instance)", + "title": "Threads", "type": "timeseries" }, { @@ -2570,6 +2628,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2583,6 +2642,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2605,8 +2665,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2614,13 +2673,14 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [ { "matcher": { - "id": "byName", - "options": "read calls" + "id": "byRegexp", + "options": "/.*read.*/" }, "properties": [ { @@ -2665,12 +2725,12 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_io_read_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(rate(process_io_read_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "read calls", + "legendFormat": "{{instance}} - read calls", "range": true, "refId": "A" }, @@ -2680,17 +2740,17 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(process_io_write_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(rate(process_io_write_syscalls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "write calls", + "legendFormat": "{{instance}} - write calls", "range": true, "refId": "B" } ], - "title": "Disk write/read calls ($instance)", + "title": "Disk write/read calls", "type": "timeseries" }, { @@ -2705,6 +2765,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2718,6 +2779,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2741,8 +2803,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2750,7 +2811,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -2797,7 +2859,7 @@ "refId": "A" } ], - "title": "TCP connections rate ($instance)", + "title": "TCP connections rate", "type": "timeseries" }, { @@ -2812,6 +2874,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2825,6 +2888,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2848,8 +2912,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2857,7 +2920,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -2904,7 +2968,250 @@ "refId": "A" } ], - "title": "TCP connections ($instance)", + "title": "TCP connections", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows network usage by VM:\n* Writes show traffic sent to clients\n* Reads show traffic received from clients", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbits", + "unitScale": true + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 79 + }, + "id": 127, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_tcplistener_read_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(name) * 8 > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "read via {{name}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_tcplistener_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(name) * 8 > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "write via {{name}}", + "range": true, + "refId": "B" + } + ], + "title": "Network usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the percent of CPU spent on garbage collection.\n\nIf % is high, then CPU usage can be decreased by changing GOGC to higher values. Increasing GOGC value will increase memory usage, and decrease CPU usage.\n\nTry searching for keyword `GOGC` at https://docs.victoriametrics.com/troubleshooting/ ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 79 + }, + "id": 125, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU spent on GC", "type": "timeseries" } ], @@ -2939,13 +3246,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the rate and total number of new series created over last 24h.\n\nHigh churn rate tightly connected with database performance and may result in unexpected OOM's or slow queries. It is recommended to always keep an eye on this metric to avoid unexpected cardinality \"explosions\".\n\nThe higher churn rate is, the more resources required to handle it. Consider to keep the churn rate as low as possible.\n\nGood references to read:\n* https://www.robustperception.io/cardinality-is-key\n* https://www.robustperception.io/using-tsdb-analyze-to-investigate-churn-and-cardinality", + "description": "Shows the rate and total number of new series created over last 24h.\n\nHigh [churn rate](https://docs.victoriametrics.com/faq/#what-is-high-churn-rate) tightly connected with database performance and may result in unexpected OOM's or slow queries. It is recommended to always keep an eye on this metric to avoid unexpected [cardinality](https://docs.victoriametrics.com/keyconcepts/#cardinality) \"explosions\".\n\nThe higher churn rate is, the more resources required to handle it. Consider to keep the churn rate as low as possible.\n\nGood references to read:\n* https://www.robustperception.io/cardinality-is-key\n* https://www.robustperception.io/using-tsdb-analyze-to-investigate-churn-and-cardinality", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2959,6 +3267,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2990,7 +3299,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -2998,7 +3308,7 @@ "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 4 }, "id": 66, "options": { @@ -3026,9 +3336,11 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(rate(vm_new_timeseries_created_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "editorMode": "code", + "expr": "sum(rate(vm_new_timeseries_created_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "interval": "", - "legendFormat": "churn rate", + "legendFormat": "{{instance}} - churn rate", + "range": true, "refId": "A" }, { @@ -3036,13 +3348,15 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(increase(vm_new_timeseries_created_total{job=~\"$job\", instance=~\"$instance\"}[24h]))", + "editorMode": "code", + "expr": "sum(increase(vm_new_timeseries_created_total{job=~\"$job\", instance=~\"$instance\"}[24h])) by (instance)", "interval": "", - "legendFormat": "new series over 24h", + "legendFormat": "{{instance}} - new series over 24h", + "range": true, "refId": "B" } ], - "title": "Churn rate ($instance)", + "title": "Churn rate", "type": "timeseries" }, { @@ -3050,13 +3364,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "The percentage of slow inserts comparing to total insertion rate during the last 5 minutes. \n\nThe less value is better. If percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of active time series. \n\nIn general, VictoriaMetrics requires ~1KB or RAM per active time series, so it should be easy calculating the required amounts of RAM for the current workload according to capacity planning docs. But the resulting number may be far from the real number because the required amounts of memory depends on many other factors such as the number of labels per time series and the length of label values. See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183", + "description": "The percentage of slow inserts comparing to total insertion rate during the last 5 minutes. \n\nThe less value is better. If percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of [active time series](https://docs.victoriametrics.com/faq/#what-is-an-active-time-series). \n\nIn general, VictoriaMetrics requires ~1KB or RAM per active time series, so it should be easy calculating the required amounts of RAM for the current workload according to capacity planning docs. But the resulting number may be far from the real number because the required amounts of memory depends on many other factors such as the number of labels per time series and the length of label values. See also [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for details.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3070,6 +3385,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3102,7 +3418,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -3110,7 +3427,7 @@ "h": 8, "w": 12, "x": 12, - "y": 8 + "y": 4 }, "id": 68, "links": [], @@ -3140,17 +3457,17 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(\n rate(vm_slow_row_inserts_total{job=~\"$job\"}[$__rate_interval]) \n / rate(vm_rows_added_to_storage_total{job=~\"$job\"}[$__rate_interval])\n)", + "expr": "max(\n rate(vm_slow_row_inserts_total{job=~\"$job\"}[$__rate_interval]) \n / rate(vm_rows_added_to_storage_total{job=~\"$job\"}[$__rate_interval])\n) by (instance)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "slow inserts percentage", + "legendFormat": "{{instance}}", "range": true, "refId": "A" } ], - "title": "Slow inserts ($instance)", + "title": "Slow inserts %", "type": "timeseries" }, { @@ -3165,6 +3482,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3178,6 +3496,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3209,7 +3528,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -3217,7 +3537,7 @@ "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 12 }, "id": 116, "links": [], @@ -3246,16 +3566,16 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(increase(vm_assisted_merges_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(type) > 0", + "expr": "sum(increase(vm_assisted_merges_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance, type) > 0", "format": "time_series", "interval": "5m", "intervalFactor": 1, - "legendFormat": "__auto", + "legendFormat": "{{instance}} - {{type}}", "range": true, "refId": "A" } ], - "title": "Assisted merges ($instance)", + "title": "Assisted merges", "type": "timeseries" }, { @@ -3270,6 +3590,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3283,6 +3604,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3314,7 +3636,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -3322,7 +3645,7 @@ "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 12 }, "id": 60, "links": [], @@ -3351,15 +3674,17 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(rate(vm_slow_queries_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "editorMode": "code", + "expr": "sum(rate(vm_slow_queries_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "slow queries rate", + "legendFormat": "{{instance}}", + "range": true, "refId": "A" } ], - "title": "Slow queries rate ($instance)", + "title": "Slow queries rate", "type": "timeseries" }, { @@ -3374,6 +3699,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3387,6 +3713,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3416,7 +3743,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -3424,7 +3752,7 @@ "h": 9, "w": 12, "x": 0, - "y": 24 + "y": 20 }, "id": 90, "options": { @@ -3456,12 +3784,12 @@ "exemplar": false, "expr": "vm_cache_size_bytes{job=~\"$job\", instance=~\"$instance\"} / vm_cache_size_max_bytes{job=~\"$job\", instance=~\"$instance\"}", "interval": "", - "legendFormat": "{{type}}", + "legendFormat": "{{instance}} - {{type}}", "range": true, "refId": "A" } ], - "title": "Cache usage % by type ($instance)", + "title": "Cache usage % by type", "type": "timeseries" }, { @@ -3476,6 +3804,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3489,6 +3818,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3521,7 +3851,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -3529,7 +3860,7 @@ "h": 9, "w": 12, "x": 12, - "y": 24 + "y": 20 }, "id": 118, "links": [], @@ -3565,12 +3896,12 @@ "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}} - {{type}}", "range": true, "refId": "A" } ], - "title": "Cache miss ratio ($instance)", + "title": "Cache miss ratio", "type": "timeseries" }, { @@ -3578,6 +3909,7 @@ "type": "prometheus", "uid": "$ds" }, + "description": "Flags explicitly set to non-default values", "fieldConfig": { "defaults": { "color": { @@ -3585,7 +3917,9 @@ }, "custom": { "align": "auto", - "displayMode": "auto", + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], @@ -3600,9 +3934,22 @@ "value": 80 } ] - } + }, + "unitScale": true }, "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, { "matcher": { "id": "byName", @@ -3618,7 +3965,7 @@ { "matcher": { "id": "byName", - "options": "Time" + "options": "job" }, "properties": [ { @@ -3633,26 +3980,22 @@ "h": 8, "w": 12, "x": 0, - "y": 33 + "y": 29 }, - "id": 120, + "id": 126, "options": { + "cellHeight": "sm", "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" ], "show": false }, - "showHeader": true, - "sortBy": [ - { - "desc": true, - "displayName": "job" - } - ] + "showHeader": true }, - "pluginVersion": "9.1.0", + "pluginVersion": "10.3.1", "targets": [ { "datasource": { @@ -3670,29 +4013,6 @@ } ], "title": "Non-default flags", - "transformations": [ - { - "id": "groupBy", - "options": { - "fields": { - "instance": { - "aggregations": [] - }, - "job": { - "aggregations": [] - }, - "name": { - "aggregations": [], - "operation": "groupby" - }, - "value": { - "aggregations": [], - "operation": "groupby" - } - } - } - } - ], "type": "table" }, { @@ -3700,13 +4020,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n\nThis prevents from ingesting metrics with too many labels. The value of `maxLabelsPerTimeseries` must be adjusted for your workload.\n\nWhen limit is exceeded (graph is > 0) - extra labels are dropped, which could result in unexpected identical time series.", + "description": "VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.\n\nThis prevents from ingesting metrics with too many labels. The value of `maxLabelsPerTimeseries` must be adjusted for your workload.\n\nWhen limit is exceeded (graph is > 0) - extra labels are dropped, which could result in unexpected identical time series. See more details about dropped labels in logs.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3720,6 +4041,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3752,7 +4074,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -3760,7 +4083,7 @@ "h": 8, "w": 12, "x": 12, - "y": 33 + "y": 29 }, "id": 74, "links": [], @@ -3788,16 +4111,17 @@ "uid": "$ds" }, "exemplar": false, - "expr": "sum(increase(vm_metrics_with_dropped_labels_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(increase(vm_metrics_with_dropped_labels_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "limit exceeded", + "legendFormat": "{{instance}} - limit exceeded", + "range": true, "refId": "A" } ], - "title": "Labels limit exceeded ($instance)", + "title": "Labels limit exceeded", "type": "timeseries" } ], @@ -3839,6 +4163,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3852,6 +4177,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3875,8 +4201,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3884,7 +4209,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -3892,7 +4218,7 @@ "h": 8, "w": 12, "x": 0, - "y": 33 + "y": 5 }, "id": 10, "links": [], @@ -3922,16 +4248,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(rate(vm_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type) > 0", + "expr": "sum(rate(vm_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, type) > 0", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}} - {{type}}", "range": true, "refId": "A" } ], - "title": "Datapoints ingestion rate ($instance)", + "title": "Datapoints ingestion rate", "type": "timeseries" }, { @@ -3939,13 +4265,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the time needed to reach the 100% of disk capacity based on the following params:\n* free disk space;\n* row ingestion rate;\n* dedup rate;\n* compression.\n\nUse this panel for capacity planning in order to estimate the time remaining for running out of the disk space.\n\n", + "description": "Shows the approx time needed to reach 100% of disk capacity based on the following params:\n* free disk space;\n* row ingestion rate;\n* compression.\n\nNote: this panel doesn't account for deduplication process.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3959,6 +4286,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3982,8 +4310,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3991,7 +4318,8 @@ } ] }, - "unit": "s" + "unit": "s", + "unitScale": true }, "overrides": [] }, @@ -3999,7 +4327,7 @@ "h": 8, "w": 12, "x": 12, - "y": 33 + "y": 5 }, "id": 73, "links": [], @@ -4029,7 +4357,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"} \n/ ignoring(path) (\n (\n rate(vm_rows_added_to_storage_total{job=~\"$job\", instance=~\"$instance\"}[1d]) \n - ignoring(type) rate(vm_deduplicated_samples_total{job=~\"$job\", instance=~\"$instance\", type=\"merge\"}[1d])\n ) * scalar(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) \n / sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n )", + "expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"} \n/ ignoring(path) (\n rate(vm_rows_added_to_storage_total{job=~\"$job\", instance=~\"$instance\"}[1d]) \n * scalar(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) \n / sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n )", "format": "time_series", "hide": false, "interval": "", @@ -4039,7 +4367,7 @@ "refId": "A" } ], - "title": "Storage full ETA ($instance)", + "title": "Storage full ETA", "type": "timeseries" }, { @@ -4054,6 +4382,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4067,6 +4396,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4090,8 +4420,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4099,7 +4428,8 @@ } ] }, - "unit": "bytes" + "unit": "bytes", + "unitScale": true }, "overrides": [] }, @@ -4107,7 +4437,7 @@ "h": 8, "w": 12, "x": 0, - "y": 41 + "y": 13 }, "id": 53, "links": [], @@ -4137,11 +4467,11 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})", + "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "Used (datapoints)", + "legendFormat": "{{instance}} - Used (datapoints)", "range": true, "refId": "A" }, @@ -4154,7 +4484,8 @@ "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "Free", + "legendFormat": "{{instance}} - Free", + "range": true, "refId": "B" }, { @@ -4163,17 +4494,17 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type=~\"indexdb.*\"})", + "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type=~\"indexdb.*\"}) by (instance)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "Used (index)", + "legendFormat": "{{instance}} - Used (index)", "range": true, "refId": "C" } ], - "title": "Disk space usage - datapoints ($instance)", + "title": "Disk space usage - datapoints", "type": "timeseries" }, { @@ -4188,6 +4519,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4201,6 +4533,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4224,8 +4557,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4233,7 +4565,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [ { @@ -4258,7 +4591,7 @@ "h": 8, "w": 12, "x": 12, - "y": 41 + "y": 13 }, "id": 34, "links": [], @@ -4291,7 +4624,8 @@ "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "pending datapoints", + "legendFormat": "{{instance}} - pending datapoints", + "range": true, "refId": "A" }, { @@ -4303,11 +4637,12 @@ "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "pending index entries", + "legendFormat": "{{instance}} - pending index entries", + "range": true, "refId": "B" } ], - "title": "Pending datapoints ($instance)", + "title": "Pending datapoints", "type": "timeseries" }, { @@ -4322,6 +4657,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4335,6 +4671,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4358,8 +4695,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4367,7 +4703,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [ { @@ -4392,7 +4729,7 @@ "h": 8, "w": 12, "x": 0, - "y": 49 + "y": 21 }, "id": 30, "links": [], @@ -4421,11 +4758,13 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})", + "editorMode": "code", + "expr": "sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "total datapoints", + "legendFormat": "{{instance}} - total datapoints", + "range": true, "refId": "A" }, { @@ -4434,16 +4773,16 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) \n/ sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})", + "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)\n/ sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) by (instance)", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "bytes-per-datapoint", + "legendFormat": "{{instance}} - bytes-per-datapoint", "range": true, "refId": "B" } ], - "title": "Datapoints ($instance)", + "title": "Datapoints", "type": "timeseries" }, { @@ -4457,6 +4796,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4470,6 +4810,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4493,8 +4834,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4502,7 +4842,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -4510,7 +4851,7 @@ "h": 8, "w": 12, "x": 12, - "y": 49 + "y": 21 }, "id": 36, "links": [], @@ -4539,14 +4880,16 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(vm_parts{job=~\"$job\", instance=~\"$instance\"}) by (type)", + "editorMode": "code", + "expr": "sum(vm_parts{job=~\"$job\", instance=~\"$instance\"}) by (instance, type)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}} - {{type}}", + "range": true, "refId": "A" } ], - "title": "LSM parts ($instance)", + "title": "LSM parts", "type": "timeseries" }, { @@ -4561,6 +4904,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4574,6 +4918,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4597,8 +4942,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4606,7 +4950,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -4614,7 +4959,7 @@ "h": 8, "w": 12, "x": 0, - "y": 57 + "y": 29 }, "id": 58, "links": [], @@ -4643,17 +4988,17 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(increase(vm_rows_ignored_total{job=~\"$job\", instance=~\"$instance\"}[1h])) by (reason)", + "expr": "sum(increase(vm_rows_ignored_total{job=~\"$job\", instance=~\"$instance\"}[1h])) by (instance, reason)", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{reason}}", + "legendFormat": "{{instance}} - {{reason}}", "range": true, "refId": "A" } ], - "title": "Rows ignored for last 1h ($instance)", + "title": "Rows ignored for last 1h", "type": "timeseries" }, { @@ -4668,6 +5013,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4681,6 +5027,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4705,8 +5052,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4714,7 +5060,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -4722,7 +5069,7 @@ "h": 8, "w": 12, "x": 12, - "y": 57 + "y": 29 }, "id": 62, "options": { @@ -4748,12 +5095,14 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(vm_active_merges{job=~\"$job\", instance=~\"$instance\"}) by(type)", - "legendFormat": "{{type}}", + "editorMode": "code", + "expr": "sum(vm_active_merges{job=~\"$job\", instance=~\"$instance\"}) by(instance, type)", + "legendFormat": "{{instance}} - {{type}}", + "range": true, "refId": "A" } ], - "title": "Active merges ($instance)", + "title": "Active merges", "type": "timeseries" }, { @@ -4768,6 +5117,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4781,6 +5131,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4805,8 +5156,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4814,7 +5164,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [ { @@ -4838,7 +5189,7 @@ "h": 8, "w": 12, "x": 0, - "y": 65 + "y": 37 }, "id": 59, "links": [], @@ -4869,7 +5220,7 @@ "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "max", + "legendFormat": "{{instance}} - max", "range": true, "refId": "A" }, @@ -4878,14 +5229,16 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(vm_concurrent_insert_current{job=~\"$job\", instance=~\"$instance\"})", + "editorMode": "code", + "expr": "sum(vm_concurrent_insert_current{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "current", + "legendFormat": "{{instance}} - current", + "range": true, "refId": "B" } ], - "title": "Concurrent flushes on disk ($instance)", + "title": "Concurrent flushes on disk", "type": "timeseries" }, { @@ -4900,6 +5253,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4913,6 +5267,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4937,8 +5292,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4946,7 +5300,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -4954,7 +5309,7 @@ "h": 8, "w": 12, "x": 12, - "y": 65 + "y": 37 }, "id": 64, "options": { @@ -4982,12 +5337,14 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(rate(vm_rows_merged_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(type)", - "legendFormat": "{{type}}", + "editorMode": "code", + "expr": "sum(rate(vm_rows_merged_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance, type)", + "legendFormat": "{{instance}} - {{type}}", + "range": true, "refId": "A" } ], - "title": "Merge speed ($instance)", + "title": "Merge speed", "type": "timeseries" }, { @@ -5002,6 +5359,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5015,6 +5373,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5039,8 +5398,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5048,7 +5406,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -5056,7 +5415,7 @@ "h": 8, "w": 12, "x": 0, - "y": 73 + "y": 45 }, "id": 99, "links": [], @@ -5095,7 +5454,7 @@ "refId": "A" } ], - "title": "Series read per query ($instance)", + "title": "Series read per query", "type": "timeseries" }, { @@ -5103,13 +5462,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "99th percentile of number of raw samples read per queried series.", + "description": "99th percentile of number of [data samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) read per queried series.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5123,6 +5483,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5147,8 +5508,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5156,7 +5516,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -5164,7 +5525,7 @@ "h": 8, "w": 12, "x": 12, - "y": 73 + "y": 45 }, "id": 103, "links": [], @@ -5203,7 +5564,7 @@ "refId": "A" } ], - "title": "Datapoints read per series ($instance)", + "title": "Datapoints read per series", "type": "timeseries" }, { @@ -5211,13 +5572,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "99th percentile of number of raw datapoints read per query.", + "description": "99th percentile of number of [data samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) read per query.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5231,6 +5593,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5255,8 +5618,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5264,7 +5626,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -5272,7 +5635,7 @@ "h": 8, "w": 12, "x": 0, - "y": 81 + "y": 53 }, "id": 122, "links": [], @@ -5311,7 +5674,7 @@ "refId": "A" } ], - "title": "Datapoints read per query ($instance)", + "title": "Datapoints read per query", "type": "timeseries" }, { @@ -5319,13 +5682,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "99th percentile of number of raw samples scanner per query.\n\nThis number can exceed number of RowsReadPerQuery if `step` query arg passed to [/api/v1/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries) is smaller than the lookbehind window set in square brackets of [rollup function](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions). For example, if `increase(some_metric[1h])` is executed with the `step=5m`, then the same raw samples on a hour time range are scanned `1h/5m=12` times. See [this article](https://valyala.medium.com/how-to-optimize-promql-and-metricsql-queries-85a1b75bf986) for details.", + "description": "99th percentile of number of [data samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) scanner per query.\n\nThis number can exceed number of RowsReadPerQuery if `step` query arg passed to [/api/v1/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries) is smaller than the lookbehind window set in square brackets of [rollup function](https://docs.victoriametrics.com/metricsql/#rollup-functions). For example, if `increase(some_metric[1h])` is executed with the `step=5m`, then the same [data samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) on a hour time range are scanned `1h/5m=12` times. See [this article](https://valyala.medium.com/how-to-optimize-promql-and-metricsql-queries-85a1b75bf986) for details.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5339,6 +5703,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5363,8 +5728,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -5372,7 +5736,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -5380,7 +5745,7 @@ "h": 8, "w": 12, "x": 12, - "y": 81 + "y": 53 }, "id": 105, "links": [], @@ -5419,7 +5784,7 @@ "refId": "A" } ], - "title": "Datapoints scanned per query ($instance)", + "title": "Datapoints scanned per query", "type": "timeseries" } ], @@ -5436,9 +5801,8 @@ "type": "row" } ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", + "refresh": "", + "schemaVersion": 39, "tags": [ "victoriametrics", "vmsingle" @@ -5449,7 +5813,7 @@ "current": { "selected": false, "text": "VictoriaMetrics", - "value": "VictoriaMetrics" + "value": "P4169E866C3094E38" }, "hide": 0, "includeAll": false, @@ -5505,7 +5869,7 @@ "refId": "VictoriaMetrics-version-Variable-Query" }, "refresh": 1, - "regex": "/.*-tags-(v\\d+\\.\\d+\\.\\d+)/", + "regex": "/.*-(?:tags|heads)-(.*)-(?:0|dirty)-.*/", "skipUrlSync": false, "sort": 2, "tagValuesQuery": "", @@ -5514,6 +5878,7 @@ "useTags": false }, { + "allValue": ".*", "current": {}, "datasource": { "type": "prometheus", @@ -5521,8 +5886,8 @@ }, "definition": "label_values(vm_app_version{job=~\"$job\"}, instance)", "hide": 0, - "includeAll": false, - "multi": false, + "includeAll": true, + "multi": true, "name": "instance", "options": [], "query": { @@ -5552,7 +5917,7 @@ ] }, "time": { - "from": "now-30m", + "from": "now-3h", "to": "now" }, "timepicker": { @@ -5580,8 +5945,8 @@ ] }, "timezone": "", - "title": "VictoriaMetrics", + "title": "VictoriaMetrics - single-node", "uid": "wNf0q_kZk", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/dashboards/victoria-metrics/vmagent.json b/dashboards/victoria-metrics/vmagent.json index adc0da01..cd26420f 100644 --- a/dashboards/victoria-metrics/vmagent.json +++ b/dashboards/victoria-metrics/vmagent.json @@ -6,7 +6,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "9.2.7" + "version": "10.4.2" }, { "type": "datasource", @@ -65,7 +65,7 @@ "uid": "$ds" }, "enable": true, - "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(short_version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset 20m) by(short_version))", + "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(short_version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset $__interval) by(short_version))", "hide": true, "iconColor": "dark-blue", "name": "version", @@ -78,15 +78,14 @@ "uid": "$ds" }, "enable": true, - "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"})) by(job, instance)", - "hide": true, + "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job, instance)", "iconColor": "dark-yellow", "name": "restarts", "textFormat": "{{job}}:{{instance}} restarted" } ] }, - "description": "Overview for VictoriaMetrics vmagent v1.80.0 or higher", + "description": "Overview for VictoriaMetrics vmagent v1.102.0 or higher", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, @@ -99,7 +98,7 @@ "title": "vmagent wiki", "tooltip": "", "type": "link", - "url": "https://docs.victoriametrics.com/vmagent.html" + "url": "https://docs.victoriametrics.com/vmagent/" }, { "icon": "external link", @@ -138,7 +137,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the rate of samples scraped from configured targets.", + "description": "Shows the rate of [samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) scraped from configured targets.", "fieldConfig": { "defaults": { "mappings": [], @@ -164,7 +163,7 @@ "links": [ { "title": "Troubleshooting", - "url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting" + "url": "https://docs.victoriametrics.com/vmagent/#troubleshooting" } ], "options": { @@ -179,10 +178,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -205,7 +206,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the rate of ingested samples", + "description": "Shows the rate of ingested [samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples)", "fieldConfig": { "defaults": { "mappings": [], @@ -231,7 +232,7 @@ "links": [ { "title": "Troubleshooting", - "url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting" + "url": "https://docs.victoriametrics.com/vmagent/#troubleshooting" } ], "options": { @@ -246,10 +247,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -272,9 +275,10 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows total number of all configured scrape targets in state \"up\".\n\nSee `http://vmagent-host:8429/targets` to get list of all targets. \n", + "description": "Shows the number of targets scraped per second.", "fieldConfig": { "defaults": { + "decimals": 1, "mappings": [], "thresholds": { "mode": "absolute", @@ -294,10 +298,10 @@ "x": 8, "y": 1 }, - "id": 9, + "id": 134, "options": { "colorMode": "value", - "graphMode": "area", + "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { @@ -307,23 +311,27 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, - "expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})", + "editorMode": "code", + "expr": "sum(rate(vm_promscrape_scrapes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) ", "interval": "", - "legendFormat": "up", + "legendFormat": "__auto", + "range": true, "refId": "A" } ], - "title": "Scrape targets up", + "title": "Targets scraped/s", "type": "stat" }, { @@ -331,7 +339,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows total number of all configured scrape targets in state \"down\".\n\nSee `http://vmagent-host:8429/targets` to get list of all targets. \n", + "description": "Shows total number of all configured scrape targets in state `up` or `down`.\n\nSee `http://vmagent-host:8429/targets` to get list of all targets. \n", "fieldConfig": { "defaults": { "mappings": [], @@ -341,15 +349,36 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 1 } ] } }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "down" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] }, "gridPos": { "h": 3, @@ -361,7 +390,7 @@ "links": [ { "title": "Troubleshooting", - "url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting" + "url": "https://docs.victoriametrics.com/vmagent/#troubleshooting" } ], "options": { @@ -376,23 +405,40 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, - "expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})", + "editorMode": "code", + "expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})", "interval": "", "legendFormat": "up", + "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})", + "hide": false, + "interval": "", + "legendFormat": "down", + "range": true, + "refId": "B" } ], - "title": "Scrape targets down", + "title": "Scrape targets", "type": "stat" }, { @@ -433,7 +479,7 @@ { "targetBlank": true, "title": "Troubleshooting", - "url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting" + "url": "https://docs.victoriametrics.com/vmagent/#troubleshooting" } ], "options": { @@ -448,10 +494,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -512,10 +560,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -543,7 +593,9 @@ }, "custom": { "align": "auto", - "displayMode": "auto", + "cellOptions": { + "type": "auto" + }, "inspect": false, "minWidth": 50 }, @@ -597,7 +649,9 @@ }, "id": 101, "options": { + "cellHeight": "sm", "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" @@ -606,7 +660,7 @@ }, "showHeader": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -635,6 +689,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -648,6 +703,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "stepAfter", "lineWidth": 1, "pointSize": 5, @@ -758,13 +814,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows in/out samples rate including push and pull models. \n\nThe out-rate could be different to in-rate because of replication or additional timeseries added by vmagent for every scraped target.\n\nClick on the line and choose Drilldown to show CPU usage per instance\n", + "description": "Shows in/out [samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) rate including push and pull models. \n\nThe out-rate could be different to in-rate because of replication or additional timeseries added by vmagent for every scraped target.\n\nClick on the line and choose Drilldown to show CPU usage per instance\n", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -778,6 +835,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -899,6 +957,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -912,6 +971,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -964,7 +1024,7 @@ "links": [ { "title": "Troubleshooting", - "url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting" + "url": "https://docs.victoriametrics.com/vmagent/#troubleshooting" } ], "options": { @@ -1016,6 +1076,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1029,6 +1090,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1072,7 +1134,6 @@ "y": 18 }, "id": 107, - "links": [], "options": { "legend": { "calcs": [ @@ -1118,13 +1179,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows rate of dropped samples from persistent queue. VMagent drops samples from queue if in-memory and on-disk queues are full and it is unable to flush them to remote storage.\nThe max size of on-disk queue is configured by `-remoteWrite.maxDiskUsagePerURL` flag.", + "description": "Shows rate of dropped [samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) from persistent queue. vmagent drops samples from queue if in-memory and on-disk queues are full and it is unable to flush them to remote storage.\nThe max size of on-disk queue is configured by `-remoteWrite.maxDiskUsagePerURL` flag.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1138,6 +1200,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1185,7 +1248,7 @@ { "targetBlank": true, "title": "Troubleshooting", - "url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting" + "url": "https://docs.victoriametrics.com/vmagent/#troubleshooting" } ], "options": { @@ -1234,6 +1297,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1247,6 +1311,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1290,7 +1355,6 @@ "y": 26 }, "id": 15, - "links": [], "options": { "legend": { "calcs": [ @@ -1337,6 +1401,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1350,6 +1415,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1397,7 +1463,7 @@ { "targetBlank": true, "title": "Troubleshooting", - "url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting" + "url": "https://docs.victoriametrics.com/vmagent/#troubleshooting" } ], "options": { @@ -1508,6 +1574,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1521,6 +1588,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1569,7 +1637,6 @@ "y": 11 }, "id": 109, - "links": [], "options": { "legend": { "calcs": [ @@ -1621,6 +1688,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1634,6 +1702,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1682,7 +1751,6 @@ "y": 11 }, "id": 111, - "links": [], "options": { "legend": { "calcs": [ @@ -1732,6 +1800,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1745,6 +1814,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1805,7 +1875,6 @@ "y": 19 }, "id": 81, - "links": [], "options": { "legend": { "calcs": [ @@ -1872,6 +1941,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1885,6 +1955,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1999,6 +2070,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2012,6 +2084,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2055,7 +2128,6 @@ "y": 27 }, "id": 83, - "links": [], "options": { "legend": { "calcs": [ @@ -2103,6 +2175,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2116,6 +2189,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2159,7 +2233,6 @@ "y": 27 }, "id": 39, - "links": [], "options": { "legend": { "calcs": [ @@ -2201,12 +2274,14 @@ "type": "prometheus", "uid": "$ds" }, + "description": "Shows the percent of CPU spent on garbage collection.\n\nIf % is high, then CPU usage can be decreased by changing GOGC to higher values. Increasing GOGC value will increase memory usage, and decrease CPU usage.\n\nTry searching for keyword `GOGC` at https://docs.victoriametrics.com/troubleshooting/ ", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2220,6 +2295,112 @@ "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 135, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(job)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU spent on GC ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2263,7 +2444,6 @@ "y": 35 }, "id": 41, - "links": [], "options": { "legend": { "calcs": [ @@ -2331,13 +2511,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows top 5 job by the number of new series registered by vmagent over the 5min range. These jobs generate the most of the churn rate.", + "description": "Shows top 10 jobs by the number of new series registered by vmagent over the 5min range. These jobs generate the most of the churn rate.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2351,6 +2532,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2388,7 +2570,7 @@ "h": 8, "w": 12, "x": 0, - "y": 4 + "y": 12 }, "id": 92, "options": { @@ -2418,14 +2600,14 @@ }, "editorMode": "code", "exemplar": false, - "expr": "topk_max(5, sum(sum_over_time(scrape_series_added[5m])) by (job)) > 0", + "expr": "topk_max(10, sum(sum_over_time(scrape_series_added[5m])) by (job)) > 0", "interval": "", "legendFormat": "{{ job }}", "range": true, "refId": "A" } ], - "title": "Top 5 jobs by unique samples", + "title": "Top 10 jobs by unique samples", "type": "timeseries" }, { @@ -2433,13 +2615,14 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows top 5 instances by the number of new series registered by vmagent over the 5min range. These instances generate the most of the churn rate.", + "description": "Shows top 10 instances by the number of new series registered by vmagent over the 5min range. These instances generate the most of the churn rate.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2453,6 +2636,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2490,7 +2674,7 @@ "h": 8, "w": 12, "x": 12, - "y": 4 + "y": 12 }, "id": 95, "options": { @@ -2527,7 +2711,7 @@ "refId": "A" } ], - "title": "Top 5 instances by unique samples", + "title": "Top 10 instances by unique samples", "type": "timeseries" }, { @@ -2542,6 +2726,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2556,6 +2741,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2595,7 +2781,7 @@ "h": 8, "w": 12, "x": 0, - "y": 12 + "y": 20 }, "id": 98, "options": { @@ -2647,6 +2833,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2661,6 +2848,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2700,7 +2888,7 @@ "h": 8, "w": 12, "x": 12, - "y": 12 + "y": 20 }, "id": 99, "options": { @@ -2804,10 +2992,9 @@ "h": 8, "w": 12, "x": 0, - "y": 20 + "y": 28 }, "id": 79, - "links": [], "options": { "legend": { "calcs": [ @@ -2850,7 +3037,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the rate of dropped samples due to relabeling. \nMetric tracks drops for `-remoteWrite.relabelConfig` configuration only.", + "description": "Shows the rate of dropped [samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) due to relabeling. \nMetric tracks drops for `-remoteWrite.relabelConfig` configuration only.", "fieldConfig": { "defaults": { "color": { @@ -2909,14 +3096,14 @@ "h": 8, "w": 12, "x": 12, - "y": 20 + "y": 28 }, "id": 18, "links": [ { "targetBlank": true, "title": "Relabeling", - "url": "https://docs.victoriametrics.com/vmagent.html#relabeling" + "url": "https://docs.victoriametrics.com/vmagent/#relabeling" } ], "options": { @@ -3019,10 +3206,9 @@ "h": 8, "w": 12, "x": 0, - "y": 28 + "y": 36 }, "id": 127, - "links": [], "options": { "legend": { "calcs": [ @@ -3122,7 +3308,7 @@ "h": 8, "w": 12, "x": 12, - "y": 28 + "y": 36 }, "id": 50, "options": { @@ -3172,7 +3358,9 @@ }, "custom": { "align": "auto", - "displayMode": "auto", + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], @@ -3220,7 +3408,7 @@ "h": 7, "w": 24, "x": 0, - "y": 36 + "y": 44 }, "id": 129, "options": { @@ -3321,6 +3509,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3334,6 +3523,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3357,7 +3547,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3373,7 +3564,7 @@ "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 37 }, "id": 48, "options": { @@ -3425,6 +3616,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3438,6 +3630,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3461,7 +3654,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3477,7 +3671,7 @@ "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 37 }, "id": 76, "options": { @@ -3527,6 +3721,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3540,6 +3735,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3563,7 +3759,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3579,7 +3776,7 @@ "h": 7, "w": 12, "x": 0, - "y": 45 + "y": 44 }, "id": 132, "options": { @@ -3631,6 +3828,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3644,6 +3842,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3667,7 +3866,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3683,7 +3883,7 @@ "h": 7, "w": 12, "x": 12, - "y": 45 + "y": 44 }, "id": 133, "options": { @@ -3734,6 +3934,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3747,6 +3948,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3770,7 +3972,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3786,7 +3989,7 @@ "h": 8, "w": 12, "x": 0, - "y": 52 + "y": 51 }, "id": 20, "options": { @@ -3836,6 +4039,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3849,6 +4053,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3872,7 +4077,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3888,7 +4094,7 @@ "h": 8, "w": 12, "x": 12, - "y": 52 + "y": 51 }, "id": 126, "options": { @@ -3937,6 +4143,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3950,6 +4157,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3973,7 +4181,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3989,7 +4198,7 @@ "h": 8, "w": 12, "x": 0, - "y": 60 + "y": 59 }, "id": 46, "options": { @@ -4038,19 +4247,21 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4074,7 +4285,112 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 59 + }, + "id": 148, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(histogram_quantile(0.99, sum(rate(vm_promscrape_scrape_duration_seconds_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, vmrange))) by(job)", + "format": "time_series", + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Scrape duration 0.99 quantile ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, { "color": "red", @@ -4090,7 +4406,7 @@ "h": 8, "w": 12, "x": 12, - "y": 60 + "y": 67 }, "id": 31, "options": { @@ -4215,6 +4531,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4228,6 +4545,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4251,8 +4569,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4268,10 +4585,9 @@ "h": 8, "w": 12, "x": 0, - "y": 38 + "y": 14 }, "id": 73, - "links": [], "options": { "legend": { "calcs": [ @@ -4332,6 +4648,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4345,6 +4662,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4368,8 +4686,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4385,10 +4702,9 @@ "h": 8, "w": 12, "x": 12, - "y": 38 + "y": 6 }, "id": 131, - "links": [], "options": { "legend": { "calcs": [ @@ -4436,6 +4752,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4449,6 +4766,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4472,8 +4790,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4489,10 +4806,9 @@ "h": 8, "w": 12, "x": 0, - "y": 46 + "y": 14 }, "id": 130, - "links": [], "options": { "legend": { "calcs": [ @@ -4553,6 +4869,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4566,6 +4883,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4589,8 +4907,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4606,10 +4923,9 @@ "h": 8, "w": 12, "x": 12, - "y": 46 + "y": 14 }, "id": 77, - "links": [], "options": { "legend": { "calcs": [ @@ -4667,6 +4983,752 @@ "title": "Ingestion", "type": "row" }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 136, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the number of matched samples by the aggregation rule. \n\nThe more samples is matched, the more work this aggregation rule does. The matching rule is specified via `match` param.\n\nSee more details in [stream aggregation config](https://docs.victoriametrics.com/stream-aggregation/#stream-aggregation-config). ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 146, + "options": { + "legend": { + "calcs": [ + "min", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_streamaggr_matched_samples_total{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) without (instance, pod) > 0", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Matched samples ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "The rate of ignored samples during aggregation. \nStream aggregation will drop samples with NaN values, or samples with too old timestamps. See https://docs.victoriametrics.com/stream-aggregation/#ignoring-old-samples ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 143, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_streamaggr_ignored_samples_total{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0) without (instance, pod)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Ignored samples ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the number of produced samples by the aggregation rule. \n\nNumber of produced samples depend on params like `by`, `without`, `interval`, etc.\n\nSee more details in [stream aggregation config](https://docs.victoriametrics.com/stream-aggregation/#stream-aggregation-config). ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 147, + "options": { + "legend": { + "calcs": [ + "min", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "sum(rate(vm_streamaggr_output_samples_total{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) without (instance, pod) > 0", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Produced samples ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows events when deduplication or aggregation couldn't be finished in the configured interval. Such events may result into bad accuracy of produced data.\n\nPossible solutions:\n* increase interval; \n* use match filter matching smaller number of series;\n* reduce samples ingestion rate to stream aggregation", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": -5, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 139, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "increase(vm_streamaggr_flush_timeouts_total{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0", + "instant": false, + "legendFormat": "aggregation: {{url}} ({{job}}): {{path}}:{{position}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "increase(vm_streamaggr_dedup_flush_timeouts_total{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0", + "hide": false, + "instant": false, + "legendFormat": "deduplication: {{url}} ({{job}}): {{path}}:{{position}}", + "range": true, + "refId": "B" + } + ], + "title": "Flush timeouts ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the max lag between samples timestamps within one batch passed to the aggregation input.\n\nLower is better.\n\nToo high lag or lag exceeding the interval might be a sign that data was delayed before aggregation or resource insufficiency on aggregator. Samples with high lag may affect accuracy of aggregation.\n\nSee https://docs.victoriametrics.com/stream-aggregation/#ignoring-old-samples", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 142, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(vm_streamaggr_samples_lag_seconds_bucket{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) without (instance, pod))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Samples lag 0.99 quantile ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "The 99th percentile of avg flush duration for the aggregated data. \n\nSmaller is better.\n\nAggregation can produce incorrect results ff flush duration exceeds configured deduplication interval. See \"Flush Timeouts\" panel.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 137, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, rate(vm_streamaggr_dedup_flush_duration_seconds_bucket{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Dedup flush duration 0.99 quantile ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the size of Label Compressor in number of entries.\n\nLabels compressor encodes label-value pairs during aggregation to optimise memory usage. It is expected for its size to grow with time and to reset on vmagent restarts.\n\nRapid spikes in Label compressor size might be a sign of significant changes in labels of received samples.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/bytes.*/" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 31 + }, + "id": 141, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_streamaggr_labels_compressor_items_count{job=~\"$job\",instance=~\"$instance\"}) by(job, instance)", + "hide": false, + "instant": false, + "legendFormat": "items: {{instance}} ({{job}})", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(vm_streamaggr_labels_compressor_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)", + "hide": false, + "instant": false, + "legendFormat": "bytes: {{instance}} ({{job}})", + "range": true, + "refId": "B" + } + ], + "title": "Labels compressor ($instance)", + "type": "timeseries" + } + ], + "title": "Streaming aggregation", + "type": "row" + }, { "collapsed": true, "datasource": { @@ -4677,7 +5739,7 @@ "h": 1, "w": 24, "x": 0, - "y": 38 + "y": 39 }, "id": 58, "panels": [ @@ -4693,6 +5755,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4706,6 +5769,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4746,7 +5810,7 @@ "h": 8, "w": 12, "x": 0, - "y": 55 + "y": 8 }, "id": 60, "options": { @@ -4796,6 +5860,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4809,6 +5874,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4848,7 +5914,7 @@ "h": 8, "w": 12, "x": 12, - "y": 55 + "y": 8 }, "id": 66, "options": { @@ -4898,6 +5964,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -4911,6 +5978,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -4950,7 +6018,7 @@ "h": 8, "w": 12, "x": 0, - "y": 63 + "y": 16 }, "id": 61, "options": { @@ -5000,6 +6068,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5013,6 +6082,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5052,7 +6122,7 @@ "h": 8, "w": 12, "x": 12, - "y": 63 + "y": 16 }, "id": 65, "options": { @@ -5102,6 +6172,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5115,6 +6186,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5153,7 +6225,7 @@ "h": 8, "w": 12, "x": 0, - "y": 71 + "y": 24 }, "id": 88, "options": { @@ -5199,6 +6271,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5251,7 +6324,7 @@ "h": 8, "w": 12, "x": 12, - "y": 71 + "y": 24 }, "id": 84, "options": { @@ -5301,6 +6374,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5314,6 +6388,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5352,7 +6427,7 @@ "h": 8, "w": 12, "x": 0, - "y": 79 + "y": 32 }, "id": 90, "options": { @@ -5405,7 +6480,7 @@ "h": 1, "w": 24, "x": 0, - "y": 39 + "y": 40 }, "id": 113, "panels": [ @@ -5418,7 +6493,7 @@ "h": 2, "w": 24, "x": 0, - "y": 87 + "y": 17 }, "id": 115, "options": { @@ -5430,7 +6505,7 @@ "content": "Drilldown row is used by other panels on the dashboard to show more detailed metrics per-instance.", "mode": "markdown" }, - "pluginVersion": "9.2.6", + "pluginVersion": "10.4.2", "transparent": true, "type": "text" }, @@ -5446,6 +6521,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5459,6 +6535,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5494,10 +6571,9 @@ "h": 8, "w": 12, "x": 0, - "y": 89 + "y": 19 }, "id": 119, - "links": [], "options": { "legend": { "calcs": [ @@ -5549,6 +6625,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5562,6 +6639,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5597,10 +6675,9 @@ "h": 8, "w": 12, "x": 12, - "y": 89 + "y": 19 }, "id": 117, - "links": [], "options": { "legend": { "calcs": [ @@ -5650,6 +6727,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5663,6 +6741,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5702,13 +6781,13 @@ "h": 8, "w": 12, "x": 0, - "y": 97 + "y": 27 }, "id": 125, "links": [ { "title": "Troubleshooting", - "url": "https://docs.victoriametrics.com/vmagent.html#troubleshooting" + "url": "https://docs.victoriametrics.com/vmagent/#troubleshooting" } ], "options": { @@ -5758,6 +6837,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5771,6 +6851,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5822,7 +6903,7 @@ "h": 8, "w": 12, "x": 12, - "y": 97 + "y": 27 }, "id": 123, "options": { @@ -5886,6 +6967,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -5899,6 +6981,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -5950,10 +7033,9 @@ "h": 8, "w": 12, "x": 0, - "y": 105 + "y": 35 }, "id": 121, - "links": [], "options": { "legend": { "calcs": [ @@ -6014,8 +7096,7 @@ } ], "refresh": "", - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 39, "tags": [ "vmagent", "victoriametrics" @@ -6024,9 +7105,9 @@ "list": [ { "current": { - "selected": true, + "selected": false, "text": "VictoriaMetrics", - "value": "VictoriaMetrics" + "value": "P4169E866C3094E38" }, "hide": 0, "includeAll": false, diff --git a/dashboards/victoria-metrics/vmalert.json b/dashboards/victoria-metrics/vmalert.json index 5a9d53e8..d1d44ecf 100644 --- a/dashboards/victoria-metrics/vmalert.json +++ b/dashboards/victoria-metrics/vmalert.json @@ -6,7 +6,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "9.2.7" + "version": "10.4.2" }, { "type": "datasource", @@ -59,7 +59,7 @@ "uid": "$ds" }, "enable": true, - "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(short_version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset 20m) by(short_version))", + "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(short_version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset $__interval) by(short_version))", "hide": true, "iconColor": "dark-blue", "name": "version", @@ -72,15 +72,14 @@ "uid": "$ds" }, "enable": true, - "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"})) by(job, instance)", - "hide": true, + "expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"}[$__interval])) by(job, instance)", "iconColor": "dark-yellow", "name": "restarts", "textFormat": "{{job}}:{{instance}} restarted" } ] }, - "description": "Overview for VictoriaMetrics vmalert v1.96.0 or higher", + "description": "Overview for VictoriaMetrics vmalert v1.102.0 or higher", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, @@ -96,7 +95,7 @@ "title": "vmalert docs", "tooltip": "", "type": "link", - "url": "https://docs.victoriametrics.com/vmalert.html" + "url": "https://docs.victoriametrics.com/vmalert/" }, { "asDropdown": false, @@ -201,10 +200,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -261,10 +262,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -321,10 +324,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -385,10 +390,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -449,10 +456,12 @@ "fields": "", "values": false }, + "showPercentChange": false, "text": {}, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -483,7 +492,9 @@ }, "custom": { "align": "auto", - "displayMode": "auto", + "cellOptions": { + "type": "auto" + }, "inspect": false, "minWidth": 50 }, @@ -537,7 +548,9 @@ }, "id": 45, "options": { + "cellHeight": "sm", "footer": { + "countRows": false, "fields": "", "reducer": [ "sum" @@ -546,7 +559,7 @@ }, "showHeader": true }, - "pluginVersion": "9.2.7", + "pluginVersion": "10.4.2", "targets": [ { "datasource": { @@ -575,6 +588,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -588,6 +602,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "stepAfter", "lineWidth": 1, "pointSize": 5, @@ -706,6 +721,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -719,6 +735,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -809,6 +826,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -822,6 +840,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -912,6 +931,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -925,6 +945,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1013,6 +1034,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1026,6 +1048,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1114,7 +1137,9 @@ }, "custom": { "align": "auto", - "displayMode": "auto", + "cellOptions": { + "type": "auto" + }, "inspect": false }, "mappings": [], @@ -1122,8 +1147,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1234,7 +1258,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Missed evaluation means that group evaluation time takes longer than the configured evaluation interval. \nThis may result in missed alerting notifications or recording rules samples. Try increasing evaluation interval or concurrency for such groups. See https://docs.victoriametrics.com/vmalert.html#groups\n\nIf rule expressions are taking longer than expected, please see https://docs.victoriametrics.com/Troubleshooting.html#slow-queries.\"", + "description": "Missed evaluation means that group evaluation time takes longer than the configured evaluation interval. \nThis may result in missed alerting notifications or recording rules samples. Try increasing evaluation interval or concurrency for such groups. See https://docs.victoriametrics.com/vmalert/#groups\n\nIf rule expressions are taking longer than expected, please see https://docs.victoriametrics.com/troubleshooting/#slow-queries.\"", "fieldConfig": { "defaults": { "color": { @@ -1275,8 +1299,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1356,6 +1379,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1369,6 +1393,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1400,7 +1425,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -1408,14 +1434,14 @@ "h": 8, "w": 12, "x": 0, - "y": 33 + "y": 3 }, "id": 37, "links": [ { "targetBlank": true, "title": "Profiling", - "url": "https://docs.victoriametrics.com/vmagent.html#profiling" + "url": "https://docs.victoriametrics.com/vmagent/#profiling" } ], "options": { @@ -1467,6 +1493,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1480,6 +1507,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1511,7 +1539,8 @@ } ] }, - "unit": "bytes" + "unit": "bytes", + "unitScale": true }, "overrides": [] }, @@ -1519,14 +1548,14 @@ "h": 8, "w": 12, "x": 12, - "y": 33 + "y": 3 }, "id": 57, "links": [ { "targetBlank": true, "title": "Profiling", - "url": "https://docs.victoriametrics.com/vmagent.html#profiling" + "url": "https://docs.victoriametrics.com/vmagent/#profiling" } ], "options": { @@ -1578,6 +1607,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1591,6 +1621,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1622,7 +1653,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -1630,14 +1662,14 @@ "h": 8, "w": 12, "x": 0, - "y": 41 + "y": 11 }, "id": 35, "links": [ { "targetBlank": true, "title": "Profiling", - "url": "https://docs.victoriametrics.com/vmagent.html#profiling" + "url": "https://docs.victoriametrics.com/vmagent/#profiling" } ], "options": { @@ -1691,6 +1723,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1704,6 +1737,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1735,7 +1769,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -1743,14 +1778,14 @@ "h": 8, "w": 12, "x": 12, - "y": 41 + "y": 11 }, "id": 56, "links": [ { "targetBlank": true, "title": "Profiling", - "url": "https://docs.victoriametrics.com/vmagent.html#profiling" + "url": "https://docs.victoriametrics.com/vmagent/#profiling" } ], "options": { @@ -1820,6 +1855,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1833,6 +1869,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1865,7 +1902,8 @@ } ] }, - "unit": "percentunit" + "unit": "percentunit", + "unitScale": true }, "overrides": [] }, @@ -1873,7 +1911,7 @@ "h": 8, "w": 12, "x": 0, - "y": 49 + "y": 19 }, "id": 39, "links": [], @@ -1925,6 +1963,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1938,6 +1977,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1970,7 +2010,8 @@ } ] }, - "unit": "short" + "unit": "short", + "unitScale": true }, "overrides": [] }, @@ -1978,7 +2019,7 @@ "h": 8, "w": 12, "x": 12, - "y": 49 + "y": 19 }, "id": 41, "links": [], @@ -2017,6 +2058,114 @@ ], "title": "Goroutines ($instance)", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the percent of CPU spent on garbage collection.\n\nIf % is high, then CPU usage can be decreased by changing GOGC to higher values. Increasing GOGC value will increase memory usage, and decrease CPU usage.\n\nTry searching for keyword `GOGC` at https://docs.victoriametrics.com/troubleshooting/ ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit", + "unitScale": true + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 59, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "expr": "max(\n rate(go_gc_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) \n / rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n ) by(job)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU spent on GC ($instance)", + "type": "timeseries" } ], "targets": [ @@ -2107,7 +2256,7 @@ "h": 8, "w": 12, "x": 0, - "y": 28 + "y": 36 }, "id": 14, "options": { @@ -2209,7 +2358,7 @@ "h": 8, "w": 12, "x": 12, - "y": 28 + "y": 36 }, "id": 13, "options": { @@ -2311,7 +2460,7 @@ "h": 8, "w": 12, "x": 0, - "y": 36 + "y": 44 }, "id": 20, "options": { @@ -2414,7 +2563,7 @@ "h": 8, "w": 12, "x": 12, - "y": 36 + "y": 44 }, "id": 32, "options": { @@ -2513,7 +2662,7 @@ "h": 8, "w": 12, "x": 0, - "y": 44 + "y": 52 }, "id": 26, "options": { @@ -2583,7 +2732,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the top $topk recording rules which generate the most of samples. Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.\n\nThe panel uses MetricsQL functions and may not work with Prometheus.", + "description": "Shows the top $topk recording rules which generate the most of [samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples). Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.\n\nThe panel uses MetricsQL functions and may not work with Prometheus.", "fieldConfig": { "defaults": { "color": { @@ -2640,7 +2789,7 @@ "h": 8, "w": 12, "x": 0, - "y": 43 + "y": 51 }, "id": 31, "options": { @@ -2685,7 +2834,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "Shows the rules which do not produce any samples during the evaluation. Usually it means that such rules are misconfigured, since they give no output during the evaluation.\nPlease check if rule's expression is correct and it is working as expected.", + "description": "Shows the rules which do not produce any [samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) during the evaluation. Usually it means that such rules are misconfigured, since they give no output during the evaluation.\nPlease check if rule's expression is correct and it is working as expected.", "fieldConfig": { "defaults": { "color": { @@ -2742,7 +2891,7 @@ "h": 8, "w": 12, "x": 12, - "y": 43 + "y": 51 }, "id": 33, "options": { @@ -2843,7 +2992,7 @@ "h": 8, "w": 12, "x": 0, - "y": 51 + "y": 59 }, "id": 30, "options": { @@ -2964,7 +3113,7 @@ "h": 8, "w": 12, "x": 0, - "y": 9 + "y": 17 }, "id": 52, "options": { @@ -3056,7 +3205,7 @@ "h": 8, "w": 12, "x": 12, - "y": 9 + "y": 17 }, "id": 53, "options": { @@ -3086,15 +3235,221 @@ ], "title": "Datapoints drop rate ($instance)", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows current number of established connections to remote write endpoints.\n\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 44 + }, + "id": 54, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(max_over_time(vmalert_remotewrite_conns{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)", + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Connections ($instance)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the global rate for number of written bytes via remote write connections.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 44 + }, + "id": 55, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(vmalert_remotewrite_conn_bytes_written_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0", + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Bytes write rate ($instance)", + "type": "timeseries" } ], "title": "Remote write", "type": "row" } ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", + "refresh": "", + "schemaVersion": 39, "tags": [ "victoriametrics", "vmalert" @@ -3104,8 +3459,8 @@ { "current": { "selected": false, - "text": "VictoriaMetrics - cluster", - "value": "VictoriaMetrics - cluster" + "text": "VictoriaMetrics", + "value": "P4169E866C3094E38" }, "hide": 0, "includeAll": false, @@ -3171,14 +3526,14 @@ "type": "prometheus", "uid": "$ds" }, - "definition": "label_values(vmalert_iteration_duration_seconds{job=~\"$job\", instance=~\"$instance\"}, group)", + "definition": "label_values(vmalert_iteration_total{job=~\"$job\", instance=~\"$instance\"}, group)", "hide": 0, "includeAll": true, "multi": true, "name": "group", "options": [], "query": { - "query": "label_values(vmalert_iteration_duration_seconds{job=~\"$job\", instance=~\"$instance\"}, group)", + "query": "label_values(vmalert_iteration_total{job=~\"$job\", instance=~\"$instance\"}, group)", "refId": "StandardVariableQuery" }, "refresh": 1, diff --git a/hack/download-dashboards.sh b/hack/download-dashboards.sh index a6c0f676..8a51bf31 100755 --- a/hack/download-dashboards.sh +++ b/hack/download-dashboards.sh @@ -1,6 +1,6 @@ #https://github.com/deckhouse/deckhouse/blob/main/modules/340-monitoring-kubernetes-control-plane/monitoring/grafana-dashboards/kubernetes-cluster/control-plane-status.json base=https://github.com/deckhouse/deckhouse/raw/main/ -dir="grafana-dashboards" +dir="dashboards" mkdir -p "$dir" From 7431e245b38e9e47515ca5838a9bf4cb50273500 Mon Sep 17 00:00:00 2001 From: chumkaska <38977376+chumkaska@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:41:06 +0500 Subject: [PATCH 04/41] add postgresql.parameters.max_connections (#359) ## Summary by CodeRabbit - **New Features** - Introduced a new configuration parameter `max_connections` for PostgreSQL, allowing users to specify the maximum number of concurrent connections. - Added a "Server Configuration" section in the settings for easier management of PostgreSQL parameters. --------- Signed-off-by: Andrei Kvapil Co-authored-by: Andrei Gumilev Co-authored-by: Andrei Kvapil --- packages/apps/postgres/Chart.yaml | 2 +- packages/apps/postgres/README.md | 17 +++++++++-------- packages/apps/postgres/templates/db.yaml | 1 + packages/apps/postgres/values.schema.json | 15 +++++++++++++++ packages/apps/postgres/values.yaml | 6 ++++++ 5 files changed, 32 insertions(+), 9 deletions(-) diff --git a/packages/apps/postgres/Chart.yaml b/packages/apps/postgres/Chart.yaml index 1821b9fc..02998343 100644 --- a/packages/apps/postgres/Chart.yaml +++ b/packages/apps/postgres/Chart.yaml @@ -16,7 +16,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.6.2 +version: 0.7.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/packages/apps/postgres/README.md b/packages/apps/postgres/README.md index f94d95ec..bf053ed4 100644 --- a/packages/apps/postgres/README.md +++ b/packages/apps/postgres/README.md @@ -35,14 +35,15 @@ more details: ### Common parameters -| Name | Description | Value | -| ------------------------ | ----------------------------------------------------------------------------------------------------------------------- | ------- | -| `external` | Enable external access from outside the cluster | `false` | -| `size` | Persistent Volume size | `10Gi` | -| `replicas` | Number of Postgres replicas | `2` | -| `storageClass` | StorageClass used to store the data | `""` | -| `quorum.minSyncReplicas` | Minimum number of synchronous replicas that must acknowledge a transaction before it is considered committed. | `0` | -| `quorum.maxSyncReplicas` | Maximum number of synchronous replicas that can acknowledge a transaction (must be lower than the number of instances). | `0` | +| Name | Description | Value | +| --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ | ------- | +| `external` | Enable external access from outside the cluster | `false` | +| `size` | Persistent Volume size | `10Gi` | +| `replicas` | Number of Postgres replicas | `2` | +| `storageClass` | StorageClass used to store the data | `""` | +| `postgresql.parameters.max_connections` | Determines the maximum number of concurrent connections to the database server. The default is typically 100 connections | `100` | +| `quorum.minSyncReplicas` | Minimum number of synchronous replicas that must acknowledge a transaction before it is considered committed. | `0` | +| `quorum.maxSyncReplicas` | Maximum number of synchronous replicas that can acknowledge a transaction (must be lower than the number of instances). | `0` | ### Configuration parameters diff --git a/packages/apps/postgres/templates/db.yaml b/packages/apps/postgres/templates/db.yaml index cbc947e3..287c8b74 100644 --- a/packages/apps/postgres/templates/db.yaml +++ b/packages/apps/postgres/templates/db.yaml @@ -10,6 +10,7 @@ spec: postgresql: parameters: max_wal_senders: "30" + max_connections: “{{ .Values.postgresql.parameters.max_connections }}” minSyncReplicas: {{ .Values.quorum.minSyncReplicas }} maxSyncReplicas: {{ .Values.quorum.maxSyncReplicas }} diff --git a/packages/apps/postgres/values.schema.json b/packages/apps/postgres/values.schema.json index 15049426..006d5982 100644 --- a/packages/apps/postgres/values.schema.json +++ b/packages/apps/postgres/values.schema.json @@ -22,6 +22,21 @@ "description": "StorageClass used to store the data", "default": "" }, + "postgresql": { + "type": "object", + "properties": { + "parameters": { + "type": "object", + "properties": { + "max_connections": { + "type": "string", + "description": "Determines the maximum number of concurrent connections to the database server. The default is typically 100 connections", + "default": "100" + } + } + } + } + }, "quorum": { "type": "object", "properties": { diff --git a/packages/apps/postgres/values.yaml b/packages/apps/postgres/values.yaml index 2eaacff1..f5d6dada 100644 --- a/packages/apps/postgres/values.yaml +++ b/packages/apps/postgres/values.yaml @@ -10,6 +10,12 @@ size: 10Gi replicas: 2 storageClass: "" +## Server Configuration +## @param postgresql.parameters.max_connections Determines the maximum number of concurrent connections to the database server. The default is typically 100 connections +postgresql: + parameters: + max_connections: "100" + ## Configuration for the quorum-based synchronous replication ## @param quorum.minSyncReplicas Minimum number of synchronous replicas that must acknowledge a transaction before it is considered committed. ## @param quorum.maxSyncReplicas Maximum number of synchronous replicas that can acknowledge a transaction (must be lower than the number of instances). From 910a9e5378217d9e05f6010920d6e8da006aa922 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Thu, 26 Sep 2024 11:44:25 +0200 Subject: [PATCH 05/41] Grafana remove flant-statusmap-panel plugin (#360) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- packages/extra/monitoring/templates/grafana/grafana.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/extra/monitoring/templates/grafana/grafana.yaml b/packages/extra/monitoring/templates/grafana/grafana.yaml index e98573be..5cbff82f 100644 --- a/packages/extra/monitoring/templates/grafana/grafana.yaml +++ b/packages/extra/monitoring/templates/grafana/grafana.yaml @@ -64,7 +64,7 @@ spec: failureThreshold: 3 env: - name: GF_INSTALL_PLUGINS - value: grafana-worldmap-panel,flant-statusmap-panel,natel-discrete-panel + value: grafana-worldmap-panel,natel-discrete-panel - name: ONCALL_API_URL value: http://grafana-oncall-engine:8080 - name: GF_DATABASE_HOST From 0a89478b3d99c1328b0f7a33d86287b530943ac9 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Thu, 26 Sep 2024 10:50:06 -0400 Subject: [PATCH 06/41] Upgrade flux-operator to 0.9.0 (#362) ## Summary by CodeRabbit - **New Features** - Updated to version 0.9.0 of the Flux Operator Helm chart. - Introduced a new `ServiceMonitor` resource for Prometheus metrics scraping. - Added configuration options for the `serviceMonitor`, including scrape interval and timeout settings. - **Bug Fixes** - Corrected the GitHub repository URL in the README. - **Documentation** - Updated README to reflect new version and added details for the `serviceMonitor` settings. - **Chores** - Updated maintainer information in the chart configuration. Signed-off-by: Kingdon Barrett --- .../charts/flux-operator/Chart.yaml | 10 ++- .../charts/flux-operator/README.md | 7 +- .../charts/flux-operator/templates/crds.yaml | 72 ++++++++++--------- .../flux-operator/templates/deployment.yaml | 7 +- .../templates/servicemonitor.yaml | 31 ++++++++ .../charts/flux-operator/values.schema.json | 23 ++++++ .../charts/flux-operator/values.yaml | 19 +++-- 7 files changed, 117 insertions(+), 52 deletions(-) create mode 100644 packages/system/fluxcd-operator/charts/flux-operator/templates/servicemonitor.yaml diff --git a/packages/system/fluxcd-operator/charts/flux-operator/Chart.yaml b/packages/system/fluxcd-operator/charts/flux-operator/Chart.yaml index 4d13ecdb..4cfa8643 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/Chart.yaml +++ b/packages/system/fluxcd-operator/charts/flux-operator/Chart.yaml @@ -8,7 +8,7 @@ annotations: - name: Upstream Project url: https://github.com/controlplaneio-fluxcd/flux-operator apiVersion: v2 -appVersion: v0.6.0 +appVersion: v0.9.0 description: 'A Helm chart for deploying the Flux Operator. ' home: https://github.com/controlplaneio-fluxcd icon: https://raw.githubusercontent.com/cncf/artwork/main/projects/flux/icon/color/flux-icon-color.png @@ -18,13 +18,11 @@ keywords: - gitops kubeVersion: '>=1.22.0-0' maintainers: -- email: stefan.prodan@control-plane.io - name: Stefan Prodan -- name: Soule Ba - url: soule.ba@control-plane.io +- email: flux-enterprise@control-plane.io + name: ControlPlane Flux Team name: flux-operator sources: - https://github.com/controlplaneio-fluxcd/flux-operator - https://github.com/controlplaneio-fluxcd/charts type: application -version: 0.6.0 +version: 0.9.0 diff --git a/packages/system/fluxcd-operator/charts/flux-operator/README.md b/packages/system/fluxcd-operator/charts/flux-operator/README.md index 7fea50d7..c377eb54 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/README.md +++ b/packages/system/fluxcd-operator/charts/flux-operator/README.md @@ -1,9 +1,9 @@ # flux-operator -![Version: 0.6.0](https://img.shields.io/badge/Version-0.6.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.6.0](https://img.shields.io/badge/AppVersion-v0.6.0-informational?style=flat-square) +![Version: 0.9.0](https://img.shields.io/badge/Version-0.9.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.9.0](https://img.shields.io/badge/AppVersion-v0.9.0-informational?style=flat-square) -The [Flux Operator](https://github.com/controlplaneio-fluxcd) provides a declarative API -for the installation and upgrade of CNCF [Flux](https://fluxcd.io) and the +The [Flux Operator](https://github.com/controlplaneio-fluxcd/flux-operator) provides a +declarative API for the installation and upgrade of CNCF [Flux](https://fluxcd.io) and the ControlPlane [enterprise distribution](https://control-plane.io/enterprise-for-flux-cd/). The operator automates the patching for hotfixes and CVEs affecting the Flux controllers container images @@ -49,6 +49,7 @@ see the Flux Operator [documentation](https://fluxcd.control-plane.io/operator/) | resources | object | `{"limits":{"cpu":"1000m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"64Mi"}}` | Container resources requests and limits settings. | | securityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}}` | Container security context settings. The default is compliant with the pod security restricted profile. | | serviceAccount | object | `{"automount":true,"create":true,"name":""}` | Pod service account settings. The name of the service account defaults to the release name. | +| serviceMonitor | object | `{"create":false,"interval":"60s","labels":{},"scrapeTimeout":"30s"}` | Prometheus Operator scraping settings. | | tolerations | list | `[]` | Pod tolerations settings. | ## Source Code diff --git a/packages/system/fluxcd-operator/charts/flux-operator/templates/crds.yaml b/packages/system/fluxcd-operator/charts/flux-operator/templates/crds.yaml index ff6dfe83..17b74747 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/templates/crds.yaml +++ b/packages/system/fluxcd-operator/charts/flux-operator/templates/crds.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.1 helm.sh/resource-policy: keep labels: app.kubernetes.io/instance: '{{ .Release.Name }}' @@ -77,6 +77,12 @@ spec: NetworkPolicy restricts network access to the current namespace. Defaults to true. type: boolean + tenantDefaultServiceAccount: + description: |- + TenantDefaultServiceAccount is the name of the service account + to use as default when the multitenant lockdown is enabled. + Defaults to the 'default' service account from the tenant namespace. + type: string type: default: kubernetes description: |- @@ -202,6 +208,29 @@ spec: type: object type: array type: object + migrateResources: + default: true + description: |- + MigrateResources instructs the controller to migrate the Flux custom resources + from the previous version to the latest API version specified in the CRD. + Defaults to true. + type: boolean + sharding: + description: Sharding holds the specification of the sharding configuration. + properties: + key: + default: sharding.fluxcd.io/key + description: Key is the label key used to shard the resources. + type: string + shards: + description: Shards is the list of shard names. + items: + type: string + minItems: 1 + type: array + required: + - shards + type: object storage: description: |- Storage holds the specification of the source-controller @@ -274,7 +303,6 @@ spec: type: boolean required: - distribution - - wait type: object status: description: FluxInstanceStatus defines the observed state of FluxInstance @@ -307,16 +335,8 @@ spec: conditions: description: Conditions contains the readiness conditions of the object. items: - description: "Condition contains details for one aspect of the current - state of this API Resource.\n---\nThis struct is intended for - direct use as an array at the field path .status.conditions. For - example,\n\n\n\ttype FooStatus struct{\n\t // Represents the - observations of a foo's current state.\n\t // Known .status.conditions.type - are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // - +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t - \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" - patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t - \ // other fields\n\t}" + description: Condition contains details for one aspect of the current + state of this API Resource. properties: lastTransitionTime: description: |- @@ -357,12 +377,7 @@ spec: - Unknown type: string type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + description: type of condition in CamelCase or in foo.example.com/CamelCase. maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string @@ -429,7 +444,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.15.0 + controller-gen.kubebuilder.io/version: v0.16.1 helm.sh/resource-policy: keep labels: app.kubernetes.io/instance: '{{ .Release.Name }}' @@ -622,16 +637,8 @@ spec: conditions: description: Conditions contains the readiness conditions of the object. items: - description: "Condition contains details for one aspect of the current - state of this API Resource.\n---\nThis struct is intended for - direct use as an array at the field path .status.conditions. For - example,\n\n\n\ttype FooStatus struct{\n\t // Represents the - observations of a foo's current state.\n\t // Known .status.conditions.type - are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // - +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t - \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" - patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t - \ // other fields\n\t}" + description: Condition contains details for one aspect of the current + state of this API Resource. properties: lastTransitionTime: description: |- @@ -672,12 +679,7 @@ spec: - Unknown type: string type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + description: type of condition in CamelCase or in foo.example.com/CamelCase. maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string diff --git a/packages/system/fluxcd-operator/charts/flux-operator/templates/deployment.yaml b/packages/system/fluxcd-operator/charts/flux-operator/templates/deployment.yaml index 7de88af2..31c712d2 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/templates/deployment.yaml +++ b/packages/system/fluxcd-operator/charts/flux-operator/templates/deployment.yaml @@ -18,10 +18,13 @@ spec: {{- include "flux-operator.selectorLabels" . | nindent 6 }} template: metadata: - {{- with .Values.commonAnnotations }} annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + {{- with .Values.commonAnnotations }} {{- toYaml . | nindent 8 }} - {{- end }} + {{- end }} labels: {{- include "flux-operator.labels" . | nindent 8 }} {{- with .Values.commonLabels }} diff --git a/packages/system/fluxcd-operator/charts/flux-operator/templates/servicemonitor.yaml b/packages/system/fluxcd-operator/charts/flux-operator/templates/servicemonitor.yaml new file mode 100644 index 00000000..5bdb4889 --- /dev/null +++ b/packages/system/fluxcd-operator/charts/flux-operator/templates/servicemonitor.yaml @@ -0,0 +1,31 @@ +{{- if .Values.serviceMonitor.create }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "flux-operator.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "flux-operator.labels" . | nindent 4 }} + {{- with .Values.commonLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.commonAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + namespaceSelector: + matchNames: + - {{ .Release.Namespace | quote }} + selector: + matchLabels: + {{- include "flux-operator.selectorLabels" . | nindent 6 }} + endpoints: + - targetPort: 8080 + path: /metrics + interval: {{ .Values.serviceMonitor.interval }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }} +{{- end }} diff --git a/packages/system/fluxcd-operator/charts/flux-operator/values.schema.json b/packages/system/fluxcd-operator/charts/flux-operator/values.schema.json index 1324083f..59c16f24 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/values.schema.json +++ b/packages/system/fluxcd-operator/charts/flux-operator/values.schema.json @@ -293,6 +293,29 @@ }, "type": "object" }, + "serviceMonitor": { + "default": { + "create": false, + "interval": "60s", + "scrapeTimeout": "30s" + }, + "properties": { + "create": { + "type": "boolean" + }, + "interval": { + "type": "string" + }, + "labels": { + "properties": {}, + "type": "object" + }, + "scrapeTimeout": { + "type": "string" + } + }, + "type": "object" + }, "tolerations": { "items": { "type": "object" diff --git a/packages/system/fluxcd-operator/charts/flux-operator/values.yaml b/packages/system/fluxcd-operator/charts/flux-operator/values.yaml index feebdf18..2d28fec2 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/values.yaml +++ b/packages/system/fluxcd-operator/charts/flux-operator/values.yaml @@ -84,14 +84,21 @@ affinity: # @schema default: {"nodeAffinity":{"requiredDuringSchedulingIgnoredDu # -- Pod tolerations settings. tolerations: [ ] # @schema item: object ; uniqueItems: true -# -- Marketplace settings. -marketplace: - type: "" - license: "" - account: "" - # -- If `true`, the container ports (`8080` and `8081`) are exposed on the host network. hostNetwork: false # @schema default: false # -- Container extra environment variables. extraEnvs: [ ] # @schema item: object ; uniqueItems: true + +# -- Prometheus Operator scraping settings. +serviceMonitor: # @schema default: {"create":false,"interval":"60s","scrapeTimeout":"30s"} + create: false + interval: 60s + scrapeTimeout: 30s + labels: { } + +# -- Marketplace settings. +marketplace: + type: "" + license: "" + account: "" From ac5c38bacd9e46c6643a9b62ab833c80e14dd7a1 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Thu, 26 Sep 2024 16:50:20 +0200 Subject: [PATCH 07/41] Update kamaji to latest version (#361) ## Summary by CodeRabbit - **New Features** - Introduced a new version (2.0.0) for the Kamaji project with updated dependencies and configuration options. - Added support for a new `kamaji-etcd` dependency, enhancing datastore functionality. - Implemented comprehensive access control with a new Kubernetes ClusterRole. - Added webhook configurations for validating and mutating resources. - **Bug Fixes** - Streamlined configuration by removing outdated etcd settings and consolidating datastore configurations. - **Documentation** - Updated README.md to reflect new version and configuration options. - **Chores** - Simplified Dockerfile by using a pre-built image instead of a multi-stage build process. --- packages/system/kamaji/Makefile | 7 +- .../system/kamaji/charts/kamaji/Chart.lock | 6 + .../system/kamaji/charts/kamaji/Chart.yaml | 37 +- .../system/kamaji/charts/kamaji/README.md | 58 +- .../kamaji/controller-gen/clusterrole.yaml | 76 + .../kamaji/controller-gen/crd-conversion.yaml | 11 + .../controller-gen/mutating-webhook.yaml | 20 + .../controller-gen/validating-webhook.yaml | 81 + ...yaml => kamaji.clastix.io_datastores.yaml} | 44 +- ...amaji.clastix.io_tenantcontrolplanes.yaml} | 1540 +++++++---------- .../kamaji/templates/_helpers_datastore.tpl | 94 - .../charts/kamaji/templates/_helpers_etcd.tpl | 142 -- .../charts/kamaji/templates/controller.yaml | 5 +- .../charts/kamaji/templates/datastore.yaml | 33 - .../charts/kamaji/templates/etcd_cm.yaml | 98 -- .../kamaji/templates/etcd_job_postdelete.yaml | 35 - .../templates/etcd_job_postinstall.yaml | 74 - .../kamaji/templates/etcd_job_preinstall.yaml | 72 - .../charts/kamaji/templates/etcd_rbac.yaml | 56 - .../charts/kamaji/templates/etcd_sa.yaml | 12 - .../charts/kamaji/templates/etcd_service.yaml | 18 - .../charts/kamaji/templates/etcd_sts.yaml | 101 -- .../mutatingwebhookconfiguration.yaml | 21 +- .../kamaji/charts/kamaji/templates/rbac.yaml | 117 +- .../validatingwebhookconfiguration.yaml | 82 +- .../system/kamaji/charts/kamaji/values.yaml | 138 +- .../system/kamaji/images/kamaji/Dockerfile | 26 +- .../images/kamaji/patches/enable-gc.diff | 30 - packages/system/kamaji/values.yaml | 2 +- 29 files changed, 870 insertions(+), 2166 deletions(-) create mode 100644 packages/system/kamaji/charts/kamaji/Chart.lock create mode 100644 packages/system/kamaji/charts/kamaji/controller-gen/clusterrole.yaml create mode 100644 packages/system/kamaji/charts/kamaji/controller-gen/crd-conversion.yaml create mode 100644 packages/system/kamaji/charts/kamaji/controller-gen/mutating-webhook.yaml create mode 100644 packages/system/kamaji/charts/kamaji/controller-gen/validating-webhook.yaml rename packages/system/kamaji/charts/kamaji/crds/{datastore.yaml => kamaji.clastix.io_datastores.yaml} (91%) rename packages/system/kamaji/charts/kamaji/crds/{tenantcontrolplane.yaml => kamaji.clastix.io_tenantcontrolplanes.yaml} (91%) delete mode 100644 packages/system/kamaji/charts/kamaji/templates/_helpers_datastore.tpl delete mode 100644 packages/system/kamaji/charts/kamaji/templates/_helpers_etcd.tpl delete mode 100644 packages/system/kamaji/charts/kamaji/templates/datastore.yaml delete mode 100644 packages/system/kamaji/charts/kamaji/templates/etcd_cm.yaml delete mode 100644 packages/system/kamaji/charts/kamaji/templates/etcd_job_postdelete.yaml delete mode 100644 packages/system/kamaji/charts/kamaji/templates/etcd_job_postinstall.yaml delete mode 100644 packages/system/kamaji/charts/kamaji/templates/etcd_job_preinstall.yaml delete mode 100644 packages/system/kamaji/charts/kamaji/templates/etcd_rbac.yaml delete mode 100644 packages/system/kamaji/charts/kamaji/templates/etcd_sa.yaml delete mode 100644 packages/system/kamaji/charts/kamaji/templates/etcd_service.yaml delete mode 100644 packages/system/kamaji/charts/kamaji/templates/etcd_sts.yaml delete mode 100644 packages/system/kamaji/images/kamaji/patches/enable-gc.diff diff --git a/packages/system/kamaji/Makefile b/packages/system/kamaji/Makefile index 323ab704..18e80cf9 100644 --- a/packages/system/kamaji/Makefile +++ b/packages/system/kamaji/Makefile @@ -6,9 +6,10 @@ include ../../../scripts/package.mk update: rm -rf charts - helm repo add clastix https://clastix.github.io/charts - helm repo update clastix - helm pull clastix/kamaji --untar --untardir charts + tag=$$(git ls-remote --tags --sort="v:refname" https://github.com/clastix/kamaji | grep refs/tags/edge- | awk -F'[/^]' 'END{print $$3}') && \ + curl -sSL https://github.com/clastix/kamaji/archive/refs/tags/$${tag}.tar.gz | \ + tar -xzvf - --strip 1 kamaji-$${tag}/charts && \ + sed -i "/^FROM clastix/ s|:.*|:$${tag}|g" images/kamaji/Dockerfile image: docker buildx build images/kamaji \ diff --git a/packages/system/kamaji/charts/kamaji/Chart.lock b/packages/system/kamaji/charts/kamaji/Chart.lock new file mode 100644 index 00000000..de604948 --- /dev/null +++ b/packages/system/kamaji/charts/kamaji/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: kamaji-etcd + repository: https://clastix.github.io/charts + version: 0.8.0 +digest: sha256:525b0eb2b5bae709d62de9328312d42c54b5219c6df67061de0da79eeca04fb3 +generated: "2024-08-25T08:44:24.92211307+02:00" diff --git a/packages/system/kamaji/charts/kamaji/Chart.yaml b/packages/system/kamaji/charts/kamaji/Chart.yaml index 854242ba..5519b8f6 100644 --- a/packages/system/kamaji/charts/kamaji/Chart.yaml +++ b/packages/system/kamaji/charts/kamaji/Chart.yaml @@ -1,13 +1,9 @@ -annotations: - catalog.cattle.io/certified: partner - catalog.cattle.io/display-name: Kamaji - catalog.cattle.io/release-name: kamaji apiVersion: v2 appVersion: v1.0.0 description: Kamaji is the Hosted Control Plane Manager for Kubernetes. home: https://github.com/clastix/kamaji icon: https://github.com/clastix/kamaji/raw/master/assets/logo-colored.png -kubeVersion: '>=1.21.0-0' +kubeVersion: ">=1.21.0-0" maintainers: - email: dario@tranchitella.eu name: Dario Tranchitella @@ -21,4 +17,33 @@ name: kamaji sources: - https://github.com/clastix/kamaji type: application -version: 1.0.0 +version: 2.0.0 +dependencies: +- name: kamaji-etcd + repository: https://clastix.github.io/charts + version: ">=0.7.0" + condition: kamaji-etcd.deploy +annotations: + catalog.cattle.io/certified: partner + catalog.cattle.io/release-name: kamaji + catalog.cattle.io/display-name: Kamaji + artifacthub.io/crds: | + - kind: TenantControlPlane + version: v1alpha1 + name: tenantcontrolplanes.kamaji.clastix.io + displayName: TenantControlPlane + description: TenantControlPlane defines the desired state for a Control Plane backed by Kamaji. + - kind: DataStore + version: v1alpha1 + name: datastores.kamaji.clastix.io + displayName: DataStore + description: DataStores is holding all the required details to communicate with a Datastore, such as etcd, MySQL, PostgreSQL, and NATS. + artifacthub.io/links: | + - name: CLASTIX + url: https://clastix.io + - name: support + url: https://clastix.io/support + artifacthub.io/operator: "true" + artifacthub.io/operatorCapabilities: "full lifecycle" + artifacthub.io/changes: | + - Using dependency chart `kamaji-etcd` as a default DataStore. diff --git a/packages/system/kamaji/charts/kamaji/README.md b/packages/system/kamaji/charts/kamaji/README.md index 89a7d078..a98bb0e3 100644 --- a/packages/system/kamaji/charts/kamaji/README.md +++ b/packages/system/kamaji/charts/kamaji/README.md @@ -1,6 +1,6 @@ # kamaji -![Version: 1.0.0](https://img.shields.io/badge/Version-1.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.0.0](https://img.shields.io/badge/AppVersion-v1.0.0-informational?style=flat-square) +![Version: 2.0.0](https://img.shields.io/badge/Version-2.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.0.0](https://img.shields.io/badge/AppVersion-v1.0.0-informational?style=flat-square) Kamaji is the Hosted Control Plane Manager for Kubernetes. @@ -20,6 +20,10 @@ Kamaji is the Hosted Control Plane Manager for Kubernetes. Kubernetes: `>=1.21.0-0` +| Repository | Name | Version | +|------------|------|---------| +| https://clastix.github.io/charts | kamaji-etcd | >=0.7.0 | + [Kamaji](https://github.com/clastix/kamaji) requires a [multi-tenant `etcd`](https://github.com/clastix/kamaji-internal/blob/master/deploy/getting-started-with-kamaji.md#setup-internal-multi-tenant-etcd) cluster. This Helm Chart starting from v0.1.1 provides the installation of an internal `etcd` in order to streamline the local test. If you'd like to use an externally managed etcd instance, you can specify the overrides and by setting the value `etcd.deploy=false`. @@ -66,49 +70,7 @@ Here the values you can override: | Key | Type | Default | Description | |-----|------|---------|-------------| | affinity | object | `{}` | Kubernetes affinity rules to apply to Kamaji controller pods | -| cfssl.image.repository | string | `"cfssl/cfssl"` | | -| cfssl.image.tag | string | `"latest"` | | -| datastore.basicAuth.passwordSecret.keyPath | string | `nil` | The Secret key where the data is stored. | -| datastore.basicAuth.passwordSecret.name | string | `nil` | The name of the Secret containing the password used to connect to the relational database. | -| datastore.basicAuth.passwordSecret.namespace | string | `nil` | The namespace of the Secret containing the password used to connect to the relational database. | -| datastore.basicAuth.usernameSecret.keyPath | string | `nil` | The Secret key where the data is stored. | -| datastore.basicAuth.usernameSecret.name | string | `nil` | The name of the Secret containing the username used to connect to the relational database. | -| datastore.basicAuth.usernameSecret.namespace | string | `nil` | The namespace of the Secret containing the username used to connect to the relational database. | -| datastore.driver | string | `"etcd"` | (string) The Kamaji Datastore driver, supported: etcd, MySQL, PostgreSQL (defaults=etcd). | -| datastore.enabled | bool | `true` | (bool) Enable the Kamaji Datastore creation (default=true) | -| datastore.endpoints | list | `[]` | (array) List of endpoints of the selected Datastore. When letting the Chart install the etcd datastore, this field is populated automatically. | -| datastore.nameOverride | string | `nil` | The Datastore name override, if empty and enabled=true defaults to `default`, if enabled=false, this is the name of the Datastore to connect to. | -| datastore.tlsConfig.certificateAuthority.certificate.keyPath | string | `nil` | Key of the Secret which contains the content of the certificate. | -| datastore.tlsConfig.certificateAuthority.certificate.name | string | `nil` | Name of the Secret containing the CA required to establish the mandatory SSL/TLS connection to the datastore. | -| datastore.tlsConfig.certificateAuthority.certificate.namespace | string | `nil` | Namespace of the Secret containing the CA required to establish the mandatory SSL/TLS connection to the datastore. | -| datastore.tlsConfig.certificateAuthority.privateKey.keyPath | string | `nil` | Key of the Secret which contains the content of the private key. | -| datastore.tlsConfig.certificateAuthority.privateKey.name | string | `nil` | Name of the Secret containing the CA private key required to establish the mandatory SSL/TLS connection to the datastore. | -| datastore.tlsConfig.certificateAuthority.privateKey.namespace | string | `nil` | Namespace of the Secret containing the CA private key required to establish the mandatory SSL/TLS connection to the datastore. | -| datastore.tlsConfig.clientCertificate.certificate.keyPath | string | `nil` | Key of the Secret which contains the content of the certificate. | -| datastore.tlsConfig.clientCertificate.certificate.name | string | `nil` | Name of the Secret containing the client certificate required to establish the mandatory SSL/TLS connection to the datastore. | -| datastore.tlsConfig.clientCertificate.certificate.namespace | string | `nil` | Namespace of the Secret containing the client certificate required to establish the mandatory SSL/TLS connection to the datastore. | -| datastore.tlsConfig.clientCertificate.privateKey.keyPath | string | `nil` | Key of the Secret which contains the content of the private key. | -| datastore.tlsConfig.clientCertificate.privateKey.name | string | `nil` | Name of the Secret containing the client certificate private key required to establish the mandatory SSL/TLS connection to the datastore. | -| datastore.tlsConfig.clientCertificate.privateKey.namespace | string | `nil` | Namespace of the Secret containing the client certificate private key required to establish the mandatory SSL/TLS connection to the datastore. | -| datastore.tlsConfig.enabled | bool | `true` | | -| etcd.compactionInterval | int | `0` | ETCD Compaction interval (e.g. "5m0s"). (default: "0" (disabled)) | -| etcd.deploy | bool | `true` | Install an etcd with enabled multi-tenancy along with Kamaji | -| etcd.image | object | `{"pullPolicy":"IfNotPresent","repository":"quay.io/coreos/etcd","tag":"v3.5.6"}` | Install specific etcd image | -| etcd.livenessProbe | object | `{"failureThreshold":8,"httpGet":{"path":"/health?serializable=true","port":2381,"scheme":"HTTP"},"initialDelaySeconds":10,"periodSeconds":10,"timeoutSeconds":15}` | The livenessProbe for the etcd container | -| etcd.overrides.caSecret.name | string | `"etcd-certs"` | Name of the secret which contains CA's certificate and private key. (default: "etcd-certs") | -| etcd.overrides.caSecret.namespace | string | `"kamaji-system"` | Namespace of the secret which contains CA's certificate and private key. (default: "kamaji-system") | -| etcd.overrides.clientSecret.name | string | `"root-client-certs"` | Name of the secret which contains ETCD client certificates. (default: "root-client-certs") | -| etcd.overrides.clientSecret.namespace | string | `"kamaji-system"` | Name of the namespace where the secret which contains ETCD client certificates is. (default: "kamaji-system") | -| etcd.overrides.endpoints | object | `{"etcd-0":"etcd-0.etcd.kamaji-system.svc.cluster.local","etcd-1":"etcd-1.etcd.kamaji-system.svc.cluster.local","etcd-2":"etcd-2.etcd.kamaji-system.svc.cluster.local"}` | (map) Dictionary of the endpoints for the etcd cluster's members, key is the name of the etcd server. Don't define the protocol (TLS is automatically inflected), or any port, inflected from .etcd.peerApiPort value. | -| etcd.peerApiPort | int | `2380` | The peer API port which servers are listening to. | -| etcd.persistence.accessModes[0] | string | `"ReadWriteOnce"` | | -| etcd.persistence.customAnnotations | object | `{}` | The custom annotations to add to the PVC | -| etcd.persistence.size | string | `"10Gi"` | | -| etcd.persistence.storageClassName | string | `""` | | -| etcd.port | int | `2379` | The client request port. | -| etcd.serviceAccount.create | bool | `true` | Create a ServiceAccount, required to install and provision the etcd backing storage (default: true) | -| etcd.serviceAccount.name | string | `""` | Define the ServiceAccount name to use during the setup and provision of the etcd backing storage (default: "") | -| etcd.tolerations | list | `[]` | (array) Kubernetes affinity rules to apply to Kamaji etcd pods | +| defaultDatastoreName | string | `"default"` | Specify the default DataStore name for the Kamaji instance. | | extraArgs | list | `[]` | A list of extra arguments to add to the kamaji controller default ones | | fullnameOverride | string | `""` | | | healthProbeBindAddress | string | `":8081"` | The address the probe endpoint binds to. (default ":8081") | @@ -116,9 +78,13 @@ Here the values you can override: | image.repository | string | `"clastix/kamaji"` | The container image of the Kamaji controller. | | image.tag | string | `nil` | Overrides the image tag whose default is the chart appVersion. | | imagePullSecrets | list | `[]` | | +| kamaji-etcd.datastore.enabled | bool | `true` | | +| kamaji-etcd.datastore.name | string | `"default"` | | +| kamaji-etcd.deploy | bool | `true` | | +| kamaji-etcd.fullnameOverride | string | `"kamaji-etcd"` | | | livenessProbe | object | `{"httpGet":{"path":"/healthz","port":"healthcheck"},"initialDelaySeconds":15,"periodSeconds":20}` | The livenessProbe for the controller container | -| loggingDevel.enable | bool | `false` | (string) Development Mode defaults(encoder=consoleEncoder,logLevel=Debug,stackTraceLevel=Warn). Production Mode defaults(encoder=jsonEncoder,logLevel=Info,stackTraceLevel=Error) (default false) | -| metricsBindAddress | string | `":8080"` | (string) The address the metric endpoint binds to. (default ":8080") | +| loggingDevel.enable | bool | `false` | Development Mode defaults(encoder=consoleEncoder,logLevel=Debug,stackTraceLevel=Warn). Production Mode defaults(encoder=jsonEncoder,logLevel=Info,stackTraceLevel=Error) (default false) | +| metricsBindAddress | string | `":8080"` | The address the metric endpoint binds to. (default ":8080") | | nameOverride | string | `""` | | | nodeSelector | object | `{}` | Kubernetes node selector rules to schedule Kamaji controller | | podAnnotations | object | `{}` | The annotations to apply to the Kamaji controller pods. | diff --git a/packages/system/kamaji/charts/kamaji/controller-gen/clusterrole.yaml b/packages/system/kamaji/charts/kamaji/controller-gen/clusterrole.yaml new file mode 100644 index 00000000..93530197 --- /dev/null +++ b/packages/system/kamaji/charts/kamaji/controller-gen/clusterrole.yaml @@ -0,0 +1,76 @@ +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - batch + resources: + - jobs + verbs: + - create + - delete + - get + - list + - watch +- apiGroups: + - "" + resources: + - configmaps + - secrets + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kamaji.clastix.io + resources: + - datastores + - tenantcontrolplanes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kamaji.clastix.io + resources: + - datastores/status + - tenantcontrolplanes/status + verbs: + - get + - patch + - update +- apiGroups: + - kamaji.clastix.io + resources: + - tenantcontrolplanes/finalizers + verbs: + - update +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - create + - delete + - get + - list + - patch + - update + - watch diff --git a/packages/system/kamaji/charts/kamaji/controller-gen/crd-conversion.yaml b/packages/system/kamaji/charts/kamaji/controller-gen/crd-conversion.yaml new file mode 100644 index 00000000..be48bdb0 --- /dev/null +++ b/packages/system/kamaji/charts/kamaji/controller-gen/crd-conversion.yaml @@ -0,0 +1,11 @@ +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + name: kamaji-webhook-service + namespace: kamaji-system + path: /convert + conversionReviewVersions: + - v1 diff --git a/packages/system/kamaji/charts/kamaji/controller-gen/mutating-webhook.yaml b/packages/system/kamaji/charts/kamaji/controller-gen/mutating-webhook.yaml new file mode 100644 index 00000000..88a152e7 --- /dev/null +++ b/packages/system/kamaji/charts/kamaji/controller-gen/mutating-webhook.yaml @@ -0,0 +1,20 @@ +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "kamaji.webhookServiceName" . }}' + namespace: '{{ .Release.Namespace }}' + path: /mutate-kamaji-clastix-io-v1alpha1-tenantcontrolplane + failurePolicy: Fail + name: mtenantcontrolplane.kb.io + rules: + - apiGroups: + - kamaji.clastix.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - tenantcontrolplanes + sideEffects: None diff --git a/packages/system/kamaji/charts/kamaji/controller-gen/validating-webhook.yaml b/packages/system/kamaji/charts/kamaji/controller-gen/validating-webhook.yaml new file mode 100644 index 00000000..7042df0b --- /dev/null +++ b/packages/system/kamaji/charts/kamaji/controller-gen/validating-webhook.yaml @@ -0,0 +1,81 @@ +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "kamaji.webhookServiceName" . }}' + namespace: '{{ .Release.Namespace }}' + path: /telemetry + failurePolicy: Ignore + name: telemetry.kamaji.clastix.io + rules: + - apiGroups: + - kamaji.clastix.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + - DELETE + resources: + - tenantcontrolplanes + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "kamaji.webhookServiceName" . }}' + namespace: '{{ .Release.Namespace }}' + path: /validate-kamaji-clastix-io-v1alpha1-datastore + failurePolicy: Fail + name: vdatastore.kb.io + rules: + - apiGroups: + - kamaji.clastix.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + - DELETE + resources: + - datastores + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "kamaji.webhookServiceName" . }}' + namespace: '{{ .Release.Namespace }}' + path: /validate--v1-secret + failurePolicy: Ignore + name: vdatastoresecrets.kb.io + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - DELETE + resources: + - secrets + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: '{{ include "kamaji.webhookServiceName" . }}' + namespace: '{{ .Release.Namespace }}' + path: /validate-kamaji-clastix-io-v1alpha1-tenantcontrolplane + failurePolicy: Fail + name: vtenantcontrolplane.kb.io + rules: + - apiGroups: + - kamaji.clastix.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - tenantcontrolplanes + sideEffects: None diff --git a/packages/system/kamaji/charts/kamaji/crds/datastore.yaml b/packages/system/kamaji/charts/kamaji/crds/kamaji.clastix.io_datastores.yaml similarity index 91% rename from packages/system/kamaji/charts/kamaji/crds/datastore.yaml rename to packages/system/kamaji/charts/kamaji/crds/kamaji.clastix.io_datastores.yaml index 97405495..f3071767 100644 --- a/packages/system/kamaji/charts/kamaji/crds/datastore.yaml +++ b/packages/system/kamaji/charts/kamaji/crds/kamaji.clastix.io_datastores.yaml @@ -4,7 +4,7 @@ kind: CustomResourceDefinition metadata: annotations: cert-manager.io/inject-ca-from: kamaji-system/kamaji-serving-cert - controller-gen.kubebuilder.io/version: v0.11.4 + controller-gen.kubebuilder.io/version: v0.16.1 name: datastores.kamaji.clastix.io spec: group: kamaji.clastix.io @@ -71,12 +71,10 @@ spec: minLength: 1 type: string name: - description: name is unique within a namespace to reference - a secret resource. + description: name is unique within a namespace to reference a secret resource. type: string namespace: - description: namespace defines the space within which - the secret name must be unique. + description: namespace defines the space within which the secret name must be unique. type: string required: - keyPath @@ -100,12 +98,10 @@ spec: minLength: 1 type: string name: - description: name is unique within a namespace to reference - a secret resource. + description: name is unique within a namespace to reference a secret resource. type: string namespace: - description: namespace defines the space within which - the secret name must be unique. + description: namespace defines the space within which the secret name must be unique. type: string required: - keyPath @@ -159,12 +155,10 @@ spec: minLength: 1 type: string name: - description: name is unique within a namespace to - reference a secret resource. + description: name is unique within a namespace to reference a secret resource. type: string namespace: - description: namespace defines the space within which - the secret name must be unique. + description: namespace defines the space within which the secret name must be unique. type: string required: - keyPath @@ -188,12 +182,10 @@ spec: minLength: 1 type: string name: - description: name is unique within a namespace to - reference a secret resource. + description: name is unique within a namespace to reference a secret resource. type: string namespace: - description: namespace defines the space within which - the secret name must be unique. + description: namespace defines the space within which the secret name must be unique. type: string required: - keyPath @@ -204,8 +196,7 @@ spec: - certificate type: object clientCertificate: - description: Specifies the SSL/TLS key and private key pair used - to connect to the data store. + description: Specifies the SSL/TLS key and private key pair used to connect to the data store. properties: certificate: properties: @@ -224,12 +215,10 @@ spec: minLength: 1 type: string name: - description: name is unique within a namespace to - reference a secret resource. + description: name is unique within a namespace to reference a secret resource. type: string namespace: - description: namespace defines the space within which - the secret name must be unique. + description: namespace defines the space within which the secret name must be unique. type: string required: - keyPath @@ -253,12 +242,10 @@ spec: minLength: 1 type: string name: - description: name is unique within a namespace to - reference a secret resource. + description: name is unique within a namespace to reference a secret resource. type: string namespace: - description: namespace defines the space within which - the secret name must be unique. + description: namespace defines the space within which the secret name must be unique. type: string required: - keyPath @@ -280,8 +267,7 @@ spec: description: DataStoreStatus defines the observed state of DataStore. properties: usedBy: - description: List of the Tenant Control Planes, namespaced named, - using this data store. + description: List of the Tenant Control Planes, namespaced named, using this data store. items: type: string type: array diff --git a/packages/system/kamaji/charts/kamaji/crds/tenantcontrolplane.yaml b/packages/system/kamaji/charts/kamaji/crds/kamaji.clastix.io_tenantcontrolplanes.yaml similarity index 91% rename from packages/system/kamaji/charts/kamaji/crds/tenantcontrolplane.yaml rename to packages/system/kamaji/charts/kamaji/crds/kamaji.clastix.io_tenantcontrolplanes.yaml index b94dd328..cc2c141f 100644 --- a/packages/system/kamaji/charts/kamaji/crds/tenantcontrolplane.yaml +++ b/packages/system/kamaji/charts/kamaji/crds/kamaji.clastix.io_tenantcontrolplanes.yaml @@ -1,24 +1,15 @@ ---- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: cert-manager.io/inject-ca-from: kamaji-system/kamaji-serving-cert - controller-gen.kubebuilder.io/version: v0.11.4 + controller-gen.kubebuilder.io/version: v0.16.1 name: tenantcontrolplanes.kamaji.clastix.io spec: - conversion: - strategy: Webhook - webhook: - clientConfig: - service: - name: kamaji-webhook-service - namespace: kamaji-system - path: /convert - conversionReviewVersions: - - v1 group: kamaji.clastix.io names: + categories: + - kamaji kind: TenantControlPlane listKind: TenantControlPlaneList plural: tenantcontrolplanes @@ -55,8 +46,7 @@ spec: name: v1alpha1 schema: openAPIV3Schema: - description: TenantControlPlane is the Schema for the tenantcontrolplanes - API. + description: TenantControlPlane is the Schema for the tenantcontrolplanes API. properties: apiVersion: description: |- @@ -98,13 +88,12 @@ spec: type: string type: object konnectivity: - description: Enables the Konnectivity addon in the Tenant Cluster, - required if the worker nodes are in a different network. + description: Enables the Konnectivity addon in the Tenant Cluster, required if the worker nodes are in a different network. properties: agent: default: image: registry.k8s.io/kas-network-proxy/proxy-agent - version: v0.0.32 + version: v0.28.6 properties: extraArgs: description: |- @@ -117,8 +106,7 @@ spec: type: array image: default: registry.k8s.io/kas-network-proxy/proxy-agent - description: AgentImage defines the container image for - Konnectivity's agent. + description: AgentImage defines the container image for Konnectivity's agent. type: string tolerations: default: @@ -165,7 +153,7 @@ spec: type: object type: array version: - default: v0.0.32 + default: v0.28.6 description: Version for Konnectivity agent. type: string type: object @@ -173,7 +161,7 @@ spec: default: image: registry.k8s.io/kas-network-proxy/proxy-server port: 8132 - version: v0.0.32 + version: v0.28.6 properties: extraArgs: description: |- @@ -186,32 +174,26 @@ spec: type: array image: default: registry.k8s.io/kas-network-proxy/proxy-server - description: Container image used by the Konnectivity - server. + description: Container image used by the Konnectivity server. type: string port: - description: The port which Konnectivity server is listening - to. + description: The port which Konnectivity server is listening to. format: int32 type: integer resources: - description: Resources define the amount of CPU and memory - to allocate to the Konnectivity server. + description: Resources define the amount of CPU and memory to allocate to the Konnectivity server. properties: claims: description: |- Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - - + This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - - + This field is immutable. It can only be set for containers. items: - description: ResourceClaim references one entry - in PodSpec.ResourceClaims. + description: ResourceClaim references one entry in PodSpec.ResourceClaims. properties: name: description: |- @@ -219,6 +201,12 @@ spec: the Pod where this field is used. It makes that resource available inside a container. type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string required: - name type: object @@ -252,9 +240,8 @@ spec: type: object type: object version: - default: v0.0.32 - description: Container image version of the Konnectivity - server. + default: v0.28.6 + description: Container image version of the Konnectivity server. type: string required: - port @@ -283,15 +270,12 @@ spec: such as the number of Pod replicas, the Service resource, or the Ingress. properties: deployment: - description: Defining the options for the deployed Tenant Control - Plane as Deployment resource. + description: Defining the options for the deployed Tenant Control Plane as Deployment resource. properties: additionalContainers: - description: AdditionalContainers allows adding additional - containers to the Control Plane deployment. + description: AdditionalContainers allows adding additional containers to the Control Plane deployment. items: - description: A single application container that you want - to run within a pod. + description: A single application container that you want to run within a pod. properties: args: description: |- @@ -326,12 +310,10 @@ spec: List of environment variables to set in the container. Cannot be updated. items: - description: EnvVar represents an environment variable - present in a Container. + description: EnvVar represents an environment variable present in a Container. properties: name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. + description: Name of the environment variable. Must be a C_IDENTIFIER. type: string value: description: |- @@ -346,8 +328,7 @@ spec: Defaults to "". type: string valueFrom: - description: Source for the environment variable's - value. Cannot be used if value is not empty. + description: Source for the environment variable's value. Cannot be used if value is not empty. properties: configMapKeyRef: description: Selects a key of a ConfigMap. @@ -356,14 +337,16 @@ spec: description: The key to select. type: string name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: Specify whether the ConfigMap - or its key must be defined + description: Specify whether the ConfigMap or its key must be defined type: boolean required: - key @@ -375,13 +358,10 @@ spec: spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. properties: apiVersion: - description: Version of the schema the - FieldPath is written in terms of, defaults - to "v1". + description: Version of the schema the FieldPath is written in terms of, defaults to "v1". type: string fieldPath: - description: Path of the field to select - in the specified API version. + description: Path of the field to select in the specified API version. type: string required: - fieldPath @@ -393,16 +373,13 @@ spec: (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. properties: containerName: - description: 'Container name: required - for volumes, optional for env vars' + description: 'Container name: required for volumes, optional for env vars' type: string divisor: anyOf: - type: integer - type: string - description: Specifies the output format - of the exposed resources, defaults to - "1" + description: Specifies the output format of the exposed resources, defaults to "1" pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true resource: @@ -413,23 +390,22 @@ spec: type: object x-kubernetes-map-type: atomic secretKeyRef: - description: Selects a key of a secret in - the pod's namespace + description: Selects a key of a secret in the pod's namespace properties: key: - description: The key of the secret to - select from. Must be a valid secret - key. + description: The key of the secret to select from. Must be a valid secret key. type: string name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: Specify whether the Secret - or its key must be defined + description: Specify whether the Secret or its key must be defined type: boolean required: - key @@ -452,40 +428,42 @@ spec: Values defined by an Env with a duplicate key will take precedence. Cannot be updated. items: - description: EnvFromSource represents the source of - a set of ConfigMaps + description: EnvFromSource represents the source of a set of ConfigMaps properties: configMapRef: description: The ConfigMap to select from properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: Specify whether the ConfigMap - must be defined + description: Specify whether the ConfigMap must be defined type: boolean type: object x-kubernetes-map-type: atomic prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be a C_IDENTIFIER. + description: An optional identifier to prepend to each key in the ConfigMap. Must be a C_IDENTIFIER. type: string secretRef: description: The Secret to select from properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: Specify whether the Secret must - be defined + description: Specify whether the Secret must be defined type: boolean type: object x-kubernetes-map-type: atomic @@ -535,8 +513,7 @@ spec: x-kubernetes-list-type: atomic type: object httpGet: - description: HTTPGet specifies the http request - to perform. + description: HTTPGet specifies the http request to perform. properties: host: description: |- @@ -544,11 +521,9 @@ spec: "Host" in httpHeaders instead. type: string httpHeaders: - description: Custom headers to set in the - request. HTTP allows repeated headers. + description: Custom headers to set in the request. HTTP allows repeated headers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: HTTPHeader describes a custom header to be used in HTTP probes properties: name: description: |- @@ -565,8 +540,7 @@ spec: type: array x-kubernetes-list-type: atomic path: - description: Path to access on the HTTP - server. + description: Path to access on the HTTP server. type: string port: anyOf: @@ -586,12 +560,10 @@ spec: - port type: object sleep: - description: Sleep represents the duration that - the container should sleep before being terminated. + description: Sleep represents the duration that the container should sleep before being terminated. properties: seconds: - description: Seconds is the number of seconds - to sleep. + description: Seconds is the number of seconds to sleep. format: int64 type: integer required: @@ -604,8 +576,7 @@ spec: lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + description: 'Optional: Host name to connect to, defaults to the pod IP.' type: string port: anyOf: @@ -648,8 +619,7 @@ spec: x-kubernetes-list-type: atomic type: object httpGet: - description: HTTPGet specifies the http request - to perform. + description: HTTPGet specifies the http request to perform. properties: host: description: |- @@ -657,11 +627,9 @@ spec: "Host" in httpHeaders instead. type: string httpHeaders: - description: Custom headers to set in the - request. HTTP allows repeated headers. + description: Custom headers to set in the request. HTTP allows repeated headers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: HTTPHeader describes a custom header to be used in HTTP probes properties: name: description: |- @@ -678,8 +646,7 @@ spec: type: array x-kubernetes-list-type: atomic path: - description: Path to access on the HTTP - server. + description: Path to access on the HTTP server. type: string port: anyOf: @@ -699,12 +666,10 @@ spec: - port type: object sleep: - description: Sleep represents the duration that - the container should sleep before being terminated. + description: Sleep represents the duration that the container should sleep before being terminated. properties: seconds: - description: Seconds is the number of seconds - to sleep. + description: Seconds is the number of seconds to sleep. format: int64 type: integer required: @@ -717,8 +682,7 @@ spec: lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + description: 'Optional: Host name to connect to, defaults to the pod IP.' type: string port: anyOf: @@ -763,28 +727,25 @@ spec: format: int32 type: integer grpc: - description: GRPC specifies an action involving - a GRPC port. + description: GRPC specifies an action involving a GRPC port. properties: port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. + description: Port number of the gRPC service. Number must be in the range 1 to 65535. format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - + If this is not specified, the default behavior is defined by gRPC. type: string required: - port type: object httpGet: - description: HTTPGet specifies the http request - to perform. + description: HTTPGet specifies the http request to perform. properties: host: description: |- @@ -792,11 +753,9 @@ spec: "Host" in httpHeaders instead. type: string httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. + description: Custom headers to set in the request. HTTP allows repeated headers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: HTTPHeader describes a custom header to be used in HTTP probes properties: name: description: |- @@ -851,12 +810,10 @@ spec: format: int32 type: integer tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. + description: TCPSocket specifies an action involving a TCP port. properties: host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + description: 'Optional: Host name to connect to, defaults to the pod IP.' type: string port: anyOf: @@ -908,8 +865,7 @@ spec: For more information See https://github.com/kubernetes/kubernetes/issues/108255. Cannot be updated. items: - description: ContainerPort represents a network port - in a single container. + description: ContainerPort represents a network port in a single container. properties: containerPort: description: |- @@ -918,8 +874,7 @@ spec: format: int32 type: integer hostIP: - description: What host IP to bind the external - port to. + description: What host IP to bind the external port to. type: string hostPort: description: |- @@ -978,28 +933,25 @@ spec: format: int32 type: integer grpc: - description: GRPC specifies an action involving - a GRPC port. + description: GRPC specifies an action involving a GRPC port. properties: port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. + description: Port number of the gRPC service. Number must be in the range 1 to 65535. format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - + If this is not specified, the default behavior is defined by gRPC. type: string required: - port type: object httpGet: - description: HTTPGet specifies the http request - to perform. + description: HTTPGet specifies the http request to perform. properties: host: description: |- @@ -1007,11 +959,9 @@ spec: "Host" in httpHeaders instead. type: string httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. + description: Custom headers to set in the request. HTTP allows repeated headers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: HTTPHeader describes a custom header to be used in HTTP probes properties: name: description: |- @@ -1066,12 +1016,10 @@ spec: format: int32 type: integer tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. + description: TCPSocket specifies an action involving a TCP port. properties: host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + description: 'Optional: Host name to connect to, defaults to the pod IP.' type: string port: anyOf: @@ -1110,8 +1058,7 @@ spec: resizePolicy: description: Resources resize policy for the container. items: - description: ContainerResizePolicy represents resource - resize policy for the container. + description: ContainerResizePolicy represents resource resize policy for the container. properties: resourceName: description: |- @@ -1139,16 +1086,13 @@ spec: description: |- Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - - + This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - - + This field is immutable. It can only be set for containers. items: - description: ResourceClaim references one entry - in PodSpec.ResourceClaims. + description: ResourceClaim references one entry in PodSpec.ResourceClaims. properties: name: description: |- @@ -1156,6 +1100,12 @@ spec: the Pod where this field is used. It makes that resource available inside a container. type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string required: - name type: object @@ -1255,16 +1205,14 @@ spec: add: description: Added capabilities items: - description: Capability represent POSIX capabilities - type + description: Capability represent POSIX capabilities type type: string type: array x-kubernetes-list-type: atomic drop: description: Removed capabilities items: - description: Capability represent POSIX capabilities - type + description: Capability represent POSIX capabilities type type: string type: array x-kubernetes-list-type: atomic @@ -1279,7 +1227,7 @@ spec: procMount: description: |- procMount denotes the type of proc mount to use for the containers. - The default is DefaultProcMount which uses the container runtime defaults for + The default value is Default which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. Note that this field cannot be set when spec.os.name is windows. @@ -1326,20 +1274,16 @@ spec: Note that this field cannot be set when spec.os.name is windows. properties: level: - description: Level is SELinux level label that - applies to the container. + description: Level is SELinux level label that applies to the container. type: string role: - description: Role is a SELinux role label that - applies to the container. + description: Role is a SELinux role label that applies to the container. type: string type: - description: Type is a SELinux type label that - applies to the container. + description: Type is a SELinux type label that applies to the container. type: string user: - description: User is a SELinux user label that - applies to the container. + description: User is a SELinux user label that applies to the container. type: string type: object seccompProfile: @@ -1360,8 +1304,7 @@ spec: description: |- type indicates which kind of seccomp profile will be applied. Valid options are: - - + Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied. @@ -1383,8 +1326,7 @@ spec: GMSA credential spec named by the GMSACredentialSpecName field. type: string gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. + description: GMSACredentialSpecName is the name of the GMSA credential spec to use. type: string hostProcess: description: |- @@ -1434,28 +1376,25 @@ spec: format: int32 type: integer grpc: - description: GRPC specifies an action involving - a GRPC port. + description: GRPC specifies an action involving a GRPC port. properties: port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. + description: Port number of the gRPC service. Number must be in the range 1 to 65535. format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - + If this is not specified, the default behavior is defined by gRPC. type: string required: - port type: object httpGet: - description: HTTPGet specifies the http request - to perform. + description: HTTPGet specifies the http request to perform. properties: host: description: |- @@ -1463,11 +1402,9 @@ spec: "Host" in httpHeaders instead. type: string httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. + description: Custom headers to set in the request. HTTP allows repeated headers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: HTTPHeader describes a custom header to be used in HTTP probes properties: name: description: |- @@ -1522,12 +1459,10 @@ spec: format: int32 type: integer tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. + description: TCPSocket specifies an action involving a TCP port. properties: host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + description: 'Optional: Host name to connect to, defaults to the pod IP.' type: string port: anyOf: @@ -1605,20 +1540,15 @@ spec: Default is false. type: boolean volumeDevices: - description: volumeDevices is the list of block devices - to be used by the container. + description: volumeDevices is the list of block devices to be used by the container. items: - description: volumeDevice describes a mapping of a - raw block device within a container. + description: volumeDevice describes a mapping of a raw block device within a container. properties: devicePath: - description: devicePath is the path inside of - the container that the device will be mapped - to. + description: devicePath is the path inside of the container that the device will be mapped to. type: string name: - description: name must match the name of a persistentVolumeClaim - in the pod + description: name must match the name of a persistentVolumeClaim in the pod type: string required: - devicePath @@ -1633,8 +1563,7 @@ spec: Pod volumes to mount into the container's filesystem. Cannot be updated. items: - description: VolumeMount describes a mounting of a - Volume within a container. + description: VolumeMount describes a mounting of a Volume within a container. properties: mountPath: description: |- @@ -1662,23 +1591,19 @@ spec: description: |- RecursiveReadOnly specifies whether read-only mounts should be handled recursively. - - + If ReadOnly is false, this field has no meaning and must be unspecified. - - + If ReadOnly is true, and this field is set to Disabled, the mount is not made recursively read-only. If this field is set to IfPossible, the mount is made recursively read-only, if it is supported by the container runtime. If this field is set to Enabled, the mount is made recursively read-only if it is supported by the container runtime, otherwise the pod will not be started and an error will be generated to indicate the reason. - - + If this field is set to IfPossible or Enabled, MountPropagation must be set to None (or be unspecified, which defaults to None). - - + If this field is not specified, it is treated as an equivalent of Disabled. type: string subPath: @@ -1713,11 +1638,9 @@ spec: type: object type: array additionalInitContainers: - description: AdditionalInitContainers allows adding additional - init containers to the Control Plane deployment. + description: AdditionalInitContainers allows adding additional init containers to the Control Plane deployment. items: - description: A single application container that you want - to run within a pod. + description: A single application container that you want to run within a pod. properties: args: description: |- @@ -1752,12 +1675,10 @@ spec: List of environment variables to set in the container. Cannot be updated. items: - description: EnvVar represents an environment variable - present in a Container. + description: EnvVar represents an environment variable present in a Container. properties: name: - description: Name of the environment variable. - Must be a C_IDENTIFIER. + description: Name of the environment variable. Must be a C_IDENTIFIER. type: string value: description: |- @@ -1772,8 +1693,7 @@ spec: Defaults to "". type: string valueFrom: - description: Source for the environment variable's - value. Cannot be used if value is not empty. + description: Source for the environment variable's value. Cannot be used if value is not empty. properties: configMapKeyRef: description: Selects a key of a ConfigMap. @@ -1782,14 +1702,16 @@ spec: description: The key to select. type: string name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: Specify whether the ConfigMap - or its key must be defined + description: Specify whether the ConfigMap or its key must be defined type: boolean required: - key @@ -1801,13 +1723,10 @@ spec: spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. properties: apiVersion: - description: Version of the schema the - FieldPath is written in terms of, defaults - to "v1". + description: Version of the schema the FieldPath is written in terms of, defaults to "v1". type: string fieldPath: - description: Path of the field to select - in the specified API version. + description: Path of the field to select in the specified API version. type: string required: - fieldPath @@ -1819,16 +1738,13 @@ spec: (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. properties: containerName: - description: 'Container name: required - for volumes, optional for env vars' + description: 'Container name: required for volumes, optional for env vars' type: string divisor: anyOf: - type: integer - type: string - description: Specifies the output format - of the exposed resources, defaults to - "1" + description: Specifies the output format of the exposed resources, defaults to "1" pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true resource: @@ -1839,23 +1755,22 @@ spec: type: object x-kubernetes-map-type: atomic secretKeyRef: - description: Selects a key of a secret in - the pod's namespace + description: Selects a key of a secret in the pod's namespace properties: key: - description: The key of the secret to - select from. Must be a valid secret - key. + description: The key of the secret to select from. Must be a valid secret key. type: string name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: Specify whether the Secret - or its key must be defined + description: Specify whether the Secret or its key must be defined type: boolean required: - key @@ -1878,40 +1793,42 @@ spec: Values defined by an Env with a duplicate key will take precedence. Cannot be updated. items: - description: EnvFromSource represents the source of - a set of ConfigMaps + description: EnvFromSource represents the source of a set of ConfigMaps properties: configMapRef: description: The ConfigMap to select from properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: Specify whether the ConfigMap - must be defined + description: Specify whether the ConfigMap must be defined type: boolean type: object x-kubernetes-map-type: atomic prefix: - description: An optional identifier to prepend - to each key in the ConfigMap. Must be a C_IDENTIFIER. + description: An optional identifier to prepend to each key in the ConfigMap. Must be a C_IDENTIFIER. type: string secretRef: description: The Secret to select from properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: Specify whether the Secret must - be defined + description: Specify whether the Secret must be defined type: boolean type: object x-kubernetes-map-type: atomic @@ -1961,8 +1878,7 @@ spec: x-kubernetes-list-type: atomic type: object httpGet: - description: HTTPGet specifies the http request - to perform. + description: HTTPGet specifies the http request to perform. properties: host: description: |- @@ -1970,11 +1886,9 @@ spec: "Host" in httpHeaders instead. type: string httpHeaders: - description: Custom headers to set in the - request. HTTP allows repeated headers. + description: Custom headers to set in the request. HTTP allows repeated headers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: HTTPHeader describes a custom header to be used in HTTP probes properties: name: description: |- @@ -1991,8 +1905,7 @@ spec: type: array x-kubernetes-list-type: atomic path: - description: Path to access on the HTTP - server. + description: Path to access on the HTTP server. type: string port: anyOf: @@ -2012,12 +1925,10 @@ spec: - port type: object sleep: - description: Sleep represents the duration that - the container should sleep before being terminated. + description: Sleep represents the duration that the container should sleep before being terminated. properties: seconds: - description: Seconds is the number of seconds - to sleep. + description: Seconds is the number of seconds to sleep. format: int64 type: integer required: @@ -2030,8 +1941,7 @@ spec: lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + description: 'Optional: Host name to connect to, defaults to the pod IP.' type: string port: anyOf: @@ -2074,8 +1984,7 @@ spec: x-kubernetes-list-type: atomic type: object httpGet: - description: HTTPGet specifies the http request - to perform. + description: HTTPGet specifies the http request to perform. properties: host: description: |- @@ -2083,11 +1992,9 @@ spec: "Host" in httpHeaders instead. type: string httpHeaders: - description: Custom headers to set in the - request. HTTP allows repeated headers. + description: Custom headers to set in the request. HTTP allows repeated headers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: HTTPHeader describes a custom header to be used in HTTP probes properties: name: description: |- @@ -2104,8 +2011,7 @@ spec: type: array x-kubernetes-list-type: atomic path: - description: Path to access on the HTTP - server. + description: Path to access on the HTTP server. type: string port: anyOf: @@ -2125,12 +2031,10 @@ spec: - port type: object sleep: - description: Sleep represents the duration that - the container should sleep before being terminated. + description: Sleep represents the duration that the container should sleep before being terminated. properties: seconds: - description: Seconds is the number of seconds - to sleep. + description: Seconds is the number of seconds to sleep. format: int64 type: integer required: @@ -2143,8 +2047,7 @@ spec: lifecycle hooks will fail in runtime when tcp handler is specified. properties: host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + description: 'Optional: Host name to connect to, defaults to the pod IP.' type: string port: anyOf: @@ -2189,28 +2092,25 @@ spec: format: int32 type: integer grpc: - description: GRPC specifies an action involving - a GRPC port. + description: GRPC specifies an action involving a GRPC port. properties: port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. + description: Port number of the gRPC service. Number must be in the range 1 to 65535. format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - + If this is not specified, the default behavior is defined by gRPC. type: string required: - port type: object httpGet: - description: HTTPGet specifies the http request - to perform. + description: HTTPGet specifies the http request to perform. properties: host: description: |- @@ -2218,11 +2118,9 @@ spec: "Host" in httpHeaders instead. type: string httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. + description: Custom headers to set in the request. HTTP allows repeated headers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: HTTPHeader describes a custom header to be used in HTTP probes properties: name: description: |- @@ -2277,12 +2175,10 @@ spec: format: int32 type: integer tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. + description: TCPSocket specifies an action involving a TCP port. properties: host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + description: 'Optional: Host name to connect to, defaults to the pod IP.' type: string port: anyOf: @@ -2334,8 +2230,7 @@ spec: For more information See https://github.com/kubernetes/kubernetes/issues/108255. Cannot be updated. items: - description: ContainerPort represents a network port - in a single container. + description: ContainerPort represents a network port in a single container. properties: containerPort: description: |- @@ -2344,8 +2239,7 @@ spec: format: int32 type: integer hostIP: - description: What host IP to bind the external - port to. + description: What host IP to bind the external port to. type: string hostPort: description: |- @@ -2404,28 +2298,25 @@ spec: format: int32 type: integer grpc: - description: GRPC specifies an action involving - a GRPC port. + description: GRPC specifies an action involving a GRPC port. properties: port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. + description: Port number of the gRPC service. Number must be in the range 1 to 65535. format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - + If this is not specified, the default behavior is defined by gRPC. type: string required: - port type: object httpGet: - description: HTTPGet specifies the http request - to perform. + description: HTTPGet specifies the http request to perform. properties: host: description: |- @@ -2433,11 +2324,9 @@ spec: "Host" in httpHeaders instead. type: string httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. + description: Custom headers to set in the request. HTTP allows repeated headers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: HTTPHeader describes a custom header to be used in HTTP probes properties: name: description: |- @@ -2492,12 +2381,10 @@ spec: format: int32 type: integer tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. + description: TCPSocket specifies an action involving a TCP port. properties: host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + description: 'Optional: Host name to connect to, defaults to the pod IP.' type: string port: anyOf: @@ -2536,8 +2423,7 @@ spec: resizePolicy: description: Resources resize policy for the container. items: - description: ContainerResizePolicy represents resource - resize policy for the container. + description: ContainerResizePolicy represents resource resize policy for the container. properties: resourceName: description: |- @@ -2565,16 +2451,13 @@ spec: description: |- Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - - + This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - - + This field is immutable. It can only be set for containers. items: - description: ResourceClaim references one entry - in PodSpec.ResourceClaims. + description: ResourceClaim references one entry in PodSpec.ResourceClaims. properties: name: description: |- @@ -2582,6 +2465,12 @@ spec: the Pod where this field is used. It makes that resource available inside a container. type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string required: - name type: object @@ -2681,16 +2570,14 @@ spec: add: description: Added capabilities items: - description: Capability represent POSIX capabilities - type + description: Capability represent POSIX capabilities type type: string type: array x-kubernetes-list-type: atomic drop: description: Removed capabilities items: - description: Capability represent POSIX capabilities - type + description: Capability represent POSIX capabilities type type: string type: array x-kubernetes-list-type: atomic @@ -2705,7 +2592,7 @@ spec: procMount: description: |- procMount denotes the type of proc mount to use for the containers. - The default is DefaultProcMount which uses the container runtime defaults for + The default value is Default which uses the container runtime defaults for readonly paths and masked paths. This requires the ProcMountType feature flag to be enabled. Note that this field cannot be set when spec.os.name is windows. @@ -2752,20 +2639,16 @@ spec: Note that this field cannot be set when spec.os.name is windows. properties: level: - description: Level is SELinux level label that - applies to the container. + description: Level is SELinux level label that applies to the container. type: string role: - description: Role is a SELinux role label that - applies to the container. + description: Role is a SELinux role label that applies to the container. type: string type: - description: Type is a SELinux type label that - applies to the container. + description: Type is a SELinux type label that applies to the container. type: string user: - description: User is a SELinux user label that - applies to the container. + description: User is a SELinux user label that applies to the container. type: string type: object seccompProfile: @@ -2786,8 +2669,7 @@ spec: description: |- type indicates which kind of seccomp profile will be applied. Valid options are: - - + Localhost - a profile defined in a file on the node should be used. RuntimeDefault - the container runtime default profile should be used. Unconfined - no profile should be applied. @@ -2809,8 +2691,7 @@ spec: GMSA credential spec named by the GMSACredentialSpecName field. type: string gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name - of the GMSA credential spec to use. + description: GMSACredentialSpecName is the name of the GMSA credential spec to use. type: string hostProcess: description: |- @@ -2860,28 +2741,25 @@ spec: format: int32 type: integer grpc: - description: GRPC specifies an action involving - a GRPC port. + description: GRPC specifies an action involving a GRPC port. properties: port: - description: Port number of the gRPC service. - Number must be in the range 1 to 65535. + description: Port number of the gRPC service. Number must be in the range 1 to 65535. format: int32 type: integer service: + default: "" description: |- Service is the name of the service to place in the gRPC HealthCheckRequest (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). - - + If this is not specified, the default behavior is defined by gRPC. type: string required: - port type: object httpGet: - description: HTTPGet specifies the http request - to perform. + description: HTTPGet specifies the http request to perform. properties: host: description: |- @@ -2889,11 +2767,9 @@ spec: "Host" in httpHeaders instead. type: string httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. + description: Custom headers to set in the request. HTTP allows repeated headers. items: - description: HTTPHeader describes a custom - header to be used in HTTP probes + description: HTTPHeader describes a custom header to be used in HTTP probes properties: name: description: |- @@ -2948,12 +2824,10 @@ spec: format: int32 type: integer tcpSocket: - description: TCPSocket specifies an action involving - a TCP port. + description: TCPSocket specifies an action involving a TCP port. properties: host: - description: 'Optional: Host name to connect - to, defaults to the pod IP.' + description: 'Optional: Host name to connect to, defaults to the pod IP.' type: string port: anyOf: @@ -3031,20 +2905,15 @@ spec: Default is false. type: boolean volumeDevices: - description: volumeDevices is the list of block devices - to be used by the container. + description: volumeDevices is the list of block devices to be used by the container. items: - description: volumeDevice describes a mapping of a - raw block device within a container. + description: volumeDevice describes a mapping of a raw block device within a container. properties: devicePath: - description: devicePath is the path inside of - the container that the device will be mapped - to. + description: devicePath is the path inside of the container that the device will be mapped to. type: string name: - description: name must match the name of a persistentVolumeClaim - in the pod + description: name must match the name of a persistentVolumeClaim in the pod type: string required: - devicePath @@ -3059,8 +2928,7 @@ spec: Pod volumes to mount into the container's filesystem. Cannot be updated. items: - description: VolumeMount describes a mounting of a - Volume within a container. + description: VolumeMount describes a mounting of a Volume within a container. properties: mountPath: description: |- @@ -3088,23 +2956,19 @@ spec: description: |- RecursiveReadOnly specifies whether read-only mounts should be handled recursively. - - + If ReadOnly is false, this field has no meaning and must be unspecified. - - + If ReadOnly is true, and this field is set to Disabled, the mount is not made recursively read-only. If this field is set to IfPossible, the mount is made recursively read-only, if it is supported by the container runtime. If this field is set to Enabled, the mount is made recursively read-only if it is supported by the container runtime, otherwise the pod will not be started and an error will be generated to indicate the reason. - - + If this field is set to IfPossible or Enabled, MountPropagation must be set to None (or be unspecified, which defaults to None). - - + If this field is not specified, it is treated as an equivalent of Disabled. type: string subPath: @@ -3139,9 +3003,7 @@ spec: type: object type: array additionalMetadata: - description: AdditionalMetadata defines which additional metadata, - such as labels and annotations, must be attached to the - created resource. + description: AdditionalMetadata defines which additional metadata, such as labels and annotations, must be attached to the created resource. properties: annotations: additionalProperties: @@ -3159,8 +3021,7 @@ spec: properties: apiServer: items: - description: VolumeMount describes a mounting of a Volume - within a container. + description: VolumeMount describes a mounting of a Volume within a container. properties: mountPath: description: |- @@ -3188,23 +3049,19 @@ spec: description: |- RecursiveReadOnly specifies whether read-only mounts should be handled recursively. - - + If ReadOnly is false, this field has no meaning and must be unspecified. - - + If ReadOnly is true, and this field is set to Disabled, the mount is not made recursively read-only. If this field is set to IfPossible, the mount is made recursively read-only, if it is supported by the container runtime. If this field is set to Enabled, the mount is made recursively read-only if it is supported by the container runtime, otherwise the pod will not be started and an error will be generated to indicate the reason. - - + If this field is set to IfPossible or Enabled, MountPropagation must be set to None (or be unspecified, which defaults to None). - - + If this field is not specified, it is treated as an equivalent of Disabled. type: string subPath: @@ -3226,8 +3083,7 @@ spec: type: array controllerManager: items: - description: VolumeMount describes a mounting of a Volume - within a container. + description: VolumeMount describes a mounting of a Volume within a container. properties: mountPath: description: |- @@ -3255,23 +3111,19 @@ spec: description: |- RecursiveReadOnly specifies whether read-only mounts should be handled recursively. - - + If ReadOnly is false, this field has no meaning and must be unspecified. - - + If ReadOnly is true, and this field is set to Disabled, the mount is not made recursively read-only. If this field is set to IfPossible, the mount is made recursively read-only, if it is supported by the container runtime. If this field is set to Enabled, the mount is made recursively read-only if it is supported by the container runtime, otherwise the pod will not be started and an error will be generated to indicate the reason. - - + If this field is set to IfPossible or Enabled, MountPropagation must be set to None (or be unspecified, which defaults to None). - - + If this field is not specified, it is treated as an equivalent of Disabled. type: string subPath: @@ -3293,8 +3145,7 @@ spec: type: array scheduler: items: - description: VolumeMount describes a mounting of a Volume - within a container. + description: VolumeMount describes a mounting of a Volume within a container. properties: mountPath: description: |- @@ -3322,23 +3173,19 @@ spec: description: |- RecursiveReadOnly specifies whether read-only mounts should be handled recursively. - - + If ReadOnly is false, this field has no meaning and must be unspecified. - - + If ReadOnly is true, and this field is set to Disabled, the mount is not made recursively read-only. If this field is set to IfPossible, the mount is made recursively read-only, if it is supported by the container runtime. If this field is set to Enabled, the mount is made recursively read-only if it is supported by the container runtime, otherwise the pod will not be started and an error will be generated to indicate the reason. - - + If this field is set to IfPossible or Enabled, MountPropagation must be set to None (or be unspecified, which defaults to None). - - + If this field is not specified, it is treated as an equivalent of Disabled. type: string subPath: @@ -3360,11 +3207,9 @@ spec: type: array type: object additionalVolumes: - description: AdditionalVolumes allows to add additional volumes - to the Control Plane deployment. + description: AdditionalVolumes allows to add additional volumes to the Control Plane deployment. items: - description: Volume represents a named volume in a pod that - may be accessed by any container in the pod. + description: Volume represents a named volume in a pod that may be accessed by any container in the pod. properties: awsElasticBlockStore: description: |- @@ -3378,7 +3223,6 @@ spec: Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore - TODO: how do we prevent errors in the filesystem from compromising the machine type: string partition: description: |- @@ -3402,35 +3246,29 @@ spec: - volumeID type: object azureDisk: - description: azureDisk represents an Azure Data Disk - mount on the host and bind mount to the pod. + description: azureDisk represents an Azure Data Disk mount on the host and bind mount to the pod. properties: cachingMode: - description: 'cachingMode is the Host Caching mode: - None, Read Only, Read Write.' + description: 'cachingMode is the Host Caching mode: None, Read Only, Read Write.' type: string diskName: - description: diskName is the Name of the data disk - in the blob storage + description: diskName is the Name of the data disk in the blob storage type: string diskURI: - description: diskURI is the URI of data disk in - the blob storage + description: diskURI is the URI of data disk in the blob storage type: string fsType: + default: ext4 description: |- fsType is Filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. type: string kind: - description: 'kind expected values are Shared: multiple - blob disks per storage account Dedicated: single - blob disk per storage account Managed: azure - managed data disk (only in managed availability - set). defaults to shared' + description: 'kind expected values are Shared: multiple blob disks per storage account Dedicated: single blob disk per storage account Managed: azure managed data disk (only in managed availability set). defaults to shared' type: string readOnly: + default: false description: |- readOnly Defaults to false (read/write). ReadOnly here will force the ReadOnly setting in VolumeMounts. @@ -3440,8 +3278,7 @@ spec: - diskURI type: object azureFile: - description: azureFile represents an Azure File Service - mount on the host and bind mount to the pod. + description: azureFile represents an Azure File Service mount on the host and bind mount to the pod. properties: readOnly: description: |- @@ -3449,8 +3286,7 @@ spec: the ReadOnly setting in VolumeMounts. type: boolean secretName: - description: secretName is the name of secret that - contains Azure Storage Account Name and Key + description: secretName is the name of secret that contains Azure Storage Account Name and Key type: string shareName: description: shareName is the azure share Name @@ -3460,8 +3296,7 @@ spec: - shareName type: object cephfs: - description: cephFS represents a Ceph FS mount on the - host that shares a pod's lifetime + description: cephFS represents a Ceph FS mount on the host that shares a pod's lifetime properties: monitors: description: |- @@ -3472,9 +3307,7 @@ spec: type: array x-kubernetes-list-type: atomic path: - description: 'path is Optional: Used as the mounted - root, rather than the full Ceph tree, default - is /' + description: 'path is Optional: Used as the mounted root, rather than the full Ceph tree, default is /' type: string readOnly: description: |- @@ -3493,10 +3326,13 @@ spec: More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string type: object x-kubernetes-map-type: atomic @@ -3532,10 +3368,13 @@ spec: to OpenStack. properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string type: object x-kubernetes-map-type: atomic @@ -3548,8 +3387,7 @@ spec: - volumeID type: object configMap: - description: configMap represents a configMap that should - populate this volume + description: configMap represents a configMap that should populate this volume properties: defaultMode: description: |- @@ -3572,8 +3410,7 @@ spec: the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. items: - description: Maps a string key to a path within - a volume. + description: Maps a string key to a path within a volume. properties: key: description: key is the key to project. @@ -3602,21 +3439,21 @@ spec: type: array x-kubernetes-list-type: atomic name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: optional specify whether the ConfigMap - or its keys must be defined + description: optional specify whether the ConfigMap or its keys must be defined type: boolean type: object x-kubernetes-map-type: atomic csi: - description: csi (Container Storage Interface) represents - ephemeral storage that is handled by certain external - CSI drivers (Beta feature). + description: csi (Container Storage Interface) represents ephemeral storage that is handled by certain external CSI drivers (Beta feature). properties: driver: description: |- @@ -3638,10 +3475,13 @@ spec: secret object contains more than one secret, all secret references are passed. properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string type: object x-kubernetes-map-type: atomic @@ -3661,8 +3501,7 @@ spec: - driver type: object downwardAPI: - description: downwardAPI represents downward API about - the pod that should populate this volume + description: downwardAPI represents downward API about the pod that should populate this volume properties: defaultMode: description: |- @@ -3677,26 +3516,18 @@ spec: format: int32 type: integer items: - description: Items is a list of downward API volume - file + description: Items is a list of downward API volume file items: - description: DownwardAPIVolumeFile represents - information to create the file containing the - pod field + description: DownwardAPIVolumeFile represents information to create the file containing the pod field properties: fieldRef: - description: 'Required: Selects a field of - the pod: only annotations, labels, name, - namespace and uid are supported.' + description: 'Required: Selects a field of the pod: only annotations, labels, name, namespace and uid are supported.' properties: apiVersion: - description: Version of the schema the - FieldPath is written in terms of, defaults - to "v1". + description: Version of the schema the FieldPath is written in terms of, defaults to "v1". type: string fieldPath: - description: Path of the field to select - in the specified API version. + description: Path of the field to select in the specified API version. type: string required: - fieldPath @@ -3713,11 +3544,7 @@ spec: format: int32 type: integer path: - description: 'Required: Path is the relative - path name of the file to be created. Must - not be absolute or contain the ''..'' path. - Must be utf-8 encoded. The first item of - the relative path must not start with ''..''' + description: 'Required: Path is the relative path name of the file to be created. Must not be absolute or contain the ''..'' path. Must be utf-8 encoded. The first item of the relative path must not start with ''..''' type: string resourceFieldRef: description: |- @@ -3725,16 +3552,13 @@ spec: (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. properties: containerName: - description: 'Container name: required - for volumes, optional for env vars' + description: 'Container name: required for volumes, optional for env vars' type: string divisor: anyOf: - type: integer - type: string - description: Specifies the output format - of the exposed resources, defaults to - "1" + description: Specifies the output format of the exposed resources, defaults to "1" pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true resource: @@ -3781,8 +3605,7 @@ spec: ephemeral represents a volume that is handled by a cluster storage driver. The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, and deleted when the pod is removed. - - + Use this if: a) the volume is only needed while the pod runs, b) features of normal volumes like restoring from snapshot or capacity @@ -3792,18 +3615,15 @@ spec: a PersistentVolumeClaim (see EphemeralVolumeSource for more information on the connection between this volume type and PersistentVolumeClaim). - - + Use PersistentVolumeClaim or one of the vendor-specific APIs for volumes that persist for longer than the lifecycle of an individual pod. - - + Use CSI for light-weight local ephemeral volumes if the CSI driver is meant to be used that way - see the documentation of the driver for more information. - - + A pod can use both types of ephemeral volumes and persistent volumes at the same time. properties: @@ -3816,8 +3636,7 @@ spec: `` is the name from the `PodSpec.Volumes` array entry. Pod validation will reject the pod if the concatenated name is not valid for a PVC (for example, too long). - - + An existing PVC with that name that is not owned by the pod will *not* be used for the pod to avoid using an unrelated volume by mistake. Starting the pod is then blocked until @@ -3826,12 +3645,10 @@ spec: owner reference to the pod once the pod exists. Normally this should not be necessary, but it may be useful when manually reconstructing a broken cluster. - - + This field is read-only and no changes will be made by Kubernetes to the PVC after it has been created. - - + Required, must not be nil. properties: metadata: @@ -3873,12 +3690,10 @@ spec: For any other third-party types, APIGroup is required. type: string kind: - description: Kind is the type of resource - being referenced + description: Kind is the type of resource being referenced type: string name: - description: Name is the name of resource - being referenced + description: Name is the name of resource being referenced type: string required: - kind @@ -3918,12 +3733,10 @@ spec: For any other third-party types, APIGroup is required. type: string kind: - description: Kind is the type of resource - being referenced + description: Kind is the type of resource being referenced type: string name: - description: Name is the name of resource - being referenced + description: Name is the name of resource being referenced type: string namespace: description: |- @@ -3969,22 +3782,17 @@ spec: type: object type: object selector: - description: selector is a label query over - volumes to consider for binding. + description: selector is a label query over volumes to consider for binding. properties: matchExpressions: - description: matchExpressions is a list - of label selector requirements. The - requirements are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label - key that the selector applies - to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -4035,7 +3843,7 @@ spec: set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Alpha) Using this field requires the VolumeAttributesClass feature gate to be enabled. + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -4043,8 +3851,7 @@ spec: Value of Filesystem is implied when not included in claim spec. type: string volumeName: - description: volumeName is the binding reference - to the PersistentVolume backing this claim. + description: volumeName is the binding reference to the PersistentVolume backing this claim. type: string type: object required: @@ -4052,16 +3859,13 @@ spec: type: object type: object fc: - description: fc represents a Fibre Channel resource - that is attached to a kubelet's host machine and then - exposed to the pod. + description: fc represents a Fibre Channel resource that is attached to a kubelet's host machine and then exposed to the pod. properties: fsType: description: |- fsType is the filesystem type to mount. Must be a filesystem type supported by the host operating system. Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. - TODO: how do we prevent errors in the filesystem from compromising the machine type: string lun: description: 'lun is Optional: FC target lun number' @@ -4073,8 +3877,7 @@ spec: the ReadOnly setting in VolumeMounts. type: boolean targetWWNs: - description: 'targetWWNs is Optional: FC target - worldwide names (WWNs)' + description: 'targetWWNs is Optional: FC target worldwide names (WWNs)' items: type: string type: array @@ -4094,8 +3897,7 @@ spec: provisioned/attached using an exec based plugin. properties: driver: - description: driver is the name of the driver to - use for this volume. + description: driver is the name of the driver to use for this volume. type: string fsType: description: |- @@ -4106,8 +3908,7 @@ spec: options: additionalProperties: type: string - description: 'options is Optional: this field holds - extra command options if any.' + description: 'options is Optional: this field holds extra command options if any.' type: object readOnly: description: |- @@ -4123,10 +3924,13 @@ spec: scripts. properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string type: object x-kubernetes-map-type: atomic @@ -4134,9 +3938,7 @@ spec: - driver type: object flocker: - description: flocker represents a Flocker volume attached - to a kubelet's host machine. This depends on the Flocker - control service being running + description: flocker represents a Flocker volume attached to a kubelet's host machine. This depends on the Flocker control service being running properties: datasetName: description: |- @@ -4144,8 +3946,7 @@ spec: should be considered as deprecated type: string datasetUUID: - description: datasetUUID is the UUID of the dataset. - This is unique identifier of a Flocker dataset + description: datasetUUID is the UUID of the dataset. This is unique identifier of a Flocker dataset type: string type: object gcePersistentDisk: @@ -4160,7 +3961,6 @@ spec: Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk - TODO: how do we prevent errors in the filesystem from compromising the machine type: string partition: description: |- @@ -4203,8 +4003,7 @@ spec: description: repository is the URL type: string revision: - description: revision is the commit hash for the - specified revision. + description: revision is the commit hash for the specified revision. type: string required: - repository @@ -4241,9 +4040,6 @@ spec: used for system agents or other privileged things that are allowed to see the host machine. Most containers will NOT need this. More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath - --- - TODO(jonesdl) We need to restrict who can use host directory mounts and who can/can not - mount host directories as read/write. properties: path: description: |- @@ -4260,6 +4056,41 @@ spec: required: - path type: object + image: + description: |- + image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine. + The volume is resolved at pod startup depending on which PullPolicy value is provided: + + - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. + - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. + - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. Container creation will fail if the reference isn't present and the pull fails. + + The volume gets re-resolved if the pod gets deleted and recreated, which means that new remote content will become available on pod recreation. + A failure to resolve or pull the image during pod startup will block containers from starting and may add significant latency. Failures will be retried using normal volume backoff and will be reported on the pod reason and message. + The types of objects that may be mounted by this volume are defined by the container runtime implementation on a host machine and at minimum must include all valid types supported by the container image field. + The OCI object gets mounted in a single directory (spec.containers[*].volumeMounts.mountPath) by merging the manifest layers in the same way as for container images. + The volume will be mounted read-only (ro) and non-executable files (noexec). + Sub path mounts for containers are not supported (spec.containers[*].volumeMounts.subpath). + The field spec.securityContext.fsGroupChangePolicy has no effect on this volume type. + properties: + pullPolicy: + description: |- + Policy for pulling OCI objects. Possible values are: + Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. + Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. + IfNotPresent: the kubelet pulls if the reference isn't already present on disk. Container creation will fail if the reference isn't present and the pull fails. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + type: string + reference: + description: |- + Required: Image or artifact reference to be used. + Behaves in the same way as pod.spec.containers[*].image. + Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + type: object iscsi: description: |- iscsi represents an ISCSI Disk resource that is attached to a @@ -4267,12 +4098,10 @@ spec: More info: https://examples.k8s.io/volumes/iscsi/README.md properties: chapAuthDiscovery: - description: chapAuthDiscovery defines whether support - iSCSI Discovery CHAP authentication + description: chapAuthDiscovery defines whether support iSCSI Discovery CHAP authentication type: boolean chapAuthSession: - description: chapAuthSession defines whether support - iSCSI Session CHAP authentication + description: chapAuthSession defines whether support iSCSI Session CHAP authentication type: boolean fsType: description: |- @@ -4280,7 +4109,6 @@ spec: Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi - TODO: how do we prevent errors in the filesystem from compromising the machine type: string initiatorName: description: |- @@ -4292,6 +4120,7 @@ spec: description: iqn is the target iSCSI Qualified Name. type: string iscsiInterface: + default: default description: |- iscsiInterface is the interface Name that uses an iSCSI transport. Defaults to 'default' (tcp). @@ -4314,14 +4143,16 @@ spec: Defaults to false. type: boolean secretRef: - description: secretRef is the CHAP Secret for iSCSI - target and initiator authentication + description: secretRef is the CHAP Secret for iSCSI target and initiator authentication properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string type: object x-kubernetes-map-type: atomic @@ -4386,9 +4217,7 @@ spec: - claimName type: object photonPersistentDisk: - description: photonPersistentDisk represents a PhotonController - persistent disk attached and mounted on kubelets host - machine + description: photonPersistentDisk represents a PhotonController persistent disk attached and mounted on kubelets host machine properties: fsType: description: |- @@ -4397,15 +4226,13 @@ spec: Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. type: string pdID: - description: pdID is the ID that identifies Photon - Controller persistent disk + description: pdID is the ID that identifies Photon Controller persistent disk type: string required: - pdID type: object portworxVolume: - description: portworxVolume represents a portworx volume - attached and mounted on kubelets host machine + description: portworxVolume represents a portworx volume attached and mounted on kubelets host machine properties: fsType: description: |- @@ -4419,15 +4246,13 @@ spec: the ReadOnly setting in VolumeMounts. type: boolean volumeID: - description: volumeID uniquely identifies a Portworx - volume + description: volumeID uniquely identifies a Portworx volume type: string required: - volumeID type: object projected: - description: projected items for all in one resources - secrets, configmaps, and downward API + description: projected items for all in one resources secrets, configmaps, and downward API properties: defaultMode: description: |- @@ -4440,24 +4265,24 @@ spec: format: int32 type: integer sources: - description: sources is the list of volume projections + description: |- + sources is the list of volume projections. Each entry in this list + handles one source. items: - description: Projection that may be projected - along with other supported volume types + description: |- + Projection that may be projected along with other supported volume types. + Exactly one of these fields must be set. properties: clusterTrustBundle: description: |- ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field of ClusterTrustBundle objects in an auto-updating file. - - + Alpha, gated by the ClusterTrustBundleProjection feature gate. - - + ClusterTrustBundle objects can either be selected by name, or by the combination of signer name and a label selector. - - + Kubelet performs aggressive normalization of the PEM contents written into the pod filesystem. Esoteric PEM features such as inter-block comments and block headers are stripped. Certificates are deduplicated. @@ -4472,18 +4297,14 @@ spec: everything". properties: matchExpressions: - description: matchExpressions is a - list of label selector requirements. - The requirements are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label - key that the selector applies - to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -4530,8 +4351,7 @@ spec: ClusterTrustBundles. type: boolean path: - description: Relative path from the volume - root to write the bundle. + description: Relative path from the volume root to write the bundle. type: string signerName: description: |- @@ -4543,8 +4363,7 @@ spec: - path type: object configMap: - description: configMap information about the - configMap data to project + description: configMap information about the configMap data to project properties: items: description: |- @@ -4556,8 +4375,7 @@ spec: the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. items: - description: Maps a string key to a - path within a volume. + description: Maps a string key to a path within a volume. properties: key: description: key is the key to project. @@ -4586,44 +4404,35 @@ spec: type: array x-kubernetes-list-type: atomic name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: optional specify whether - the ConfigMap or its keys must be defined + description: optional specify whether the ConfigMap or its keys must be defined type: boolean type: object x-kubernetes-map-type: atomic downwardAPI: - description: downwardAPI information about - the downwardAPI data to project + description: downwardAPI information about the downwardAPI data to project properties: items: - description: Items is a list of DownwardAPIVolume - file + description: Items is a list of DownwardAPIVolume file items: - description: DownwardAPIVolumeFile represents - information to create the file containing - the pod field + description: DownwardAPIVolumeFile represents information to create the file containing the pod field properties: fieldRef: - description: 'Required: Selects - a field of the pod: only annotations, - labels, name, namespace and uid - are supported.' + description: 'Required: Selects a field of the pod: only annotations, labels, name, namespace and uid are supported.' properties: apiVersion: - description: Version of the - schema the FieldPath is written - in terms of, defaults to "v1". + description: Version of the schema the FieldPath is written in terms of, defaults to "v1". type: string fieldPath: - description: Path of the field - to select in the specified - API version. + description: Path of the field to select in the specified API version. type: string required: - fieldPath @@ -4640,13 +4449,7 @@ spec: format: int32 type: integer path: - description: 'Required: Path is the - relative path name of the file - to be created. Must not be absolute - or contain the ''..'' path. Must - be utf-8 encoded. The first item - of the relative path must not - start with ''..''' + description: 'Required: Path is the relative path name of the file to be created. Must not be absolute or contain the ''..'' path. Must be utf-8 encoded. The first item of the relative path must not start with ''..''' type: string resourceFieldRef: description: |- @@ -4654,22 +4457,17 @@ spec: (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. properties: containerName: - description: 'Container name: - required for volumes, optional - for env vars' + description: 'Container name: required for volumes, optional for env vars' type: string divisor: anyOf: - type: integer - type: string - description: Specifies the output - format of the exposed resources, - defaults to "1" + description: Specifies the output format of the exposed resources, defaults to "1" pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true resource: - description: 'Required: resource - to select' + description: 'Required: resource to select' type: string required: - resource @@ -4682,8 +4480,7 @@ spec: x-kubernetes-list-type: atomic type: object secret: - description: secret information about the - secret data to project + description: secret information about the secret data to project properties: items: description: |- @@ -4695,8 +4492,7 @@ spec: the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. items: - description: Maps a string key to a - path within a volume. + description: Maps a string key to a path within a volume. properties: key: description: key is the key to project. @@ -4725,20 +4521,21 @@ spec: type: array x-kubernetes-list-type: atomic name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string optional: - description: optional field specify whether - the Secret or its key must be defined + description: optional field specify whether the Secret or its key must be defined type: boolean type: object x-kubernetes-map-type: atomic serviceAccountToken: - description: serviceAccountToken is information - about the serviceAccountToken data to project + description: serviceAccountToken is information about the serviceAccountToken data to project properties: audience: description: |- @@ -4770,8 +4567,7 @@ spec: x-kubernetes-list-type: atomic type: object quobyte: - description: quobyte represents a Quobyte mount on the - host that shares a pod's lifetime + description: quobyte represents a Quobyte mount on the host that shares a pod's lifetime properties: group: description: |- @@ -4800,8 +4596,7 @@ spec: Defaults to serivceaccount user type: string volume: - description: volume is a string that references - an already created Quobyte volume by name. + description: volume is a string that references an already created Quobyte volume by name. type: string required: - registry @@ -4818,7 +4613,6 @@ spec: Tip: Ensure that the filesystem type is supported by the host operating system. Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd - TODO: how do we prevent errors in the filesystem from compromising the machine type: string image: description: |- @@ -4826,6 +4620,7 @@ spec: More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it type: string keyring: + default: /etc/ceph/keyring description: |- keyring is the path to key ring for RBDUser. Default is /etc/ceph/keyring. @@ -4840,6 +4635,7 @@ spec: type: array x-kubernetes-list-type: atomic pool: + default: rbd description: |- pool is the rados pool name. Default is rbd. @@ -4859,14 +4655,18 @@ spec: More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string type: object x-kubernetes-map-type: atomic user: + default: admin description: |- user is the rados user name. Default is admin. @@ -4877,10 +4677,10 @@ spec: - monitors type: object scaleIO: - description: scaleIO represents a ScaleIO persistent - volume attached and mounted on Kubernetes nodes. + description: scaleIO represents a ScaleIO persistent volume attached and mounted on Kubernetes nodes. properties: fsType: + default: xfs description: |- fsType is the filesystem type to mount. Must be a filesystem type supported by the host operating system. @@ -4888,12 +4688,10 @@ spec: Default is "xfs". type: string gateway: - description: gateway is the host address of the - ScaleIO API Gateway. + description: gateway is the host address of the ScaleIO API Gateway. type: string protectionDomain: - description: protectionDomain is the name of the - ScaleIO Protection Domain for the configured storage. + description: protectionDomain is the name of the ScaleIO Protection Domain for the configured storage. type: string readOnly: description: |- @@ -4906,29 +4704,30 @@ spec: sensitive information. If this is not provided, Login operation will fail. properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string type: object x-kubernetes-map-type: atomic sslEnabled: - description: sslEnabled Flag enable/disable SSL - communication with Gateway, default false + description: sslEnabled Flag enable/disable SSL communication with Gateway, default false type: boolean storageMode: + default: ThinProvisioned description: |- storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. Default is ThinProvisioned. type: string storagePool: - description: storagePool is the ScaleIO Storage - Pool associated with the protection domain. + description: storagePool is the ScaleIO Storage Pool associated with the protection domain. type: string system: - description: system is the name of the storage system - as configured in ScaleIO. + description: system is the name of the storage system as configured in ScaleIO. type: string volumeName: description: |- @@ -4966,8 +4765,7 @@ spec: the volume setup will error unless it is marked optional. Paths must be relative and may not contain the '..' path or start with '..'. items: - description: Maps a string key to a path within - a volume. + description: Maps a string key to a path within a volume. properties: key: description: key is the key to project. @@ -4996,8 +4794,7 @@ spec: type: array x-kubernetes-list-type: atomic optional: - description: optional field specify whether the - Secret or its keys must be defined + description: optional field specify whether the Secret or its keys must be defined type: boolean secretName: description: |- @@ -5006,8 +4803,7 @@ spec: type: string type: object storageos: - description: storageOS represents a StorageOS volume - attached and mounted on Kubernetes nodes. + description: storageOS represents a StorageOS volume attached and mounted on Kubernetes nodes. properties: fsType: description: |- @@ -5026,10 +4822,13 @@ spec: credentials. If not specified, default values will be attempted. properties: name: + default: "" description: |- Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? type: string type: object x-kubernetes-map-type: atomic @@ -5049,8 +4848,7 @@ spec: type: string type: object vsphereVolume: - description: vsphereVolume represents a vSphere volume - attached and mounted on kubelets host machine + description: vsphereVolume represents a vSphere volume attached and mounted on kubelets host machine properties: fsType: description: |- @@ -5059,17 +4857,13 @@ spec: Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. type: string storagePolicyID: - description: storagePolicyID is the storage Policy - Based Management (SPBM) profile ID associated - with the StoragePolicyName. + description: storagePolicyID is the storage Policy Based Management (SPBM) profile ID associated with the StoragePolicyName. type: string storagePolicyName: - description: storagePolicyName is the storage Policy - Based Management (SPBM) profile name. + description: storagePolicyName is the storage Policy Based Management (SPBM) profile name. type: string volumePath: - description: volumePath is the path that identifies - vSphere volume vmdk + description: volumePath is the path that identifies vSphere volume vmdk type: string required: - volumePath @@ -5084,8 +4878,7 @@ spec: More info: https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/ properties: nodeAffinity: - description: Describes node affinity scheduling rules - for the pod. + description: Describes node affinity scheduling rules for the pod. properties: preferredDuringSchedulingIgnoredDuringExecution: description: |- @@ -5104,20 +4897,17 @@ spec: (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). properties: preference: - description: A node selector term, associated - with the corresponding weight. + description: A node selector term, associated with the corresponding weight. properties: matchExpressions: - description: A list of node selector requirements - by node's labels. + description: A list of node selector requirements by node's labels. items: description: |- A node selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: The label key that the - selector applies to. + description: The label key that the selector applies to. type: string operator: description: |- @@ -5142,16 +4932,14 @@ spec: type: array x-kubernetes-list-type: atomic matchFields: - description: A list of node selector requirements - by node's fields. + description: A list of node selector requirements by node's fields. items: description: |- A node selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: The label key that the - selector applies to. + description: The label key that the selector applies to. type: string operator: description: |- @@ -5178,9 +4966,7 @@ spec: type: object x-kubernetes-map-type: atomic weight: - description: Weight associated with matching - the corresponding nodeSelectorTerm, in the - range 1-100. + description: Weight associated with matching the corresponding nodeSelectorTerm, in the range 1-100. format: int32 type: integer required: @@ -5198,8 +4984,7 @@ spec: may or may not try to eventually evict the pod from its node. properties: nodeSelectorTerms: - description: Required. A list of node selector - terms. The terms are ORed. + description: Required. A list of node selector terms. The terms are ORed. items: description: |- A null or empty node selector term matches no objects. The requirements of @@ -5207,16 +4992,14 @@ spec: The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. properties: matchExpressions: - description: A list of node selector requirements - by node's labels. + description: A list of node selector requirements by node's labels. items: description: |- A node selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: The label key that the - selector applies to. + description: The label key that the selector applies to. type: string operator: description: |- @@ -5241,16 +5024,14 @@ spec: type: array x-kubernetes-list-type: atomic matchFields: - description: A list of node selector requirements - by node's fields. + description: A list of node selector requirements by node's fields. items: description: |- A node selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: The label key that the - selector applies to. + description: The label key that the selector applies to. type: string operator: description: |- @@ -5284,9 +5065,7 @@ spec: x-kubernetes-map-type: atomic type: object podAffinity: - description: Describes pod affinity scheduling rules (e.g. - co-locate this pod in the same node, zone, etc. as some - other pod(s)). + description: Describes pod affinity scheduling rules (e.g. co-locate this pod in the same node, zone, etc. as some other pod(s)). properties: preferredDuringSchedulingIgnoredDuringExecution: description: |- @@ -5300,13 +5079,10 @@ spec: "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: - description: The weights of all of the matched WeightedPodAffinityTerm - fields are added per-node to find the most preferred - node(s) + description: The weights of all of the matched WeightedPodAffinityTerm fields are added per-node to find the most preferred node(s) properties: podAffinityTerm: - description: Required. A pod affinity term, - associated with the corresponding weight. + description: Required. A pod affinity term, associated with the corresponding weight. properties: labelSelector: description: |- @@ -5314,18 +5090,14 @@ spec: If it's null, this PodAffinityTerm matches with no Pods. properties: matchExpressions: - description: matchExpressions is a list - of label selector requirements. The - requirements are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label - key that the selector applies - to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -5368,7 +5140,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both matchLabelKeys and labelSelector. Also, matchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -5383,7 +5155,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. Also, mismatchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -5397,18 +5169,14 @@ spec: An empty selector ({}) matches all namespaces. properties: matchExpressions: - description: matchExpressions is a list - of label selector requirements. The - requirements are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label - key that the selector applies - to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -5498,17 +5266,14 @@ spec: If it's null, this PodAffinityTerm matches with no Pods. properties: matchExpressions: - description: matchExpressions is a list - of label selector requirements. The requirements - are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label key - that the selector applies to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -5551,7 +5316,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both matchLabelKeys and labelSelector. Also, matchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -5566,7 +5331,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. Also, mismatchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -5580,17 +5345,14 @@ spec: An empty selector ({}) matches all namespaces. properties: matchExpressions: - description: matchExpressions is a list - of label selector requirements. The requirements - are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label key - that the selector applies to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -5648,9 +5410,7 @@ spec: x-kubernetes-list-type: atomic type: object podAntiAffinity: - description: Describes pod anti-affinity scheduling rules - (e.g. avoid putting this pod in the same node, zone, - etc. as some other pod(s)). + description: Describes pod anti-affinity scheduling rules (e.g. avoid putting this pod in the same node, zone, etc. as some other pod(s)). properties: preferredDuringSchedulingIgnoredDuringExecution: description: |- @@ -5664,13 +5424,10 @@ spec: "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: - description: The weights of all of the matched WeightedPodAffinityTerm - fields are added per-node to find the most preferred - node(s) + description: The weights of all of the matched WeightedPodAffinityTerm fields are added per-node to find the most preferred node(s) properties: podAffinityTerm: - description: Required. A pod affinity term, - associated with the corresponding weight. + description: Required. A pod affinity term, associated with the corresponding weight. properties: labelSelector: description: |- @@ -5678,18 +5435,14 @@ spec: If it's null, this PodAffinityTerm matches with no Pods. properties: matchExpressions: - description: matchExpressions is a list - of label selector requirements. The - requirements are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label - key that the selector applies - to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -5732,7 +5485,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both matchLabelKeys and labelSelector. Also, matchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -5747,7 +5500,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. Also, mismatchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -5761,18 +5514,14 @@ spec: An empty selector ({}) matches all namespaces. properties: matchExpressions: - description: matchExpressions is a list - of label selector requirements. The - requirements are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label - key that the selector applies - to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -5862,17 +5611,14 @@ spec: If it's null, this PodAffinityTerm matches with no Pods. properties: matchExpressions: - description: matchExpressions is a list - of label selector requirements. The requirements - are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label key - that the selector applies to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -5915,7 +5661,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both matchLabelKeys and labelSelector. Also, matchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -5930,7 +5676,7 @@ spec: pod labels will be ignored. The default value is empty. The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. Also, mismatchLabelKeys cannot be set when labelSelector isn't set. - This is an alpha field and requires enabling MatchLabelKeysInPodAffinity feature gate. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). items: type: string type: array @@ -5944,17 +5690,14 @@ spec: An empty selector ({}) matches all namespaces. properties: matchExpressions: - description: matchExpressions is a list - of label selector requirements. The requirements - are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label key - that the selector applies to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -6028,8 +5771,7 @@ spec: type: string type: array kine: - description: Available only if Kamaji is running using - Kine as backing storage. + description: Available only if Kamaji is running using Kine as backing storage. items: type: string type: array @@ -6047,9 +5789,7 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ type: object podAdditionalMetadata: - description: AdditionalMetadata defines which additional metadata, - such as labels and annotations, must be attached to the - created resource. + description: AdditionalMetadata defines which additional metadata, such as labels and annotations, must be attached to the created resource. properties: annotations: additionalProperties: @@ -6098,23 +5838,19 @@ spec: (kube-apiserver, controller-manager, and scheduler). properties: apiServer: - description: ResourceRequirements describes the compute - resource requirements. + description: ResourceRequirements describes the compute resource requirements. properties: claims: description: |- Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - - + This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - - + This field is immutable. It can only be set for containers. items: - description: ResourceClaim references one entry - in PodSpec.ResourceClaims. + description: ResourceClaim references one entry in PodSpec.ResourceClaims. properties: name: description: |- @@ -6122,6 +5858,12 @@ spec: the Pod where this field is used. It makes that resource available inside a container. type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string required: - name type: object @@ -6155,23 +5897,19 @@ spec: type: object type: object controllerManager: - description: ResourceRequirements describes the compute - resource requirements. + description: ResourceRequirements describes the compute resource requirements. properties: claims: description: |- Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - - + This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - - + This field is immutable. It can only be set for containers. items: - description: ResourceClaim references one entry - in PodSpec.ResourceClaims. + description: ResourceClaim references one entry in PodSpec.ResourceClaims. properties: name: description: |- @@ -6179,6 +5917,12 @@ spec: the Pod where this field is used. It makes that resource available inside a container. type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string required: - name type: object @@ -6220,16 +5964,13 @@ spec: description: |- Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - - + This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - - + This field is immutable. It can only be set for containers. items: - description: ResourceClaim references one entry - in PodSpec.ResourceClaims. + description: ResourceClaim references one entry in PodSpec.ResourceClaims. properties: name: description: |- @@ -6237,6 +5978,12 @@ spec: the Pod where this field is used. It makes that resource available inside a container. type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string required: - name type: object @@ -6270,23 +6017,19 @@ spec: type: object type: object scheduler: - description: ResourceRequirements describes the compute - resource requirements. + description: ResourceRequirements describes the compute resource requirements. properties: claims: description: |- Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - - + This is an alpha field and requires enabling the DynamicResourceAllocation feature gate. - - + This field is immutable. It can only be set for containers. items: - description: ResourceClaim references one entry - in PodSpec.ResourceClaims. + description: ResourceClaim references one entry in PodSpec.ResourceClaims. properties: name: description: |- @@ -6294,6 +6037,12 @@ spec: the Pod where this field is used. It makes that resource available inside a container. type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string required: - name type: object @@ -6337,8 +6086,7 @@ spec: type: string serviceAccountName: default: default - description: ServiceAccountName allows to specify the service - account to be mounted to the pods of the Control plane deployment + description: ServiceAccountName allows to specify the service account to be mounted to the pods of the Control plane deployment type: string strategy: default: @@ -6354,9 +6102,6 @@ spec: description: |- Rolling update config params. Present only if DeploymentStrategyType = RollingUpdate. - --- - TODO: Update this to follow our convention for oneOf, whatever we decide it - to be. properties: maxSurge: anyOf: @@ -6393,8 +6138,7 @@ spec: x-kubernetes-int-or-string: true type: object type: - description: Type of deployment. Can be "Recreate" or - "RollingUpdate". Default is RollingUpdate. + description: Type of deployment. Can be "Recreate" or "RollingUpdate". Default is RollingUpdate. type: string type: object tolerations: @@ -6445,8 +6189,7 @@ spec: In case of nil underlying LabelSelector, the Kamaji one for the given Tenant Control Plane will be used. All topologySpreadConstraints are ANDed. items: - description: TopologySpreadConstraint specifies how to spread - matching pods among the given topology. + description: TopologySpreadConstraint specifies how to spread matching pods among the given topology. properties: labelSelector: description: |- @@ -6455,16 +6198,14 @@ spec: in their corresponding topology domain. properties: matchExpressions: - description: matchExpressions is a list of label - selector requirements. The requirements are ANDed. + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. items: description: |- A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values. properties: key: - description: key is the label key that the - selector applies to. + description: key is the label key that the selector applies to. type: string operator: description: |- @@ -6507,8 +6248,7 @@ spec: MatchLabelKeys cannot be set when LabelSelector isn't set. Keys that don't exist in the incoming pod labels will be ignored. A null or empty list means only match against labelSelector. - - + This is a beta field and requires the MatchLabelKeysInPodTopologySpread feature gate to be enabled (enabled by default). items: type: string @@ -6547,8 +6287,7 @@ spec: If value is nil, the constraint behaves as if MinDomains is equal to 1. Valid values are integers greater than 0. When value is not nil, WhenUnsatisfiable must be DoNotSchedule. - - + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as 2/2/2: | zone1 | zone2 | zone3 | @@ -6565,8 +6304,7 @@ spec: when calculating pod topology spread skew. Options are: - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. - - + If this value is nil, the behavior is equivalent to the Honor policy. This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. type: string @@ -6577,8 +6315,7 @@ spec: - Honor: nodes without taints, along with tainted nodes for which the incoming pod has a toleration, are included. - Ignore: node taints are ignored. All nodes are included. - - + If this value is nil, the behavior is equivalent to the Ignore policy. This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. type: string @@ -6624,13 +6361,10 @@ spec: type: array type: object ingress: - description: Defining the options for an Optional Ingress which - will expose API Server of the Tenant Control Plane + description: Defining the options for an Optional Ingress which will expose API Server of the Tenant Control Plane properties: additionalMetadata: - description: AdditionalMetadata defines which additional metadata, - such as labels and annotations, must be attached to the - created resource. + description: AdditionalMetadata defines which additional metadata, such as labels and annotations, must be attached to the created resource. properties: annotations: additionalProperties: @@ -6650,13 +6384,10 @@ spec: type: string type: object service: - description: Defining the options for the Tenant Control Plane - Service resource. + description: Defining the options for the Tenant Control Plane Service resource. properties: additionalMetadata: - description: AdditionalMetadata defines which additional metadata, - such as labels and annotations, must be attached to the - created resource. + description: AdditionalMetadata defines which additional metadata, such as labels and annotations, must be attached to the created resource. properties: annotations: additionalProperties: @@ -6668,8 +6399,7 @@ spec: type: object type: object serviceType: - description: ServiceType allows specifying how to expose the - Tenant Control Plane. + description: ServiceType allows specifying how to expose the Tenant Control Plane. enum: - ClusterIP - NodePort @@ -6851,8 +6581,7 @@ spec: - enabled type: object konnectivity: - description: KonnectivityStatus defines the status of Konnectivity - as Addon. + description: KonnectivityStatus defines the status of Konnectivity as Addon. properties: agent: properties: @@ -6897,8 +6626,7 @@ spec: enabled: type: boolean kubeconfig: - description: KubeconfigStatus contains information about the - generated kubeconfig. + description: KubeconfigStatus contains information about the generated kubeconfig. properties: checksum: type: string @@ -6920,24 +6648,12 @@ spec: type: string type: object service: - description: KubernetesServiceStatus defines the status for - the Tenant Control Plane Service in the management cluster. + description: KubernetesServiceStatus defines the status for the Tenant Control Plane Service in the management cluster. properties: conditions: description: Current service state items: - description: "Condition contains details for one aspect - of the current state of this API Resource.\n---\nThis - struct is intended for direct use as an array at the - field path .status.conditions. For example,\n\n\n\ttype - FooStatus struct{\n\t // Represents the observations - of a foo's current state.\n\t // Known .status.conditions.type - are: \"Available\", \"Progressing\", and \"Degraded\"\n\t - \ // +patchMergeKey=type\n\t // +patchStrategy=merge\n\t - \ // +listType=map\n\t // +listMapKey=type\n\t - \ Conditions []metav1.Condition `json:\"conditions,omitempty\" - patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t - \ // other fields\n\t}" + description: Condition contains details for one aspect of the current state of this API Resource. properties: lastTransitionTime: description: |- @@ -6971,20 +6687,14 @@ spec: pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ type: string status: - description: status of the condition, one of True, - False, Unknown. + description: status of the condition, one of True, False, Unknown. enum: - "True" - "False" - Unknown type: string type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + description: type of condition in CamelCase or in foo.example.com/CamelCase. maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string @@ -7046,24 +6756,20 @@ spec: CamelCase names - cloud provider specific error values must have names that comply with the format foo.example.com/CamelCase. - --- - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string port: - description: Port is the port number of - the service port of which status is - recorded here + description: Port is the port number of the service port of which status is recorded here format: int32 type: integer protocol: - default: TCP description: |- Protocol is the protocol of the service port of which status is recorded here The supported values are: "TCP", "UDP", "SCTP" type: string required: + - error - port - protocol type: object @@ -7077,8 +6783,7 @@ spec: description: The name of the Service for the given cluster. type: string namespace: - description: The namespace which the Service for the given - cluster is deployed. + description: The namespace which the Service for the given cluster is deployed. type: string port: description: The port where the service is running @@ -7143,12 +6848,10 @@ spec: type: string type: object etcd: - description: ETCDCertificatesStatus defines the observed state - of ETCD Certificate for API server. + description: ETCDCertificatesStatus defines the observed state of ETCD Certificate for API server. properties: apiServer: - description: APIServerCertificatesStatus defines the observed - state of ETCD Certificate for API server. + description: APIServerCertificatesStatus defines the observed state of ETCD Certificate for API server. properties: checksum: type: string @@ -7159,8 +6862,7 @@ spec: type: string type: object ca: - description: ETCDCertificateStatus defines the observed state - of ETCD Certificate for API server. + description: ETCDCertificateStatus defines the observed state of ETCD Certificate for API server. properties: checksum: type: string @@ -7206,16 +6908,13 @@ spec: type: object type: object controlPlaneEndpoint: - description: ControlPlaneEndpoint contains the status of the kubernetes - control plane + description: ControlPlaneEndpoint contains the status of the kubernetes control plane type: string kubeadmPhase: - description: KubeadmPhase contains the status of the kubeadm phases - action + description: KubeadmPhase contains the status of the kubeadm phases action properties: bootstrapToken: - description: KubeadmPhaseStatus contains the status of a kubeadm - phase action. + description: KubeadmPhaseStatus contains the status of a kubeadm phase action. properties: checksum: type: string @@ -7227,8 +6926,7 @@ spec: - bootstrapToken type: object kubeadmconfig: - description: KubeadmConfig contains the status of the configuration - required by kubeadm + description: KubeadmConfig contains the status of the configuration required by kubeadm properties: checksum: description: Checksum of the kubeadm configuration to detect changes @@ -7240,12 +6938,10 @@ spec: type: string type: object kubeconfig: - description: KubeConfig contains information about the kubenconfigs - that control plane pieces need + description: KubeConfig contains information about the kubenconfigs that control plane pieces need properties: admin: - description: KubeconfigStatus contains information about the generated - kubeconfig. + description: KubeconfigStatus contains information about the generated kubeconfig. properties: checksum: type: string @@ -7256,8 +6952,7 @@ spec: type: string type: object controllerManager: - description: KubeconfigStatus contains information about the generated - kubeconfig. + description: KubeconfigStatus contains information about the generated kubeconfig. properties: checksum: type: string @@ -7268,8 +6963,7 @@ spec: type: string type: object scheduler: - description: KubeconfigStatus contains information about the generated - kubeconfig. + description: KubeconfigStatus contains information about the generated kubeconfig. properties: checksum: type: string @@ -7281,16 +6975,13 @@ spec: type: object type: object kubernetesResources: - description: Kubernetes contains information about the reconciliation - of the required Kubernetes resources deployed in the admin cluster + description: Kubernetes contains information about the reconciliation of the required Kubernetes resources deployed in the admin cluster properties: deployment: - description: KubernetesDeploymentStatus defines the status for - the Tenant Control Plane Deployment in the management cluster. + description: KubernetesDeploymentStatus defines the status for the Tenant Control Plane Deployment in the management cluster. properties: availableReplicas: - description: Total number of available pods (ready for at - least minReadySeconds) targeted by this deployment. + description: Total number of available pods (ready for at least minReadySeconds) targeted by this deployment. format: int32 type: integer collisionCount: @@ -7301,15 +6992,12 @@ spec: format: int32 type: integer conditions: - description: Represents the latest available observations - of a deployment's current state. + description: Represents the latest available observations of a deployment's current state. items: - description: DeploymentCondition describes the state of - a deployment at a certain point. + description: DeploymentCondition describes the state of a deployment at a certain point. properties: lastTransitionTime: - description: Last time the condition transitioned from - one status to another. + description: Last time the condition transitioned from one status to another. format: date-time type: string lastUpdateTime: @@ -7317,15 +7005,13 @@ spec: format: date-time type: string message: - description: A human readable message indicating details - about the transition. + description: A human readable message indicating details about the transition. type: string reason: description: The reason for the condition's last transition. type: string status: - description: Status of the condition, one of True, False, - Unknown. + description: Status of the condition, one of True, False, Unknown. type: string type: description: Type of deployment condition. @@ -7346,26 +7032,22 @@ spec: description: The name of the Deployment for the given cluster. type: string namespace: - description: The namespace which the Deployment for the given - cluster is deployed. + description: The namespace which the Deployment for the given cluster is deployed. type: string observedGeneration: description: The generation observed by the deployment controller. format: int64 type: integer readyReplicas: - description: readyReplicas is the number of pods targeted - by this Deployment with a Ready Condition. + description: readyReplicas is the number of pods targeted by this Deployment with a Ready Condition. format: int32 type: integer replicas: - description: Total number of non-terminated pods targeted - by this deployment (their labels match the selector). + description: Total number of non-terminated pods targeted by this deployment (their labels match the selector). format: int32 type: integer selector: - description: Selector is the label selector used to group - the Tenant Control Plane Pods used by the scale subresource. + description: Selector is the label selector used to group the Tenant Control Plane Pods used by the scale subresource. type: string unavailableReplicas: description: |- @@ -7375,8 +7057,7 @@ spec: format: int32 type: integer updatedReplicas: - description: Total number of non-terminated pods targeted - by this deployment that have the desired template spec. + description: Total number of non-terminated pods targeted by this deployment that have the desired template spec. format: int32 type: integer required: @@ -7385,34 +7066,26 @@ spec: - selector type: object ingress: - description: KubernetesIngressStatus defines the status for the - Tenant Control Plane Ingress in the management cluster. + description: KubernetesIngressStatus defines the status for the Tenant Control Plane Ingress in the management cluster. properties: loadBalancer: - description: loadBalancer contains the current status of the - load-balancer. + description: loadBalancer contains the current status of the load-balancer. properties: ingress: - description: ingress is a list containing ingress points - for the load-balancer. + description: ingress is a list containing ingress points for the load-balancer. items: - description: IngressLoadBalancerIngress represents the - status of a load-balancer ingress point. + description: IngressLoadBalancerIngress represents the status of a load-balancer ingress point. properties: hostname: - description: hostname is set for load-balancer ingress - points that are DNS based. + description: hostname is set for load-balancer ingress points that are DNS based. type: string ip: - description: ip is set for load-balancer ingress - points that are IP based. + description: ip is set for load-balancer ingress points that are IP based. type: string ports: - description: ports provides information about the - ports exposed by this LoadBalancer. + description: ports provides information about the ports exposed by this LoadBalancer. items: - description: IngressPortStatus represents the - error condition of a service port + description: IngressPortStatus represents the error condition of a service port properties: error: description: |- @@ -7422,23 +7095,20 @@ spec: CamelCase names - cloud provider specific error values must have names that comply with the format foo.example.com/CamelCase. - --- - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string port: - description: port is the port number of the - ingress port. + description: port is the port number of the ingress port. format: int32 type: integer protocol: - default: TCP description: |- protocol is the protocol of the ingress port. The supported values are: "TCP", "UDP", "SCTP" type: string required: + - error - port - protocol type: object @@ -7452,32 +7122,19 @@ spec: description: The name of the Ingress for the given cluster. type: string namespace: - description: The namespace which the Ingress for the given - cluster is deployed. + description: The namespace which the Ingress for the given cluster is deployed. type: string required: - name - namespace type: object service: - description: KubernetesServiceStatus defines the status for the - Tenant Control Plane Service in the management cluster. + description: KubernetesServiceStatus defines the status for the Tenant Control Plane Service in the management cluster. properties: conditions: description: Current service state items: - description: "Condition contains details for one aspect - of the current state of this API Resource.\n---\nThis - struct is intended for direct use as an array at the field - path .status.conditions. For example,\n\n\n\ttype FooStatus - struct{\n\t // Represents the observations of a foo's - current state.\n\t // Known .status.conditions.type - are: \"Available\", \"Progressing\", and \"Degraded\"\n\t - \ // +patchMergeKey=type\n\t // +patchStrategy=merge\n\t - \ // +listType=map\n\t // +listMapKey=type\n\t Conditions - []metav1.Condition `json:\"conditions,omitempty\" patchStrategy:\"merge\" - patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t - \ // other fields\n\t}" + description: Condition contains details for one aspect of the current state of this API Resource. properties: lastTransitionTime: description: |- @@ -7511,20 +7168,14 @@ spec: pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ type: string status: - description: status of the condition, one of True, False, - Unknown. + description: status of the condition, one of True, False, Unknown. enum: - "True" - "False" - Unknown type: string type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + description: type of condition in CamelCase or in foo.example.com/CamelCase. maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string @@ -7586,24 +7237,20 @@ spec: CamelCase names - cloud provider specific error values must have names that comply with the format foo.example.com/CamelCase. - --- - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) maxLength: 316 pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ type: string port: - description: Port is the port number of the - service port of which status is recorded - here + description: Port is the port number of the service port of which status is recorded here format: int32 type: integer protocol: - default: TCP description: |- Protocol is the protocol of the service port of which status is recorded here The supported values are: "TCP", "UDP", "SCTP" type: string required: + - error - port - protocol type: object @@ -7617,8 +7264,7 @@ spec: description: The name of the Service for the given cluster. type: string namespace: - description: The namespace which the Service for the given - cluster is deployed. + description: The namespace which the Service for the given cluster is deployed. type: string port: description: The port where the service is running @@ -7630,13 +7276,11 @@ spec: - port type: object version: - description: KubernetesVersion contains the information regarding - the running Kubernetes version, and its upgrade status. + description: KubernetesVersion contains the information regarding the running Kubernetes version, and its upgrade status. properties: status: default: Provisioning - description: Status returns the current status of the Kubernetes - version, such as its provisioning state, or completed upgrade. + description: Status returns the current status of the Kubernetes version, such as its provisioning state, or completed upgrade. enum: - Provisioning - CertificateAuthorityRotating @@ -7646,14 +7290,12 @@ spec: - NotReady type: string version: - description: Version is the running Kubernetes version of - the Tenant Control Plane. + description: Version is the running Kubernetes version of the Tenant Control Plane. type: string type: object type: object storage: - description: Storage Status contains information about Kubernetes - storage system + description: Storage Status contains information about Kubernetes storage system properties: certificate: properties: @@ -7699,3 +7341,13 @@ spec: specReplicasPath: .spec.controlPlane.deployment.replicas statusReplicasPath: .status.kubernetesResources.deployment.replicas status: {} + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + name: kamaji-webhook-service + namespace: kamaji-system + path: /convert + conversionReviewVersions: + - v1 diff --git a/packages/system/kamaji/charts/kamaji/templates/_helpers_datastore.tpl b/packages/system/kamaji/charts/kamaji/templates/_helpers_datastore.tpl deleted file mode 100644 index 3ed2c16e..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/_helpers_datastore.tpl +++ /dev/null @@ -1,94 +0,0 @@ -{{/* -Create a default fully qualified datastore name. -*/}} -{{- define "datastore.fullname" -}} -{{- if .Values.datastore.enabled }} -{{- default "default" .Values.datastore.nameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- required "A valid .Values.datastore.nameOverride required!" .Values.datastore.nameOverride }} -{{- end }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "datastore.labels" -}} -kamaji.clastix.io/datastore: {{ .Values.datastore.driver }} -helm.sh/chart: {{ include "kamaji.chart" . }} -{{ include "kamaji.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Datastore endpoints, in case of ETCD, retrieving the one provided by the chart. -*/}} -{{- define "datastore.endpoints" -}} -{{- if eq .Values.datastore.driver "etcd" }} -{{ include "etcd.endpoints" . }} -{{- else }} -{{ .Values.datastore.endpoints }} -{{- end }} -{{- end }} - -{{/* -The Certificate Authority section for the DataSource object. -*/}} -{{- define "datastore.certificateAuthority" -}} -{{- if eq .Values.datastore.driver "etcd" }} -certificate: - secretReference: - name: {{ include "etcd.caSecretName" . }} - namespace: {{ include "etcd.caSecretNamespace" . }} - keyPath: ca.crt -privateKey: - secretReference: - name: {{ include "etcd.caSecretName" . }} - namespace: {{ include "etcd.caSecretNamespace" . }} - keyPath: ca.key -{{- else }} -certificate: - secretReference: - name: {{ .Values.datastore.tlsConfig.certificateAuthority.certificate.name }} - namespace: {{ .Values.datastore.tlsConfig.certificateAuthority.certificate.namespace }} - keyPath: {{ .Values.datastore.tlsConfig.certificateAuthority.certificate.keyPath }} -{{- if .Values.datastore.tlsConfig.certificateAuthority.privateKey.name }} -privateKey: - secretReference: - name: {{ .Values.datastore.tlsConfig.certificateAuthority.privateKey.name }} - namespace: {{ .Values.datastore.tlsConfig.certificateAuthority.privateKey.namespace }} - keyPath: {{ .Values.datastore.tlsConfig.certificateAuthority.privateKey.keyPath }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -The Client Certificate section for the DataSource object. -*/}} -{{- define "datastore.clientCertificate" -}} -{{- if eq .Values.datastore.driver "etcd" }} -certificate: - secretReference: - name: {{ include "etcd.clientSecretName" . }} - namespace: {{ include "etcd.clientSecretNamespace" . }} - keyPath: tls.crt -privateKey: - secretReference: - name: {{ include "etcd.clientSecretName" . }} - namespace: {{ include "etcd.clientSecretNamespace" . }} - keyPath: tls.key -{{- else }} -certificate: - secretReference: - name: {{ .Values.datastore.tlsConfig.clientCertificate.certificate.name }} - namespace: {{ .Values.datastore.tlsConfig.clientCertificate.certificate.namespace }} - keyPath: {{ .Values.datastore.tlsConfig.clientCertificate.certificate.keyPath }} -privateKey: - secretReference: - name: {{ .Values.datastore.tlsConfig.clientCertificate.privateKey.name }} - namespace: {{ .Values.datastore.tlsConfig.clientCertificate.privateKey.namespace }} - keyPath: {{ .Values.datastore.tlsConfig.clientCertificate.privateKey.keyPath }} -{{- end }} -{{- end }} diff --git a/packages/system/kamaji/charts/kamaji/templates/_helpers_etcd.tpl b/packages/system/kamaji/charts/kamaji/templates/_helpers_etcd.tpl deleted file mode 100644 index 2a6b5247..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/_helpers_etcd.tpl +++ /dev/null @@ -1,142 +0,0 @@ -{{/* -Create a default fully qualified etcd name. -*/}} -{{- define "etcd.fullname" -}} -{{- printf "etcd" }} -{{- end }} - -{{/* -Create the name of the service account to use -*/}} -{{- define "etcd.serviceAccountName" -}} -{{- if .Values.etcd.serviceAccount.create }} -{{- default (include "etcd.fullname" .) .Values.etcd.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.etcd.serviceAccount.name }} -{{- end }} -{{- end }} - -{{/* -Create the name of the Service to use -*/}} -{{- define "etcd.serviceName" -}} -{{- printf "%s" (include "etcd.fullname" .) | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "etcd.labels" -}} -app.kubernetes.io/name: {{ include "kamaji.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -app.kubernetes.io/components: etcd -{{- end }} - -{{/* -Selector labels. -*/}} -{{- define "etcd.selectorLabels" -}} -app.kubernetes.io/name: {{ include "kamaji.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -app.kubernetes.io/component: etcd -{{- end }} - -{{/* -Name of the etcd CA secret. -*/}} -{{- define "etcd.caSecretName" }} -{{- if .Values.etcd.deploy }} -{{- printf "%s-%s" (include "etcd.fullname" .) "certs" | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- required "A valid .Values.etcd.overrides.caSecret.name required!" .Values.etcd.overrides.caSecret.name }} -{{- end }} -{{- end }} - -{{/* -Namespace of the etcd CA secret. -*/}} -{{- define "etcd.caSecretNamespace" }} -{{- if .Values.etcd.deploy }} -{{- .Release.Namespace }} -{{- else }} -{{- required "A valid .Values.etcd.overrides.caSecret.namespace required!" .Values.etcd.overrides.caSecret.namespace }} -{{- end }} -{{- end }} - -{{/* -Name of the certificate signing requests for the certificates required by etcd. -*/}} -{{- define "etcd.csrConfigMapName" }} -{{- printf "%s-csr" (include "etcd.fullname" .) }} -{{- end }} - -{{/* -Name of the etcd root-client secret. -*/}} -{{- define "etcd.clientSecretName" }} -{{- if .Values.etcd.deploy }} -{{- printf "root-client-certs" }} -{{- else }} -{{- required "A valid .Values.etcd.overrides.clientSecret.name required!" .Values.etcd.overrides.clientSecret.name }} -{{- end }} -{{- end }} - -{{/* -Namespace of the etcd root-client secret. -*/}} -{{- define "etcd.clientSecretNamespace" }} -{{- if .Values.etcd.deploy }} -{{- .Release.Namespace }} -{{- else }} -{{- required "A valid .Values.etcd.overrides.clientSecret.namespace required!" .Values.etcd.overrides.clientSecret.namespace }} -{{- end }} -{{- end }} - -{{/* -Comma separated list of etcd endpoints, using the overrides in case of unmanaged etcd. -*/}} -{{- define "etcd.endpoints" }} -{{- $list := list -}} -{{- if .Values.etcd.deploy }} - {{- range $count := until 3 -}} - {{- $list = append $list (printf "%s-%d.%s.%s.svc.cluster.local:%d" "etcd" $count ( include "etcd.serviceName" . ) $.Release.Namespace (int $.Values.etcd.port) ) -}} - {{- end }} -{{- else if .Values.etcd.overrides.endpoints }} - {{- range $v := .Values.etcd.overrides.endpoints -}} - {{- $list = append $list (printf "%s:%d" $v (int $.Values.etcd.port) ) -}} - {{- end -}} -{{- else if not .Values.etcd.overrides.endpoints }} - {{- fail "A valid .Values.etcd.overrides.endpoints required!" }} -{{- end }} -{{- $list | toYaml }} -{{- end }} - -{{/* -Key-value of the etcd peers, using the overrides in case of unmanaged etcd. -*/}} -{{- define "etcd.initialCluster" }} -{{- $list := list -}} -{{- if .Values.etcd.deploy }} - {{- range $i, $count := until 3 -}} - {{- $list = append $list ( printf "etcd-%d=https://%s-%d.%s.%s.svc.cluster.local:%d" $i "etcd" $count ( include "etcd.serviceName" . ) $.Release.Namespace (int $.Values.etcd.peerApiPort) ) -}} - {{- end }} -{{- else if .Values.etcd.overrides.endpoints }} - {{- range $k, $v := .Values.etcd.overrides.endpoints -}} - {{- $list = append $list ( printf "%s=%s:%d" $k $v (int $.Values.etcd.peerApiPort) ) -}} - {{- end -}} -{{- else if not .Values.etcd.overrides.endpoints }} - {{- fail "A valid .Values.etcd.overrides.endpoints required!" }} -{{- end }} -{{- join "," $list -}} -{{- end }} - -{{/* -Retrieve the current Kubernetes version to launch a kubectl container with the minimum version skew possible. -*/}} -{{- define "etcd.jobsTagKubeVersion" -}} -{{- if contains "-eks-" .Capabilities.KubeVersion.GitVersion }} -{{- print "v" .Capabilities.KubeVersion.Major "." (.Capabilities.KubeVersion.Minor | replace "+" "") -}} -{{- else }} -{{- print "v" .Capabilities.KubeVersion.Major "." .Capabilities.KubeVersion.Minor -}} -{{- end }} -{{- end }} diff --git a/packages/system/kamaji/charts/kamaji/templates/controller.yaml b/packages/system/kamaji/charts/kamaji/templates/controller.yaml index e1a73e67..d4ca6dc5 100644 --- a/packages/system/kamaji/charts/kamaji/templates/controller.yaml +++ b/packages/system/kamaji/charts/kamaji/templates/controller.yaml @@ -33,7 +33,8 @@ spec: - --leader-elect - --metrics-bind-address={{ .Values.metricsBindAddress }} - --tmp-directory={{ .Values.temporaryDirectoryPath }} - - --datastore={{ include "datastore.fullname" . }} + {{- $datastoreName := .Values.defaultDatastoreName | required ".Values.defaultDatastoreName is required!" }} + - --datastore={{ $datastoreName }} {{- if .Values.telemetry.disabled }} - --disable-telemetry {{- end }} @@ -43,8 +44,6 @@ spec: {{- with .Values.extraArgs }} {{- toYaml . | nindent 8 }} {{- end }} - command: - - /kamaji env: - name: POD_NAMESPACE valueFrom: diff --git a/packages/system/kamaji/charts/kamaji/templates/datastore.yaml b/packages/system/kamaji/charts/kamaji/templates/datastore.yaml deleted file mode 100644 index 08631b98..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/datastore.yaml +++ /dev/null @@ -1,33 +0,0 @@ -{{- if .Values.datastore.enabled}} -apiVersion: kamaji.clastix.io/v1alpha1 -kind: DataStore -metadata: - name: {{ include "datastore.fullname" . }} - annotations: - "helm.sh/hook": pre-install - labels: - {{- include "datastore.labels" . | nindent 4 }} -spec: - driver: {{ .Values.datastore.driver }} - endpoints: - {{- include "datastore.endpoints" . | indent 4 }} -{{- if (and .Values.datastore.basicAuth.usernameSecret.name .Values.datastore.basicAuth.passwordSecret.name) }} - basicAuth: - username: - secretReference: - {{- .Values.datastore.basicAuth.usernameSecret | toYaml | nindent 8 }} - password: - secretReference: - {{- .Values.datastore.basicAuth.passwordSecret | toYaml | nindent 8 }} -{{- end }} -{{- if .Values.datastore.tlsConfig.enabled }} - tlsConfig: - certificateAuthority: - {{- include "datastore.certificateAuthority" . | indent 6 }} - - {{- if .Values.datastore.tlsConfig.clientCertificate }} - clientCertificate: - {{- include "datastore.clientCertificate" . | indent 6 }} - {{- end }} -{{- end}} -{{- end}} diff --git a/packages/system/kamaji/charts/kamaji/templates/etcd_cm.yaml b/packages/system/kamaji/charts/kamaji/templates/etcd_cm.yaml deleted file mode 100644 index 53e48b5a..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/etcd_cm.yaml +++ /dev/null @@ -1,98 +0,0 @@ -{{- if .Values.etcd.deploy }} -apiVersion: v1 -kind: ConfigMap -metadata: - labels: - {{- include "etcd.labels" . | nindent 4 }} - name: {{ include "etcd.csrConfigMapName" . }} - namespace: {{ .Release.Namespace }} - annotations: - "helm.sh/hook": pre-install - "helm.sh/hook-weight": "-5" - "helm.sh/hook-delete-policy": "hook-succeeded,hook-failed" -data: - ca-csr.json: |- - { - "CN": "Clastix CA", - "key": { - "algo": "rsa", - "size": 2048 - }, - "names": [ - { - "C": "IT", - "ST": "Italy", - "L": "Milan" - } - ] - } - config.json: |- - { - "signing": { - "default": { - "expiry": "8760h" - }, - "profiles": { - "server-authentication": { - "usages": ["signing", "key encipherment", "server auth"], - "expiry": "8760h" - }, - "client-authentication": { - "usages": ["signing", "key encipherment", "client auth"], - "expiry": "8760h" - }, - "peer-authentication": { - "usages": ["signing", "key encipherment", "server auth", "client auth"], - "expiry": "8760h" - } - } - } - } - server-csr.json: |- - { - "CN": "etcd", - "key": { - "algo": "rsa", - "size": 2048 - }, - "hosts": [ -{{- range $count := until 3 -}} - {{ printf "\"etcd-%d.%s.%s.svc.cluster.local\"," $count (include "etcd.serviceName" .) $.Release.Namespace }} -{{- end }} - "etcd-server.{{ .Release.Namespace }}.svc.cluster.local", - "etcd-server.{{ .Release.Namespace }}.svc", - "etcd-server", - "127.0.0.1" - ] - } - peer-csr.json: |- - { - "CN": "etcd", - "key": { - "algo": "rsa", - "size": 2048 - }, - "hosts": [ -{{- range $count := until 3 -}} - {{ printf "\"etcd-%d\"," $count }} - {{ printf "\"etcd-%d.%s\"," $count (include "etcd.serviceName" .) }} - {{ printf "\"etcd-%d.%s.%s.svc\"," $count (include "etcd.serviceName" .) $.Release.Namespace }} - {{ printf "\"etcd-%d.%s.%s.svc.cluster.local\"," $count (include "etcd.serviceName" .) $.Release.Namespace }} -{{- end }} - "127.0.0.1" - ] - } - root-client-csr.json: |- - { - "CN": "root", - "key": { - "algo": "rsa", - "size": 2048 - }, - "names": [ - { - "O": "system:masters" - } - ] - } -{{- end }} diff --git a/packages/system/kamaji/charts/kamaji/templates/etcd_job_postdelete.yaml b/packages/system/kamaji/charts/kamaji/templates/etcd_job_postdelete.yaml deleted file mode 100644 index 45a8b4e6..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/etcd_job_postdelete.yaml +++ /dev/null @@ -1,35 +0,0 @@ -{{- if .Values.etcd.deploy }} -apiVersion: batch/v1 -kind: Job -metadata: - labels: - {{- include "etcd.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": pre-delete - "helm.sh/hook-weight": "-5" - "helm.sh/hook-delete-policy": "hook-succeeded,hook-failed" - name: "{{ .Release.Name }}-etcd-teardown" - namespace: {{ .Release.Namespace }} -spec: - template: - metadata: - name: "{{ .Release.Name }}" - spec: - serviceAccountName: {{ include "etcd.serviceAccountName" . }} - restartPolicy: Never - containers: - - name: kubectl - image: {{ printf "clastix/kubectl:%s" (include "etcd.jobsTagKubeVersion" .) }} - command: - - kubectl - - --namespace={{ .Release.Namespace }} - - delete - - secret - - --ignore-not-found=true - - {{ include "etcd.caSecretName" . }} - - {{ include "etcd.clientSecretName" . }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} diff --git a/packages/system/kamaji/charts/kamaji/templates/etcd_job_postinstall.yaml b/packages/system/kamaji/charts/kamaji/templates/etcd_job_postinstall.yaml deleted file mode 100644 index 0ecd928f..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/etcd_job_postinstall.yaml +++ /dev/null @@ -1,74 +0,0 @@ -{{- if .Values.etcd.deploy }} -apiVersion: batch/v1 -kind: Job -metadata: - labels: - {{- include "etcd.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": post-install - "helm.sh/hook-weight": "-5" - "helm.sh/hook-delete-policy": "hook-succeeded,hook-failed" - name: "{{ .Release.Name }}-etcd-setup" - namespace: {{ .Release.Namespace }} -spec: - template: - metadata: - name: "{{ .Release.Name }}" - spec: - serviceAccountName: {{ include "etcd.serviceAccountName" . }} - restartPolicy: Never - initContainers: - - name: kubectl - image: {{ printf "clastix/kubectl:%s" (include "etcd.jobsTagKubeVersion" .) }} - command: - - sh - - -c - - |- - kubectl --namespace={{ .Release.Namespace }} rollout status sts/etcd --timeout=300s - containers: - - command: - - bash - - -c - - |- - etcdctl member list -w table - if etcdctl user get root &>/dev/null; then - echo "User already exists, nothing to do" - else - etcdctl user add --no-password=true root && - etcdctl role add root && - etcdctl user grant-role root root && - etcdctl auth enable - fi - env: - - name: ETCDCTL_ENDPOINTS - value: https://etcd-0.{{ include "etcd.serviceName" . }}.{{ .Release.Namespace }}.svc.cluster.local:2379 - - name: ETCDCTL_CACERT - value: /opt/certs/ca/ca.crt - - name: ETCDCTL_CERT - value: /opt/certs/root-certs/tls.crt - - name: ETCDCTL_KEY - value: /opt/certs/root-certs/tls.key - image: quay.io/coreos/etcd:v3.5.1 - imagePullPolicy: Always - name: etcd-client - volumeMounts: - - name: root-certs - mountPath: /opt/certs/root-certs - - name: certs - mountPath: /opt/certs/ca - securityContext: - runAsUser: 1000 - runAsGroup: 1000 - fsGroup: 1000 - volumes: - - name: root-certs - secret: - secretName: {{ include "etcd.clientSecretName" . }} - - name: certs - secret: - secretName: {{ include "etcd.caSecretName" . }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} diff --git a/packages/system/kamaji/charts/kamaji/templates/etcd_job_preinstall.yaml b/packages/system/kamaji/charts/kamaji/templates/etcd_job_preinstall.yaml deleted file mode 100644 index 3344a58a..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/etcd_job_preinstall.yaml +++ /dev/null @@ -1,72 +0,0 @@ -{{- if .Values.etcd.deploy }} -apiVersion: batch/v1 -kind: Job -metadata: - labels: - {{- include "etcd.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": pre-install - "helm.sh/hook-weight": "-5" - "helm.sh/hook-delete-policy": "hook-succeeded" - name: "{{ .Release.Name }}-etcd-certs" - namespace: {{ .Release.Namespace }} -spec: - template: - metadata: - name: "{{ .Release.Name }}" - spec: - serviceAccountName: {{ include "etcd.serviceAccountName" . }} - restartPolicy: Never - initContainers: - - name: cfssl - image: "{{ .Values.cfssl.image.repository }}:{{ .Values.cfssl.image.tag }}" - command: - - bash - - -c - - |- - cfssl gencert -initca /csr/ca-csr.json | cfssljson -bare /certs/ca && - mv /certs/ca.pem /certs/ca.crt && mv /certs/ca-key.pem /certs/ca.key && - cfssl gencert -ca=/certs/ca.crt -ca-key=/certs/ca.key -config=/csr/config.json -profile=peer-authentication /csr/peer-csr.json | cfssljson -bare /certs/peer && - cfssl gencert -ca=/certs/ca.crt -ca-key=/certs/ca.key -config=/csr/config.json -profile=peer-authentication /csr/server-csr.json | cfssljson -bare /certs/server && - cfssl gencert -ca=/certs/ca.crt -ca-key=/certs/ca.key -config=/csr/config.json -profile=client-authentication /csr/root-client-csr.json | cfssljson -bare /certs/root-client - volumeMounts: - - mountPath: /certs - name: certs - - mountPath: /csr - name: csr - containers: - - name: kubectl - image: {{ printf "clastix/kubectl:%s" (include "etcd.jobsTagKubeVersion" .) }} - command: ["/bin/sh", "-c"] - args: - - | - if kubectl get secret {{ include "etcd.caSecretName" . }} --namespace={{ .Release.Namespace }} &>/dev/null; then - echo "Secret {{ include "etcd.caSecretName" . }} already exists" - else - echo "Creating secret {{ include "etcd.caSecretName" . }}" - kubectl --namespace={{ .Release.Namespace }} create secret generic {{ include "etcd.caSecretName" . }} --from-file=/certs/ca.crt --from-file=/certs/ca.key --from-file=/certs/peer-key.pem --from-file=/certs/peer.pem --from-file=/certs/server-key.pem --from-file=/certs/server.pem - fi - if kubectl get secret {{ include "etcd.clientSecretName" . }} --namespace={{ .Release.Namespace }} &>/dev/null; then - echo "Secret {{ include "etcd.clientSecretName" . }} already exists" - else - echo "Creating secret {{ include "etcd.clientSecretName" . }}" - kubectl --namespace={{ .Release.Namespace }} create secret tls {{ include "etcd.clientSecretName" . }} --key=/certs/root-client-key.pem --cert=/certs/root-client.pem - fi - volumeMounts: - - mountPath: /certs - name: certs - securityContext: - runAsUser: 1000 - runAsGroup: 1000 - fsGroup: 1000 - volumes: - - name: csr - configMap: - name: {{ include "etcd.csrConfigMapName" . }} - - name: certs - emptyDir: {} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} diff --git a/packages/system/kamaji/charts/kamaji/templates/etcd_rbac.yaml b/packages/system/kamaji/charts/kamaji/templates/etcd_rbac.yaml deleted file mode 100644 index c34d3bcc..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/etcd_rbac.yaml +++ /dev/null @@ -1,56 +0,0 @@ -{{- if .Values.etcd.deploy }} -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - labels: - {{- include "etcd.labels" . | nindent 4 }} - name: etcd-gen-certs-role - annotations: - "helm.sh/hook": pre-install - "helm.sh/hook-weight": "-5" - namespace: {{ .Release.Namespace }} -rules: - - apiGroups: - - "" - resources: - - secrets - verbs: - - get - - delete - resourceNames: - - {{ include "etcd.caSecretName" . }} - - {{ include "etcd.clientSecretName" . }} - - apiGroups: - - "" - resources: - - secrets - verbs: - - create - - apiGroups: - - apps - resources: - - statefulsets - verbs: - - get - - list - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - labels: - {{- include "etcd.labels" . | nindent 4 }} - name: etcd-gen-certs-rolebiding - namespace: {{ .Release.Namespace }} - annotations: - "helm.sh/hook": pre-install - "helm.sh/hook-weight": "-5" -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: etcd-gen-certs-role -subjects: - - kind: ServiceAccount - name: {{ include "etcd.serviceAccountName" . }} - namespace: {{ .Release.Namespace }} -{{- end }} diff --git a/packages/system/kamaji/charts/kamaji/templates/etcd_sa.yaml b/packages/system/kamaji/charts/kamaji/templates/etcd_sa.yaml deleted file mode 100644 index e511ef2a..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/etcd_sa.yaml +++ /dev/null @@ -1,12 +0,0 @@ -{{- if .Values.etcd.deploy }} -apiVersion: v1 -kind: ServiceAccount -metadata: - labels: - {{- include "etcd.labels" . | nindent 4 }} - name: {{ include "etcd.serviceAccountName" . }} - annotations: - "helm.sh/hook": pre-install - "helm.sh/hook-weight": "-5" - namespace: {{ .Release.Namespace }} -{{- end }} diff --git a/packages/system/kamaji/charts/kamaji/templates/etcd_service.yaml b/packages/system/kamaji/charts/kamaji/templates/etcd_service.yaml deleted file mode 100644 index 96342427..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/etcd_service.yaml +++ /dev/null @@ -1,18 +0,0 @@ -{{- if .Values.etcd.deploy }} -apiVersion: v1 -kind: Service -metadata: - labels: - {{- include "etcd.labels" . | nindent 4 }} - name: {{ include "etcd.serviceName" . }} - namespace: {{ .Release.Namespace }} -spec: - clusterIP: None - ports: - - port: {{ .Values.etcd.port }} - name: client - - port: {{ .Values.etcd.peerApiPort }} - name: peer - selector: - {{- include "etcd.selectorLabels" . | nindent 4 }} -{{- end }} diff --git a/packages/system/kamaji/charts/kamaji/templates/etcd_sts.yaml b/packages/system/kamaji/charts/kamaji/templates/etcd_sts.yaml deleted file mode 100644 index 516873bf..00000000 --- a/packages/system/kamaji/charts/kamaji/templates/etcd_sts.yaml +++ /dev/null @@ -1,101 +0,0 @@ -{{- if .Values.etcd.deploy }} -apiVersion: apps/v1 -kind: StatefulSet -metadata: - labels: - {{- include "etcd.labels" . | nindent 4 }} - name: {{ include "etcd.fullname" . }} - namespace: {{ .Release.Namespace }} -spec: - serviceName: {{ include "etcd.serviceName" . }} - selector: - matchLabels: - {{- include "etcd.selectorLabels" . | nindent 6 }} - replicas: 3 - template: - metadata: - name: etcd - labels: - {{- include "etcd.selectorLabels" . | nindent 8 }} - spec: - volumes: - - name: certs - secret: - secretName: {{ include "etcd.caSecretName" . }} - {{- with .Values.etcd.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: etcd - image: {{ .Values.etcd.image.repository }}:{{ .Values.etcd.image.tag | default "v3.5.4" }} - imagePullPolicy: {{ .Values.etcd.image.pullPolicy }} - ports: - - containerPort: 2379 - name: client - - containerPort: 2380 - name: peer - volumeMounts: - - name: data - mountPath: /var/run/etcd - - name: certs - mountPath: /etc/etcd/pki - command: - - etcd - - --data-dir=/var/run/etcd - - --name=$(POD_NAME) - - --initial-cluster-state=new - - --initial-cluster={{ include "etcd.initialCluster" . }} - - --initial-advertise-peer-urls=https://$(POD_NAME).etcd.$(POD_NAMESPACE).svc.cluster.local:2380 - - --advertise-client-urls=https://$(POD_NAME).etcd.$(POD_NAMESPACE).svc.cluster.local:2379 - - --initial-cluster-token=kamaji - - --listen-client-urls=https://0.0.0.0:2379 - - --listen-metrics-urls=http://0.0.0.0:2381 - - --listen-peer-urls=https://0.0.0.0:2380 - - --client-cert-auth=true - - --peer-client-cert-auth=true - - --trusted-ca-file=/etc/etcd/pki/ca.crt - - --cert-file=/etc/etcd/pki/server.pem - - --key-file=/etc/etcd/pki/server-key.pem - - --peer-trusted-ca-file=/etc/etcd/pki/ca.crt - - --peer-cert-file=/etc/etcd/pki/peer.pem - - --peer-key-file=/etc/etcd/pki/peer-key.pem - - --auto-compaction-mode=periodic - - --auto-compaction-retention=5m - - --snapshot-count=10000 - - --quota-backend-bytes=8589934592 - - --v=8 - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - {{- with .Values.etcd.livenessProbe }} - livenessProbe: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.etcd.startupProbe }} - startupProbe: - {{- toYaml . | nindent 12 }} - {{- end }} - volumeClaimTemplates: - - metadata: - name: data - {{- with .Values.etcd.persistence.customAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - storageClassName: {{ .Values.etcd.persistence.storageClassName }} - accessModes: - {{- range .Values.etcd.persistence.accessModes }} - - {{ . | quote }} - {{- end }} - resources: - requests: - storage: {{ .Values.etcd.persistence.size }} -{{- end }} diff --git a/packages/system/kamaji/charts/kamaji/templates/mutatingwebhookconfiguration.yaml b/packages/system/kamaji/charts/kamaji/templates/mutatingwebhookconfiguration.yaml index f68843eb..7fa4237d 100644 --- a/packages/system/kamaji/charts/kamaji/templates/mutatingwebhookconfiguration.yaml +++ b/packages/system/kamaji/charts/kamaji/templates/mutatingwebhookconfiguration.yaml @@ -8,23 +8,4 @@ metadata: {{- include "kamaji.labels" $data | nindent 4 }} name: kamaji-mutating-webhook-configuration webhooks: - - admissionReviewVersions: - - v1 - clientConfig: - service: - name: {{ include "kamaji.webhookServiceName" . }} - namespace: {{ .Release.Namespace }} - path: /mutate-kamaji-clastix-io-v1alpha1-tenantcontrolplane - failurePolicy: Fail - name: mtenantcontrolplane.kb.io - rules: - - apiGroups: - - kamaji.clastix.io - apiVersions: - - v1alpha1 - operations: - - CREATE - - UPDATE - resources: - - tenantcontrolplanes - sideEffects: None \ No newline at end of file +{{ tpl (.Files.Get "controller-gen/mutating-webhook.yaml") . }} diff --git a/packages/system/kamaji/charts/kamaji/templates/rbac.yaml b/packages/system/kamaji/charts/kamaji/templates/rbac.yaml index 8fb939c3..aa4301db 100644 --- a/packages/system/kamaji/charts/kamaji/templates/rbac.yaml +++ b/packages/system/kamaji/charts/kamaji/templates/rbac.yaml @@ -54,122 +54,7 @@ metadata: creationTimestamp: null name: kamaji-manager-role rules: -- apiGroups: - - apps - resources: - - deployments - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - batch - resources: - - jobs - verbs: - - create - - delete - - get - - list - - watch -- apiGroups: - - "" - resources: - - configmaps - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - secrets - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - "" - resources: - - services - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kamaji.clastix.io - resources: - - datastores - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kamaji.clastix.io - resources: - - datastores/status - verbs: - - get - - patch - - update -- apiGroups: - - kamaji.clastix.io - resources: - - tenantcontrolplanes - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kamaji.clastix.io - resources: - - tenantcontrolplanes/finalizers - verbs: - - update -- apiGroups: - - kamaji.clastix.io - resources: - - tenantcontrolplanes/status - verbs: - - get - - patch - - update -- apiGroups: - - networking.k8s.io - resources: - - ingresses - verbs: - - create - - delete - - get - - list - - patch - - update - - watch +{{ tpl (.Files.Get "controller-gen/clusterrole.yaml") . }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/packages/system/kamaji/charts/kamaji/templates/validatingwebhookconfiguration.yaml b/packages/system/kamaji/charts/kamaji/templates/validatingwebhookconfiguration.yaml index a347443f..eacf9d72 100644 --- a/packages/system/kamaji/charts/kamaji/templates/validatingwebhookconfiguration.yaml +++ b/packages/system/kamaji/charts/kamaji/templates/validatingwebhookconfiguration.yaml @@ -8,84 +8,4 @@ metadata: {{- include "kamaji.labels" $data | nindent 4 }} name: kamaji-validating-webhook-configuration webhooks: - - admissionReviewVersions: - - v1 - clientConfig: - service: - name: {{ include "kamaji.webhookServiceName" . }} - namespace: {{ .Release.Namespace }} - path: /telemetry - failurePolicy: Ignore - name: telemetry.kamaji.clastix.io - rules: - - apiGroups: - - kamaji.clastix.io - apiVersions: - - v1alpha1 - operations: - - CREATE - - UPDATE - - DELETE - resources: - - tenantcontrolplanes - sideEffects: None - - admissionReviewVersions: - - v1 - clientConfig: - service: - name: {{ include "kamaji.webhookServiceName" . }} - namespace: {{ .Release.Namespace }} - path: /validate--v1-secret - failurePolicy: Ignore - name: vdatastoresecrets.kb.io - rules: - - apiGroups: - - "" - apiVersions: - - v1 - operations: - - DELETE - resources: - - secrets - sideEffects: None - - admissionReviewVersions: - - v1 - clientConfig: - service: - name: {{ include "kamaji.webhookServiceName" . }} - namespace: {{ .Release.Namespace }} - path: /validate-kamaji-clastix-io-v1alpha1-datastore - failurePolicy: Fail - name: vdatastore.kb.io - rules: - - apiGroups: - - kamaji.clastix.io - apiVersions: - - v1alpha1 - operations: - - CREATE - - UPDATE - - DELETE - resources: - - datastores - sideEffects: None - - admissionReviewVersions: - - v1 - clientConfig: - service: - name: {{ include "kamaji.webhookServiceName" . }} - namespace: {{ .Release.Namespace }} - path: /validate-kamaji-clastix-io-v1alpha1-tenantcontrolplane - failurePolicy: Fail - name: vtenantcontrolplane.kb.io - rules: - - apiGroups: - - kamaji.clastix.io - apiVersions: - - v1alpha1 - operations: - - CREATE - - UPDATE - resources: - - tenantcontrolplanes - sideEffects: None \ No newline at end of file +{{ tpl (.Files.Get "controller-gen/validating-webhook.yaml") . }} diff --git a/packages/system/kamaji/charts/kamaji/values.yaml b/packages/system/kamaji/charts/kamaji/values.yaml index 03d4f635..21b529e0 100644 --- a/packages/system/kamaji/charts/kamaji/values.yaml +++ b/packages/system/kamaji/charts/kamaji/values.yaml @@ -15,74 +15,10 @@ image: # -- A list of extra arguments to add to the kamaji controller default ones extraArgs: [] - serviceMonitor: # -- Toggle the ServiceMonitor true if you have Prometheus Operator installed and configured enabled: false -etcd: - # -- Install an etcd with enabled multi-tenancy along with Kamaji - deploy: true - - # -- The peer API port which servers are listening to. - peerApiPort: 2380 - - # -- The client request port. - port: 2379 - - # -- Install specific etcd image - image: - repository: quay.io/coreos/etcd - tag: "v3.5.6" - pullPolicy: IfNotPresent - - # -- The livenessProbe for the etcd container - livenessProbe: - failureThreshold: 8 - httpGet: - path: /health?serializable=true - port: 2381 - scheme: HTTP - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 15 - - serviceAccount: - # -- Create a ServiceAccount, required to install and provision the etcd backing storage (default: true) - create: true - # -- Define the ServiceAccount name to use during the setup and provision of the etcd backing storage (default: "") - name: "" - persistence: - size: 10Gi - storageClassName: "" - accessModes: - - ReadWriteOnce - # -- The custom annotations to add to the PVC - customAnnotations: {} - # volumeType: local - - # -- (array) Kubernetes affinity rules to apply to Kamaji etcd pods - tolerations: [] - - overrides: - caSecret: - # -- Name of the secret which contains CA's certificate and private key. (default: "etcd-certs") - name: etcd-certs - # -- Namespace of the secret which contains CA's certificate and private key. (default: "kamaji-system") - namespace: kamaji-system - clientSecret: - # -- Name of the secret which contains ETCD client certificates. (default: "root-client-certs") - name: root-client-certs - # -- Name of the namespace where the secret which contains ETCD client certificates is. (default: "kamaji-system") - namespace: kamaji-system - # -- (map) Dictionary of the endpoints for the etcd cluster's members, key is the name of the etcd server. Don't define the protocol (TLS is automatically inflected), or any port, inflected from .etcd.peerApiPort value. - endpoints: - etcd-0: etcd-0.etcd.kamaji-system.svc.cluster.local - etcd-1: etcd-1.etcd.kamaji-system.svc.cluster.local - etcd-2: etcd-2.etcd.kamaji-system.svc.cluster.local - # -- ETCD Compaction interval (e.g. "5m0s"). (default: "0" (disabled)) - compactionInterval: 0 - # -- The address the probe endpoint binds to. (default ":8081") healthProbeBindAddress: ":8081" @@ -102,7 +38,7 @@ readinessProbe: initialDelaySeconds: 5 periodSeconds: 10 -# -- (string) The address the metric endpoint binds to. (default ":8080") +# -- The address the metric endpoint binds to. (default ":8080") metricsBindAddress: ":8080" imagePullSecrets: [] @@ -156,72 +92,20 @@ affinity: {} temporaryDirectoryPath: "/tmp/kamaji" loggingDevel: - # -- (string) Development Mode defaults(encoder=consoleEncoder,logLevel=Debug,stackTraceLevel=Warn). Production Mode defaults(encoder=jsonEncoder,logLevel=Info,stackTraceLevel=Error) (default false) + # -- Development Mode defaults(encoder=consoleEncoder,logLevel=Debug,stackTraceLevel=Warn). Production Mode defaults(encoder=jsonEncoder,logLevel=Info,stackTraceLevel=Error) (default false) enable: false -datastore: - # -- (bool) Enable the Kamaji Datastore creation (default=true) - enabled: true - # -- (string) The Datastore name override, if empty and enabled=true defaults to `default`, if enabled=false, this is the name of the Datastore to connect to. - nameOverride: - # -- (string) The Kamaji Datastore driver, supported: etcd, MySQL, PostgreSQL (defaults=etcd). - driver: etcd - # -- (array) List of endpoints of the selected Datastore. When letting the Chart install the etcd datastore, this field is populated automatically. - endpoints: [] - basicAuth: - usernameSecret: - # -- The name of the Secret containing the username used to connect to the relational database. - name: - # -- The namespace of the Secret containing the username used to connect to the relational database. - namespace: - # -- The Secret key where the data is stored. - keyPath: - passwordSecret: - # -- The name of the Secret containing the password used to connect to the relational database. - name: - # -- The namespace of the Secret containing the password used to connect to the relational database. - namespace: - # -- The Secret key where the data is stored. - keyPath: - tlsConfig: - enabled: true - certificateAuthority: - certificate: - # -- Name of the Secret containing the CA required to establish the mandatory SSL/TLS connection to the datastore. - name: - # -- Namespace of the Secret containing the CA required to establish the mandatory SSL/TLS connection to the datastore. - namespace: - # -- Key of the Secret which contains the content of the certificate. - keyPath: - privateKey: - # -- Name of the Secret containing the CA private key required to establish the mandatory SSL/TLS connection to the datastore. - name: - # -- Namespace of the Secret containing the CA private key required to establish the mandatory SSL/TLS connection to the datastore. - namespace: - # -- Key of the Secret which contains the content of the private key. - keyPath: - clientCertificate: - certificate: - # -- Name of the Secret containing the client certificate required to establish the mandatory SSL/TLS connection to the datastore. - name: - # -- Namespace of the Secret containing the client certificate required to establish the mandatory SSL/TLS connection to the datastore. - namespace: - # -- Key of the Secret which contains the content of the certificate. - keyPath: - privateKey: - # -- Name of the Secret containing the client certificate private key required to establish the mandatory SSL/TLS connection to the datastore. - name: - # -- Namespace of the Secret containing the client certificate private key required to establish the mandatory SSL/TLS connection to the datastore. - namespace: - # -- Key of the Secret which contains the content of the private key. - keyPath: +# -- Specify the default DataStore name for the Kamaji instance. +defaultDatastoreName: default -cfssl: - image: - repository: cfssl/cfssl - tag: latest +kamaji-etcd: + deploy: true + fullnameOverride: kamaji-etcd + datastore: + enabled: true + name: default # -- Disable the analytics traces collection telemetry: - disabled: false + disabled: false \ No newline at end of file diff --git a/packages/system/kamaji/images/kamaji/Dockerfile b/packages/system/kamaji/images/kamaji/Dockerfile index 966226d7..e453967e 100644 --- a/packages/system/kamaji/images/kamaji/Dockerfile +++ b/packages/system/kamaji/images/kamaji/Dockerfile @@ -1,25 +1 @@ -# Build the manager binary -FROM golang:1.22 as builder - -ARG VERSION=v1.0.0 -ARG TARGETOS TARGETARCH - -WORKDIR /workspace - -RUN curl -sSL https://github.com/clastix/kamaji/archive/refs/tags/v1.0.0.tar.gz | tar -xzvf- --strip=1 - -COPY patches /patches -RUN git apply /patches/enable-gc.diff - -RUN CGO_ENABLED=0 GOOS=linux GOARCH=$TARGETARCH go build \ - -ldflags "-X github.com/clastix/kamaji/internal.GitRepo=$GIT_REPO -X github.com/clastix/kamaji/internal.GitTag=$GIT_LAST_TAG -X github.com/clastix/kamaji/internal.GitCommit=$GIT_HEAD_COMMIT -X github.com/clastix/kamaji/internal.GitDirty=$GIT_MODIFIED -X github.com/clastix/kamaji/internal.BuildTime=$BUILD_DATE" \ - -a -o kamaji main.go - -# Use distroless as minimal base image to package the manager binary -# Refer to https://github.com/GoogleContainerTools/distroless for more details -FROM gcr.io/distroless/static:nonroot -WORKDIR / -COPY --from=builder /workspace/kamaji . -USER 65532:65532 - -ENTRYPOINT ["/kamaji"] +FROM clastix/kamaji:edge-24.9.2 diff --git a/packages/system/kamaji/images/kamaji/patches/enable-gc.diff b/packages/system/kamaji/images/kamaji/patches/enable-gc.diff deleted file mode 100644 index 20159f53..00000000 --- a/packages/system/kamaji/images/kamaji/patches/enable-gc.diff +++ /dev/null @@ -1,30 +0,0 @@ -diff --git a/internal/kubeadm/uploadconfig.go b/internal/kubeadm/uploadconfig.go -index 0dc9e71..e516390 100644 ---- a/internal/kubeadm/uploadconfig.go -+++ b/internal/kubeadm/uploadconfig.go -@@ -98,21 +98,15 @@ func getKubeletConfigmapContent(kubeletConfiguration KubeletConfiguration) ([]by - CacheUnauthorizedTTL: zeroDuration, - }, - }, -- CgroupDriver: kubeletConfiguration.TenantControlPlaneCgroupDriver, -- ClusterDNS: kubeletConfiguration.TenantControlPlaneDNSServiceIPs, -- ClusterDomain: kubeletConfiguration.TenantControlPlaneDomain, -- CPUManagerReconcilePeriod: zeroDuration, -- EvictionHard: map[string]string{ -- "imagefs.available": "0%", -- "nodefs.available": "0%", -- "nodefs.inodesFree": "0%", -- }, -+ CgroupDriver: kubeletConfiguration.TenantControlPlaneCgroupDriver, -+ ClusterDNS: kubeletConfiguration.TenantControlPlaneDNSServiceIPs, -+ ClusterDomain: kubeletConfiguration.TenantControlPlaneDomain, -+ CPUManagerReconcilePeriod: zeroDuration, - EvictionPressureTransitionPeriod: zeroDuration, - FileCheckFrequency: zeroDuration, - HealthzBindAddress: "127.0.0.1", - HealthzPort: pointer.To(int32(10248)), - HTTPCheckFrequency: zeroDuration, -- ImageGCHighThresholdPercent: pointer.To(int32(100)), - NodeStatusUpdateFrequency: zeroDuration, - NodeStatusReportFrequency: zeroDuration, - RotateCertificates: true, diff --git a/packages/system/kamaji/values.yaml b/packages/system/kamaji/values.yaml index d03fca66..8adc2918 100644 --- a/packages/system/kamaji/values.yaml +++ b/packages/system/kamaji/values.yaml @@ -3,7 +3,7 @@ kamaji: deploy: false image: pullPolicy: IfNotPresent - tag: v0.15.0@sha256:631b38ed012edab7ba3044ea67a57ab7cd303b8176d0d8a9b7f87e501c9aa085 + tag: latest@sha256:bb45d953a8ba46a19c8941ccc9fc8498d91435c77db439d8b1d6bde9fea8802a repository: ghcr.io/aenix-io/cozystack/kamaji resources: limits: From 35536bb26aa37eda85eb43ee1b73ab7cd9149e95 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Thu, 26 Sep 2024 16:50:59 +0200 Subject: [PATCH 08/41] Update Kubernetes service, build containers and enable externalTrafficPolicy: Local (#364) - Update Kubernetes service - Build kubevirt-cloud-provider, kubevirt-csi-driver, ubuntu-container-disk - Enable support for `externalTrafficPolicy: Local` This PR includes patch from upstream project https://github.com/kubevirt/cloud-provider-kubevirt/pull/330 Signed-off-by: Andrei Kvapil --- packages/apps/kubernetes/Chart.yaml | 2 +- packages/apps/kubernetes/Makefile | 46 +- .../kubernetes/images/cluster-autoscaler.tag | 1 + .../images/cluster-autoscaler/Dockerfile | 17 + .../images/kubevirt-cloud-provider.tag | 1 + .../images/kubevirt-cloud-provider/Dockerfile | 21 + .../external-traffic-policy-local.diff | 1889 +++++++++++++++++ .../kubernetes/images/kubevirt-csi-driver.tag | 1 + .../images/kubevirt-csi-driver/Dockerfile | 25 + .../kubernetes/templates/cloud-config.yaml | 2 + .../cluster-autoscaler/deployment.yaml | 2 +- .../apps/kubernetes/templates/csi/deploy.yaml | 2 +- .../kubernetes/templates/kccm/kccm_role.yaml | 6 + .../kubernetes/templates/kccm/manager.yaml | 2 +- .../capi-providers/templates/providers.yaml | 8 +- 15 files changed, 2016 insertions(+), 9 deletions(-) create mode 100644 packages/apps/kubernetes/images/cluster-autoscaler.tag create mode 100644 packages/apps/kubernetes/images/cluster-autoscaler/Dockerfile create mode 100644 packages/apps/kubernetes/images/kubevirt-cloud-provider.tag create mode 100644 packages/apps/kubernetes/images/kubevirt-cloud-provider/Dockerfile create mode 100644 packages/apps/kubernetes/images/kubevirt-cloud-provider/patches/external-traffic-policy-local.diff create mode 100644 packages/apps/kubernetes/images/kubevirt-csi-driver.tag create mode 100644 packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile diff --git a/packages/apps/kubernetes/Chart.yaml b/packages/apps/kubernetes/Chart.yaml index fcd0f184..7265232a 100644 --- a/packages/apps/kubernetes/Chart.yaml +++ b/packages/apps/kubernetes/Chart.yaml @@ -16,7 +16,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.10.0 +version: 0.11.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/packages/apps/kubernetes/Makefile b/packages/apps/kubernetes/Makefile index 5c90cc15..38caaa7c 100644 --- a/packages/apps/kubernetes/Makefile +++ b/packages/apps/kubernetes/Makefile @@ -1,4 +1,5 @@ UBUNTU_CONTAINER_DISK_TAG = v1.30.1 +KUBERNETES_PKG_TAG = $(shell awk '$$1 == "version:" {print $$2}' Chart.yaml) include ../../../scripts/common-envs.mk include ../../../scripts/package.mk @@ -6,7 +7,7 @@ include ../../../scripts/package.mk generate: readme-generator -v values.yaml -s values.schema.json -r README.md -image: image-ubuntu-container-disk +image: image-ubuntu-container-disk image-kubevirt-cloud-provider image-kubevirt-csi-driver image-cluster-autoscaler image-ubuntu-container-disk: docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 images/ubuntu-container-disk \ @@ -21,3 +22,46 @@ image-ubuntu-container-disk: echo "$(REGISTRY)/ubuntu-container-disk:$(call settag,$(UBUNTU_CONTAINER_DISK_TAG))@$$(yq e '."containerimage.digest"' images/ubuntu-container-disk.json -o json -r)" \ > images/ubuntu-container-disk.tag rm -f images/ubuntu-container-disk.json + +image-kubevirt-cloud-provider: + docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 images/kubevirt-cloud-provider \ + --provenance false \ + --tag $(REGISTRY)/kubevirt-cloud-provider:$(call settag,$(KUBERNETES_PKG_TAG)) \ + --tag $(REGISTRY)/kubevirt-cloud-provider:$(call settag,$(KUBERNETES_PKG_TAG)-$(TAG)) \ + --cache-from type=registry,ref=$(REGISTRY)/kubevirt-cloud-provider:latest \ + --cache-to type=inline \ + --metadata-file images/kubevirt-cloud-provider.json \ + --push=$(PUSH) \ + --load=$(LOAD) + echo "$(REGISTRY)/kubevirt-cloud-provider:$(call settag,$(KUBERNETES_PKG_TAG))@$$(yq e '."containerimage.digest"' images/kubevirt-cloud-provider.json -o json -r)" \ + > images/kubevirt-cloud-provider.tag + rm -f images/kubevirt-cloud-provider.json + +image-kubevirt-csi-driver: + docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 images/kubevirt-csi-driver \ + --provenance false \ + --tag $(REGISTRY)/kubevirt-csi-driver:$(call settag,$(KUBERNETES_PKG_TAG)) \ + --tag $(REGISTRY)/kubevirt-csi-driver:$(call settag,$(KUBERNETES_PKG_TAG)-$(TAG)) \ + --cache-from type=registry,ref=$(REGISTRY)/kubevirt-csi-driver:latest \ + --cache-to type=inline \ + --metadata-file images/kubevirt-csi-driver.json \ + --push=$(PUSH) \ + --load=$(LOAD) + echo "$(REGISTRY)/kubevirt-csi-driver:$(call settag,$(KUBERNETES_PKG_TAG))@$$(yq e '."containerimage.digest"' images/kubevirt-csi-driver.json -o json -r)" \ + > images/kubevirt-csi-driver.tag + rm -f images/kubevirt-csi-driver.json + + +image-cluster-autoscaler: + docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 images/cluster-autoscaler \ + --provenance false \ + --tag $(REGISTRY)/cluster-autoscaler:$(call settag,$(KUBERNETES_PKG_TAG)) \ + --tag $(REGISTRY)/cluster-autoscaler:$(call settag,$(KUBERNETES_PKG_TAG)-$(TAG)) \ + --cache-from type=registry,ref=$(REGISTRY)/cluster-autoscaler:latest \ + --cache-to type=inline \ + --metadata-file images/cluster-autoscaler.json \ + --push=$(PUSH) \ + --load=$(LOAD) + echo "$(REGISTRY)/cluster-autoscaler:$(call settag,$(KUBERNETES_PKG_TAG))@$$(yq e '."containerimage.digest"' images/cluster-autoscaler.json -o json -r)" \ + > images/cluster-autoscaler.tag + rm -f images/cluster-autoscaler.json diff --git a/packages/apps/kubernetes/images/cluster-autoscaler.tag b/packages/apps/kubernetes/images/cluster-autoscaler.tag new file mode 100644 index 00000000..12fa6d8a --- /dev/null +++ b/packages/apps/kubernetes/images/cluster-autoscaler.tag @@ -0,0 +1 @@ +ghcr.io/aenix-io/cozystack/cluster-autoscaler:latest@sha256:7f617de5a24de790a15d9e97c6287ff2b390922e6e74c7a665cbf498f634514d diff --git a/packages/apps/kubernetes/images/cluster-autoscaler/Dockerfile b/packages/apps/kubernetes/images/cluster-autoscaler/Dockerfile new file mode 100644 index 00000000..41b4c291 --- /dev/null +++ b/packages/apps/kubernetes/images/cluster-autoscaler/Dockerfile @@ -0,0 +1,17 @@ +# Source: https://raw.githubusercontent.com/kubernetes/autoscaler/refs/heads/master/cluster-autoscaler/Dockerfile.amd64 +ARG builder_image=docker.io/library/golang:1.22.5 +ARG BASEIMAGE=gcr.io/distroless/static:nonroot-amd64 +FROM ${builder_image} AS builder +RUN git clone https://github.com/kubernetes/autoscaler /src/autoscaler \ + && cd /src/autoscaler/cluster-autoscaler \ + && git checkout cluster-autoscaler-1.31.0 + +WORKDIR /src/autoscaler/cluster-autoscaler +RUN make build + +FROM $BASEIMAGE +LABEL maintainer="Marcin Wielgus " + +COPY --from=builder /src/autoscaler/cluster-autoscaler/cluster-autoscaler-amd64 /cluster-autoscaler +WORKDIR / +CMD ["/cluster-autoscaler"] diff --git a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag new file mode 100644 index 00000000..3a925cd7 --- /dev/null +++ b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag @@ -0,0 +1 @@ +ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:latest@sha256:735aa8092501fc0f2904b685b15bc0137ea294cb08301ca1185d3dec5f467f0f diff --git a/packages/apps/kubernetes/images/kubevirt-cloud-provider/Dockerfile b/packages/apps/kubernetes/images/kubevirt-cloud-provider/Dockerfile new file mode 100644 index 00000000..fc6cb30c --- /dev/null +++ b/packages/apps/kubernetes/images/kubevirt-cloud-provider/Dockerfile @@ -0,0 +1,21 @@ +# Source: https://github.com/kubevirt/cloud-provider-kubevirt/blob/main/build/images/kubevirt-cloud-controller-manager/Dockerfile +FROM --platform=linux/amd64 golang:1.20.6 AS builder + +RUN git clone https://github.com/kubevirt/cloud-provider-kubevirt /go/src/kubevirt.io/cloud-provider-kubevirt \ + && cd /go/src/kubevirt.io/cloud-provider-kubevirt \ + && git checkout adbd6c27468b86b020cf38490e84f124ef24ab62 + +WORKDIR /go/src/kubevirt.io/cloud-provider-kubevirt + +# see: https://github.com/kubevirt/cloud-provider-kubevirt/pull/291 +ADD patches /patches +RUN git apply /patches/external-traffic-policy-local.diff +RUN go get 'k8s.io/endpointslice/util@v0.28' 'k8s.io/apiserver@v0.28' +RUN go mod tidy +RUN go mod vendor + +RUN CGO_ENABLED=0 GOOS=linux go build -mod=vendor -ldflags="-s -w" -o bin/kubevirt-cloud-controller-manager ./cmd/kubevirt-cloud-controller-manager + +FROM registry.access.redhat.com/ubi9/ubi-micro +COPY --from=builder /go/src/kubevirt.io/cloud-provider-kubevirt/bin/kubevirt-cloud-controller-manager /bin/kubevirt-cloud-controller-manager +ENTRYPOINT [ "/bin/kubevirt-cloud-controller-manager" ] diff --git a/packages/apps/kubernetes/images/kubevirt-cloud-provider/patches/external-traffic-policy-local.diff b/packages/apps/kubernetes/images/kubevirt-cloud-provider/patches/external-traffic-policy-local.diff new file mode 100644 index 00000000..6e71ad22 --- /dev/null +++ b/packages/apps/kubernetes/images/kubevirt-cloud-provider/patches/external-traffic-policy-local.diff @@ -0,0 +1,1889 @@ +diff --git a/cmd/kubevirt-cloud-controller-manager/kubevirteps.go b/cmd/kubevirt-cloud-controller-manager/kubevirteps.go +new file mode 100644 +index 00000000..f8a5999b +--- /dev/null ++++ b/cmd/kubevirt-cloud-controller-manager/kubevirteps.go +@@ -0,0 +1,120 @@ ++package main ++ ++import ( ++ "context" ++ "fmt" ++ ++ "k8s.io/client-go/dynamic" ++ "k8s.io/client-go/kubernetes" ++ "k8s.io/client-go/rest" ++ "k8s.io/client-go/tools/clientcmd" ++ cloudprovider "k8s.io/cloud-provider" ++ "k8s.io/cloud-provider/app" ++ "k8s.io/cloud-provider/app/config" ++ genericcontrollermanager "k8s.io/controller-manager/app" ++ "k8s.io/controller-manager/controller" ++ "k8s.io/klog/v2" ++ "kubevirt.io/cloud-provider-kubevirt/pkg/controller/kubevirteps" ++ kubevirt "kubevirt.io/cloud-provider-kubevirt/pkg/provider" ++) ++ ++func StartKubevirtCloudControllerWrapper(initContext app.ControllerInitContext, completedConfig *config.CompletedConfig, cloud cloudprovider.Interface) app.InitFunc { ++ return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) { ++ return startKubevirtCloudController(ctx, initContext, controllerContext, completedConfig, cloud) ++ } ++} ++ ++func startKubevirtCloudController( ++ ctx context.Context, ++ initContext app.ControllerInitContext, ++ controllerContext genericcontrollermanager.ControllerContext, ++ ccmConfig *config.CompletedConfig, ++ cloud cloudprovider.Interface) (controller.Interface, bool, error) { ++ ++ klog.Infof(fmt.Sprintf("Starting %s.", kubevirteps.ControllerName)) ++ ++ kubevirtCloud, ok := cloud.(*kubevirt.Cloud) ++ if !ok { ++ err := fmt.Errorf("%s does not support %v provider", kubevirteps.ControllerName, cloud.ProviderName()) ++ return nil, false, err ++ } ++ ++ if kubevirtCloud.GetCloudConfig().LoadBalancer.EnableEPSController == nil || !*kubevirtCloud.GetCloudConfig().LoadBalancer.EnableEPSController { ++ klog.Infof(fmt.Sprintf("%s is not enabled.", kubevirteps.ControllerName)) ++ return nil, false, nil ++ } ++ ++ klog.Infof("Setting up tenant client.") ++ ++ var tenantClient kubernetes.Interface ++ // This is the kubeconfig for the tenant (in-cluster) cluster ++ tenantClient, err := kubernetes.NewForConfig(ccmConfig.Complete().Kubeconfig) ++ if err != nil { ++ return nil, false, err ++ } ++ ++ klog.Infof("Setting up infra client.") ++ ++ // This is the kubeconfig for the infra cluster ++ var restConfig *rest.Config ++ ++ if kubevirtCloud.GetCloudConfig().Kubeconfig == "" { ++ restConfig, err = rest.InClusterConfig() ++ if err != nil { ++ klog.Errorf("Failed to get in-cluster config: %v", err) ++ return nil, false, err ++ } ++ } else { ++ var infraKubeConfig string ++ infraKubeConfig, err = kubevirtCloud.GetInfraKubeconfig() ++ if err != nil { ++ klog.Errorf("Failed to get infra kubeconfig: %v", err) ++ return nil, false, err ++ } ++ var clientConfig clientcmd.ClientConfig ++ clientConfig, err = clientcmd.NewClientConfigFromBytes([]byte(infraKubeConfig)) ++ if err != nil { ++ klog.Errorf("Failed to create client config from infra kubeconfig: %v", err) ++ return nil, false, err ++ } ++ restConfig, err = clientConfig.ClientConfig() ++ if err != nil { ++ klog.Errorf("Failed to create rest config for infra cluster: %v", err) ++ return nil, false, err ++ } ++ } ++ ++ var infraClient kubernetes.Interface ++ ++ // create new client for the infra cluster ++ infraClient, err = kubernetes.NewForConfig(restConfig) ++ if err != nil { ++ klog.Errorf("Failed to create infra cluster client: %v", err) ++ return nil, false, err ++ } ++ ++ var infraDynamic dynamic.Interface ++ ++ infraDynamic, err = dynamic.NewForConfig(restConfig) ++ if err != nil { ++ klog.Errorf("Failed to create dynamic client for infra cluster: %v", err) ++ return nil, false, err ++ } ++ ++ klog.Infof("Setting up kubevirtEPSController") ++ ++ kubevirtEPSController := kubevirteps.NewKubevirtEPSController(tenantClient, infraClient, infraDynamic, kubevirtCloud.Namespace()) ++ ++ klog.Infof("Initializing kubevirtEPSController") ++ ++ err = kubevirtEPSController.Init() ++ if err != nil { ++ klog.Errorf("Failed to initialize kubevirtEPSController: %v", err) ++ return nil, false, err ++ } ++ ++ klog.Infof("Running kubevirtEPSController") ++ go kubevirtEPSController.Run(1, controllerContext.Stop, controllerContext.ControllerManagerMetrics) ++ ++ return nil, false, nil ++} +diff --git a/cmd/kubevirt-cloud-controller-manager/main.go b/cmd/kubevirt-cloud-controller-manager/main.go +index 8e2c2467..a6f8d749 100644 +--- a/cmd/kubevirt-cloud-controller-manager/main.go ++++ b/cmd/kubevirt-cloud-controller-manager/main.go +@@ -21,6 +21,7 @@ limitations under the License. + package main + + import ( ++ "kubevirt.io/cloud-provider-kubevirt/pkg/controller/kubevirteps" + "os" + + "k8s.io/apimachinery/pkg/util/wait" +@@ -33,8 +34,6 @@ import ( + _ "k8s.io/component-base/metrics/prometheus/clientgo" // load all the prometheus client-go plugins + _ "k8s.io/component-base/metrics/prometheus/version" // for version metric registration + "k8s.io/klog/v2" +- +- _ "kubevirt.io/cloud-provider-kubevirt/pkg/provider" + ) + + func main() { +@@ -46,6 +45,11 @@ func main() { + fss := cliflag.NamedFlagSets{} + controllerInitializers := app.DefaultInitFuncConstructors + ++ // add kubevirt-cloud-controller to the list of controllers ++ controllerInitializers[kubevirteps.ControllerName.String()] = app.ControllerInitFuncConstructor{ ++ Constructor: StartKubevirtCloudControllerWrapper, ++ } ++ + command := app.NewCloudControllerManagerCommand(ccmOptions, cloudInitializer, controllerInitializers, fss, wait.NeverStop) + code := cli.Run(command) + os.Exit(code) +diff --git a/pkg/controller/kubevirteps/kubevirteps_controller.go b/pkg/controller/kubevirteps/kubevirteps_controller.go +new file mode 100644 +index 00000000..055b35ab +--- /dev/null ++++ b/pkg/controller/kubevirteps/kubevirteps_controller.go +@@ -0,0 +1,682 @@ ++package kubevirteps ++ ++import ( ++ "context" ++ "errors" ++ "fmt" ++ "strings" ++ "time" ++ ++ v1 "k8s.io/api/core/v1" ++ discovery "k8s.io/api/discovery/v1" ++ apiequality "k8s.io/apimachinery/pkg/api/equality" ++ k8serrors "k8s.io/apimachinery/pkg/api/errors" ++ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ++ "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ++ "k8s.io/apimachinery/pkg/runtime" ++ "k8s.io/apimachinery/pkg/runtime/schema" ++ utilruntime "k8s.io/apimachinery/pkg/util/runtime" ++ "k8s.io/apimachinery/pkg/util/sets" ++ "k8s.io/apimachinery/pkg/util/wait" ++ "k8s.io/client-go/dynamic" ++ "k8s.io/client-go/informers" ++ "k8s.io/client-go/kubernetes" ++ "k8s.io/client-go/tools/cache" ++ "k8s.io/client-go/util/workqueue" ++ controllersmetrics "k8s.io/component-base/metrics/prometheus/controllers" ++ endpointsliceutil "k8s.io/endpointslice/util" ++ "k8s.io/klog/v2" ++ kubevirtv1 "kubevirt.io/api/core/v1" ++ kubevirt "kubevirt.io/cloud-provider-kubevirt/pkg/provider" ++) ++ ++const ( ++ ControllerName = controllerName("kubevirt_eps_controller") ++) ++ ++type controllerName string ++ ++func (c controllerName) dashed() string { ++ // replace underscores with dashes ++ return strings.ReplaceAll(string(c), "_", "-") ++} ++ ++func (c controllerName) String() string { ++ return string(c) ++} ++ ++type Controller struct { ++ tenantClient kubernetes.Interface ++ tenantFactory informers.SharedInformerFactory ++ tenantEPSTracker tenantEPSTracker ++ ++ infraClient kubernetes.Interface ++ infraDynamic dynamic.Interface ++ infraFactory informers.SharedInformerFactory ++ ++ infraNamespace string ++ queue workqueue.RateLimitingInterface ++ maxRetries int ++ ++ maxEndPointsPerSlice int ++} ++ ++func NewKubevirtEPSController( ++ tenantClient kubernetes.Interface, ++ infraClient kubernetes.Interface, ++ infraDynamic dynamic.Interface, ++ infraNamespace string) *Controller { ++ ++ tenantFactory := informers.NewSharedInformerFactory(tenantClient, 0) ++ infraFactory := informers.NewSharedInformerFactoryWithOptions(infraClient, 0, informers.WithNamespace(infraNamespace)) ++ queue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) ++ ++ return &Controller{ ++ tenantClient: tenantClient, ++ tenantFactory: tenantFactory, ++ tenantEPSTracker: tenantEPSTracker{}, ++ infraClient: infraClient, ++ infraDynamic: infraDynamic, ++ infraFactory: infraFactory, ++ infraNamespace: infraNamespace, ++ queue: queue, ++ maxRetries: 25, ++ maxEndPointsPerSlice: 100, ++ } ++} ++ ++type ReqType string ++ ++const ( ++ AddReq ReqType = "add" ++ UpdateReq ReqType = "update" ++ DeleteReq ReqType = "delete" ++) ++ ++type Request struct { ++ ReqType ReqType ++ Obj interface{} ++ OldObj interface{} ++} ++ ++func newRequest(reqType ReqType, obj interface{}, oldObj interface{}) *Request { ++ return &Request{ ++ ReqType: reqType, ++ Obj: obj, ++ OldObj: oldObj, ++ } ++} ++ ++func (c *Controller) Init() error { ++ ++ // Act on events from Services on the infra cluster. These are created by the EnsureLoadBalancer function. ++ // We need to watch for these events so that we can update the EndpointSlices in the infra cluster accordingly. ++ _, err := c.infraFactory.Core().V1().Services().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ ++ AddFunc: func(obj interface{}) { ++ // cast obj to Service ++ svc := obj.(*v1.Service) ++ // Only act on Services of type LoadBalancer ++ if svc.Spec.Type == v1.ServiceTypeLoadBalancer { ++ klog.Infof("Service added: %v/%v", svc.Namespace, svc.Name) ++ c.queue.Add(newRequest(AddReq, obj, nil)) ++ } ++ }, ++ UpdateFunc: func(oldObj, newObj interface{}) { ++ // cast obj to Service ++ newSvc := newObj.(*v1.Service) ++ // Only act on Services of type LoadBalancer ++ if newSvc.Spec.Type == v1.ServiceTypeLoadBalancer { ++ klog.Infof("Service updated: %v/%v", newSvc.Namespace, newSvc.Name) ++ c.queue.Add(newRequest(UpdateReq, newObj, oldObj)) ++ } ++ }, ++ DeleteFunc: func(obj interface{}) { ++ // cast obj to Service ++ svc := obj.(*v1.Service) ++ // Only act on Services of type LoadBalancer ++ if svc.Spec.Type == v1.ServiceTypeLoadBalancer { ++ klog.Infof("Service deleted: %v/%v", svc.Namespace, svc.Name) ++ c.queue.Add(newRequest(DeleteReq, obj, nil)) ++ } ++ }, ++ }) ++ if err != nil { ++ return err ++ } ++ ++ // Monitor endpoint slices that we are interested in based on known services in the infra cluster ++ _, err = c.tenantFactory.Discovery().V1().EndpointSlices().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ ++ AddFunc: func(obj interface{}) { ++ eps := obj.(*discovery.EndpointSlice) ++ if c.tenantEPSTracker.contains(eps) { ++ klog.Infof("get Infra Service for Tenant EndpointSlice: %v/%v", eps.Namespace, eps.Name) ++ infraSvc, err := c.getInfraServiceFromTenantEPS(context.TODO(), eps) ++ if err != nil { ++ klog.Errorf("Failed to get Service in Infra cluster for EndpointSlice %s/%s: %v", eps.Namespace, eps.Name, err) ++ return ++ } ++ klog.Infof("EndpointSlice added: %v/%v", eps.Namespace, eps.Name) ++ c.queue.Add(newRequest(AddReq, infraSvc, nil)) ++ } ++ }, ++ UpdateFunc: func(oldObj, newObj interface{}) { ++ eps := newObj.(*discovery.EndpointSlice) ++ if c.tenantEPSTracker.contains(eps) { ++ klog.Infof("get Infra Service for Tenant EndpointSlice: %v/%v", eps.Namespace, eps.Name) ++ infraSvc, err := c.getInfraServiceFromTenantEPS(context.TODO(), eps) ++ if err != nil { ++ klog.Errorf("Failed to get Service in Infra cluster for EndpointSlice %s/%s: %v", eps.Namespace, eps.Name, err) ++ return ++ } ++ klog.Infof("EndpointSlice updated: %v/%v", eps.Namespace, eps.Name) ++ c.queue.Add(newRequest(UpdateReq, infraSvc, nil)) ++ } ++ }, ++ DeleteFunc: func(obj interface{}) { ++ eps := obj.(*discovery.EndpointSlice) ++ if c.tenantEPSTracker.contains(eps) { ++ c.tenantEPSTracker.remove(eps) ++ klog.Infof("get Infra Service for Tenant EndpointSlice: %v/%v", eps.Namespace, eps.Name) ++ infraSvc, err := c.getInfraServiceFromTenantEPS(context.TODO(), eps) ++ if err != nil { ++ klog.Errorf("Failed to get Service in Infra cluster for EndpointSlice %s/%s: %v", eps.Namespace, eps.Name, err) ++ return ++ } ++ klog.Infof("EndpointSlice deleted: %v/%v", eps.Namespace, eps.Name) ++ c.queue.Add(newRequest(DeleteReq, infraSvc, nil)) ++ } ++ }, ++ }) ++ if err != nil { ++ return err ++ } ++ ++ //TODO: Add informer for EndpointSlices in the infra cluster to watch for (unwanted) changes ++ return nil ++} ++ ++// Run starts an asynchronous loop that monitors and updates GKENetworkParamSet in the cluster. ++func (c *Controller) Run(numWorkers int, stopCh <-chan struct{}, controllerManagerMetrics *controllersmetrics.ControllerManagerMetrics) { ++ defer utilruntime.HandleCrash() ++ ++ ctx, cancelFn := context.WithCancel(context.Background()) ++ defer cancelFn() ++ defer c.queue.ShutDown() ++ ++ klog.Infof(fmt.Sprintf("Starting %s", ControllerName)) ++ defer klog.Infof(fmt.Sprintf("Shutting down %s", ControllerName)) ++ controllerManagerMetrics.ControllerStarted(ControllerName.String()) ++ defer controllerManagerMetrics.ControllerStopped(ControllerName.String()) ++ ++ c.tenantFactory.Start(stopCh) ++ c.infraFactory.Start(stopCh) ++ ++ if !cache.WaitForNamedCacheSync(ControllerName.String(), stopCh, ++ c.infraFactory.Core().V1().Services().Informer().HasSynced, ++ c.tenantFactory.Discovery().V1().EndpointSlices().Informer().HasSynced) { ++ return ++ } ++ ++ for i := 0; i < numWorkers; i++ { ++ go wait.UntilWithContext(ctx, c.runWorker, time.Second) ++ } ++ ++ <-stopCh ++} ++ ++// worker pattern adapted from https://github.com/kubernetes/client-go/blob/master/examples/workqueue/main.go ++func (c *Controller) runWorker(ctx context.Context) { ++ for c.processNextItem(ctx) { ++ } ++} ++ ++func (c *Controller) processNextItem(ctx context.Context) bool { ++ req, quit := c.queue.Get() ++ if quit { ++ return false ++ } ++ ++ defer c.queue.Done(req) ++ ++ err := c.reconcile(ctx, req.(*Request)) ++ ++ if err == nil { ++ c.queue.Forget(req) ++ } else if c.queue.NumRequeues(req) < c.maxRetries { ++ c.queue.AddRateLimited(req) ++ } else { ++ c.queue.Forget(req) ++ klog.Errorf("Dropping object out of queue after too many retries: %v", req) ++ utilruntime.HandleError(err) ++ } ++ ++ return true ++} ++ ++// getInfraServiceFromTenantEPS returns the Service in the infra cluster that is associated with the given tenant endpoint slice. ++func (c *Controller) getInfraServiceFromTenantEPS(ctx context.Context, slice *discovery.EndpointSlice) (*v1.Service, error) { ++ infraServices, err := c.infraClient.CoreV1().Services(c.infraNamespace).List(ctx, ++ metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s,%s=%s", kubevirt.TenantServiceNameLabelKey, slice.Labels["kubernetes.io/service-name"], ++ kubevirt.TenantServiceNamespaceLabelKey, slice.Namespace)}) ++ if err != nil { ++ klog.Errorf("Failed to get Service in Infra for EndpointSlice %s in namespace %s: %v", slice.Name, slice.Namespace, err) ++ return nil, err ++ } ++ if len(infraServices.Items) > 1 { ++ // This should never be possible, only one service should exist for a given tenant endpoint slice ++ klog.Errorf("Multiple services found for tenant endpoint slice %s in namespace %s", slice.Name, slice.Namespace) ++ return nil, errors.New("multiple services found for tenant endpoint slice") ++ } ++ if len(infraServices.Items) == 1 { ++ return &infraServices.Items[0], nil ++ } ++ // No service found, possible if service is deleted. ++ return nil, nil ++} ++ ++// getTenantEPSFromInfraService returns the EndpointSlices in the tenant cluster that are associated with the given infra service. ++func (c *Controller) getTenantEPSFromInfraService(ctx context.Context, svc *v1.Service) ([]*discovery.EndpointSlice, error) { ++ var tenantEPSSlices []*discovery.EndpointSlice ++ tenantServiceName := svc.Labels[kubevirt.TenantServiceNameLabelKey] ++ tenantServiceNamespace := svc.Labels[kubevirt.TenantServiceNamespaceLabelKey] ++ clusterName := svc.Labels[kubevirt.TenantClusterNameLabelKey] ++ klog.Infof("Searching for endpoints on tenant cluster %s for service %s in namespace %s.", clusterName, tenantServiceName, tenantServiceNamespace) ++ result, err := c.tenantClient.DiscoveryV1().EndpointSlices(tenantServiceNamespace).List(ctx, ++ metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", discovery.LabelServiceName, tenantServiceName)}) ++ if err != nil { ++ klog.Errorf("Failed to get EndpointSlices for Service %s in namespace %s: %v", tenantServiceName, ++ tenantServiceNamespace, err) ++ return nil, err ++ } ++ for _, eps := range result.Items { ++ c.tenantEPSTracker.add(&eps) ++ tenantEPSSlices = append(tenantEPSSlices, &eps) ++ } ++ return tenantEPSSlices, nil ++} ++ ++// getInfraEPSFromInfraService returns the EndpointSlices in the infra cluster that are associated with the given infra service. ++func (c *Controller) getInfraEPSFromInfraService(ctx context.Context, svc *v1.Service) ([]*discovery.EndpointSlice, error) { ++ var infraEPSSlices []*discovery.EndpointSlice ++ klog.Infof("Searching for endpoints on infra cluster for service %s in namespace %s.", svc.Name, svc.Namespace) ++ result, err := c.infraClient.DiscoveryV1().EndpointSlices(svc.Namespace).List(ctx, ++ metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", discovery.LabelServiceName, svc.Name)}) ++ if err != nil { ++ klog.Errorf("Failed to get EndpointSlices for Service %s in namespace %s: %v", svc.Name, svc.Namespace, err) ++ return nil, err ++ } ++ for _, eps := range result.Items { ++ infraEPSSlices = append(infraEPSSlices, &eps) ++ } ++ return infraEPSSlices, nil ++} ++ ++func (c *Controller) reconcile(ctx context.Context, r *Request) error { ++ service, ok := r.Obj.(*v1.Service) ++ if !ok || service == nil { ++ return errors.New("could not cast object to service") ++ } ++ ++ if service.Labels[kubevirt.TenantServiceNameLabelKey] == "" || ++ service.Labels[kubevirt.TenantServiceNamespaceLabelKey] == "" || ++ service.Labels[kubevirt.TenantClusterNameLabelKey] == "" { ++ klog.Infof("This LoadBalancer Service: %s is not managed by the %s. Skipping.", service.Name, ControllerName) ++ return nil ++ } ++ klog.Infof("Reconciling: %v", service.Name) ++ ++ serviceDeleted := false ++ svc, err := c.infraFactory.Core().V1().Services().Lister().Services(c.infraNamespace).Get(service.Name) ++ if err != nil { ++ klog.Infof("Service %s in namespace %s is deleted.", service.Name, service.Namespace) ++ serviceDeleted = true ++ } else { ++ service = svc ++ } ++ ++ infraExistingEpSlices, err := c.getInfraEPSFromInfraService(ctx, service) ++ if err != nil { ++ return err ++ } ++ ++ // At this point we have the current state of the 3 main objects we are interested in: ++ // 1. The Service in the infra cluster, the one created by the KubevirtCloudController. ++ // 2. The EndpointSlices in the tenant cluster, created for the tenant cluster's Service. ++ // 3. The EndpointSlices in the infra cluster, managed by this controller. ++ ++ slicesToDelete := []*discovery.EndpointSlice{} ++ slicesByAddressType := make(map[discovery.AddressType][]*discovery.EndpointSlice) ++ ++ serviceSupportedAddressesTypes := getAddressTypesForService(service) ++ // If the services switched to a different address type, we need to delete the old ones, because it's immutable. ++ // If the services switched to a different externalTrafficPolicy, we need to delete the old ones. ++ for _, eps := range infraExistingEpSlices { ++ if service.Spec.Selector != nil || serviceDeleted { ++ klog.Infof("Added for deletion EndpointSlice %s in namespace %s because it has a selector", eps.Name, eps.Namespace) ++ // to be sure we don't delete any slice that is not managed by us ++ if c.managedByController(eps) { ++ slicesToDelete = append(slicesToDelete, eps) ++ } ++ continue ++ } ++ if !serviceSupportedAddressesTypes.Has(eps.AddressType) { ++ klog.Infof("Added for deletion EndpointSlice %s in namespace %s because it has an unsupported address type: %v", eps.Name, eps.Namespace, eps.AddressType) ++ slicesToDelete = append(slicesToDelete, eps) ++ continue ++ } ++ slicesByAddressType[eps.AddressType] = append(slicesByAddressType[eps.AddressType], eps) ++ } ++ ++ if !serviceDeleted { ++ // Get tenant's endpoint slices for this service ++ tenantEpSlices, err := c.getTenantEPSFromInfraService(ctx, service) ++ if err != nil { ++ return err ++ } ++ ++ // Reconcile the EndpointSlices for each address type e.g. ipv4, ipv6 ++ for addressType := range serviceSupportedAddressesTypes { ++ existingSlices := slicesByAddressType[addressType] ++ err := c.reconcileByAddressType(service, tenantEpSlices, existingSlices, addressType) ++ if err != nil { ++ return err ++ } ++ } ++ } ++ ++ // Delete the EndpointSlices that are no longer needed ++ for _, eps := range slicesToDelete { ++ err := c.infraClient.DiscoveryV1().EndpointSlices(eps.Namespace).Delete(context.TODO(), eps.Name, metav1.DeleteOptions{}) ++ if err != nil { ++ klog.Errorf("Failed to delete EndpointSlice %s in namespace %s: %v", eps.Name, eps.Namespace, err) ++ return err ++ } ++ klog.Infof("Deleted EndpointSlice %s in namespace %s", eps.Name, eps.Namespace) ++ } ++ ++ return nil ++} ++ ++//TODO: From here cleanup! ++ ++func (c *Controller) reconcileByAddressType(service *v1.Service, tenantSlices []*discovery.EndpointSlice, existingSlices []*discovery.EndpointSlice, addressType discovery.AddressType) error { ++ ++ slicesToCreate := []*discovery.EndpointSlice{} ++ slicesToUpdate := []*discovery.EndpointSlice{} ++ slicesToDelete := []*discovery.EndpointSlice{} ++ slicesUntouched := []*discovery.EndpointSlice{} ++ ++ // Create the desired port configuration ++ var desiredPorts []discovery.EndpointPort ++ ++ for _, port := range service.Spec.Ports { ++ desiredPorts = append(desiredPorts, discovery.EndpointPort{ ++ Port: &port.TargetPort.IntVal, ++ Protocol: &port.Protocol, ++ Name: &port.Name, ++ }) ++ } ++ ++ // Create the desired endpoint configuration ++ var desiredEndpoints []*discovery.Endpoint ++ desiredEndpoints = c.getDesiredEndpoints(service, tenantSlices) ++ desiredEndpointSet := endpointsliceutil.EndpointSet{} ++ desiredEndpointSet.Insert(desiredEndpoints...) ++ ++ // 1. Iterate through existing slices, delete endpoints that are no longer ++ // desired and update matching endpoints that have changed. It also checks ++ // if the slices have the labels of the parent services, and updates them if not. ++ for _, existingSlice := range existingSlices { ++ var coveredEndpoints []discovery.Endpoint ++ sliceUpdated := false ++ // first enforce the right portmapping ++ if !apiequality.Semantic.DeepEqual(existingSlice.Ports, desiredPorts) { ++ existingSlice.Ports = desiredPorts ++ sliceUpdated = true ++ } ++ for _, endpoint := range existingSlice.Endpoints { ++ present := desiredEndpointSet.Get(&endpoint) ++ if present != nil { ++ // one of the desired endpoint is covered by this slice ++ coveredEndpoints = append(coveredEndpoints, *present) ++ // Check if the endpoint needs updating ++ if !endpointsliceutil.EndpointsEqualBeyondHash(present, &endpoint) { ++ sliceUpdated = true ++ } ++ // remove endpoint from desired set because it's already covered. ++ desiredEndpointSet.Delete(&endpoint) ++ } ++ } ++ // Check if the labels need updating ++ labels, labelsChanged := c.ensureEndpointSliceLabels(existingSlice, service) ++ ++ // If an endpoint was updated or removed, mark for update or delete ++ if sliceUpdated || labelsChanged || len(existingSlice.Endpoints) != len(coveredEndpoints) { ++ if len(coveredEndpoints) == 0 { ++ // No endpoint that is desired is covered by this slice, so it should be deleted ++ slicesToDelete = append(slicesToDelete, existingSlice) ++ } else { ++ // Here we override the existing endpoints with the covered ones ++ // This also deletes the unwanted endpoints from the existing slice ++ existingSlice.Endpoints = coveredEndpoints ++ existingSlice.Labels = labels ++ slicesToUpdate = append(slicesToUpdate, existingSlice) ++ } ++ } else { ++ slicesUntouched = append(slicesUntouched, existingSlice) ++ } ++ } ++ // 2. Iterate through slices that have been modified in 1 and fill them up with ++ // any remaining desired endpoints. ++ // FillAlreadyUpdatedSlicesWithDesiredEndpoints ++ if desiredEndpointSet.Len() > 0 { ++ for _, existingUpdatedSlice := range slicesToUpdate { ++ for len(existingUpdatedSlice.Endpoints) < c.maxEndPointsPerSlice { ++ endpoint, ok := desiredEndpointSet.PopAny() ++ if !ok { ++ break ++ } ++ existingUpdatedSlice.Endpoints = append(existingUpdatedSlice.Endpoints, *endpoint) ++ } ++ } ++ } ++ ++ // 3. If there still desired endpoints left, try to fit them into a previously ++ // unchanged slice and/or create new ones. ++ // FillUntouchedSlicesWithDesiredEndpoints ++ if desiredEndpointSet.Len() > 0 { ++ for _, untouchedSlice := range slicesUntouched { ++ for len(untouchedSlice.Endpoints) < c.maxEndPointsPerSlice { ++ endpoint, ok := desiredEndpointSet.PopAny() ++ if !ok { ++ break ++ } ++ untouchedSlice.Endpoints = append(untouchedSlice.Endpoints, *endpoint) ++ } ++ slicesToUpdate = append(slicesToUpdate, untouchedSlice) ++ } ++ } ++ ++ // 4. If there still desired endpoints left, create new slices. ++ if desiredEndpointSet.Len() > 0 { ++ slice := c.newSlice(service, desiredPorts, addressType) ++ slice.Labels, _ = c.ensureEndpointSliceLabels(slice, service) ++ for len(slice.Endpoints) < c.maxEndPointsPerSlice { ++ endpoint, ok := desiredEndpointSet.PopAny() ++ if !ok { ++ break ++ } ++ slice.Endpoints = append(slice.Endpoints, *endpoint) ++ } ++ slicesToCreate = append(slicesToCreate, slice) ++ } ++ ++ return c.finalize(service, slicesToCreate, slicesToUpdate, slicesToDelete) ++} ++ ++func ownedBy(endpointSlice *discovery.EndpointSlice, svc *v1.Service) bool { ++ for _, o := range endpointSlice.OwnerReferences { ++ if o.UID == svc.UID && o.Kind == "Service" && o.APIVersion == "v1" { ++ return true ++ } ++ } ++ return false ++} ++ ++func (c *Controller) finalize(service *v1.Service, slicesToCreate []*discovery.EndpointSlice, slicesToUpdate []*discovery.EndpointSlice, slicesToDelete []*discovery.EndpointSlice) error { ++ // If there are slices to delete and slices to create, make them as update ++ for i := 0; i < len(slicesToDelete); { ++ if len(slicesToCreate) == 0 { ++ break ++ } ++ if slicesToDelete[i].AddressType == slicesToCreate[0].AddressType && ownedBy(slicesToDelete[i], service) { ++ slicesToCreate[0].Name = slicesToDelete[i].Name ++ slicesToCreate = slicesToCreate[1:] ++ slicesToUpdate = append(slicesToUpdate, slicesToCreate[0]) ++ slicesToDelete = append(slicesToDelete[:i], slicesToDelete[i+1:]...) ++ } else { ++ i++ ++ } ++ } ++ ++ // Create the new slices if service is not marked for deletion ++ if service.DeletionTimestamp == nil { ++ for _, slice := range slicesToCreate { ++ createdSlice, err := c.infraClient.DiscoveryV1().EndpointSlices(slice.Namespace).Create(context.TODO(), slice, metav1.CreateOptions{}) ++ if err != nil { ++ klog.Errorf("Failed to create EndpointSlice %s in namespace %s: %v", slice.Name, slice.Namespace, err) ++ if k8serrors.HasStatusCause(err, v1.NamespaceTerminatingCause) { ++ return nil ++ } ++ return err ++ } ++ klog.Infof("Created EndpointSlice %s in namespace %s", createdSlice.Name, createdSlice.Namespace) ++ } ++ } ++ ++ // Update slices ++ for _, slice := range slicesToUpdate { ++ _, err := c.infraClient.DiscoveryV1().EndpointSlices(slice.Namespace).Update(context.TODO(), slice, metav1.UpdateOptions{}) ++ if err != nil { ++ klog.Errorf("Failed to update EndpointSlice %s in namespace %s: %v", slice.Name, slice.Namespace, err) ++ return err ++ } ++ klog.Infof("Updated EndpointSlice %s in namespace %s", slice.Name, slice.Namespace) ++ } ++ ++ // Delete slices ++ for _, slice := range slicesToDelete { ++ err := c.infraClient.DiscoveryV1().EndpointSlices(slice.Namespace).Delete(context.TODO(), slice.Name, metav1.DeleteOptions{}) ++ if err != nil { ++ klog.Errorf("Failed to delete EndpointSlice %s in namespace %s: %v", slice.Name, slice.Namespace, err) ++ return err ++ } ++ klog.Infof("Deleted EndpointSlice %s in namespace %s", slice.Name, slice.Namespace) ++ } ++ ++ return nil ++} ++ ++func (c *Controller) newSlice(service *v1.Service, desiredPorts []discovery.EndpointPort, addressType discovery.AddressType) *discovery.EndpointSlice { ++ ownerRef := metav1.NewControllerRef(service, schema.GroupVersionKind{Version: "v1", Kind: "Service"}) ++ ++ slice := &discovery.EndpointSlice{ ++ ObjectMeta: metav1.ObjectMeta{ ++ Labels: map[string]string{}, ++ GenerateName: service.Name, ++ Namespace: service.Namespace, ++ OwnerReferences: []metav1.OwnerReference{*ownerRef}, ++ }, ++ Ports: desiredPorts, ++ AddressType: addressType, ++ Endpoints: []discovery.Endpoint{}, ++ } ++ return slice ++} ++ ++func (c *Controller) getDesiredEndpoints(service *v1.Service, tenantSlices []*discovery.EndpointSlice) []*discovery.Endpoint { ++ var desiredEndpoints []*discovery.Endpoint ++ if service.Spec.Selector == nil { ++ // Extract the desired endpoints from the tenant EndpointSlices ++ // for extracting the nodes it does not matter what type of address we are dealing with ++ // all nodes with an endpoint for a corresponding slice will be selected. ++ nodeSet := sets.Set[string]{} ++ for _, slice := range tenantSlices { ++ for _, endpoint := range slice.Endpoints { ++ // find all unique nodes that correspond to an endpoint in a tenant slice ++ nodeSet.Insert(*endpoint.NodeName) ++ } ++ } ++ ++ klog.Infof("Desired nodes for service %s in namespace %s: %v", service.Name, service.Namespace, sets.List(nodeSet)) ++ ++ for _, node := range sets.List(nodeSet) { ++ // find vmi for node name ++ obj := &unstructured.Unstructured{} ++ vmi := &kubevirtv1.VirtualMachineInstance{} ++ ++ obj, err := c.infraDynamic.Resource(kubevirtv1.VirtualMachineInstanceGroupVersionKind.GroupVersion().WithResource("virtualmachineinstances")).Namespace(c.infraNamespace).Get(context.TODO(), node, metav1.GetOptions{}) ++ if err != nil { ++ klog.Errorf("Failed to get VirtualMachineInstance %s in namespace %s:%v", node, c.infraNamespace, err) ++ continue ++ } ++ ++ err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.Object, vmi) ++ if err != nil { ++ klog.Errorf("Failed to convert Unstructured to VirtualMachineInstance: %v", err) ++ klog.Fatal(err) ++ } ++ ++ ready := vmi.Status.Phase == kubevirtv1.Running ++ serving := vmi.Status.Phase == kubevirtv1.Running ++ terminating := vmi.Status.Phase == kubevirtv1.Failed || vmi.Status.Phase == kubevirtv1.Succeeded ++ ++ for _, i := range vmi.Status.Interfaces { ++ if i.Name == "default" { ++ desiredEndpoints = append(desiredEndpoints, &discovery.Endpoint{ ++ Addresses: []string{i.IP}, ++ Conditions: discovery.EndpointConditions{ ++ Ready: &ready, ++ Serving: &serving, ++ Terminating: &terminating, ++ }, ++ NodeName: &vmi.Status.NodeName, ++ }) ++ continue ++ } ++ } ++ } ++ } ++ ++ return desiredEndpoints ++} ++ ++func (c *Controller) ensureEndpointSliceLabels(slice *discovery.EndpointSlice, svc *v1.Service) (map[string]string, bool) { ++ labels := make(map[string]string) ++ labelsChanged := false ++ ++ for k, v := range slice.Labels { ++ labels[k] = v ++ } ++ ++ for k, v := range svc.ObjectMeta.Labels { ++ labels[k] = v ++ } ++ ++ labels[discovery.LabelServiceName] = svc.Name ++ labels[discovery.LabelManagedBy] = ControllerName.dashed() ++ if svc.Spec.ClusterIP == "" || svc.Spec.ClusterIP == v1.ClusterIPNone { ++ labels[v1.IsHeadlessService] = "" ++ } else { ++ delete(labels, v1.IsHeadlessService) ++ } ++ ++ if !apiequality.Semantic.DeepEqual(slice.Labels, labels) { ++ labelsChanged = true ++ } ++ return labels, labelsChanged ++} ++ ++func (c *Controller) managedByController(slice *discovery.EndpointSlice) bool { ++ return slice.Labels[discovery.LabelManagedBy] == ControllerName.dashed() ++} +diff --git a/pkg/controller/kubevirteps/kubevirteps_controller_suite_test.go b/pkg/controller/kubevirteps/kubevirteps_controller_suite_test.go +new file mode 100644 +index 00000000..59cb0da0 +--- /dev/null ++++ b/pkg/controller/kubevirteps/kubevirteps_controller_suite_test.go +@@ -0,0 +1,13 @@ ++package kubevirteps_test ++ ++import ( ++ . "github.com/onsi/ginkgo/v2" ++ . "github.com/onsi/gomega" ++ ++ "testing" ++) ++ ++func TestProvider(t *testing.T) { ++ RegisterFailHandler(Fail) ++ RunSpecs(t, "KubevirtEPS Controller Suite") ++} +diff --git a/pkg/controller/kubevirteps/kubevirteps_controller_test.go b/pkg/controller/kubevirteps/kubevirteps_controller_test.go +new file mode 100644 +index 00000000..7e645a0b +--- /dev/null ++++ b/pkg/controller/kubevirteps/kubevirteps_controller_test.go +@@ -0,0 +1,635 @@ ++package kubevirteps ++ ++import ( ++ "context" ++ g "github.com/onsi/ginkgo/v2" ++ . "github.com/onsi/gomega" ++ v1 "k8s.io/api/core/v1" ++ discoveryv1 "k8s.io/api/discovery/v1" ++ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ++ "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ++ "k8s.io/apimachinery/pkg/runtime" ++ "k8s.io/apimachinery/pkg/runtime/schema" ++ "k8s.io/apimachinery/pkg/util/intstr" ++ dfake "k8s.io/client-go/dynamic/fake" ++ "k8s.io/client-go/kubernetes/fake" ++ "k8s.io/client-go/testing" ++ "k8s.io/client-go/tools/cache" ++ "k8s.io/component-base/metrics/prometheus/controllers" ++ "k8s.io/klog/v2" ++ kubevirtv1 "kubevirt.io/api/core/v1" ++ kubevirt "kubevirt.io/cloud-provider-kubevirt/pkg/provider" ++) ++ ++const ( ++ tenantNamespace = "tenant-namespace" ++ infraNamespace = "test" ++) ++ ++type testKubevirtEPSController struct { ++ controller *Controller ++ tenantClient *fake.Clientset ++ infraClient *fake.Clientset ++ infraDynamic *dfake.FakeDynamicClient ++} ++ ++func createInfraServiceLB(name, tenantServiceName, clusterName string, servicePort v1.ServicePort, externalTrafficPolicy v1.ServiceExternalTrafficPolicy) *v1.Service { ++ return &v1.Service{ ++ ObjectMeta: metav1.ObjectMeta{ ++ Name: name, ++ Namespace: infraNamespace, ++ Labels: map[string]string{ ++ kubevirt.TenantServiceNameLabelKey: tenantServiceName, ++ kubevirt.TenantServiceNamespaceLabelKey: tenantNamespace, ++ kubevirt.TenantClusterNameLabelKey: clusterName, ++ }, ++ }, ++ Spec: v1.ServiceSpec{ ++ Ports: []v1.ServicePort{ ++ servicePort, ++ }, ++ Type: v1.ServiceTypeLoadBalancer, ++ ExternalTrafficPolicy: externalTrafficPolicy, ++ IPFamilies: []v1.IPFamily{ ++ v1.IPv4Protocol, ++ }, ++ }, ++ } ++} ++ ++func createUnstructuredVMINode(name, nodeName, ip string) *unstructured.Unstructured { ++ vmi := &unstructured.Unstructured{} ++ vmi.SetUnstructuredContent(map[string]interface{}{ ++ "apiVersion": "kubevirt.io/v1", ++ "kind": "VirtualMachineInstance", ++ "metadata": map[string]interface{}{ ++ "name": name, ++ "namespace": infraNamespace, ++ }, ++ "status": map[string]interface{}{ ++ "phase": "Running", ++ "nodeName": nodeName, ++ "interfaces": []interface{}{ ++ map[string]interface{}{ ++ "name": "default", ++ "ipAddress": ip, ++ }, ++ }, ++ }, ++ }) ++ return vmi ++} ++ ++func createPort(name string, port int32, protocol v1.Protocol) *discoveryv1.EndpointPort { ++ return &discoveryv1.EndpointPort{ ++ Name: &name, ++ Port: &port, ++ Protocol: &protocol, ++ } ++} ++ ++func createEndpoint(ip, nodeName string, ready, serving, terminating bool) *discoveryv1.Endpoint { ++ return &discoveryv1.Endpoint{ ++ Addresses: []string{ip}, ++ Conditions: discoveryv1.EndpointConditions{ ++ Ready: &ready, ++ Serving: &serving, ++ Terminating: &terminating, ++ }, ++ NodeName: &nodeName, ++ } ++} ++ ++func createTenantEPSlice( ++ name, labelServiceName string, addressType discoveryv1.AddressType, ++ port discoveryv1.EndpointPort, endpoints []discoveryv1.Endpoint) *discoveryv1.EndpointSlice { ++ return &discoveryv1.EndpointSlice{ ++ ObjectMeta: metav1.ObjectMeta{ ++ Name: name, ++ Namespace: tenantNamespace, ++ Labels: map[string]string{ ++ discoveryv1.LabelServiceName: labelServiceName, ++ }, ++ }, ++ AddressType: addressType, ++ Ports: []discoveryv1.EndpointPort{ ++ port, ++ }, ++ Endpoints: endpoints, ++ } ++} ++ ++func createAndAssertVMI(node, nodeName, ip string) { ++ vmi := createUnstructuredVMINode(node, nodeName, ip) ++ _, err := testVals.infraDynamic.Resource(kubevirtv1.VirtualMachineInstanceGroupVersionKind.GroupVersion().WithResource("virtualmachineinstances")). ++ Namespace(infraNamespace).Create(context.TODO(), vmi, metav1.CreateOptions{}) ++ Expect(err).To(BeNil()) ++ ++ Eventually(func() (bool, error) { ++ vmiList, err := testVals.infraDynamic.Resource(kubevirtv1.VirtualMachineInstanceGroupVersionKind.GroupVersion().WithResource("virtualmachineinstances")). ++ Namespace(infraNamespace).Get(context.TODO(), node, metav1.GetOptions{}) ++ if err == nil || vmiList != nil { ++ return true, err ++ } ++ return false, err ++ }).Should(BeTrue(), "VMI in infra cluster should be created") ++} ++ ++func createAndAssertTenantSlice(name, labelServiceName string, addressType discoveryv1.AddressType, port discoveryv1.EndpointPort, endpoints []discoveryv1.Endpoint) { ++ epSlice := createTenantEPSlice(name, labelServiceName, addressType, port, endpoints) ++ _, _ = testVals.tenantClient.DiscoveryV1().EndpointSlices(tenantNamespace).Create(context.TODO(), epSlice, metav1.CreateOptions{}) ++ // Check if tenant Endpointslice is created ++ Eventually(func() (bool, error) { ++ eps, err := testVals.tenantClient.DiscoveryV1().EndpointSlices(tenantNamespace).Get(context.TODO(), name, metav1.GetOptions{}) ++ if err == nil || eps != nil { ++ return true, err ++ } ++ return false, err ++ }).Should(BeTrue(), "EndpointSlice in tenant cluster should be created") ++} ++ ++func createAndAssertInfraServiceLB(name, tenantServiceName, clusterName string, servicePort v1.ServicePort, externalTrafficPolicy v1.ServiceExternalTrafficPolicy) { ++ svc := createInfraServiceLB(name, tenantServiceName, clusterName, servicePort, externalTrafficPolicy) ++ _, _ = testVals.infraClient.CoreV1().Services(infraNamespace).Create(context.TODO(), svc, metav1.CreateOptions{}) ++ // Check if the service is created ++ Eventually(func() (bool, error) { ++ svc, err := testVals.infraClient.CoreV1().Services(infraNamespace).Get(context.TODO(), name, metav1.GetOptions{}) ++ if err == nil || svc != nil { ++ return true, err ++ } ++ return false, err ++ ++ }).Should(BeTrue(), "") ++} ++ ++func setupTestKubevirtEPSController(ctx context.Context) *testKubevirtEPSController { ++ var tenantClient *fake.Clientset ++ var infraClient *fake.Clientset ++ ++ tenantClient = fake.NewSimpleClientset() ++ infraClient = fake.NewSimpleClientset() ++ ++ s := runtime.NewScheme() ++ infraDynamic := dfake.NewSimpleDynamicClientWithCustomListKinds(s, map[schema.GroupVersionResource]string{ ++ schema.GroupVersionResource{ ++ Group: kubevirtv1.GroupVersion.Group, ++ Version: kubevirtv1.GroupVersion.Version, ++ Resource: "virtualmachineinstances", ++ }: "VirtualMachineInstanceList", ++ }) ++ ++ controller := NewKubevirtEPSController(tenantClient, infraClient, infraDynamic, "test") ++ ++ err := controller.Init() ++ if err != nil { ++ klog.Errorf("Failed to initialize kubevirtEPSController: %v", err) ++ klog.Fatal(err) ++ } ++ ++ return &testKubevirtEPSController{ ++ controller: controller, ++ tenantClient: tenantClient, ++ infraClient: infraClient, ++ infraDynamic: infraDynamic, ++ } ++} ++ ++func (testVals *testKubevirtEPSController) runKubevirtEPSController(ctx context.Context) { ++ metrics := controllers.NewControllerManagerMetrics("test") ++ go testVals.controller.Run(1, ctx.Done(), metrics) ++} ++ ++var _ = g.Describe("KubevirtEPSController start", g.Ordered, func() { ++ g.Context("With starting the controller", g.Ordered, func() { ++ ++ g.It("Should start the controller", func() { ++ ctx, stop := context.WithCancel(context.Background()) ++ defer stop() ++ testVals := setupTestKubevirtEPSController(ctx) ++ testVals.runKubevirtEPSController(ctx) ++ }) ++ }) ++}) ++ ++var ( ++ stop context.CancelFunc ++ ctx context.Context ++ testVals *testKubevirtEPSController ++) ++ ++var _ = g.Describe("KubevirtEPSController", g.Ordered, func() { ++ ++ g.Context("With starting the controller", g.Ordered, func() { ++ g.It("Should start the controller", func() { ++ ctx, stop = context.WithCancel(context.Background()) ++ defer stop() ++ testVals := setupTestKubevirtEPSController(ctx) ++ testVals.runKubevirtEPSController(ctx) ++ ++ cache.WaitForCacheSync(ctx.Done(), ++ testVals.controller.tenantFactory.Discovery().V1().EndpointSlices().Informer().HasSynced, ++ testVals.controller.infraFactory.Core().V1().Services().Informer().HasSynced) ++ }) ++ }) ++ ++ g.Context("With adding an infraService", g.Ordered, func() { ++ // Startup and wait for cache sync ++ g.BeforeEach(func() { ++ ctx, stop = context.WithCancel(context.Background()) ++ testVals = setupTestKubevirtEPSController(ctx) ++ testVals.runKubevirtEPSController(ctx) ++ ++ cache.WaitForCacheSync(ctx.Done(), ++ testVals.controller.tenantFactory.Discovery().V1().EndpointSlices().Informer().HasSynced, ++ testVals.controller.infraFactory.Core().V1().Services().Informer().HasSynced) ++ ++ }) ++ ++ // Stop the controller ++ g.AfterEach(func() { ++ stop() ++ }) ++ ++ g.It("Should reconcile a new Endpointslice on the infra cluster", func() { ++ // Create VMI in infra cluster ++ createAndAssertVMI("worker-0-test", "ip-10-32-5-13", "123.45.67.89") ++ ++ // Create Endpoinslices in tenant cluster ++ createAndAssertTenantSlice("test-epslice", "tenant-service-name", discoveryv1.AddressTypeIPv4, ++ *createPort("http", 80, v1.ProtocolTCP), ++ []discoveryv1.Endpoint{*createEndpoint("123.45.67.89", "worker-0-test", true, true, false)}) ++ ++ // Create service in infra cluster ++ createAndAssertInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, ++ v1.ServiceExternalTrafficPolicyLocal) ++ ++ var epsList *discoveryv1.EndpointSliceList ++ var err error ++ // Check if the controller creates the EndpointSlice in the infra cluster ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be created by the controller reconciler") ++ ++ Expect(*epsList.Items[0].Endpoints[0].NodeName).To(Equal("ip-10-32-5-13")) ++ }) ++ ++ g.It("Should update the Endpointslice when a tenant Endpointslice is updated", func() { ++ ++ ipAddr1 := "123.45.67.11" ++ ipAddr2 := "123.99.99.99" ++ // Create VMI in infra cluster ++ createAndAssertVMI("worker-0-test", "ip-10-32-5-13", ipAddr1) ++ createAndAssertVMI("worker-1-test", "ip-10-32-5-15", ipAddr2) ++ ++ // Create Endpoinslices in tenant cluster ++ createAndAssertTenantSlice("test-epslice", "tenant-service-name", discoveryv1.AddressTypeIPv4, ++ *createPort("http", 80, v1.ProtocolTCP), ++ []discoveryv1.Endpoint{*createEndpoint(ipAddr1, "worker-0-test", true, true, false)}) ++ ++ // Create service in infra cluster ++ createAndAssertInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, ++ v1.ServiceExternalTrafficPolicyLocal) ++ ++ // Check if the controller creates the EndpointSlice in the infra cluster ++ Eventually(func() (bool, error) { ++ epsList, err := testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 && ++ len(epsList.Items[0].Endpoints) == 1 && ++ *epsList.Items[0].Endpoints[0].NodeName == "ip-10-32-5-13" { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be created by the controller reconciler") ++ ++ // Update the tenant Endpointslice ++ epSlice := createTenantEPSlice("test-epslice", "tenant-service-name", discoveryv1.AddressTypeIPv4, ++ *createPort("http", 80, v1.ProtocolTCP), ++ []discoveryv1.Endpoint{ ++ *createEndpoint(ipAddr1, "worker-0-test", true, true, false), ++ *createEndpoint(ipAddr2, "worker-1-test", true, true, false), ++ }) ++ _, err := testVals.tenantClient.DiscoveryV1().EndpointSlices(tenantNamespace).Update(context.TODO(), epSlice, metav1.UpdateOptions{}) ++ Expect(err).To(BeNil()) ++ ++ // Check if tenant Endpointslice is updated ++ Eventually(func() (bool, error) { ++ epsList, err := testVals.tenantClient.DiscoveryV1().EndpointSlices(tenantNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 && len(epsList.Items[0].Endpoints) == 2 { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in tenant cluster should be updated") ++ ++ // Check if the controller updates the EndpointSlice in the infra cluster ++ Eventually(func() (bool, error) { ++ epsList, err := testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 && len(epsList.Items[0].Endpoints) == 2 { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be updated by the controller reconciler") ++ }) ++ ++ g.It("Should update the Endpointslice when the infra Service external traffic policy changes.", func() { ++ // Create VMI in infra cluster ++ createAndAssertVMI("worker-0-test", "ip-10-32-5-13", "123.45.67.89") ++ ++ // Create Endpoinslices in tenant cluster ++ createAndAssertTenantSlice("test-epslice", "tenant-service-name", discoveryv1.AddressTypeIPv4, ++ *createPort("http", 80, v1.ProtocolTCP), ++ []discoveryv1.Endpoint{*createEndpoint("123.45.67.89", "worker-0-test", true, true, false)}) ++ ++ // Create service in infra cluster ++ createAndAssertInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, ++ v1.ServiceExternalTrafficPolicyLocal) ++ ++ var epsList *discoveryv1.EndpointSliceList ++ var err error ++ // Check if the controller creates the EndpointSlice in the infra cluster ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be created by the controller reconciler") ++ ++ Expect(*epsList.Items[0].Endpoints[0].NodeName).To(Equal("ip-10-32-5-13")) ++ ++ // Update the service's external traffic policy to Cluster ++ svc := createInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, ++ v1.ServiceExternalTrafficPolicyCluster) ++ ++ _, err = testVals.infraClient.CoreV1().Services(infraNamespace).Update(context.TODO(), svc, metav1.UpdateOptions{}) ++ Expect(err).To(BeNil()) ++ ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 0 { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be deleted by the controller reconciler") ++ ++ // Update the service's external traffic policy to Local ++ svc = createInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, ++ v1.ServiceExternalTrafficPolicyLocal) ++ ++ _, err = testVals.infraClient.CoreV1().Services(infraNamespace).Update(context.TODO(), svc, metav1.UpdateOptions{}) ++ Expect(err).To(BeNil()) ++ ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be created by the controller reconciler") ++ }) ++ ++ g.It("Should update the Endpointslice when the infra Service labels are updated.", func() { ++ // Create VMI in infra cluster ++ createAndAssertVMI("worker-0-test", "ip-10-32-5-13", "123.45.67.89") ++ ++ // Create Endpoinslices in tenant cluster ++ createAndAssertTenantSlice("test-epslice", "tenant-service-name", discoveryv1.AddressTypeIPv4, ++ *createPort("http", 80, v1.ProtocolTCP), ++ []discoveryv1.Endpoint{*createEndpoint("123.45.67.89", "worker-0-test", true, true, false)}) ++ ++ // Create service in infra cluster ++ createAndAssertInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, ++ v1.ServiceExternalTrafficPolicyLocal) ++ ++ var epsList *discoveryv1.EndpointSliceList ++ var err error ++ // Check if the controller creates the EndpointSlice in the infra cluster ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be created by the controller reconciler") ++ ++ Expect(*epsList.Items[0].Endpoints[0].NodeName).To(Equal("ip-10-32-5-13")) ++ ++ // Update the service's labels ++ svc := createInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, ++ v1.ServiceExternalTrafficPolicyLocal) ++ svc.Labels["test-label"] = "test-value" ++ svc.Labels["test-label-2"] = "test-value-2" ++ ++ _, err = testVals.infraClient.CoreV1().Services(infraNamespace).Update(context.TODO(), svc, metav1.UpdateOptions{}) ++ Expect(err).To(BeNil()) ++ ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 { ++ if epsList.Items[0].Labels["test-label"] == "test-value" && epsList.Items[0].Labels["test-label-2"] == "test-value-2" { ++ return true, err ++ } ++ return false, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should have the two added labels") ++ ++ // Update the service's external traffic policy to Cluster ++ svc = createInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, ++ v1.ServiceExternalTrafficPolicyLocal) ++ svc.Labels["test-label"] = "test-value" ++ ++ _, err = testVals.infraClient.CoreV1().Services(infraNamespace).Update(context.TODO(), svc, metav1.UpdateOptions{}) ++ Expect(err).To(BeNil()) ++ ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 { ++ if epsList.Items[0].Labels["test-label"] == "test-value" && epsList.Items[0].Labels["test-label-2"] == "test-value-2" { ++ return true, err ++ } ++ return false, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster still has the two added labels") ++ }) ++ ++ g.It("Should update the Endpointslice when the infra Service port is updated.", func() { ++ // Create VMI in infra cluster ++ createAndAssertVMI("worker-0-test", "ip-10-32-5-13", "123.45.67.89") ++ ++ // Create Endpoinslices in tenant cluster ++ createAndAssertTenantSlice("test-epslice", "tenant-service-name", discoveryv1.AddressTypeIPv4, ++ *createPort("http", 80, v1.ProtocolTCP), ++ []discoveryv1.Endpoint{*createEndpoint("123.45.67.89", "worker-0-test", true, true, false)}) ++ ++ // Create service in infra cluster ++ createAndAssertInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, ++ v1.ServiceExternalTrafficPolicyLocal) ++ ++ var epsList *discoveryv1.EndpointSliceList ++ var err error ++ // Check if the controller creates the EndpointSlice in the infra cluster ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 { ++ if *epsList.Items[0].Ports[0].Port == 30390 { ++ return true, err ++ } ++ return false, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be created by the controller reconciler") ++ ++ Expect(*epsList.Items[0].Endpoints[0].NodeName).To(Equal("ip-10-32-5-13")) ++ ++ // Update the service's port ++ svc := createInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30440}}, ++ v1.ServiceExternalTrafficPolicyLocal) ++ ++ _, err = testVals.infraClient.CoreV1().Services(infraNamespace).Update(context.TODO(), svc, metav1.UpdateOptions{}) ++ Expect(err).To(BeNil()) ++ ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 { ++ if *epsList.Items[0].Ports[0].Port == 30440 { ++ return true, err ++ } ++ return false, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should have the two added labels") ++ }) ++ ++ g.It("Should delete the Endpointslice when the Service in infra is deleted", func() { ++ // Create VMI in infra cluster ++ createAndAssertVMI("worker-0-test", "ip-10-32-5-13", "123.45.67.89") ++ ++ // Create Endpoinslices in tenant cluster ++ createAndAssertTenantSlice("test-epslice", "tenant-service-name", discoveryv1.AddressTypeIPv4, ++ *createPort("http", 80, v1.ProtocolTCP), ++ []discoveryv1.Endpoint{*createEndpoint("123.45.67.89", "worker-0-test", true, true, false)}) ++ ++ // Create service in infra cluster ++ createAndAssertInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, ++ v1.ServiceExternalTrafficPolicyLocal) ++ ++ var epsList *discoveryv1.EndpointSliceList ++ var err error ++ // Check if the controller creates the EndpointSlice in the infra cluster ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 { ++ if *epsList.Items[0].Ports[0].Port == 30390 { ++ return true, err ++ } ++ return false, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be created by the controller reconciler") ++ ++ Expect(*epsList.Items[0].Endpoints[0].NodeName).To(Equal("ip-10-32-5-13")) ++ ++ // Delete the service ++ err = testVals.infraClient.CoreV1().Services(infraNamespace).Delete(context.TODO(), "infra-service-name", metav1.DeleteOptions{}) ++ Expect(err).To(BeNil()) ++ ++ Eventually(func() (bool, error) { ++ epsList, err = testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 0 { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be deleted.") ++ }) ++ ++ g.It("Should not update the Endpointslice on the infra cluster because VMI is not present", func() { ++ // Create VMI in infra cluster ++ createAndAssertVMI("worker-0-test", "ip-10-32-5-13", "123.45.67.89") ++ ++ // Create Endpoinslices in tenant cluster ++ createAndAssertTenantSlice("test-epslice", "tenant-service-name", discoveryv1.AddressTypeIPv4, ++ *createPort("http", 80, v1.ProtocolTCP), ++ []discoveryv1.Endpoint{*createEndpoint("123.45.67.89", "worker-0-test", true, true, false)}) ++ ++ // Create service in infra cluster ++ createAndAssertInfraServiceLB("infra-service-name", "tenant-service-name", "test-cluster", ++ v1.ServicePort{Name: "web", Port: 80, NodePort: 31900, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{IntVal: 30390}}, v1.ServiceExternalTrafficPolicyLocal) ++ ++ // Check if the controller creates the EndpointSlice in the infra cluster ++ Eventually(func() (bool, error) { ++ epsList, err := testVals.infraClient.DiscoveryV1().EndpointSlices(infraNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in infra cluster should be created by the controller reconciler") ++ ++ // ++ epSlice := createTenantEPSlice("test-epslice", "tenant-service-name", discoveryv1.AddressTypeIPv4, ++ *createPort("http", 80, v1.ProtocolTCP), ++ []discoveryv1.Endpoint{ ++ *createEndpoint("123.45.67.89", "worker-0-test", true, true, false), ++ *createEndpoint("112.34.56.78", "worker-1-test", true, true, false), ++ }) ++ ++ _, err := testVals.tenantClient.DiscoveryV1().EndpointSlices(tenantNamespace).Update(context.TODO(), epSlice, metav1.UpdateOptions{}) ++ Expect(err).To(BeNil()) ++ ++ // Check if tenant Endpointslice is updated ++ Eventually(func() (bool, error) { ++ epsList, err := testVals.tenantClient.DiscoveryV1().EndpointSlices(tenantNamespace).List(context.TODO(), metav1.ListOptions{}) ++ if len(epsList.Items) == 1 && len(epsList.Items[0].Endpoints) == 2 { ++ return true, err ++ } else { ++ return false, err ++ } ++ }).Should(BeTrue(), "EndpointSlice in tenant cluster should be updated") ++ ++ //Expect call to the infraDynamic.Get to return the VMI ++ Eventually(func() (bool, error) { ++ for _, action := range testVals.infraDynamic.Actions() { ++ if action.Matches("get", "virtualmachineinstances") && ++ action.GetNamespace() == infraNamespace { ++ getAction := action.(testing.GetAction) ++ if getAction.GetName() == "worker-1-test" { ++ return true, nil ++ } ++ } ++ } ++ return false, nil ++ }).Should(BeTrue(), "Expect call to the infraDynamic.Get to return the VMI") ++ ++ }) ++ }) ++}) +diff --git a/pkg/controller/kubevirteps/kubevirteps_controller_utils.go b/pkg/controller/kubevirteps/kubevirteps_controller_utils.go +new file mode 100644 +index 00000000..0d3dbfd5 +--- /dev/null ++++ b/pkg/controller/kubevirteps/kubevirteps_controller_utils.go +@@ -0,0 +1,98 @@ ++package kubevirteps ++ ++import ( ++ v1 "k8s.io/api/core/v1" ++ discovery "k8s.io/api/discovery/v1" ++ "k8s.io/apimachinery/pkg/types" ++ "k8s.io/apimachinery/pkg/util/sets" ++ "k8s.io/klog/v2" ++ utilnet "k8s.io/utils/net" ++ "sync" ++) ++ ++// source: https://github.com/kubernetes/endpointslice/blob/master/utils.go#L280 ++func getAddressTypesForService(service *v1.Service) sets.Set[discovery.AddressType] { ++ serviceSupportedAddresses := sets.New[discovery.AddressType]() ++ ++ // If ++ for _, family := range service.Spec.IPFamilies { ++ if family == v1.IPv4Protocol { ++ serviceSupportedAddresses.Insert(discovery.AddressTypeIPv4) ++ } ++ ++ if family == v1.IPv6Protocol { ++ serviceSupportedAddresses.Insert(discovery.AddressTypeIPv6) ++ } ++ } ++ ++ if serviceSupportedAddresses.Len() > 0 { ++ return serviceSupportedAddresses // we have found families for this service ++ } ++ ++ // If no families are found, we will use the ClusterIP to determine the address type ++ if len(service.Spec.ClusterIP) > 0 && service.Spec.ClusterIP != v1.ClusterIPNone { // headfull ++ addrType := discovery.AddressTypeIPv4 ++ if utilnet.IsIPv6String(service.Spec.ClusterIP) { ++ addrType = discovery.AddressTypeIPv6 ++ } ++ serviceSupportedAddresses.Insert(addrType) ++ klog.V(2).Info("Couldn't find ipfamilies for service. This could happen if controller manager is connected to an old apiserver that does not support ip families yet. EndpointSlices for this Service will use addressType as the IP Family based on familyOf(ClusterIP).", "service", klog.KObj(service), "addressType", addrType, "clusterIP", service.Spec.ClusterIP) ++ return serviceSupportedAddresses ++ } ++ ++ serviceSupportedAddresses.Insert(discovery.AddressTypeIPv4) ++ serviceSupportedAddresses.Insert(discovery.AddressTypeIPv6) ++ klog.V(2).Info("Couldn't find ipfamilies for headless service, likely because controller manager is likely connected to an old apiserver that does not support ip families yet. The service endpoint slice will use dual stack families until api-server default it correctly", "service", klog.KObj(service)) ++ return serviceSupportedAddresses ++} ++ ++// The tenantESPTracker is used to keep track of which EndpointSlices are being watched by the KubevirtCloudController. ++// This is necessary because the KubevirtCloudController needs to watch EndpointSlices in the tenant cluster that correspond ++// to Services in the infra cluster. The KubevirtCloudController needs to know which EndpointSlices to watch so that it can ++// update the corresponding EndpointSlices in the infra cluster when the tenant cluster's EndpointSlices change. ++type tenantEPSTracker struct { ++ sync.RWMutex ++ register []types.NamespacedName ++} ++ ++func (t *tenantEPSTracker) add(eps *discovery.EndpointSlice) { ++ t.Lock() ++ defer t.Unlock() ++ klog.Infof("Adding EndpointSlice %s to the tenantEPSTracker", eps.Name) ++ name := types.NamespacedName{ ++ Namespace: eps.Namespace, ++ Name: eps.Name, ++ } ++ t.register = append(t.register, name) ++} ++ ++func (t *tenantEPSTracker) remove(eps *discovery.EndpointSlice) { ++ t.Lock() ++ defer t.Unlock() ++ klog.Infof("Remove EndpointSlice %s to the tenantEPSTracker", eps.Name) ++ name := types.NamespacedName{ ++ Namespace: eps.Namespace, ++ Name: eps.Name, ++ } ++ for i, n := range t.register { ++ if n == name { ++ t.register = append(t.register[:i], t.register[i+1:]...) ++ return ++ } ++ } ++} ++ ++func (t *tenantEPSTracker) contains(eps *discovery.EndpointSlice) bool { ++ t.RLock() ++ defer t.RUnlock() ++ name := types.NamespacedName{ ++ Namespace: eps.Namespace, ++ Name: eps.Name, ++ } ++ for _, n := range t.register { ++ if n == name { ++ return true ++ } ++ } ++ return false ++} +diff --git a/pkg/provider/cloud.go b/pkg/provider/cloud.go +index 23400c2d..30b78784 100644 +--- a/pkg/provider/cloud.go ++++ b/pkg/provider/cloud.go +@@ -35,7 +35,7 @@ func init() { + } + } + +-type cloud struct { ++type Cloud struct { + namespace string + client client.Client + config CloudConfig +@@ -62,6 +62,11 @@ type LoadBalancerConfig struct { + // Selectorless delegate endpointslices creation on third party by + // skipping service selector creation + Selectorless *bool `yaml:"selectorless,omitempty"` ++ ++ // EnableEPSController determines if the EPS controller is enabled ++ // This is a temporary flag to enable/disable the EPS controller ++ // When disabled the service selector is used. ++ EnableEPSController *bool `yaml:"enableEPSController,omitempty"` + } + + type InstancesV2Config struct { +@@ -119,7 +124,7 @@ func kubevirtCloudProviderFactory(config io.Reader) (cloudprovider.Interface, er + } + } else { + var infraKubeConfig string +- infraKubeConfig, err = getInfraKubeConfig(cloudConf.Kubeconfig) ++ infraKubeConfig, err = GetInfraKubeConfig(cloudConf.Kubeconfig) + if err != nil { + return nil, err + } +@@ -146,20 +151,20 @@ func kubevirtCloudProviderFactory(config io.Reader) (cloudprovider.Interface, er + if err != nil { + return nil, err + } +- return &cloud{ ++ return &Cloud{ + namespace: namespace, + client: c, + config: cloudConf, + }, nil + } + +-// Initialize provides the cloud with a kubernetes client builder and may spawn goroutines +-// to perform housekeeping activities within the cloud provider. +-func (c *cloud) Initialize(clientBuilder cloudprovider.ControllerClientBuilder, stop <-chan struct{}) { ++// Initialize provides the Cloud with a kubernetes client builder and may spawn goroutines ++// to perform housekeeping activities within the Cloud provider. ++func (c *Cloud) Initialize(clientBuilder cloudprovider.ControllerClientBuilder, stop <-chan struct{}) { + } + + // LoadBalancer returns a balancer interface. Also returns true if the interface is supported, false otherwise. +-func (c *cloud) LoadBalancer() (cloudprovider.LoadBalancer, bool) { ++func (c *Cloud) LoadBalancer() (cloudprovider.LoadBalancer, bool) { + if !c.config.LoadBalancer.Enabled { + return nil, false + } +@@ -172,11 +177,11 @@ func (c *cloud) LoadBalancer() (cloudprovider.LoadBalancer, bool) { + } + + // Instances returns an instances interface. Also returns true if the interface is supported, false otherwise. +-func (c *cloud) Instances() (cloudprovider.Instances, bool) { ++func (c *Cloud) Instances() (cloudprovider.Instances, bool) { + return nil, false + } + +-func (c *cloud) InstancesV2() (cloudprovider.InstancesV2, bool) { ++func (c *Cloud) InstancesV2() (cloudprovider.InstancesV2, bool) { + if !c.config.InstancesV2.Enabled { + return nil, false + } +@@ -189,31 +194,43 @@ func (c *cloud) InstancesV2() (cloudprovider.InstancesV2, bool) { + + // Zones returns a zones interface. Also returns true if the interface is supported, false otherwise. + // DEPRECATED: Zones is deprecated in favor of retrieving zone/region information from InstancesV2. +-func (c *cloud) Zones() (cloudprovider.Zones, bool) { ++func (c *Cloud) Zones() (cloudprovider.Zones, bool) { + return nil, false + } + + // Clusters returns a clusters interface. Also returns true if the interface is supported, false otherwise. +-func (c *cloud) Clusters() (cloudprovider.Clusters, bool) { ++func (c *Cloud) Clusters() (cloudprovider.Clusters, bool) { + return nil, false + } + + // Routes returns a routes interface along with whether the interface is supported. +-func (c *cloud) Routes() (cloudprovider.Routes, bool) { ++func (c *Cloud) Routes() (cloudprovider.Routes, bool) { + return nil, false + } + +-// ProviderName returns the cloud provider ID. +-func (c *cloud) ProviderName() string { ++// ProviderName returns the Cloud provider ID. ++func (c *Cloud) ProviderName() string { + return ProviderName + } + + // HasClusterID returns true if a ClusterID is required and set +-func (c *cloud) HasClusterID() bool { ++func (c *Cloud) HasClusterID() bool { + return true + } + +-func getInfraKubeConfig(infraKubeConfigPath string) (string, error) { ++func (c *Cloud) GetInfraKubeconfig() (string, error) { ++ return GetInfraKubeConfig(c.config.Kubeconfig) ++} ++ ++func (c *Cloud) Namespace() string { ++ return c.namespace ++} ++ ++func (c *Cloud) GetCloudConfig() CloudConfig { ++ return c.config ++} ++ ++func GetInfraKubeConfig(infraKubeConfigPath string) (string, error) { + config, err := os.Open(infraKubeConfigPath) + if err != nil { + return "", fmt.Errorf("Couldn't open infra-kubeconfig: %v", err) +diff --git a/pkg/provider/cloud_test.go b/pkg/provider/cloud_test.go +index 53d88d33..88b5e654 100644 +--- a/pkg/provider/cloud_test.go ++++ b/pkg/provider/cloud_test.go +@@ -22,7 +22,7 @@ var ( + invalidKubeconf = "bla" + ) + +-func makeCloudConfig(kubeconfig, namespace string, loadbalancerEnabled, instancesEnabled bool, zoneAndRegionEnabled bool, lbCreationPollInterval int, lbCreationPollTimeout int) CloudConfig { ++func makeCloudConfig(kubeconfig, namespace string, loadbalancerEnabled, instancesEnabled, zoneAndRegionEnabled bool, lbCreationPollInterval int, lbCreationPollTimeout int) CloudConfig { + return CloudConfig{ + Kubeconfig: kubeconfig, + LoadBalancer: LoadBalancerConfig{ +diff --git a/pkg/provider/loadbalancer.go b/pkg/provider/loadbalancer.go +index 56cc5587..4ae521a4 100644 +--- a/pkg/provider/loadbalancer.go ++++ b/pkg/provider/loadbalancer.go +@@ -21,6 +21,11 @@ const ( + + // Default timeout between polling the service after creation + defaultLoadBalancerCreatePollTimeout = 5 * time.Minute ++ ++ TenantServiceNameLabelKey = "cluster.x-k8s.io/tenant-service-name" ++ TenantServiceNamespaceLabelKey = "cluster.x-k8s.io/tenant-service-namespace" ++ TenantClusterNameLabelKey = "cluster.x-k8s.io/cluster-name" ++ TenantNodeRoleLabelKey = "cluster.x-k8s.io/role" + ) + + type loadbalancer struct { +@@ -75,14 +80,14 @@ func (lb *loadbalancer) EnsureLoadBalancer(ctx context.Context, clusterName stri + } + + vmiLabels := map[string]string{ +- "cluster.x-k8s.io/role": "worker", +- "cluster.x-k8s.io/cluster-name": clusterName, ++ TenantNodeRoleLabelKey: "worker", ++ TenantClusterNameLabelKey: clusterName, + } + + lbLabels := map[string]string{ +- "cluster.x-k8s.io/tenant-service-name": service.Name, +- "cluster.x-k8s.io/tenant-service-namespace": service.Namespace, +- "cluster.x-k8s.io/cluster-name": clusterName, ++ TenantServiceNameLabelKey: service.Name, ++ TenantServiceNamespaceLabelKey: service.Namespace, ++ TenantClusterNameLabelKey: clusterName, + } + + for key, val := range lb.infraLabels { +@@ -202,7 +207,12 @@ func (lb *loadbalancer) createLoadBalancerService(ctx context.Context, lbName st + ExternalTrafficPolicy: service.Spec.ExternalTrafficPolicy, + }, + } +- if lb.config.Selectorless == nil || !*lb.config.Selectorless { ++ // Give controller privilege above selectorless ++ if lb.config.EnableEPSController != nil && *lb.config.EnableEPSController && service.Spec.ExternalTrafficPolicy == corev1.ServiceExternalTrafficPolicyTypeLocal { ++ lbService.Spec.Selector = nil ++ } else if lb.config.Selectorless != nil && *lb.config.Selectorless { ++ lbService.Spec.Selector = nil ++ } else { + lbService.Spec.Selector = vmiLabels + } + if len(service.Spec.ExternalIPs) > 0 { +diff --git a/pkg/provider/loadbalancer_test.go b/pkg/provider/loadbalancer_test.go +index c7297b9f..c51df803 100644 +--- a/pkg/provider/loadbalancer_test.go ++++ b/pkg/provider/loadbalancer_test.go +@@ -127,7 +127,7 @@ func cmpLoadBalancerStatuses(a, b *corev1.LoadBalancerStatus) bool { + } + + func generateInfraService(tenantSvc *corev1.Service, ports []corev1.ServicePort) *corev1.Service { +- return &corev1.Service{ ++ svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: lbServiceName, + Namespace: lbServiceNamespace, +@@ -142,12 +142,15 @@ func generateInfraService(tenantSvc *corev1.Service, ports []corev1.ServicePort) + Type: corev1.ServiceTypeLoadBalancer, + Ports: ports, + ExternalTrafficPolicy: tenantSvc.Spec.ExternalTrafficPolicy, +- Selector: map[string]string{ +- "cluster.x-k8s.io/role": "worker", +- "cluster.x-k8s.io/cluster-name": clusterName, +- }, + }, + } ++ if tenantSvc.Spec.ExternalTrafficPolicy != corev1.ServiceExternalTrafficPolicyLocal { ++ svc.Spec.Selector = map[string]string{ ++ TenantNodeRoleLabelKey: "worker", ++ TenantClusterNameLabelKey: clusterName, ++ } ++ } ++ return svc + } + + var _ = Describe("LoadBalancer", func() { +@@ -278,6 +281,56 @@ var _ = Describe("LoadBalancer", func() { + + }) + ++ It("Should create a loadbalancer without selectors when ExternalTrafficPolicy is local and eps controller is enabled", func() { ++ checkSvcExistErr := notFoundErr ++ getCount := 3 ++ ++ tenantService.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal ++ lb.config.EnableEPSController = pointer.Bool(true) ++ lb.config.Selectorless = pointer.Bool(true) ++ ++ c.EXPECT(). ++ Get(ctx, client.ObjectKey{Name: "af6ebf1722bb111e9b210d663bd873d9", Namespace: "test"}, gomock.AssignableToTypeOf(&corev1.Service{})). ++ Return(checkSvcExistErr) ++ ++ infraService1 := generateInfraService( ++ tenantService, ++ []corev1.ServicePort{ ++ {Name: "port1", Protocol: corev1.ProtocolTCP, Port: 80, TargetPort: intstr.IntOrString{Type: intstr.Int, IntVal: 30001}}, ++ }, ++ ) ++ infraService1.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeLocal ++ ++ c.EXPECT().Create(ctx, infraService1) ++ ++ for i := 0; i < getCount; i++ { ++ infraService2 := infraService1.DeepCopy() ++ if i == getCount-1 { ++ infraService2.Status = corev1.ServiceStatus{ ++ LoadBalancer: corev1.LoadBalancerStatus{ ++ Ingress: []corev1.LoadBalancerIngress{ ++ { ++ IP: loadBalancerIP, ++ }, ++ }, ++ }, ++ } ++ } ++ c.EXPECT().Get( ++ ctx, ++ client.ObjectKey{Name: "af6ebf1722bb111e9b210d663bd873d9", Namespace: "test"}, ++ gomock.AssignableToTypeOf(&corev1.Service{}), ++ ).Do(func(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) { ++ infraService2.DeepCopyInto(obj.(*corev1.Service)) ++ }) ++ } ++ ++ lbStatus, err := lb.EnsureLoadBalancer(ctx, clusterName, tenantService, nodes) ++ Expect(err).To(BeNil()) ++ Expect(len(lbStatus.Ingress)).Should(Equal(1)) ++ Expect(lbStatus.Ingress[0].IP).Should(Equal(loadBalancerIP)) ++ }) ++ + It("Should create new Service and poll LoadBalancer service 1 time", func() { + checkSvcExistErr := notFoundErr + getCount := 1 diff --git a/packages/apps/kubernetes/images/kubevirt-csi-driver.tag b/packages/apps/kubernetes/images/kubevirt-csi-driver.tag new file mode 100644 index 00000000..529d404f --- /dev/null +++ b/packages/apps/kubernetes/images/kubevirt-csi-driver.tag @@ -0,0 +1 @@ +ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:latest@sha256:e56b46591cdf9140e97c3220a0c2681aadd4a4b3f7ea8473fb2504dc96e8b53a diff --git a/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile b/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile new file mode 100644 index 00000000..b53c7b3d --- /dev/null +++ b/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile @@ -0,0 +1,25 @@ +# Source: https://github.com/kubevirt/csi-driver/blob/main/Dockerfile +ARG builder_image=docker.io/library/golang:1.22.5 +FROM ${builder_image} AS builder +RUN git clone https://github.com/kubevirt/csi-driver /src/kubevirt-csi-driver \ + && cd /src/kubevirt-csi-driver \ + && git checkout 35836e0c8b68d9916d29a838ea60cdd3fc6199cf + +WORKDIR /src/kubevirt-csi-driver +RUN make build + +FROM quay.io/centos/centos:stream9 +ARG git_url=https://github.com/kubevirt/csi-driver.git + +LABEL maintainers="The KubeVirt Project " \ + description="KubeVirt CSI Driver" \ + multi.GIT_URL=${git_url} + +ENTRYPOINT ["./kubevirt-csi-driver"] + +RUN dnf install -y e2fsprogs xfsprogs && dnf clean all + +ARG git_sha=NONE +LABEL multi.GIT_SHA=${git_sha} + +COPY --from=builder /src/kubevirt-csi-driver/kubevirt-csi-driver . diff --git a/packages/apps/kubernetes/templates/cloud-config.yaml b/packages/apps/kubernetes/templates/cloud-config.yaml index 9f16548a..b1399b11 100644 --- a/packages/apps/kubernetes/templates/cloud-config.yaml +++ b/packages/apps/kubernetes/templates/cloud-config.yaml @@ -7,4 +7,6 @@ data: loadBalancer: creationPollInterval: 5 creationPollTimeout: 60 + enableEPSController: true + selectorless: true namespace: {{ .Release.Namespace }} diff --git a/packages/apps/kubernetes/templates/cluster-autoscaler/deployment.yaml b/packages/apps/kubernetes/templates/cluster-autoscaler/deployment.yaml index 9ecf1bf4..934abe36 100644 --- a/packages/apps/kubernetes/templates/cluster-autoscaler/deployment.yaml +++ b/packages/apps/kubernetes/templates/cluster-autoscaler/deployment.yaml @@ -23,7 +23,7 @@ spec: operator: Exists effect: "NoSchedule" containers: - - image: ghcr.io/kvaps/test:cluster-autoscaller + - image: "{{ $.Files.Get "images/cluster-autoscaler.tag" | trim }}" name: cluster-autoscaler command: - /cluster-autoscaler diff --git a/packages/apps/kubernetes/templates/csi/deploy.yaml b/packages/apps/kubernetes/templates/csi/deploy.yaml index 225935fa..0b1a4c9a 100644 --- a/packages/apps/kubernetes/templates/csi/deploy.yaml +++ b/packages/apps/kubernetes/templates/csi/deploy.yaml @@ -26,7 +26,7 @@ spec: containers: - name: csi-driver imagePullPolicy: Always - image: ghcr.io/kvaps/test:kubevirt-csi-driver + image: "{{ $.Files.Get "images/kubevirt-csi-driver.tag" | trim }}" args: - "--endpoint=$(CSI_ENDPOINT)" - "--infra-cluster-namespace=$(INFRACLUSTER_NAMESPACE)" diff --git a/packages/apps/kubernetes/templates/kccm/kccm_role.yaml b/packages/apps/kubernetes/templates/kccm/kccm_role.yaml index fee06003..314c0f55 100644 --- a/packages/apps/kubernetes/templates/kccm/kccm_role.yaml +++ b/packages/apps/kubernetes/templates/kccm/kccm_role.yaml @@ -34,6 +34,12 @@ rules: - services verbs: - "*" +- apiGroups: + - "discovery.k8s.io" + resources: + - "endpointslices" + verbs: + - "*" - apiGroups: - "" resources: diff --git a/packages/apps/kubernetes/templates/kccm/manager.yaml b/packages/apps/kubernetes/templates/kccm/manager.yaml index 69dd58aa..0f581243 100644 --- a/packages/apps/kubernetes/templates/kccm/manager.yaml +++ b/packages/apps/kubernetes/templates/kccm/manager.yaml @@ -30,7 +30,7 @@ spec: - --cluster-name={{ .Release.Name }} command: - /bin/kubevirt-cloud-controller-manager - image: ghcr.io/kvaps/test:kubevirt-cloud-provider + image: "{{ $.Files.Get "images/kubevirt-cloud-provider.tag" | trim }}" imagePullPolicy: Always #securityContext: # privileged: true diff --git a/packages/system/capi-providers/templates/providers.yaml b/packages/system/capi-providers/templates/providers.yaml index 7173bef2..06a605f2 100644 --- a/packages/system/capi-providers/templates/providers.yaml +++ b/packages/system/capi-providers/templates/providers.yaml @@ -5,7 +5,7 @@ metadata: name: cluster-api spec: # https://github.com/kubernetes-sigs/cluster-api - version: v1.7.3 + version: v1.8.3 --- apiVersion: operator.cluster.x-k8s.io/v1alpha2 kind: ControlPlaneProvider @@ -13,7 +13,7 @@ metadata: name: kamaji spec: # https://github.com/clastix/cluster-api-control-plane-provider-kamaji - version: v0.10.0 + version: v0.11.0 --- apiVersion: operator.cluster.x-k8s.io/v1alpha2 kind: BootstrapProvider @@ -21,7 +21,7 @@ metadata: name: kubeadm spec: # https://github.com/kubernetes-sigs/cluster-api - version: v1.7.3 + version: v1.8.3 --- apiVersion: operator.cluster.x-k8s.io/v1alpha2 kind: InfrastructureProvider @@ -29,4 +29,4 @@ metadata: name: kubevirt spec: # https://github.com/kubernetes-sigs/cluster-api-provider-kubevirt - version: v0.1.8 + version: v0.1.9 From 00b2834efcd5d4871e55cf2fbbb367ade26f7ffc Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Thu, 26 Sep 2024 20:26:28 +0200 Subject: [PATCH 09/41] Fix rabbitmq users creation (#367) --- packages/apps/rabbitmq/Chart.yaml | 2 +- packages/apps/rabbitmq/templates/rabbitmq.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/apps/rabbitmq/Chart.yaml b/packages/apps/rabbitmq/Chart.yaml index 71576874..07a799e4 100644 --- a/packages/apps/rabbitmq/Chart.yaml +++ b/packages/apps/rabbitmq/Chart.yaml @@ -16,7 +16,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.4.1 +version: 0.4.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/packages/apps/rabbitmq/templates/rabbitmq.yaml b/packages/apps/rabbitmq/templates/rabbitmq.yaml index 5392c4ec..1417af76 100644 --- a/packages/apps/rabbitmq/templates/rabbitmq.yaml +++ b/packages/apps/rabbitmq/templates/rabbitmq.yaml @@ -47,7 +47,7 @@ metadata: config: '{{ printf "%s %s" $user $password | sha256sum }}' spec: importCredentialsSecret: - name: {{ $.Release.Name }}-{{ $user }}-credentials + name: {{ $.Release.Name }}-{{ kebabcase $user }}-credentials rabbitmqClusterReference: name: {{ $.Release.Name }} --- From 01ce122ada9b27a5b258a8de68d934ffb2297478 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Thu, 26 Sep 2024 14:40:34 -0400 Subject: [PATCH 10/41] Adopt flux-instance from upstream (#363) Builds on #362 The main issue we will have to solve (maybe with a patch) is that `cluster.domain` is always specified in this chart; I'm reading to try to recall how we solved this last time. ## Summary by CodeRabbit ## Release Notes - **New Features** - Updated the Flux Operator Helm chart to version 0.9.0, introducing enhanced configuration options for service monitoring and resource management. - Added a new `ServiceMonitor` resource for Prometheus integration. - Introduced a `serviceMonitor` configuration option with default values for scraping settings. - New `FluxInstance` resource configuration file added for deploying a Flux instance. - **Documentation** - Updated README files to reflect new version and provide installation instructions for the Flux instance. - Added a `NOTES.txt` file directing users to Flux CD operator documentation. - **Bug Fixes** - Corrected links in documentation and ensured proper metadata for the new chart. - **Chores** - Restructured configuration files for improved organization and clarity. - Introduced a `.helmignore` file to streamline package building. --------- Signed-off-by: Kingdon Barrett Signed-off-by: Andrei Kvapil Co-authored-by: Andrei Kvapil --- packages/system/fluxcd/Makefile | 6 +- .../fluxcd/charts/flux-instance/.helmignore | 24 +++ .../fluxcd/charts/flux-instance/Chart.yaml | 28 ++++ .../fluxcd/charts/flux-instance/README.md | 52 ++++++ .../charts/flux-instance/templates/NOTES.txt | 1 + .../flux-instance/templates/_helpers.tpl | 51 ++++++ .../flux-instance/templates/instance.yaml | 43 +++++ .../charts/flux-instance/values.schema.json | 153 ++++++++++++++++++ .../fluxcd/charts/flux-instance/values.yaml | 49 ++++++ .../fluxcd/templates/flux-instance.yaml | 25 --- packages/system/fluxcd/values.yaml | 96 +++++------ 11 files changed, 455 insertions(+), 73 deletions(-) create mode 100644 packages/system/fluxcd/charts/flux-instance/.helmignore create mode 100644 packages/system/fluxcd/charts/flux-instance/Chart.yaml create mode 100644 packages/system/fluxcd/charts/flux-instance/README.md create mode 100644 packages/system/fluxcd/charts/flux-instance/templates/NOTES.txt create mode 100644 packages/system/fluxcd/charts/flux-instance/templates/_helpers.tpl create mode 100644 packages/system/fluxcd/charts/flux-instance/templates/instance.yaml create mode 100644 packages/system/fluxcd/charts/flux-instance/values.schema.json create mode 100644 packages/system/fluxcd/charts/flux-instance/values.yaml delete mode 100644 packages/system/fluxcd/templates/flux-instance.yaml diff --git a/packages/system/fluxcd/Makefile b/packages/system/fluxcd/Makefile index 2aca5f05..907ab40a 100644 --- a/packages/system/fluxcd/Makefile +++ b/packages/system/fluxcd/Makefile @@ -1,7 +1,11 @@ NAME=fluxcd NAMESPACE=cozy-$(NAME) +include ../../../scripts/package.mk + apply-locally: helm upgrade -i -n $(NAMESPACE) $(NAME) . -include ../../../scripts/package.mk +update: + rm -rf charts + helm pull oci://ghcr.io/controlplaneio-fluxcd/charts/flux-instance --untar --untardir charts diff --git a/packages/system/fluxcd/charts/flux-instance/.helmignore b/packages/system/fluxcd/charts/flux-instance/.helmignore new file mode 100644 index 00000000..f24ae1c9 --- /dev/null +++ b/packages/system/fluxcd/charts/flux-instance/.helmignore @@ -0,0 +1,24 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ +helmdocs.gotmpl diff --git a/packages/system/fluxcd/charts/flux-instance/Chart.yaml b/packages/system/fluxcd/charts/flux-instance/Chart.yaml new file mode 100644 index 00000000..8994b5cd --- /dev/null +++ b/packages/system/fluxcd/charts/flux-instance/Chart.yaml @@ -0,0 +1,28 @@ +annotations: + artifacthub.io/license: AGPL-3.0 + artifacthub.io/links: | + - name: Documentation + url: https://fluxcd.control-plane.io/operator + - name: Chart Source + url: https://github.com/controlplaneio-fluxcd/charts + - name: Upstream Project + url: https://github.com/controlplaneio-fluxcd/flux-operator +apiVersion: v2 +appVersion: v0.9.0 +description: 'A Helm chart for deploying a Flux instance managed by Flux Operator. ' +home: https://github.com/controlplaneio-fluxcd +icon: https://raw.githubusercontent.com/cncf/artwork/main/projects/flux/icon/color/flux-icon-color.png +keywords: +- flux +- fluxcd +- gitops +kubeVersion: '>=1.22.0-0' +maintainers: +- email: flux-enterprise@control-plane.io + name: ControlPlane Flux Team +name: flux-instance +sources: +- https://github.com/controlplaneio-fluxcd/flux-operator +- https://github.com/controlplaneio-fluxcd/charts +type: application +version: 0.9.0 diff --git a/packages/system/fluxcd/charts/flux-instance/README.md b/packages/system/fluxcd/charts/flux-instance/README.md new file mode 100644 index 00000000..68dbba33 --- /dev/null +++ b/packages/system/fluxcd/charts/flux-instance/README.md @@ -0,0 +1,52 @@ +# flux-instance + +![Version: 0.9.0](https://img.shields.io/badge/Version-0.9.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.9.0](https://img.shields.io/badge/AppVersion-v0.9.0-informational?style=flat-square) + +This chart is a thin wrapper around the `FluxInstance` custom resource, which is +used by the [Flux Operator](https://github.com/controlplaneio-fluxcd/flux-operator) +to install, configure and automatically upgrade Flux. + +## Prerequisites + +- Kubernetes 1.22+ +- Helm 3.8+ + +## Installing the Chart + +To deploy Flux in the `flux-system` namespace: + +```console +helm -n flux-system install flux oci://ghcr.io/controlplaneio-fluxcd/charts/flux-instance +``` + +For more information on the available configuration options, +see the [Flux Instance documentation](https://fluxcd.control-plane.io/operator/fluxinstance/). + +## Uninstalling the Chart + +To uninstall Flux without affecting the resources it manages: + +```console +helm -n flux-system uninstall flux +``` + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| commonAnnotations | object | `{}` | Common annotations to add to all deployed objects including pods. | +| commonLabels | object | `{}` | Common labels to add to all deployed objects including pods. | +| fullnameOverride | string | `"flux"` | | +| instance.cluster | object | `{"domain":"cluster.local","multitenant":false,"networkPolicy":true,"tenantDefaultServiceAccount":"default","type":"kubernetes"}` | Cluster https://fluxcd.control-plane.io/operator/fluxinstance/#cluster-configuration | +| instance.components | list | `["source-controller","kustomize-controller","helm-controller","notification-controller"]` | Components https://fluxcd.control-plane.io/operator/fluxinstance/#components-configuration | +| instance.distribution | object | `{"artifact":"oci://ghcr.io/controlplaneio-fluxcd/flux-operator-manifests:latest","imagePullSecret":"","registry":"ghcr.io/fluxcd","version":"2.x"}` | Distribution https://fluxcd.control-plane.io/operator/fluxinstance/#distribution-configuration | +| instance.kustomize.patches | list | `[]` | Kustomize patches https://fluxcd.control-plane.io/operator/fluxinstance/#kustomize-patches | +| instance.sharding | object | `{"key":"sharding.fluxcd.io/key","shards":[]}` | Sharding https://fluxcd.control-plane.io/operator/fluxinstance/#sharding-configuration | +| instance.storage | object | `{"class":"","size":""}` | Storage https://fluxcd.control-plane.io/operator/fluxinstance/#storage-configuration | +| instance.sync | object | `{"kind":"GitRepository","path":"","pullSecret":"","ref":"","url":""}` | Sync https://fluxcd.control-plane.io/operator/fluxinstance/#sync-configuration | +| nameOverride | string | `""` | | + +## Source Code + +* +* diff --git a/packages/system/fluxcd/charts/flux-instance/templates/NOTES.txt b/packages/system/fluxcd/charts/flux-instance/templates/NOTES.txt new file mode 100644 index 00000000..b09e5e52 --- /dev/null +++ b/packages/system/fluxcd/charts/flux-instance/templates/NOTES.txt @@ -0,0 +1 @@ +Documentation at https://fluxcd.control-plane.io/operator/ diff --git a/packages/system/fluxcd/charts/flux-instance/templates/_helpers.tpl b/packages/system/fluxcd/charts/flux-instance/templates/_helpers.tpl new file mode 100644 index 00000000..f04148b5 --- /dev/null +++ b/packages/system/fluxcd/charts/flux-instance/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "flux-instance.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "flux-instance.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "flux-instance.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "flux-instance.labels" -}} +helm.sh/chart: {{ include "flux-instance.chart" . }} +{{ include "flux-instance.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "flux-instance.selectorLabels" -}} +app.kubernetes.io/name: {{ include "flux-instance.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/packages/system/fluxcd/charts/flux-instance/templates/instance.yaml b/packages/system/fluxcd/charts/flux-instance/templates/instance.yaml new file mode 100644 index 00000000..ebcd0629 --- /dev/null +++ b/packages/system/fluxcd/charts/flux-instance/templates/instance.yaml @@ -0,0 +1,43 @@ +apiVersion: fluxcd.controlplane.io/v1 +kind: FluxInstance +metadata: + name: {{ include "flux-instance.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "flux-instance.labels" . | nindent 4 }} + {{- with .Values.commonLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.commonAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + distribution: + version: {{ .Values.instance.distribution.version }} + registry: {{ .Values.instance.distribution.registry }} + artifact: {{ .Values.instance.distribution.artifact }} + {{- if .Values.instance.distribution.imagePullSecret }} + imagePullSecret: {{ .Values.instance.distribution.imagePullSecret }} + {{- end }} + components: {{ .Values.instance.components | toYaml | nindent 4 }} + cluster: {{ .Values.instance.cluster | toYaml | nindent 4 }} + kustomize: {{ .Values.instance.kustomize | toYaml | nindent 4 }} + {{- if .Values.instance.sync.url }} + sync: + kind: {{ .Values.instance.sync.kind }} + url: {{ .Values.instance.sync.url }} + ref: {{ .Values.instance.sync.ref }} + path: {{ .Values.instance.sync.path }} + {{- if .Values.instance.sync.pullSecret }} + pullSecret: {{ .Values.instance.sync.pullSecret }} + {{- end }} + {{- end }} + {{- if .Values.instance.storage.size }} + storage: {{ .Values.instance.storage | toYaml | nindent 4 }} + {{- end }} + {{- if .Values.instance.sharding.shards }} + sharding: + key: {{ .Values.instance.sharding.key }} + shards: {{ .Values.instance.sharding.shards | toYaml | nindent 4 }} + {{- end }} diff --git a/packages/system/fluxcd/charts/flux-instance/values.schema.json b/packages/system/fluxcd/charts/flux-instance/values.schema.json new file mode 100644 index 00000000..a3cd648c --- /dev/null +++ b/packages/system/fluxcd/charts/flux-instance/values.schema.json @@ -0,0 +1,153 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "properties": { + "commonAnnotations": { + "properties": {}, + "type": "object" + }, + "commonLabels": { + "properties": {}, + "type": "object" + }, + "fullnameOverride": { + "type": "string" + }, + "instance": { + "properties": { + "cluster": { + "properties": { + "domain": { + "type": "string" + }, + "multitenant": { + "type": "boolean" + }, + "networkPolicy": { + "type": "boolean" + }, + "tenantDefaultServiceAccount": { + "type": "string" + }, + "type": { + "enum": [ + "kubernetes", + "openshift", + "aws", + "azure", + "gcp" + ], + "type": "string" + } + }, + "type": "object" + }, + "components": { + "items": { + "enum": [ + "source-controller", + "kustomize-controller", + "helm-controller", + "notification-controller", + "image-reflector-controller", + "image-automation-controller" + ], + "type": "string" + }, + "type": "array", + "uniqueItems": true + }, + "distribution": { + "properties": { + "artifact": { + "type": "string" + }, + "imagePullSecret": { + "type": "string" + }, + "registry": { + "type": "string" + }, + "version": { + "type": "string" + } + }, + "required": [ + "version", + "registry" + ], + "type": "object" + }, + "kustomize": { + "properties": { + "patches": { + "items": { + "type": "object" + }, + "type": "array" + } + }, + "type": "object" + }, + "sharding": { + "properties": { + "key": { + "type": "string" + }, + "shards": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "type": "object" + }, + "storage": { + "properties": { + "class": { + "type": "string" + }, + "size": { + "type": "string" + } + }, + "type": "object" + }, + "sync": { + "properties": { + "kind": { + "enum": [ + "GitRepository", + "OCIRepository", + "Bucket" + ], + "type": "string" + }, + "path": { + "type": "string" + }, + "pullSecret": { + "type": "string" + }, + "ref": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "type": "object" + } + }, + "required": [ + "distribution", + "cluster" + ], + "type": "object" + }, + "nameOverride": { + "type": "string" + } + }, + "type": "object" +} diff --git a/packages/system/fluxcd/charts/flux-instance/values.yaml b/packages/system/fluxcd/charts/flux-instance/values.yaml new file mode 100644 index 00000000..c6426a13 --- /dev/null +++ b/packages/system/fluxcd/charts/flux-instance/values.yaml @@ -0,0 +1,49 @@ +# Default values for flux-instance. + +nameOverride: "" +fullnameOverride: "flux" + +instance: + # -- Distribution https://fluxcd.control-plane.io/operator/fluxinstance/#distribution-configuration + distribution: # @schema required: true + version: "2.x" # @schema required: true + registry: "ghcr.io/fluxcd" # @schema required: true + artifact: "oci://ghcr.io/controlplaneio-fluxcd/flux-operator-manifests:latest" + imagePullSecret: "" + # -- Components https://fluxcd.control-plane.io/operator/fluxinstance/#components-configuration + components: # @schema item: string; uniqueItems: true; itemEnum: [source-controller,kustomize-controller,helm-controller,notification-controller,image-reflector-controller,image-automation-controller] + - source-controller + - kustomize-controller + - helm-controller + - notification-controller + # -- Cluster https://fluxcd.control-plane.io/operator/fluxinstance/#cluster-configuration + cluster: # @schema required: true + type: kubernetes # @schema enum:[kubernetes,openshift,aws,azure,gcp] + domain: "cluster.local" + networkPolicy: true + multitenant: false + tenantDefaultServiceAccount: "default" + # -- Storage https://fluxcd.control-plane.io/operator/fluxinstance/#storage-configuration + storage: # @schema required: false + class: "" + size: "" + # -- Sharding https://fluxcd.control-plane.io/operator/fluxinstance/#sharding-configuration + sharding: # @schema required: false + key: "sharding.fluxcd.io/key" + shards: [] # @schema item: string + # -- Sync https://fluxcd.control-plane.io/operator/fluxinstance/#sync-configuration + sync: # @schema required: false + kind: "GitRepository" # @schema enum:[GitRepository,OCIRepository,Bucket] + url: "" + ref: "" + path: "" + pullSecret: "" + kustomize: # @schema required: false + # -- Kustomize patches https://fluxcd.control-plane.io/operator/fluxinstance/#kustomize-patches + patches: [] # @schema item: object + +# -- Common annotations to add to all deployed objects including pods. +commonAnnotations: { } + +# -- Common labels to add to all deployed objects including pods. +commonLabels: { } diff --git a/packages/system/fluxcd/templates/flux-instance.yaml b/packages/system/fluxcd/templates/flux-instance.yaml deleted file mode 100644 index ce0fd133..00000000 --- a/packages/system/fluxcd/templates/flux-instance.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: fluxcd.controlplane.io/v1 -kind: FluxInstance -metadata: - name: flux -spec: - {{- with .Values.cluster }} - cluster: - {{- with .networkPolicy }} - networkPolicy: {{ . }} - {{- end }} - {{- with .domain }} - domain: {{ . }} - {{- end }} - {{- end }} - distribution: - version: {{ .Values.distribution.version }} - registry: {{ .Values.distribution.registry }} - components: - {{- if .Values.components }} - {{- toYaml .Values.components | nindent 4 }} - {{- end }} - kustomize: - {{- if .Values.kustomize }} - {{- toYaml .Values.kustomize | nindent 4 }} - {{- end }} diff --git a/packages/system/fluxcd/values.yaml b/packages/system/fluxcd/values.yaml index 9eba35ec..c766a685 100644 --- a/packages/system/fluxcd/values.yaml +++ b/packages/system/fluxcd/values.yaml @@ -1,47 +1,49 @@ -cluster: - networkPolicy: true -# domain: cozy.local -distribution: - version: 2.3.x - registry: ghcr.io/fluxcd -components: - - source-controller - - kustomize-controller - - helm-controller - - notification-controller - - image-reflector-controller - - image-automation-controller -kustomize: - patches: - - target: - kind: Deployment - name: "(kustomize-controller|helm-controller|source-controller)" - patch: | - - op: add - path: /spec/template/spec/containers/0/args/- - value: --concurrent=20 - - op: add - path: /spec/template/spec/containers/0/args/- - value: --requeue-dependency=5s - - op: replace - path: /spec/template/spec/containers/0/resources/limits - value: - cpu: 2000m - memory: 2048Mi - - target: - kind: Deployment - name: source-controller - patch: | - - op: add - path: /spec/template/spec/containers/0/args/- - value: --storage-adv-addr=source-controller.cozy-fluxcd.svc - - op: add - path: /spec/template/spec/containers/0/args/- - value: --events-addr=http://notification-controller.cozy-fluxcd.svc/ - - target: - kind: Deployment - name: (kustomize-controller|helm-controller|image-reflector-controller|image-automation-controller) - patch: | - - op: add - path: /spec/template/spec/containers/0/args/- - value: --events-addr=http://notification-controller.cozy-fluxcd.svc/ +flux-instance: + instance: + cluster: + networkPolicy: true + domain: cozy.local # -- default value is overriden in patches + distribution: + version: 2.3.x + registry: ghcr.io/fluxcd + components: + - source-controller + - kustomize-controller + - helm-controller + - notification-controller + - image-reflector-controller + - image-automation-controller + kustomize: + patches: + - target: + kind: Deployment + name: "(kustomize-controller|helm-controller|source-controller)" + patch: | + - op: add + path: /spec/template/spec/containers/0/args/- + value: --concurrent=20 + - op: add + path: /spec/template/spec/containers/0/args/- + value: --requeue-dependency=5s + - op: replace + path: /spec/template/spec/containers/0/resources/limits + value: + cpu: 2000m + memory: 2048Mi + - target: + kind: Deployment + name: source-controller + patch: | + - op: add + path: /spec/template/spec/containers/0/args/- + value: --storage-adv-addr=source-controller.cozy-fluxcd.svc + - op: add + path: /spec/template/spec/containers/0/args/- + value: --events-addr=http://notification-controller.cozy-fluxcd.svc/ + - target: + kind: Deployment + name: (kustomize-controller|helm-controller|image-reflector-controller|image-automation-controller) + patch: | + - op: add + path: /spec/template/spec/containers/0/args/- + value: --events-addr=http://notification-controller.cozy-fluxcd.svc/ From ecfa4f8005daa5e3cf01087bcf098ec536c68e8e Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Fri, 27 Sep 2024 11:49:25 +0200 Subject: [PATCH 11/41] Seaweedfs: Fix attributes for bucket creation (#371) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- packages/system/seaweedfs/Makefile | 4 ++++ packages/system/seaweedfs/charts/seaweedfs/values.yaml | 2 +- packages/system/seaweedfs/values.yaml | 1 - 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/system/seaweedfs/Makefile b/packages/system/seaweedfs/Makefile index e079734e..1868ccdd 100644 --- a/packages/system/seaweedfs/Makefile +++ b/packages/system/seaweedfs/Makefile @@ -1,3 +1,7 @@ +NAME=seaweedfs-system + +include ../../../scripts/package.mk + update: rm -rf charts mkdir -p charts diff --git a/packages/system/seaweedfs/charts/seaweedfs/values.yaml b/packages/system/seaweedfs/charts/seaweedfs/values.yaml index e53fa596..3aecc5a7 100644 --- a/packages/system/seaweedfs/charts/seaweedfs/values.yaml +++ b/packages/system/seaweedfs/charts/seaweedfs/values.yaml @@ -877,7 +877,7 @@ s3: # For more information, visit: https://container-object-storage-interface.github.io/docs/deployment-guide cosi: enabled: false - image: "ghcr.io/seaweedfs/seaweedfs-cosi-driver:v0.1.1" + image: "ghcr.io/seaweedfs/seaweedfs-cosi-driver:v0.1.2" driverName: "seaweedfs.objectstorage.k8s.io" bucketClassName: "seaweedfs" endpoint: "" diff --git a/packages/system/seaweedfs/values.yaml b/packages/system/seaweedfs/values.yaml index 9fb8f886..056e9bb7 100644 --- a/packages/system/seaweedfs/values.yaml +++ b/packages/system/seaweedfs/values.yaml @@ -114,7 +114,6 @@ seaweedfs: policy.cozystack.io/allow-to-apiserver: "true" driverName: "seaweedfs.objectstorage.k8s.io" bucketClassName: "seaweedfs" - image: "ghcr.io/seaweedfs/seaweedfs-cosi-driver:v0.1.1" region: "" sidecar: From 8cb225604269ac99b11c862a7ed00a56f368af03 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Fri, 27 Sep 2024 15:47:55 +0200 Subject: [PATCH 12/41] Nginx-ingress: fix tls-passthrough if ClientHello is fragmented (#372) Fixed nginx-ingress image to include this patch: - https://github.com/kubernetes/ingress-nginx/pull/11843 Signed-off-by: Andrei Kvapil --- packages/system/ingress-nginx/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/system/ingress-nginx/values.yaml b/packages/system/ingress-nginx/values.yaml index 6066cd29..8ed5b271 100644 --- a/packages/system/ingress-nginx/values.yaml +++ b/packages/system/ingress-nginx/values.yaml @@ -6,7 +6,7 @@ ingress-nginx: registry: ghcr.io image: kvaps/ingress-nginx-with-protobuf-exporter/controller tag: v1.11.2 - digest: sha256:f4194edb06a43c82405167427ebd552b90af9698bd295845418680aebc13f600 + digest: sha256:e80856ece4e30e9646d65c8d92c25a3446a0bba1c2468cd026f17df9e60d2c0f allowSnippetAnnotations: true replicaCount: 2 admissionWebhooks: From 3d928611ed393686ce1a4f7e1ecd7c4b40669ea7 Mon Sep 17 00:00:00 2001 From: klinch0 <68821526+klinch0@users.noreply.github.com> Date: Mon, 30 Sep 2024 19:03:23 +0300 Subject: [PATCH 13/41] fix postgres max_connections (#376) ## Summary by CodeRabbit - **New Features** - Updated the `max_connections` parameter to accept numeric values for improved clarity and correctness in PostgreSQL configurations. - **Bug Fixes** - Corrected the data type for `max_connections` from string to number in both schema and configuration files to ensure proper interpretation by the PostgreSQL server. Co-authored-by: Kirill Klinchenkov --- packages/apps/postgres/values.schema.json | 6 +++--- packages/apps/postgres/values.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/apps/postgres/values.schema.json b/packages/apps/postgres/values.schema.json index 006d5982..c63184ca 100644 --- a/packages/apps/postgres/values.schema.json +++ b/packages/apps/postgres/values.schema.json @@ -29,9 +29,9 @@ "type": "object", "properties": { "max_connections": { - "type": "string", + "type": "number", "description": "Determines the maximum number of concurrent connections to the database server. The default is typically 100 connections", - "default": "100" + "default": 100 } } } @@ -103,4 +103,4 @@ } } } -} \ No newline at end of file +} diff --git a/packages/apps/postgres/values.yaml b/packages/apps/postgres/values.yaml index f5d6dada..50adc1fa 100644 --- a/packages/apps/postgres/values.yaml +++ b/packages/apps/postgres/values.yaml @@ -14,7 +14,7 @@ storageClass: "" ## @param postgresql.parameters.max_connections Determines the maximum number of concurrent connections to the database server. The default is typically 100 connections postgresql: parameters: - max_connections: "100" + max_connections: 100 ## Configuration for the quorum-based synchronous replication ## @param quorum.minSyncReplicas Minimum number of synchronous replicas that must acknowledge a transaction before it is considered committed. From d657ca62b84fbb4337bd3ca024c17b68d8d45040 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Tue, 1 Oct 2024 13:32:18 +0200 Subject: [PATCH 14/41] Update Cilium v1.16.2 and enable genev_sys_6081 interface (#378) This PR includes the upstream fix: - https://github.com/kubeovn/kube-ovn/pull/4575 Signed-off-by: Andrei Kvapil --- .../system/cilium/charts/cilium/Chart.yaml | 4 +- .../system/cilium/charts/cilium/README.md | 16 +++---- .../templates/cilium-envoy/daemonset.yaml | 4 -- .../templates/cilium-envoy/service.yaml | 33 ++++++++++++++ .../templates/cilium-operator/deployment.yaml | 25 ++++++++++- .../charts/cilium/templates/validate.yaml | 44 +++++++++++++++++++ .../system/cilium/charts/cilium/values.yaml | 32 +++++++------- .../system/cilium/images/cilium/Dockerfile | 2 +- packages/system/cilium/values-kubeovn.yaml | 2 +- packages/system/cilium/values.yaml | 4 +- 10 files changed, 131 insertions(+), 35 deletions(-) create mode 100644 packages/system/cilium/charts/cilium/templates/cilium-envoy/service.yaml diff --git a/packages/system/cilium/charts/cilium/Chart.yaml b/packages/system/cilium/charts/cilium/Chart.yaml index 2364c1c5..92282fb6 100644 --- a/packages/system/cilium/charts/cilium/Chart.yaml +++ b/packages/system/cilium/charts/cilium/Chart.yaml @@ -79,7 +79,7 @@ annotations: Pod IP Pool\n description: |\n CiliumPodIPPool defines an IP pool that can be used for pooled IPAM (i.e. the multi-pool IPAM mode).\n" apiVersion: v2 -appVersion: 1.16.1 +appVersion: 1.16.2 description: eBPF-based Networking, Security, and Observability home: https://cilium.io/ icon: https://cdn.jsdelivr.net/gh/cilium/cilium@main/Documentation/images/logo-solo.svg @@ -95,4 +95,4 @@ kubeVersion: '>= 1.21.0-0' name: cilium sources: - https://github.com/cilium/cilium -version: 1.16.1 +version: 1.16.2 diff --git a/packages/system/cilium/charts/cilium/README.md b/packages/system/cilium/charts/cilium/README.md index 464701c1..c6f177a0 100644 --- a/packages/system/cilium/charts/cilium/README.md +++ b/packages/system/cilium/charts/cilium/README.md @@ -1,6 +1,6 @@ # cilium -![Version: 1.16.1](https://img.shields.io/badge/Version-1.16.1-informational?style=flat-square) ![AppVersion: 1.16.1](https://img.shields.io/badge/AppVersion-1.16.1-informational?style=flat-square) +![Version: 1.16.2](https://img.shields.io/badge/Version-1.16.2-informational?style=flat-square) ![AppVersion: 1.16.2](https://img.shields.io/badge/AppVersion-1.16.2-informational?style=flat-square) Cilium is open source software for providing and transparently securing network connectivity and loadbalancing between application workloads such as @@ -83,7 +83,7 @@ contributors across the globe, there is almost always someone available to help. | authentication.mutual.spire.install.agent.tolerations | list | `[{"effect":"NoSchedule","key":"node.kubernetes.io/not-ready"},{"effect":"NoSchedule","key":"node-role.kubernetes.io/master"},{"effect":"NoSchedule","key":"node-role.kubernetes.io/control-plane"},{"effect":"NoSchedule","key":"node.cloudprovider.kubernetes.io/uninitialized","value":"true"},{"key":"CriticalAddonsOnly","operator":"Exists"}]` | SPIRE agent tolerations configuration By default it follows the same tolerations as the agent itself to allow the Cilium agent on this node to connect to SPIRE. ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ | | authentication.mutual.spire.install.enabled | bool | `true` | Enable SPIRE installation. This will only take effect only if authentication.mutual.spire.enabled is true | | authentication.mutual.spire.install.existingNamespace | bool | `false` | SPIRE namespace already exists. Set to true if Helm should not create, manage, and import the SPIRE namespace. | -| authentication.mutual.spire.install.initImage | object | `{"digest":"sha256:9ae97d36d26566ff84e8893c64a6dc4fe8ca6d1144bf5b87b2b85a32def253c7","override":null,"pullPolicy":"IfNotPresent","repository":"docker.io/library/busybox","tag":"1.36.1","useDigest":true}` | init container image of SPIRE agent and server | +| authentication.mutual.spire.install.initImage | object | `{"digest":"sha256:c230832bd3b0be59a6c47ed64294f9ce71e91b327957920b6929a0caa8353140","override":null,"pullPolicy":"IfNotPresent","repository":"docker.io/library/busybox","tag":"1.36.1","useDigest":true}` | init container image of SPIRE agent and server | | authentication.mutual.spire.install.namespace | string | `"cilium-spire"` | SPIRE namespace to install into | | authentication.mutual.spire.install.server.affinity | object | `{}` | SPIRE server affinity configuration | | authentication.mutual.spire.install.server.annotations | object | `{}` | SPIRE server annotations | @@ -182,7 +182,7 @@ contributors across the globe, there is almost always someone available to help. | clustermesh.apiserver.extraVolumeMounts | list | `[]` | Additional clustermesh-apiserver volumeMounts. | | clustermesh.apiserver.extraVolumes | list | `[]` | Additional clustermesh-apiserver volumes. | | clustermesh.apiserver.healthPort | int | `9880` | TCP port for the clustermesh-apiserver health API. | -| clustermesh.apiserver.image | object | `{"digest":"sha256:e9c77417cd474cc943b2303a76c5cf584ac7024dd513ebb8d608cb62fe28896f","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/clustermesh-apiserver","tag":"v1.16.1","useDigest":true}` | Clustermesh API server image. | +| clustermesh.apiserver.image | object | `{"digest":"sha256:cc84190fed92e03a2b3a33bc670b2447b521ee258ad9b076baaad13be312ea73","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/clustermesh-apiserver","tag":"v1.16.2","useDigest":true}` | Clustermesh API server image. | | clustermesh.apiserver.kvstoremesh.enabled | bool | `true` | Enable KVStoreMesh. KVStoreMesh caches the information retrieved from the remote clusters in the local etcd instance. | | clustermesh.apiserver.kvstoremesh.extraArgs | list | `[]` | Additional KVStoreMesh arguments. | | clustermesh.apiserver.kvstoremesh.extraEnv | list | `[]` | Additional KVStoreMesh environment variables. | @@ -353,7 +353,7 @@ contributors across the globe, there is almost always someone available to help. | envoy.extraVolumes | list | `[]` | Additional envoy volumes. | | envoy.healthPort | int | `9878` | TCP port for the health API. | | envoy.idleTimeoutDurationSeconds | int | `60` | Set Envoy upstream HTTP idle connection timeout seconds. Does not apply to connections with pending requests. Default 60s | -| envoy.image | object | `{"digest":"sha256:bd5ff8c66716080028f414ec1cb4f7dc66f40d2fb5a009fff187f4a9b90b566b","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium-envoy","tag":"v1.29.7-39a2a56bbd5b3a591f69dbca51d3e30ef97e0e51","useDigest":true}` | Envoy container image. | +| envoy.image | object | `{"digest":"sha256:9762041c3760de226a8b00cc12f27dacc28b7691ea926748f9b5c18862db503f","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium-envoy","tag":"v1.29.9-1726784081-a90146d13b4cd7d168d573396ccf2b3db5a3b047","useDigest":true}` | Envoy container image. | | envoy.livenessProbe.failureThreshold | int | `10` | failure threshold of liveness probe | | envoy.livenessProbe.periodSeconds | int | `30` | interval between checks of the liveness probe | | envoy.log.format | string | `"[%Y-%m-%d %T.%e][%t][%l][%n] [%g:%#] %v"` | The format string to use for laying out the log message metadata of Envoy. | @@ -484,7 +484,7 @@ contributors across the globe, there is almost always someone available to help. | hubble.relay.extraVolumes | list | `[]` | Additional hubble-relay volumes. | | hubble.relay.gops.enabled | bool | `true` | Enable gops for hubble-relay | | hubble.relay.gops.port | int | `9893` | Configure gops listen port for hubble-relay | -| hubble.relay.image | object | `{"digest":"sha256:2e1b4c739a676ae187d4c2bfc45c3e865bda2567cc0320a90cb666657fcfcc35","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/hubble-relay","tag":"v1.16.1","useDigest":true}` | Hubble-relay container image. | +| hubble.relay.image | object | `{"digest":"sha256:4b559907b378ac18af82541dafab430a857d94f1057f2598645624e6e7ea286c","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/hubble-relay","tag":"v1.16.2","useDigest":true}` | Hubble-relay container image. | | hubble.relay.listenHost | string | `""` | Host to listen to. Specify an empty string to bind to all the interfaces. | | hubble.relay.listenPort | string | `"4245"` | Port to listen to. | | hubble.relay.nodeSelector | object | `{"kubernetes.io/os":"linux"}` | Node labels for pod assignment ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector | @@ -590,7 +590,7 @@ contributors across the globe, there is almost always someone available to help. | hubble.ui.updateStrategy | object | `{"rollingUpdate":{"maxUnavailable":1},"type":"RollingUpdate"}` | hubble-ui update strategy. | | identityAllocationMode | string | `"crd"` | Method to use for identity allocation (`crd` or `kvstore`). | | identityChangeGracePeriod | string | `"5s"` | Time to wait before using new identity on endpoint identity change. | -| image | object | `{"digest":"sha256:0b4a3ab41a4760d86b7fc945b8783747ba27f29dac30dd434d94f2c9e3679f39","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium","tag":"v1.16.1","useDigest":true}` | Agent container image. | +| image | object | `{"digest":"sha256:4386a8580d8d86934908eea022b0523f812e6a542f30a86a47edd8bed90d51ea","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium","tag":"v1.16.2","useDigest":true}` | Agent container image. | | imagePullSecrets | list | `[]` | Configure image pull secrets for pulling container images | | ingressController.default | bool | `false` | Set cilium ingress controller to be the default ingress controller This will let cilium ingress controller route entries without ingress class set | | ingressController.defaultSecretName | string | `nil` | Default secret name for ingresses without .spec.tls[].secretName set. | @@ -717,7 +717,7 @@ contributors across the globe, there is almost always someone available to help. | operator.hostNetwork | bool | `true` | HostNetwork setting | | operator.identityGCInterval | string | `"15m0s"` | Interval for identity garbage collection. | | operator.identityHeartbeatTimeout | string | `"30m0s"` | Timeout for identity heartbeats. | -| operator.image | object | `{"alibabacloudDigest":"sha256:4381adf48d76ec482551183947e537d44bcac9b6c31a635a9ac63f696d978804","awsDigest":"sha256:e3876fcaf2d6ccc8d5b4aaaded7b1efa971f3f4175eaa2c8a499878d58c39df4","azureDigest":"sha256:e55c222654a44ceb52db7ade3a7b9e8ef05681ff84c14ad1d46fea34869a7a22","genericDigest":"sha256:3bc7e7a43bc4a4d8989cb7936c5d96675dd2d02c306adf925ce0a7c35aa27dc4","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/operator","suffix":"","tag":"v1.16.1","useDigest":true}` | cilium-operator image. | +| operator.image | object | `{"alibabacloudDigest":"sha256:16e33abb6b8381e2f66388b6d7141399f06c9b51b9ffa08fd159b8d321929716","awsDigest":"sha256:b6a73ec94407a56cccc8a395225e2aecc3ca3611e7acfeec86201c19fc0727dd","azureDigest":"sha256:fde7cf8bb887e106cd388bb5c3327e92682b2ec3ab4f03bb57b87f495b99f727","genericDigest":"sha256:cccfd3b886d52cb132c06acca8ca559f0fce91a6bd99016219b1a81fdbc4813a","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/operator","suffix":"","tag":"v1.16.2","useDigest":true}` | cilium-operator image. | | operator.nodeGCInterval | string | `"5m0s"` | Interval for cilium node garbage collection. | | operator.nodeSelector | object | `{"kubernetes.io/os":"linux"}` | Node labels for cilium-operator pod assignment ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector | | operator.podAnnotations | object | `{}` | Annotations to be added to cilium-operator pods | @@ -767,7 +767,7 @@ contributors across the globe, there is almost always someone available to help. | preflight.extraEnv | list | `[]` | Additional preflight environment variables. | | preflight.extraVolumeMounts | list | `[]` | Additional preflight volumeMounts. | | preflight.extraVolumes | list | `[]` | Additional preflight volumes. | -| preflight.image | object | `{"digest":"sha256:0b4a3ab41a4760d86b7fc945b8783747ba27f29dac30dd434d94f2c9e3679f39","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium","tag":"v1.16.1","useDigest":true}` | Cilium pre-flight image. | +| preflight.image | object | `{"digest":"sha256:4386a8580d8d86934908eea022b0523f812e6a542f30a86a47edd8bed90d51ea","override":null,"pullPolicy":"IfNotPresent","repository":"quay.io/cilium/cilium","tag":"v1.16.2","useDigest":true}` | Cilium pre-flight image. | | preflight.nodeSelector | object | `{"kubernetes.io/os":"linux"}` | Node labels for preflight pod assignment ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector | | preflight.podAnnotations | object | `{}` | Annotations to be added to preflight pods | | preflight.podDisruptionBudget.enabled | bool | `false` | enable PodDisruptionBudget ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ | diff --git a/packages/system/cilium/charts/cilium/templates/cilium-envoy/daemonset.yaml b/packages/system/cilium/charts/cilium/templates/cilium-envoy/daemonset.yaml index 2dfb7ab8..c62dea3d 100644 --- a/packages/system/cilium/charts/cilium/templates/cilium-envoy/daemonset.yaml +++ b/packages/system/cilium/charts/cilium/templates/cilium-envoy/daemonset.yaml @@ -26,10 +26,6 @@ spec: template: metadata: annotations: - {{- if and .Values.envoy.prometheus.enabled (not .Values.envoy.prometheus.serviceMonitor.enabled) }} - prometheus.io/port: "{{ .Values.envoy.prometheus.port }}" - prometheus.io/scrape: "true" - {{- end }} {{- if .Values.envoy.rollOutPods }} # ensure pods roll when configmap updates cilium.io/cilium-envoy-configmap-checksum: {{ include (print $.Template.BasePath "/cilium-envoy/configmap.yaml") . | sha256sum | quote }} diff --git a/packages/system/cilium/charts/cilium/templates/cilium-envoy/service.yaml b/packages/system/cilium/charts/cilium/templates/cilium-envoy/service.yaml new file mode 100644 index 00000000..a55202a5 --- /dev/null +++ b/packages/system/cilium/charts/cilium/templates/cilium-envoy/service.yaml @@ -0,0 +1,33 @@ +{{- $envoyDS := eq (include "envoyDaemonSetEnabled" .) "true" -}} +{{- if and $envoyDS (not .Values.preflight.enabled) .Values.envoy.prometheus.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: cilium-envoy + namespace: {{ .Release.Namespace }} + {{- if or (not .Values.envoy.prometheus.serviceMonitor.enabled) .Values.envoy.annotations }} + annotations: + {{- if not .Values.envoy.prometheus.serviceMonitor.enabled }} + prometheus.io/scrape: "true" + prometheus.io/port: {{ .Values.envoy.prometheus.port | quote }} + {{- end }} + {{- with .Values.envoy.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} + labels: + k8s-app: cilium-envoy + app.kubernetes.io/name: cilium-envoy + app.kubernetes.io/part-of: cilium + io.cilium/app: proxy +spec: + clusterIP: None + type: ClusterIP + selector: + k8s-app: cilium-envoy + ports: + - name: envoy-metrics + port: {{ .Values.envoy.prometheus.port }} + protocol: TCP + targetPort: envoy-metrics +{{- end }} diff --git a/packages/system/cilium/charts/cilium/templates/cilium-operator/deployment.yaml b/packages/system/cilium/charts/cilium/templates/cilium-operator/deployment.yaml index 2b0b536b..627a63ce 100644 --- a/packages/system/cilium/charts/cilium/templates/cilium-operator/deployment.yaml +++ b/packages/system/cilium/charts/cilium/templates/cilium-operator/deployment.yaml @@ -362,7 +362,7 @@ spec: name: cilium-clustermesh optional: true # note: items are not explicitly listed here, since the entries of this secret - # depend on the peers configured, and that would cause a restart of all agents + # depend on the peers configured, and that would cause a restart of all operators # at every addition/removal. Leaving the field empty makes each secret entry # to be automatically projected into the volume as a file whose name is the key. - secret: @@ -384,5 +384,28 @@ spec: - key: {{ .Values.tls.caBundle.key }} path: common-etcd-client-ca.crt {{- end }} + # note: we configure the volume for the kvstoremesh-specific certificate + # regardless of whether KVStoreMesh is enabled or not, so that it can be + # automatically mounted in case KVStoreMesh gets subsequently enabled, + # without requiring an operator restart. + - secret: + name: clustermesh-apiserver-local-cert + optional: true + items: + - key: tls.key + path: local-etcd-client.key + - key: tls.crt + path: local-etcd-client.crt + {{- if not .Values.tls.caBundle.enabled }} + - key: ca.crt + path: local-etcd-client-ca.crt + {{- else }} + - {{ .Values.tls.caBundle.useSecret | ternary "secret" "configMap" }}: + name: {{ .Values.tls.caBundle.name }} + optional: true + items: + - key: {{ .Values.tls.caBundle.key }} + path: local-etcd-client-ca.crt + {{- end }} {{- end }} {{- end }} diff --git a/packages/system/cilium/charts/cilium/templates/validate.yaml b/packages/system/cilium/charts/cilium/templates/validate.yaml index 8bc687db..3afc14f1 100644 --- a/packages/system/cilium/charts/cilium/templates/validate.yaml +++ b/packages/system/cilium/charts/cilium/templates/validate.yaml @@ -1,3 +1,47 @@ +{{/* validate deprecated options are not being used */}} + +{{/* Options deprecated in v1.15 and removed in v1.16 */}} +{{- if or + (dig "encryption" "keyFile" "" .Values.AsMap) + (dig "encryption" "mountPath" "" .Values.AsMap) + (dig "encryption" "secretName" "" .Values.AsMap) + (dig "encryption" "interface" "" .Values.AsMap) +}} + {{ fail "encryption.{keyFile,mountPath,secretName,interface} were deprecated in v1.14 and has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }} +{{- end }} +{{- if or + ((dig "proxy" "prometheus" "enabled" "" .Values.AsMap) | toString) + (dig "proxy" "prometheus" "port" "" .Values.AsMap) +}} + {{ fail "proxy.prometheus.enabled and proxy.prometheus.port were deprecated in v1.14 and has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }} +{{- end }} +{{- if (dig "endpointStatus" "" .Values.AsMap) }} + {{ fail "endpointStatus has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }} +{{- end }} +{{- if (dig "remoteNodeIdentity" "" .Values.AsMap) }} + {{ fail "remoteNodeIdentity was deprecated in v1.15 and has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }} +{{- end }} +{{- if (dig "containerRuntime" "integration" "" .Values.AsMap) }} + {{ fail "containerRuntime.integration was deprecated in v1.14 and has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }} +{{- end }} +{{- if (dig "etcd" "managed" "" .Values.AsMap) }} + {{ fail "etcd.managed was deprecated in v1.10 has been removed in v1.16. For details please refer to https://docs.cilium.io/en/v1.16/operations/upgrade/#helm-options" }} +{{- end }} + +{{/* Options deprecated in v1.14 and removed in v1.15 */}} +{{- if .Values.tunnel }} + {{ fail "tunnel was deprecated in v1.14 and has been removed in v1.15. For details please refer to https://docs.cilium.io/en/v1.15/operations/upgrade/#helm-options" }} +{{- end }} +{{- if or (dig "clustermesh" "apiserver" "tls" "ca" "cert" "" .Values.AsMap) (dig "clustermesh" "apiserver" "tls" "ca" "key" "" .Values.AsMap) }} + {{ fail "clustermesh.apiserver.tls.ca.cert and clustermesh.apiserver.tls.ca.key were deprecated in v1.14 and has been removed in v1.15. For details please refer to https://docs.cilium.io/en/v1.15/operations/upgrade/#helm-options" }} +{{- end }} +{{- if .Values.enableK8sEventHandover }} + {{ fail "enableK8sEventHandover was deprecated in v1.14 and has been removed in v1.15. For details please refer to https://docs.cilium.io/en/v1.15/operations/upgrade/#helm-options" }} +{{- end }} +{{- if .Values.enableCnpStatusUpdates }} + {{ fail "enableCnpStatusUpdates was deprecated in v1.14 and has been removed in v1.15. For details please refer to https://docs.cilium.io/en/v1.15/operations/upgrade/#helm-options" }} +{{- end }} + {{/* validate hubble config */}} {{- if and .Values.hubble.ui.enabled (not .Values.hubble.ui.standalone.enabled) }} {{- if not .Values.hubble.relay.enabled }} diff --git a/packages/system/cilium/charts/cilium/values.yaml b/packages/system/cilium/charts/cilium/values.yaml index dbaa6c6e..d320deda 100644 --- a/packages/system/cilium/charts/cilium/values.yaml +++ b/packages/system/cilium/charts/cilium/values.yaml @@ -153,10 +153,10 @@ image: # @schema override: ~ repository: "quay.io/cilium/cilium" - tag: "v1.16.1" + tag: "v1.16.2" pullPolicy: "IfNotPresent" # cilium-digest - digest: "sha256:0b4a3ab41a4760d86b7fc945b8783747ba27f29dac30dd434d94f2c9e3679f39" + digest: "sha256:4386a8580d8d86934908eea022b0523f812e6a542f30a86a47edd8bed90d51ea" useDigest: true # -- Affinity for cilium-agent. affinity: @@ -1309,9 +1309,9 @@ hubble: # @schema override: ~ repository: "quay.io/cilium/hubble-relay" - tag: "v1.16.1" + tag: "v1.16.2" # hubble-relay-digest - digest: "sha256:2e1b4c739a676ae187d4c2bfc45c3e865bda2567cc0320a90cb666657fcfcc35" + digest: "sha256:4b559907b378ac18af82541dafab430a857d94f1057f2598645624e6e7ea286c" useDigest: true pullPolicy: "IfNotPresent" # -- Specifies the resources for the hubble-relay pods @@ -2158,9 +2158,9 @@ envoy: # @schema override: ~ repository: "quay.io/cilium/cilium-envoy" - tag: "v1.29.7-39a2a56bbd5b3a591f69dbca51d3e30ef97e0e51" + tag: "v1.29.9-1726784081-a90146d13b4cd7d168d573396ccf2b3db5a3b047" pullPolicy: "IfNotPresent" - digest: "sha256:bd5ff8c66716080028f414ec1cb4f7dc66f40d2fb5a009fff187f4a9b90b566b" + digest: "sha256:9762041c3760de226a8b00cc12f27dacc28b7691ea926748f9b5c18862db503f" useDigest: true # -- Additional containers added to the cilium Envoy DaemonSet. extraContainers: [] @@ -2474,15 +2474,15 @@ operator: # @schema override: ~ repository: "quay.io/cilium/operator" - tag: "v1.16.1" + tag: "v1.16.2" # operator-generic-digest - genericDigest: "sha256:3bc7e7a43bc4a4d8989cb7936c5d96675dd2d02c306adf925ce0a7c35aa27dc4" + genericDigest: "sha256:cccfd3b886d52cb132c06acca8ca559f0fce91a6bd99016219b1a81fdbc4813a" # operator-azure-digest - azureDigest: "sha256:e55c222654a44ceb52db7ade3a7b9e8ef05681ff84c14ad1d46fea34869a7a22" + azureDigest: "sha256:fde7cf8bb887e106cd388bb5c3327e92682b2ec3ab4f03bb57b87f495b99f727" # operator-aws-digest - awsDigest: "sha256:e3876fcaf2d6ccc8d5b4aaaded7b1efa971f3f4175eaa2c8a499878d58c39df4" + awsDigest: "sha256:b6a73ec94407a56cccc8a395225e2aecc3ca3611e7acfeec86201c19fc0727dd" # operator-alibabacloud-digest - alibabacloudDigest: "sha256:4381adf48d76ec482551183947e537d44bcac9b6c31a635a9ac63f696d978804" + alibabacloudDigest: "sha256:16e33abb6b8381e2f66388b6d7141399f06c9b51b9ffa08fd159b8d321929716" useDigest: true pullPolicy: "IfNotPresent" suffix: "" @@ -2756,9 +2756,9 @@ preflight: # @schema override: ~ repository: "quay.io/cilium/cilium" - tag: "v1.16.1" + tag: "v1.16.2" # cilium-digest - digest: "sha256:0b4a3ab41a4760d86b7fc945b8783747ba27f29dac30dd434d94f2c9e3679f39" + digest: "sha256:4386a8580d8d86934908eea022b0523f812e6a542f30a86a47edd8bed90d51ea" useDigest: true pullPolicy: "IfNotPresent" # -- The priority class to use for the preflight pod. @@ -2905,9 +2905,9 @@ clustermesh: # @schema override: ~ repository: "quay.io/cilium/clustermesh-apiserver" - tag: "v1.16.1" + tag: "v1.16.2" # clustermesh-apiserver-digest - digest: "sha256:e9c77417cd474cc943b2303a76c5cf584ac7024dd513ebb8d608cb62fe28896f" + digest: "sha256:cc84190fed92e03a2b3a33bc670b2447b521ee258ad9b076baaad13be312ea73" useDigest: true pullPolicy: "IfNotPresent" # -- TCP port for the clustermesh-apiserver health API. @@ -3406,7 +3406,7 @@ authentication: override: ~ repository: "docker.io/library/busybox" tag: "1.36.1" - digest: "sha256:9ae97d36d26566ff84e8893c64a6dc4fe8ca6d1144bf5b87b2b85a32def253c7" + digest: "sha256:c230832bd3b0be59a6c47ed64294f9ce71e91b327957920b6929a0caa8353140" useDigest: true pullPolicy: "IfNotPresent" # SPIRE agent configuration diff --git a/packages/system/cilium/images/cilium/Dockerfile b/packages/system/cilium/images/cilium/Dockerfile index fe63bd05..e5dad7b3 100644 --- a/packages/system/cilium/images/cilium/Dockerfile +++ b/packages/system/cilium/images/cilium/Dockerfile @@ -1,2 +1,2 @@ -ARG VERSION=v1.16.1 +ARG VERSION=v1.16.2 FROM quay.io/cilium/cilium:${VERSION} diff --git a/packages/system/cilium/values-kubeovn.yaml b/packages/system/cilium/values-kubeovn.yaml index 23afc406..3fa940bb 100644 --- a/packages/system/cilium/values-kubeovn.yaml +++ b/packages/system/cilium/values-kubeovn.yaml @@ -15,4 +15,4 @@ cilium: enableIdentityMark: false enableRuntimeDeviceDetection: true forceDeviceDetection: true - devices: ovn0 + devices: "ovn0 genev_sys_6081" diff --git a/packages/system/cilium/values.yaml b/packages/system/cilium/values.yaml index c638e791..2cdb9edb 100644 --- a/packages/system/cilium/values.yaml +++ b/packages/system/cilium/values.yaml @@ -12,7 +12,7 @@ cilium: mode: "kubernetes" image: repository: ghcr.io/aenix-io/cozystack/cilium - tag: 1.16.1 - digest: "sha256:9593dbc3bd25487b52d8f43330d4a308e450605479a8384a32117e9613289892" + tag: latest + digest: "sha256:534c5b04fef356a6be59234243c23c0c09702fe1e2c8872012afb391ce2965c4" envoy: enabled: false From ab8394140cee8602dec3e556016640cb2b214486 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Tue, 1 Oct 2024 13:35:47 +0200 Subject: [PATCH 15/41] Update fluxcd v2.4.0 (#379) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- packages/system/fluxcd/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/system/fluxcd/values.yaml b/packages/system/fluxcd/values.yaml index c766a685..2d0580d0 100644 --- a/packages/system/fluxcd/values.yaml +++ b/packages/system/fluxcd/values.yaml @@ -4,7 +4,7 @@ flux-instance: networkPolicy: true domain: cozy.local # -- default value is overriden in patches distribution: - version: 2.3.x + version: 2.4.x registry: ghcr.io/fluxcd components: - source-controller From b6e32034462e6f712ac3eb62ca3cbcc1e10d2e11 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Tue, 1 Oct 2024 17:12:07 +0200 Subject: [PATCH 16/41] Update Talos Linux v1.8.0 (#380) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- .../images/talos/profiles/initramfs.yaml | 22 +++++++++---------- .../images/talos/profiles/installer.yaml | 22 +++++++++---------- .../installer/images/talos/profiles/iso.yaml | 22 +++++++++---------- .../images/talos/profiles/kernel.yaml | 22 +++++++++---------- .../images/talos/profiles/metal.yaml | 22 +++++++++---------- .../images/talos/profiles/nocloud.yaml | 22 +++++++++---------- 6 files changed, 66 insertions(+), 66 deletions(-) diff --git a/packages/core/installer/images/talos/profiles/initramfs.yaml b/packages/core/installer/images/talos/profiles/initramfs.yaml index a825eb81..7ce305d1 100644 --- a/packages/core/installer/images/talos/profiles/initramfs.yaml +++ b/packages/core/installer/images/talos/profiles/initramfs.yaml @@ -3,24 +3,24 @@ arch: amd64 platform: metal secureboot: false -version: v1.7.6 +version: v1.8.0 input: kernel: path: /usr/install/amd64/vmlinuz initramfs: path: /usr/install/amd64/initramfs.xz baseInstaller: - imageRef: ghcr.io/siderolabs/installer:v1.7.6 + imageRef: ghcr.io/siderolabs/installer:v1.8.0 systemExtensions: - - imageRef: ghcr.io/siderolabs/amd-ucode:20240811 - - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811 - - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811 - - imageRef: ghcr.io/siderolabs/i915-ucode:20240811 - - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811 - - imageRef: ghcr.io/siderolabs/intel-ucode:20240813 - - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811 - - imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6 - - imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6 + - imageRef: ghcr.io/siderolabs/amd-ucode:20240909 + - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909 + - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909 + - imageRef: ghcr.io/siderolabs/i915-ucode:20240909 + - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909 + - imageRef: ghcr.io/siderolabs/intel-ucode:20240910 + - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909 + - imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0 + - imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0 output: kind: initramfs imageOptions: {} diff --git a/packages/core/installer/images/talos/profiles/installer.yaml b/packages/core/installer/images/talos/profiles/installer.yaml index 8183557f..6f0f8a42 100644 --- a/packages/core/installer/images/talos/profiles/installer.yaml +++ b/packages/core/installer/images/talos/profiles/installer.yaml @@ -3,24 +3,24 @@ arch: amd64 platform: metal secureboot: false -version: v1.7.6 +version: v1.8.0 input: kernel: path: /usr/install/amd64/vmlinuz initramfs: path: /usr/install/amd64/initramfs.xz baseInstaller: - imageRef: ghcr.io/siderolabs/installer:v1.7.6 + imageRef: ghcr.io/siderolabs/installer:v1.8.0 systemExtensions: - - imageRef: ghcr.io/siderolabs/amd-ucode:20240811 - - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811 - - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811 - - imageRef: ghcr.io/siderolabs/i915-ucode:20240811 - - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811 - - imageRef: ghcr.io/siderolabs/intel-ucode:20240813 - - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811 - - imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6 - - imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6 + - imageRef: ghcr.io/siderolabs/amd-ucode:20240909 + - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909 + - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909 + - imageRef: ghcr.io/siderolabs/i915-ucode:20240909 + - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909 + - imageRef: ghcr.io/siderolabs/intel-ucode:20240910 + - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909 + - imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0 + - imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0 output: kind: installer imageOptions: {} diff --git a/packages/core/installer/images/talos/profiles/iso.yaml b/packages/core/installer/images/talos/profiles/iso.yaml index f673e1e3..e3eec7ab 100644 --- a/packages/core/installer/images/talos/profiles/iso.yaml +++ b/packages/core/installer/images/talos/profiles/iso.yaml @@ -3,24 +3,24 @@ arch: amd64 platform: metal secureboot: false -version: v1.7.6 +version: v1.8.0 input: kernel: path: /usr/install/amd64/vmlinuz initramfs: path: /usr/install/amd64/initramfs.xz baseInstaller: - imageRef: ghcr.io/siderolabs/installer:v1.7.6 + imageRef: ghcr.io/siderolabs/installer:v1.8.0 systemExtensions: - - imageRef: ghcr.io/siderolabs/amd-ucode:20240811 - - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811 - - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811 - - imageRef: ghcr.io/siderolabs/i915-ucode:20240811 - - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811 - - imageRef: ghcr.io/siderolabs/intel-ucode:20240813 - - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811 - - imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6 - - imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6 + - imageRef: ghcr.io/siderolabs/amd-ucode:20240909 + - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909 + - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909 + - imageRef: ghcr.io/siderolabs/i915-ucode:20240909 + - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909 + - imageRef: ghcr.io/siderolabs/intel-ucode:20240910 + - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909 + - imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0 + - imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0 output: kind: iso imageOptions: {} diff --git a/packages/core/installer/images/talos/profiles/kernel.yaml b/packages/core/installer/images/talos/profiles/kernel.yaml index 421d017b..829d23dd 100644 --- a/packages/core/installer/images/talos/profiles/kernel.yaml +++ b/packages/core/installer/images/talos/profiles/kernel.yaml @@ -3,24 +3,24 @@ arch: amd64 platform: metal secureboot: false -version: v1.7.6 +version: v1.8.0 input: kernel: path: /usr/install/amd64/vmlinuz initramfs: path: /usr/install/amd64/initramfs.xz baseInstaller: - imageRef: ghcr.io/siderolabs/installer:v1.7.6 + imageRef: ghcr.io/siderolabs/installer:v1.8.0 systemExtensions: - - imageRef: ghcr.io/siderolabs/amd-ucode:20240811 - - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811 - - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811 - - imageRef: ghcr.io/siderolabs/i915-ucode:20240811 - - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811 - - imageRef: ghcr.io/siderolabs/intel-ucode:20240813 - - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811 - - imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6 - - imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6 + - imageRef: ghcr.io/siderolabs/amd-ucode:20240909 + - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909 + - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909 + - imageRef: ghcr.io/siderolabs/i915-ucode:20240909 + - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909 + - imageRef: ghcr.io/siderolabs/intel-ucode:20240910 + - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909 + - imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0 + - imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0 output: kind: kernel imageOptions: {} diff --git a/packages/core/installer/images/talos/profiles/metal.yaml b/packages/core/installer/images/talos/profiles/metal.yaml index faaff221..f1218487 100644 --- a/packages/core/installer/images/talos/profiles/metal.yaml +++ b/packages/core/installer/images/talos/profiles/metal.yaml @@ -3,24 +3,24 @@ arch: amd64 platform: metal secureboot: false -version: v1.7.6 +version: v1.8.0 input: kernel: path: /usr/install/amd64/vmlinuz initramfs: path: /usr/install/amd64/initramfs.xz baseInstaller: - imageRef: ghcr.io/siderolabs/installer:v1.7.6 + imageRef: ghcr.io/siderolabs/installer:v1.8.0 systemExtensions: - - imageRef: ghcr.io/siderolabs/amd-ucode:20240811 - - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811 - - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811 - - imageRef: ghcr.io/siderolabs/i915-ucode:20240811 - - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811 - - imageRef: ghcr.io/siderolabs/intel-ucode:20240813 - - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811 - - imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6 - - imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6 + - imageRef: ghcr.io/siderolabs/amd-ucode:20240909 + - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909 + - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909 + - imageRef: ghcr.io/siderolabs/i915-ucode:20240909 + - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909 + - imageRef: ghcr.io/siderolabs/intel-ucode:20240910 + - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909 + - imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0 + - imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0 output: kind: image imageOptions: { diskSize: 1306525696, diskFormat: raw } diff --git a/packages/core/installer/images/talos/profiles/nocloud.yaml b/packages/core/installer/images/talos/profiles/nocloud.yaml index ba0a9e06..4e964715 100644 --- a/packages/core/installer/images/talos/profiles/nocloud.yaml +++ b/packages/core/installer/images/talos/profiles/nocloud.yaml @@ -3,24 +3,24 @@ arch: amd64 platform: nocloud secureboot: false -version: v1.7.6 +version: v1.8.0 input: kernel: path: /usr/install/amd64/vmlinuz initramfs: path: /usr/install/amd64/initramfs.xz baseInstaller: - imageRef: ghcr.io/siderolabs/installer:v1.7.6 + imageRef: ghcr.io/siderolabs/installer:v1.8.0 systemExtensions: - - imageRef: ghcr.io/siderolabs/amd-ucode:20240811 - - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240811 - - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240811 - - imageRef: ghcr.io/siderolabs/i915-ucode:20240811 - - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240811 - - imageRef: ghcr.io/siderolabs/intel-ucode:20240813 - - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240811 - - imageRef: ghcr.io/siderolabs/drbd:9.2.8-v1.7.6 - - imageRef: ghcr.io/siderolabs/zfs:2.2.4-v1.7.6 + - imageRef: ghcr.io/siderolabs/amd-ucode:20240909 + - imageRef: ghcr.io/siderolabs/amdgpu-firmware:20240909 + - imageRef: ghcr.io/siderolabs/bnx2-bnx2x:20240909 + - imageRef: ghcr.io/siderolabs/i915-ucode:20240909 + - imageRef: ghcr.io/siderolabs/intel-ice-firmware:20240909 + - imageRef: ghcr.io/siderolabs/intel-ucode:20240910 + - imageRef: ghcr.io/siderolabs/qlogic-firmware:20240909 + - imageRef: ghcr.io/siderolabs/drbd:9.2.11-v1.8.0 + - imageRef: ghcr.io/siderolabs/zfs:2.2.6-v1.8.0 output: kind: image imageOptions: { diskSize: 1306525696, diskFormat: raw } From 7161b4db0656495f028136976aa3caef8f609d58 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Tue, 1 Oct 2024 17:52:07 +0200 Subject: [PATCH 17/41] Disable Kamaji default datastore check (#381) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- .../system/kamaji/images/kamaji/Dockerfile | 26 ++++++++++++++++++- .../patches/disable-datastore-check.diff | 23 ++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 packages/system/kamaji/images/kamaji/patches/disable-datastore-check.diff diff --git a/packages/system/kamaji/images/kamaji/Dockerfile b/packages/system/kamaji/images/kamaji/Dockerfile index e453967e..7946e225 100644 --- a/packages/system/kamaji/images/kamaji/Dockerfile +++ b/packages/system/kamaji/images/kamaji/Dockerfile @@ -1 +1,25 @@ -FROM clastix/kamaji:edge-24.9.2 +# Build the manager binary +FROM golang:1.22 as builder + +ARG VERSION=edge-24.9.2 +ARG TARGETOS TARGETARCH + +WORKDIR /workspace + +RUN curl -sSL https://github.com/clastix/kamaji/archive/refs/tags/${VERSION}.tar.gz | tar -xzvf- --strip=1 + +COPY patches /patches +RUN git apply /patches/disable-datastore-check.diff + +RUN CGO_ENABLED=0 GOOS=linux GOARCH=$TARGETARCH go build \ + -ldflags "-X github.com/clastix/kamaji/internal.GitRepo=$GIT_REPO -X github.com/clastix/kamaji/internal.GitTag=$GIT_LAST_TAG -X github.com/clastix/kamaji/internal.GitCommit=$GIT_HEAD_COMMIT -X github.com/clastix/kamaji/internal.GitDirty=$GIT_MODIFIED -X github.com/clastix/kamaji/internal.BuildTime=$BUILD_DATE" \ + -a -o kamaji main.go + +# Use distroless as minimal base image to package the manager binary +# Refer to https://github.com/GoogleContainerTools/distroless for more details +FROM gcr.io/distroless/static:nonroot +WORKDIR / +COPY --from=builder /workspace/kamaji . +USER 65532:65532 + +ENTRYPOINT ["/kamaji"] diff --git a/packages/system/kamaji/images/kamaji/patches/disable-datastore-check.diff b/packages/system/kamaji/images/kamaji/patches/disable-datastore-check.diff new file mode 100644 index 00000000..0d28b780 --- /dev/null +++ b/packages/system/kamaji/images/kamaji/patches/disable-datastore-check.diff @@ -0,0 +1,23 @@ +diff --git a/cmd/manager/cmd.go b/cmd/manager/cmd.go +index 9a24d4e..a03a4e0 100644 +--- a/cmd/manager/cmd.go ++++ b/cmd/manager/cmd.go +@@ -31,7 +31,6 @@ import ( + "github.com/clastix/kamaji/controllers/soot" + "github.com/clastix/kamaji/internal" + "github.com/clastix/kamaji/internal/builders/controlplane" +- datastoreutils "github.com/clastix/kamaji/internal/datastore/utils" + "github.com/clastix/kamaji/internal/webhook" + "github.com/clastix/kamaji/internal/webhook/handlers" + "github.com/clastix/kamaji/internal/webhook/routes" +@@ -80,10 +79,6 @@ func NewCmd(scheme *runtime.Scheme) *cobra.Command { + return fmt.Errorf("unable to read webhook CA: %w", err) + } + +- if err = datastoreutils.CheckExists(ctx, scheme, datastore); err != nil { +- return err +- } +- + if controllerReconcileTimeout.Seconds() == 0 { + return fmt.Errorf("the controller reconcile timeout must be greater than zero") + } From 7a1b56fa784b95a37b3a1d34b12c3dd1a9a67faf Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Tue, 1 Oct 2024 18:38:03 +0200 Subject: [PATCH 18/41] postgres: fix setting max_connections (#382) fix regression introduced by https://github.com/aenix-io/cozystack/pull/376 ## Summary by CodeRabbit - **New Features** - Enhanced flexibility in PostgreSQL configuration with conditional handling of the `max_connections` parameter. - **Bug Fixes** - Improved parameter assignment logic for better configuration management. --- packages/apps/postgres/templates/db.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/apps/postgres/templates/db.yaml b/packages/apps/postgres/templates/db.yaml index 287c8b74..8319ac4f 100644 --- a/packages/apps/postgres/templates/db.yaml +++ b/packages/apps/postgres/templates/db.yaml @@ -10,7 +10,9 @@ spec: postgresql: parameters: max_wal_senders: "30" - max_connections: “{{ .Values.postgresql.parameters.max_connections }}” + {{- with .Values.postgresql.parameters.max_connections }} + max_connections: "{{ . }}" + {{- end }} minSyncReplicas: {{ .Values.quorum.minSyncReplicas }} maxSyncReplicas: {{ .Values.quorum.maxSyncReplicas }} From 4b90bf5aacf7d33848a0478a9dfa6c7fbeeca1eb Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Tue, 1 Oct 2024 18:53:30 +0200 Subject: [PATCH 19/41] Prepare release v0.16.0 (#375) Signed-off-by: Andrei Kvapil --- hack/e2e.sh | 2 +- manifests/cozystack-installer.yaml | 4 ++-- packages/apps/ferretdb/images/postgres-backup.tag | 2 +- packages/apps/http-cache/images/nginx-cache.tag | 2 +- packages/apps/kubernetes/images/cluster-autoscaler.tag | 2 +- .../apps/kubernetes/images/kubevirt-cloud-provider.tag | 2 +- packages/apps/kubernetes/images/kubevirt-csi-driver.tag | 2 +- .../apps/kubernetes/images/ubuntu-container-disk.tag | 2 +- packages/apps/mysql/images/mariadb-backup.tag | 2 +- packages/apps/postgres/images/postgres-backup.tag | 2 +- packages/apps/versions_map | 9 ++++++--- packages/core/installer/values.yaml | 2 +- packages/core/testing/values.yaml | 2 +- packages/extra/monitoring/Chart.yaml | 2 +- packages/extra/versions_map | 3 ++- packages/system/cilium/values.yaml | 2 +- packages/system/dashboard/values.yaml | 6 +++--- packages/system/fluxcd/values.yaml | 2 +- packages/system/kamaji/values.yaml | 2 +- packages/system/kubeovn/values.yaml | 2 +- 20 files changed, 29 insertions(+), 25 deletions(-) diff --git a/hack/e2e.sh b/hack/e2e.sh index 9b3958c5..89637949 100755 --- a/hack/e2e.sh +++ b/hack/e2e.sh @@ -114,7 +114,7 @@ machine: - name: zfs - name: spl install: - image: ghcr.io/aenix-io/cozystack/talos:v1.7.1 + image: ghcr.io/aenix-io/cozystack/talos:v1.8.0 files: - content: | [plugins] diff --git a/manifests/cozystack-installer.yaml b/manifests/cozystack-installer.yaml index 64e45661..860ca3e0 100644 --- a/manifests/cozystack-installer.yaml +++ b/manifests/cozystack-installer.yaml @@ -68,7 +68,7 @@ spec: serviceAccountName: cozystack containers: - name: cozystack - image: "ghcr.io/aenix-io/cozystack/cozystack:v0.15.0" + image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.0" env: - name: KUBERNETES_SERVICE_HOST value: localhost @@ -87,7 +87,7 @@ spec: fieldRef: fieldPath: metadata.name - name: darkhttpd - image: "ghcr.io/aenix-io/cozystack/cozystack:v0.15.0" + image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.0" command: - /usr/bin/darkhttpd - /cozystack/assets diff --git a/packages/apps/ferretdb/images/postgres-backup.tag b/packages/apps/ferretdb/images/postgres-backup.tag index 71761adf..48a111cb 100644 --- a/packages/apps/ferretdb/images/postgres-backup.tag +++ b/packages/apps/ferretdb/images/postgres-backup.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/postgres-backup:0.6.2@sha256:d2015c6dba92293bda652d055e97d1be80e8414c2dc78037c12812d1a2e2cba1 +ghcr.io/aenix-io/cozystack/postgres-backup:0.7.0@sha256:d2015c6dba92293bda652d055e97d1be80e8414c2dc78037c12812d1a2e2cba1 diff --git a/packages/apps/http-cache/images/nginx-cache.tag b/packages/apps/http-cache/images/nginx-cache.tag index 48c03bea..285502bd 100644 --- a/packages/apps/http-cache/images/nginx-cache.tag +++ b/packages/apps/http-cache/images/nginx-cache.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/nginx-cache:0.3.1@sha256:556bc8d29ee9e90b3d64d0481dcfc66483d055803315bba3d9ece17c0d97f32b +ghcr.io/aenix-io/cozystack/nginx-cache:0.3.1@sha256:cd744b2d1d50191f4908f2db83079b32973d1c009fe9468627be72efbfa0a107 diff --git a/packages/apps/kubernetes/images/cluster-autoscaler.tag b/packages/apps/kubernetes/images/cluster-autoscaler.tag index 12fa6d8a..5f8ef3e0 100644 --- a/packages/apps/kubernetes/images/cluster-autoscaler.tag +++ b/packages/apps/kubernetes/images/cluster-autoscaler.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/cluster-autoscaler:latest@sha256:7f617de5a24de790a15d9e97c6287ff2b390922e6e74c7a665cbf498f634514d +ghcr.io/aenix-io/cozystack/cluster-autoscaler:0.11.0@sha256:7f617de5a24de790a15d9e97c6287ff2b390922e6e74c7a665cbf498f634514d diff --git a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag index 3a925cd7..8117b8c6 100644 --- a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag +++ b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:latest@sha256:735aa8092501fc0f2904b685b15bc0137ea294cb08301ca1185d3dec5f467f0f +ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:0.11.0@sha256:ba567212f9fe5e7c32af9857edd79eb012f3eb39c2eae0fc831b14d5b7879427 diff --git a/packages/apps/kubernetes/images/kubevirt-csi-driver.tag b/packages/apps/kubernetes/images/kubevirt-csi-driver.tag index 529d404f..d6150e58 100644 --- a/packages/apps/kubernetes/images/kubevirt-csi-driver.tag +++ b/packages/apps/kubernetes/images/kubevirt-csi-driver.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:latest@sha256:e56b46591cdf9140e97c3220a0c2681aadd4a4b3f7ea8473fb2504dc96e8b53a +ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:0.11.0@sha256:1a9e6592fc035dbaae27f308b934206858c2e0025d4c99cd906b51615cc9766c diff --git a/packages/apps/kubernetes/images/ubuntu-container-disk.tag b/packages/apps/kubernetes/images/ubuntu-container-disk.tag index 64d015f6..6e91eba2 100644 --- a/packages/apps/kubernetes/images/ubuntu-container-disk.tag +++ b/packages/apps/kubernetes/images/ubuntu-container-disk.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/ubuntu-container-disk:v1.30.1@sha256:5ce80a453073c4f44347409133fc7b15f1d2f37a564d189871a4082fc552ff0f +ghcr.io/aenix-io/cozystack/ubuntu-container-disk:v1.30.1@sha256:1f249fbe52821a62f706c6038b13401234e1b758ac498e53395b8f9a642b015f diff --git a/packages/apps/mysql/images/mariadb-backup.tag b/packages/apps/mysql/images/mariadb-backup.tag index c36ab455..73ced6f4 100644 --- a/packages/apps/mysql/images/mariadb-backup.tag +++ b/packages/apps/mysql/images/mariadb-backup.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/mariadb-backup:0.5.1@sha256:fa2b3195521cffa55eb6d71a50b875d3c234a45e5dff71b2b9002674175bea93 +ghcr.io/aenix-io/cozystack/mariadb-backup:0.5.1@sha256:793edb25a29cbc00781e40af883815ca36937e736e2b0d202ea9c9619fb6ca11 diff --git a/packages/apps/postgres/images/postgres-backup.tag b/packages/apps/postgres/images/postgres-backup.tag index 71761adf..48a111cb 100644 --- a/packages/apps/postgres/images/postgres-backup.tag +++ b/packages/apps/postgres/images/postgres-backup.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/postgres-backup:0.6.2@sha256:d2015c6dba92293bda652d055e97d1be80e8414c2dc78037c12812d1a2e2cba1 +ghcr.io/aenix-io/cozystack/postgres-backup:0.7.0@sha256:d2015c6dba92293bda652d055e97d1be80e8414c2dc78037c12812d1a2e2cba1 diff --git a/packages/apps/versions_map b/packages/apps/versions_map index 8f36849b..492cd1c1 100644 --- a/packages/apps/versions_map +++ b/packages/apps/versions_map @@ -31,7 +31,8 @@ kubernetes 0.8.0 ac11056e kubernetes 0.8.1 e54608d8 kubernetes 0.8.2 5ca8823 kubernetes 0.9.0 9b6dd19 -kubernetes 0.10.0 HEAD +kubernetes 0.10.0 ac5c38b +kubernetes 0.11.0 HEAD mysql 0.1.0 f642698 mysql 0.2.0 8b975ff0 mysql 0.3.0 5ca8823 @@ -48,12 +49,14 @@ postgres 0.4.0 ec283c33 postgres 0.4.1 5ca8823 postgres 0.5.0 c07c4bbd postgres 0.6.0 2a4768a -postgres 0.6.2 HEAD +postgres 0.6.2 54fd61c +postgres 0.7.0 HEAD rabbitmq 0.1.0 f642698 rabbitmq 0.2.0 5ca8823 rabbitmq 0.3.0 9e33dc0 rabbitmq 0.4.0 36d8855 -rabbitmq 0.4.1 HEAD +rabbitmq 0.4.1 35536bb +rabbitmq 0.4.2 HEAD redis 0.1.1 f642698 redis 0.2.0 5ca8823 redis 0.3.0 HEAD diff --git a/packages/core/installer/values.yaml b/packages/core/installer/values.yaml index 5d7b658e..05385327 100644 --- a/packages/core/installer/values.yaml +++ b/packages/core/installer/values.yaml @@ -1,2 +1,2 @@ cozystack: - image: ghcr.io/aenix-io/cozystack/cozystack:v0.15.0@sha256:aeff26a80f84b4323578e613b3bf03caa842d617ec8d9ca98706867c1e70609f + image: ghcr.io/aenix-io/cozystack/cozystack:v0.16.0@sha256:1119f30a50b3fea1ac7d8068009ca233df6214d709c7861f7ce8fbf0402cdc72 diff --git a/packages/core/testing/values.yaml b/packages/core/testing/values.yaml index ea4a7fd9..61e3ddbc 100644 --- a/packages/core/testing/values.yaml +++ b/packages/core/testing/values.yaml @@ -1,2 +1,2 @@ e2e: - image: ghcr.io/aenix-io/cozystack/e2e-sandbox:v0.15.0@sha256:20cc84e4a11db31434881355c070113a7823501a28a6114ca02830b18607ad21 + image: ghcr.io/aenix-io/cozystack/e2e-sandbox:v0.16.0@sha256:25b298d621ec79431d106184d59849bbae634588742583d111628126ad8615c5 diff --git a/packages/extra/monitoring/Chart.yaml b/packages/extra/monitoring/Chart.yaml index 93d238a1..41b09747 100644 --- a/packages/extra/monitoring/Chart.yaml +++ b/packages/extra/monitoring/Chart.yaml @@ -3,4 +3,4 @@ name: monitoring description: Monitoring and observability stack icon: /logos/monitoring.svg type: application -version: 1.4.0 +version: 1.5.0 diff --git a/packages/extra/versions_map b/packages/extra/versions_map index 0f1c54dd..fc446b7e 100644 --- a/packages/extra/versions_map +++ b/packages/extra/versions_map @@ -12,6 +12,7 @@ monitoring 1.1.0 15478a88 monitoring 1.2.0 c9e0d63b monitoring 1.2.1 4471b4ba monitoring 1.3.0 6c5cf5b -monitoring 1.4.0 HEAD +monitoring 1.4.0 adaf603b +monitoring 1.5.0 HEAD seaweedfs 0.1.0 5ca8823 seaweedfs 0.2.0 HEAD diff --git a/packages/system/cilium/values.yaml b/packages/system/cilium/values.yaml index 2cdb9edb..2ef5bb45 100644 --- a/packages/system/cilium/values.yaml +++ b/packages/system/cilium/values.yaml @@ -12,7 +12,7 @@ cilium: mode: "kubernetes" image: repository: ghcr.io/aenix-io/cozystack/cilium - tag: latest + tag: 1.16.2 digest: "sha256:534c5b04fef356a6be59234243c23c0c09702fe1e2c8872012afb391ce2965c4" envoy: enabled: false diff --git a/packages/system/dashboard/values.yaml b/packages/system/dashboard/values.yaml index a3b74894..c4c35bf7 100644 --- a/packages/system/dashboard/values.yaml +++ b/packages/system/dashboard/values.yaml @@ -33,11 +33,11 @@ kubeapps: image: registry: ghcr.io/aenix-io/cozystack repository: dashboard - tag: v0.15.0 + tag: v0.16.0 digest: "sha256:4818712e9fc9c57cc321512760c3226af564a04e69d4b3ec9229ab91fd39abeb" kubeappsapis: image: registry: ghcr.io/aenix-io/cozystack repository: kubeapps-apis - tag: v0.15.0 - digest: "sha256:70c095c8f7e3ecfa11433a3a2c8f57f6ff5a0053f006939a2c171c180cc50baf" + tag: v0.16.0 + digest: "sha256:55bc8e2495933112c7cb4bb9e3b1fcb8df46aa14e27fa007f78388a9757e3238" diff --git a/packages/system/fluxcd/values.yaml b/packages/system/fluxcd/values.yaml index 2d0580d0..c766a685 100644 --- a/packages/system/fluxcd/values.yaml +++ b/packages/system/fluxcd/values.yaml @@ -4,7 +4,7 @@ flux-instance: networkPolicy: true domain: cozy.local # -- default value is overriden in patches distribution: - version: 2.4.x + version: 2.3.x registry: ghcr.io/fluxcd components: - source-controller diff --git a/packages/system/kamaji/values.yaml b/packages/system/kamaji/values.yaml index 8adc2918..c9fc799f 100644 --- a/packages/system/kamaji/values.yaml +++ b/packages/system/kamaji/values.yaml @@ -3,7 +3,7 @@ kamaji: deploy: false image: pullPolicy: IfNotPresent - tag: latest@sha256:bb45d953a8ba46a19c8941ccc9fc8498d91435c77db439d8b1d6bde9fea8802a + tag: v0.16.0@sha256:241e6cdf60905e53f0cb47aadcab69dd8ffa97d316faac6bdeb704c1b13c24db repository: ghcr.io/aenix-io/cozystack/kamaji resources: limits: diff --git a/packages/system/kubeovn/values.yaml b/packages/system/kubeovn/values.yaml index 88a03fc9..4d7d705e 100644 --- a/packages/system/kubeovn/values.yaml +++ b/packages/system/kubeovn/values.yaml @@ -22,4 +22,4 @@ global: images: kubeovn: repository: kubeovn - tag: v1.13.0@sha256:11c4ef0f71c73df4703743c0f63b7ff0ec67af6342caf1e7db8ebd5546071855 + tag: v1.13.0@sha256:6676da12917c3645a08fb2adb7def5fcb68995338126b257e0a0a9570516cd5c From 929ab5c5ebd707a414ebbcd7b49a69bfb1858cb4 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Wed, 2 Oct 2024 15:21:59 +0200 Subject: [PATCH 20/41] cilium: enable native routing in distro-full bundle (#384) Signed-off-by: Andrei Kvapil --- packages/core/platform/bundles/distro-full.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/core/platform/bundles/distro-full.yaml b/packages/core/platform/bundles/distro-full.yaml index 7e4d61a6..5ac57835 100644 --- a/packages/core/platform/bundles/distro-full.yaml +++ b/packages/core/platform/bundles/distro-full.yaml @@ -29,6 +29,7 @@ releases: enableIdentityMark: true ipv4NativeRoutingCIDR: "{{ index $cozyConfig.data "ipv4-pod-cidr" }}" autoDirectNodeRoutes: true + routingMode: native - name: cert-manager releaseName: cert-manager From b605c85eb206d413ea8451f34fd11f5adf81dcab Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Thu, 3 Oct 2024 15:59:49 +0200 Subject: [PATCH 21/41] Rework alerts; Add fluxcd alerts (#388) - Rework alerts - Add fluxcd alerts --------- Signed-off-by: Andrei Kvapil --- packages/system/monitoring/Makefile | 23 - packages/system/monitoring/alerts/etcd.yaml | 221 ++ packages/system/monitoring/alerts/flux.yaml | 128 + .../monitoring/alerts/general.rules.yaml | 57 + ...les.container_cpu_usage_seconds_total.yaml | 18 + .../k8s.rules.container_memory_cache.yaml | 17 + .../k8s.rules.container_memory_rss.yaml | 17 + .../k8s.rules.container_memory_swap.yaml | 17 + ...es.container_memory_working_set_bytes.yaml | 17 + .../alerts/k8s.rules.container_resource.yaml | 93 + .../alerts/k8s.rules.pod_owner.yaml | 60 + .../kube-apiserver-availability.rules.yaml | 146 + .../alerts/kube-apiserver-burnrate.rules.yaml | 324 +++ .../kube-apiserver-histogram.rules.yaml | 23 + .../alerts/kube-apiserver-slos.yaml | 73 + .../alerts/kube-prometheus-general.rules.yaml | 17 + .../kube-prometheus-node-recording.rules.yaml | 37 + .../alerts/kube-scheduler.rules.yaml | 63 + .../monitoring/alerts/kube-state-metrics.yaml | 73 + .../monitoring/alerts/kubelet.rules.yaml | 30 + .../monitoring/alerts/kubernetes-apps.yaml | 304 ++ .../alerts/kubernetes-resources.yaml | 138 + .../monitoring/alerts/kubernetes-storage.yaml | 130 + .../alerts/kubernetes-system-apiserver.yaml | 91 + .../kubernetes-system-controller-manager.yaml | 21 + .../alerts/kubernetes-system-kubelet.yaml | 175 ++ .../alerts/kubernetes-system-scheduler.yaml | 20 + .../monitoring/alerts/kubernetes-system.yaml | 37 + .../alerts/node-exporter.rules.yaml | 93 + .../monitoring/alerts/node-exporter.yaml | 396 +++ .../monitoring/alerts/node-network.yaml | 21 + .../system/monitoring/alerts/node.rules.yaml | 55 + .../victoria-metrics-k8s-stack/.helmignore | 26 - .../victoria-metrics-k8s-stack/CHANGELOG.md | 688 ----- .../victoria-metrics-k8s-stack/Chart.lock | 24 - .../victoria-metrics-k8s-stack/Chart.yaml | 66 - .../victoria-metrics-k8s-stack/README.md | 2576 ----------------- .../README.md.gotmpl | 300 -- .../RELEASE_GUIDE.md | 40 - .../RELEASE_NOTES.md | 12 - .../victoria-metrics-k8s-stack/_changelog.md | 13 - .../victoria-metrics-k8s-stack/_index.md | 13 - .../files/rules/generated/etcd.yaml | 165 -- .../files/rules/generated/general.rules.yaml | 53 - ...les.container_cpu_usage_seconds_total.yaml | 11 - .../k8s.rules.container_memory_cache.yaml | 10 - .../k8s.rules.container_memory_rss.yaml | 10 - .../k8s.rules.container_memory_swap.yaml | 10 - ...es.container_memory_working_set_bytes.yaml | 10 - .../k8s.rules.container_resource.yaml | 79 - .../rules/generated/k8s.rules.pod_owner.yaml | 54 - .../kube-apiserver-availability.rules.yaml | 128 - .../kube-apiserver-burnrate.rules.yaml | 318 -- .../kube-apiserver-histogram.rules.yaml | 15 - .../rules/generated/kube-apiserver-slos.yaml | 63 - .../kube-prometheus-general.rules.yaml | 9 - .../kube-prometheus-node-recording.rules.yaml | 21 - .../rules/generated/kube-scheduler.rules.yaml | 48 - .../rules/generated/kube-state-metrics.yaml | 55 - .../files/rules/generated/kubelet.rules.yaml | 18 - .../rules/generated/kubernetes-apps.yaml | 257 -- .../rules/generated/kubernetes-resources.yaml | 113 - .../rules/generated/kubernetes-storage.yaml | 101 - .../kubernetes-system-apiserver.yaml | 62 - .../kubernetes-system-controller-manager.yaml | 13 - .../generated/kubernetes-system-kubelet.yaml | 136 - .../kubernetes-system-scheduler.yaml | 13 - .../rules/generated/kubernetes-system.yaml | 27 - .../rules/generated/node-exporter.rules.yaml | 76 - .../files/rules/generated/node-exporter.yaml | 336 --- .../files/rules/generated/node-network.yaml | 13 - .../files/rules/generated/node.rules.yaml | 44 - .../templates/_helpers.tpl | 458 --- .../templates/extra-objects.yaml | 4 - .../templates/rules/rule.yaml | 121 - .../charts/victoria-metrics-k8s-stack/todo.md | 26 - .../values.minikube.yaml | 38 - .../victoria-metrics-k8s-stack/values.yaml | 1233 -------- .../system/monitoring/templates/alerts.yaml | 7 + packages/system/monitoring/values.yaml | 272 ++ 80 files changed, 3191 insertions(+), 7929 deletions(-) create mode 100644 packages/system/monitoring/alerts/etcd.yaml create mode 100644 packages/system/monitoring/alerts/flux.yaml create mode 100644 packages/system/monitoring/alerts/general.rules.yaml create mode 100644 packages/system/monitoring/alerts/k8s.rules.container_cpu_usage_seconds_total.yaml create mode 100644 packages/system/monitoring/alerts/k8s.rules.container_memory_cache.yaml create mode 100644 packages/system/monitoring/alerts/k8s.rules.container_memory_rss.yaml create mode 100644 packages/system/monitoring/alerts/k8s.rules.container_memory_swap.yaml create mode 100644 packages/system/monitoring/alerts/k8s.rules.container_memory_working_set_bytes.yaml create mode 100644 packages/system/monitoring/alerts/k8s.rules.container_resource.yaml create mode 100644 packages/system/monitoring/alerts/k8s.rules.pod_owner.yaml create mode 100644 packages/system/monitoring/alerts/kube-apiserver-availability.rules.yaml create mode 100644 packages/system/monitoring/alerts/kube-apiserver-burnrate.rules.yaml create mode 100644 packages/system/monitoring/alerts/kube-apiserver-histogram.rules.yaml create mode 100644 packages/system/monitoring/alerts/kube-apiserver-slos.yaml create mode 100644 packages/system/monitoring/alerts/kube-prometheus-general.rules.yaml create mode 100644 packages/system/monitoring/alerts/kube-prometheus-node-recording.rules.yaml create mode 100644 packages/system/monitoring/alerts/kube-scheduler.rules.yaml create mode 100644 packages/system/monitoring/alerts/kube-state-metrics.yaml create mode 100644 packages/system/monitoring/alerts/kubelet.rules.yaml create mode 100644 packages/system/monitoring/alerts/kubernetes-apps.yaml create mode 100644 packages/system/monitoring/alerts/kubernetes-resources.yaml create mode 100644 packages/system/monitoring/alerts/kubernetes-storage.yaml create mode 100644 packages/system/monitoring/alerts/kubernetes-system-apiserver.yaml create mode 100644 packages/system/monitoring/alerts/kubernetes-system-controller-manager.yaml create mode 100644 packages/system/monitoring/alerts/kubernetes-system-kubelet.yaml create mode 100644 packages/system/monitoring/alerts/kubernetes-system-scheduler.yaml create mode 100644 packages/system/monitoring/alerts/kubernetes-system.yaml create mode 100644 packages/system/monitoring/alerts/node-exporter.rules.yaml create mode 100644 packages/system/monitoring/alerts/node-exporter.yaml create mode 100644 packages/system/monitoring/alerts/node-network.yaml create mode 100644 packages/system/monitoring/alerts/node.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/.helmignore delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/CHANGELOG.md delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.lock delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md.gotmpl delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_GUIDE.md delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_NOTES.md delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/_changelog.md delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/_index.md delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_resource.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/extra-objects.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/rules/rule.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/todo.md delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.minikube.yaml delete mode 100644 packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.yaml create mode 100644 packages/system/monitoring/templates/alerts.yaml diff --git a/packages/system/monitoring/Makefile b/packages/system/monitoring/Makefile index 61780ca7..b0425ded 100644 --- a/packages/system/monitoring/Makefile +++ b/packages/system/monitoring/Makefile @@ -19,26 +19,3 @@ update: helm repo add fluent https://fluent.github.io/helm-charts helm repo update fluent helm pull fluent/fluent-bit --untar --untardir charts - # alerts from victoria-metrics-k8s-stack - helm repo add vm https://victoriametrics.github.io/helm-charts/ - helm repo update vm - helm pull vm/victoria-metrics-k8s-stack --untar --untardir charts - rm -rf charts/victoria-metrics-k8s-stack/charts - rm -rf charts/victoria-metrics-k8s-stack/hack - rm -rf charts/victoria-metrics-k8s-stack/templates/victoria-metrics-operator - rm -rf charts/victoria-metrics-k8s-stack/templates/grafana - rm -rf charts/victoria-metrics-k8s-stack/templates/ingress.yaml - rm -rf charts/victoria-metrics-k8s-stack/files/dashboards - rm -f charts/victoria-metrics-k8s-stack/templates/servicemonitors.yaml - rm -f charts/victoria-metrics-k8s-stack/templates/serviceaccount.yaml - rm -f charts/victoria-metrics-k8s-stack/templates/rules/additionalVictoriaMetricsRules.yml - sed -i '/ namespace:/d' charts/victoria-metrics-k8s-stack/templates/rules/rule.yaml - sed -i 's|job="apiserver"|job="kube-apiserver"|g' `grep -rl 'job="apiserver"' charts/victoria-metrics-k8s-stack/files/rules/generated` - sed -i 's|severity: info|severity: informational|g' `grep -rl 'severity: info' ./charts/victoria-metrics-k8s-stack/files/rules/generated` - sed -i 's|severity: none|severity: ok|g' ./charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml - sed -i ./charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml \ - -e '/Watchdog/,/severity:/s/severity: none/severity: ok/' \ - -e '/InfoInhibitor/,/severity:/s/severity: none/severity: major/' - # TODO - rm -f charts/victoria-metrics-k8s-stack/files/rules/generated/alertmanager.rules.yaml - rm -f charts/victoria-metrics-k8s-stack/files/rules/generated/vm*.yaml diff --git a/packages/system/monitoring/alerts/etcd.yaml b/packages/system/monitoring/alerts/etcd.yaml new file mode 100644 index 00000000..72c5ef7b --- /dev/null +++ b/packages/system/monitoring/alerts/etcd.yaml @@ -0,0 +1,221 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-etcd +spec: + groups: + - name: etcd + params: {} + rules: + - alert: etcdMembersDown + annotations: + description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value + }}).' + summary: etcd cluster members are down. + expr: |- + max without (endpoint) ( + sum without (instance) (up{job=~".*etcd.*"} == bool 0) + or + count without (To) ( + sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01 + ) + ) + > 0 + for: 10m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}' + service: etcd + - alert: etcdInsufficientMembers + annotations: + description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value + }}).' + summary: etcd cluster has insufficient number of members. + expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"}) + without (instance) + 1) / 2) + for: 3m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}' + service: etcd + - alert: etcdNoLeader + annotations: + description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance + }} has no leader.' + summary: etcd cluster has no leader. + expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 + for: 1m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}' + service: etcd + - alert: etcdHighNumberOfLeaderChanges + annotations: + description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes + within the last 15 minutes. Frequent elections may be a sign of insufficient + resources, high network latency, or disruptions by other components and + should be investigated.' + summary: etcd cluster has high number of leader changes. + expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) + or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) + >= 4 + for: 5m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: etcd + - alert: etcdHighNumberOfFailedGRPCRequests + annotations: + description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests + for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance + }}.' + summary: etcd cluster has high number of failed grpc requests. + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) + > 1 + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.grpc_method }}' + service: etcd + - alert: etcdHighNumberOfFailedGRPCRequests + annotations: + description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests + for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance + }}.' + summary: etcd cluster has high number of failed grpc requests. + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) + > 5 + for: 5m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}/{{ $labels.grpc_method }}' + service: etcd + - alert: etcdGRPCRequestsSlow + annotations: + description: 'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests + is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method + }} method.' + summary: etcd grpc requests are slow + expr: |- + histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) + > 0.15 + for: 10m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}/{{ $labels.grpc_method }}' + service: etcd + - alert: etcdMemberCommunicationSlow + annotations: + description: 'etcd cluster "{{ $labels.job }}": member communication with + {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance + }}.' + summary: etcd cluster member communication is slow. + expr: |- + histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.15 + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.member }}' + service: etcd + - alert: etcdHighNumberOfFailedProposals + annotations: + description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures + within the last 30 minutes on etcd instance {{ $labels.instance }}.' + summary: etcd cluster has high number of proposal failures. + expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: etcd + - alert: etcdHighFsyncDurations + annotations: + description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations + are {{ $value }}s on etcd instance {{ $labels.instance }}.' + summary: etcd cluster 99th percentile fsync durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.5 + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: etcd + - alert: etcdHighFsyncDurations + annotations: + description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations + are {{ $value }}s on etcd instance {{ $labels.instance }}.' + summary: etcd cluster 99th percentile fsync durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 1 + for: 10m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}' + service: etcd + - alert: etcdHighCommitDurations + annotations: + description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations + {{ $value }}s on etcd instance {{ $labels.instance }}.' + summary: etcd cluster 99th percentile commit durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.25 + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: etcd + - alert: etcdDatabaseQuotaLowSpace + annotations: + description: 'etcd cluster "{{ $labels.job }}": database size exceeds the + defined quota on etcd instance {{ $labels.instance }}, please defrag or + increase the quota as the writes to etcd will be disabled when it is full.' + summary: etcd cluster database is running full. + expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) + / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 + > 95 + for: 10m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}' + service: etcd + - alert: etcdExcessiveDatabaseGrowth + annotations: + description: 'etcd cluster "{{ $labels.job }}": Predicting running out of + disk space in the next four hours, based on write observations within the + past four hours on etcd instance {{ $labels.instance }}, please check as + it might be disruptive.' + summary: etcd cluster database growing very fast. + expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], + 4*60*60) > etcd_server_quota_backend_bytes{job=~".*etcd.*"} + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: etcd + - alert: etcdDatabaseHighFragmentationRatio + annotations: + description: 'etcd cluster "{{ $labels.job }}": database size in use on instance + {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual + allocated disk space, please run defragmentation (e.g. etcdctl defrag) to + retrieve the unused fragmented disk space.' + runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation + summary: etcd database size in use is less than 50% of the actual allocated + storage. + expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m]) + / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < + 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600 + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: etcd diff --git a/packages/system/monitoring/alerts/flux.yaml b/packages/system/monitoring/alerts/flux.yaml new file mode 100644 index 00000000..85b7dae9 --- /dev/null +++ b/packages/system/monitoring/alerts/flux.yaml @@ -0,0 +1,128 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + annotations: + meta.helm.sh/release-name: monitoring + meta.helm.sh/release-namespace: cozy-monitoring + labels: + app: victoria-metrics-k8s-stack + app.kubernetes.io/instance: monitoring + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: victoria-metrics-k8s-stack + app.kubernetes.io/version: v1.102.1 + helm.sh/chart: victoria-metrics-k8s-stack-0.25.17 + name: alerts-flux-resources + namespace: cozy-monitoring +spec: + groups: + - name: flux-resources-alerts + rules: + - alert: HelmReleaseNotReady + expr: gotk_resource_info{customresource_kind="HelmRelease", ready!="True"} > 0 + for: 5m + labels: + severity: major + service: fluxcd + exported_instance: '{{ $labels.exported_namespace }}/{{ $labels.name }}' + annotations: + summary: "HelmRelease {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is not ready" + description: "HelmRelease {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is in an unready state for more than 15 minutes." + + - alert: GitRepositorySyncFailed + expr: gotk_resource_info{customresource_kind="GitRepository", ready!="True"} > 0 + for: 5m + labels: + severity: major + service: fluxcd + exported_instance: '{{ $labels.exported_namespace }}/{{ $labels.name }}' + annotations: + summary: "GitRepository {{ $labels.name }} in namespace {{ $labels.exported_namespace }} sync failed" + description: "GitRepository {{ $labels.name }} in namespace {{ $labels.exported_namespace }} has not been successfully synced for more than 15 minutes." + + - alert: KustomizationNotApplied + expr: gotk_resource_info{customresource_kind="Kustomization", ready!="True"} > 0 + for: 5m + labels: + severity: major + service: fluxcd + exported_instance: '{{ $labels.exported_namespace }}/{{ $labels.name }}' + annotations: + summary: "Kustomization {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is not applied" + description: "Kustomization {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is not successfully applied for more than 15 minutes." + + - alert: ImageRepositorySyncFailed + expr: gotk_resource_info{customresource_kind="ImageRepository", ready!="True"} > 0 + for: 5m + labels: + severity: major + service: fluxcd + exported_instance: '{{ $labels.exported_namespace }}/{{ $labels.name }}' + annotations: + summary: "ImageRepository {{ $labels.name }} in namespace {{ $labels.exported_namespace }} sync failed" + description: "ImageRepository {{ $labels.name }} in namespace {{ $labels.exported_namespace }} has not been successfully synced for more than 15 minutes." + + - alert: HelmChartFailed + expr: gotk_resource_info{customresource_kind="HelmChart", ready!="True"} > 0 + for: 5m + labels: + severity: major + service: fluxcd + exported_instance: '{{ $labels.exported_namespace }}/{{ $labels.name }}' + annotations: + summary: "HelmChart {{ $labels.name }} in namespace {{ $labels.exported_namespace }} has failed" + description: "HelmChart {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is not ready for more than 15 minutes." + + - alert: HelmReleaseSuspended + expr: gotk_resource_info{customresource_kind="HelmRelease", suspended="true"} > 0 + for: 5m + labels: + severity: warning + service: fluxcd + exported_instance: '{{ $labels.exported_namespace }}/{{ $labels.name }}' + annotations: + summary: "HelmRelease {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is suspended" + description: "HelmRelease {{ $labels.name }} in namespace {{ $labels.exported_namespace }} has been suspended." + + - alert: GitRepositorySuspended + expr: gotk_resource_info{customresource_kind="GitRepository", suspended="true"} > 0 + for: 5m + labels: + severity: warning + service: fluxcd + exported_instance: '{{ $labels.exported_namespace }}/{{ $labels.name }}' + annotations: + summary: "GitRepository {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is suspended" + description: "GitRepository {{ $labels.name }} in namespace {{ $labels.exported_namespace }} has been suspended." + + - alert: KustomizationSuspended + expr: gotk_resource_info{customresource_kind="Kustomization", suspended="true"} > 0 + for: 5m + labels: + severity: warning + service: fluxcd + exported_instance: '{{ $labels.exported_namespace }}/{{ $labels.name }}' + annotations: + summary: "Kustomization {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is suspended" + description: "Kustomization {{ $labels.name }} in namespace {{ $labels.exported_namespace }} has been suspended." + + - alert: ImageRepositorySuspended + expr: gotk_resource_info{customresource_kind="ImageRepository", suspended="true"} > 0 + for: 5m + labels: + severity: warning + service: fluxcd + exported_instance: '{{ $labels.exported_namespace }}/{{ $labels.name }}' + annotations: + summary: "ImageRepository {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is suspended" + description: "ImageRepository {{ $labels.name }} in namespace {{ $labels.exported_namespace }} has been suspended." + + - alert: HelmChartSuspended + expr: gotk_resource_info{customresource_kind="HelmChart", suspended="true"} > 0 + for: 5m + labels: + severity: warning + service: fluxcd + exported_instance: '{{ $labels.exported_namespace }}/{{ $labels.name }}' + annotations: + summary: "HelmChart {{ $labels.name }} in namespace {{ $labels.exported_namespace }} is suspended" + description: "HelmChart {{ $labels.name }} in namespace {{ $labels.exported_namespace }} has been suspended." diff --git a/packages/system/monitoring/alerts/general.rules.yaml b/packages/system/monitoring/alerts/general.rules.yaml new file mode 100644 index 00000000..924870c4 --- /dev/null +++ b/packages/system/monitoring/alerts/general.rules.yaml @@ -0,0 +1,57 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-general.rules +spec: + groups: + - name: general.rules + params: {} + rules: + - alert: TargetDown + annotations: + description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service + }} targets in {{ $labels.namespace }} namespace are down.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown + summary: One or more targets are unreachable. + expr: 100 * (count(up == 0) BY (job,namespace,service,cluster) / count(up) BY + (job,namespace,service,cluster)) > 10 + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: general.rules + - alert: Watchdog + annotations: + description: | + This is an alert meant to ensure that the entire alerting pipeline is functional. + This alert is always firing, therefore it should always be firing in Alertmanager + and always fire against a receiver. There are integrations with various notification + mechanisms that send a notification when this alert is not firing. For example the + "DeadMansSnitch" integration in PagerDuty. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog + summary: An alert that should always be firing to certify that Alertmanager + is working properly. + expr: vector(1) + labels: + severity: ok + exported_instance: global + service: general.rules + event: Heartbeat + - alert: InfoInhibitor + annotations: + description: | + This is an alert that is used to inhibit info alerts. + By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with + other alerts. + This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a + severity of 'warning' or 'critical' starts firing on the same namespace. + This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor + summary: Info-level alert inhibition. + expr: ALERTS{severity = "info"} == 1 unless on (namespace,cluster) ALERTS{alertname + != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == + 1 + labels: + severity: major + exported_instance: global + service: general.rules diff --git a/packages/system/monitoring/alerts/k8s.rules.container_cpu_usage_seconds_total.yaml b/packages/system/monitoring/alerts/k8s.rules.container_cpu_usage_seconds_total.yaml new file mode 100644 index 00000000..e0419d1b --- /dev/null +++ b/packages/system/monitoring/alerts/k8s.rules.container_cpu_usage_seconds_total.yaml @@ -0,0 +1,18 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-k8s.rules.containercpuusagesecondstotal +spec: + groups: + - name: k8s.rules.container_cpu_usage_seconds_total + params: {} + rules: + - annotations: {} + expr: |- + sum by (namespace,pod,container,cluster) ( + irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) + ) * on (namespace,pod,cluster) group_left(node) topk by (namespace,pod,cluster) ( + 1, max by (namespace,pod,node,cluster) (kube_pod_info{node!=""}) + ) + labels: {} + record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate diff --git a/packages/system/monitoring/alerts/k8s.rules.container_memory_cache.yaml b/packages/system/monitoring/alerts/k8s.rules.container_memory_cache.yaml new file mode 100644 index 00000000..6e6d5e97 --- /dev/null +++ b/packages/system/monitoring/alerts/k8s.rules.container_memory_cache.yaml @@ -0,0 +1,17 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-k8s.rules.containermemorycache +spec: + groups: + - name: k8s.rules.container_memory_cache + params: {} + rules: + - annotations: {} + expr: |- + container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (namespace,pod,cluster) group_left(node) topk by (namespace,pod,cluster) (1, + max by (namespace,pod,node,cluster) (kube_pod_info{node!=""}) + ) + labels: {} + record: node_namespace_pod_container:container_memory_cache diff --git a/packages/system/monitoring/alerts/k8s.rules.container_memory_rss.yaml b/packages/system/monitoring/alerts/k8s.rules.container_memory_rss.yaml new file mode 100644 index 00000000..3dd45fc2 --- /dev/null +++ b/packages/system/monitoring/alerts/k8s.rules.container_memory_rss.yaml @@ -0,0 +1,17 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-k8s.rules.containermemoryrss +spec: + groups: + - name: k8s.rules.container_memory_rss + params: {} + rules: + - annotations: {} + expr: |- + container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (namespace,pod,cluster) group_left(node) topk by (namespace,pod,cluster) (1, + max by (namespace,pod,node,cluster) (kube_pod_info{node!=""}) + ) + labels: {} + record: node_namespace_pod_container:container_memory_rss diff --git a/packages/system/monitoring/alerts/k8s.rules.container_memory_swap.yaml b/packages/system/monitoring/alerts/k8s.rules.container_memory_swap.yaml new file mode 100644 index 00000000..4876d465 --- /dev/null +++ b/packages/system/monitoring/alerts/k8s.rules.container_memory_swap.yaml @@ -0,0 +1,17 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-k8s.rules.containermemoryswap +spec: + groups: + - name: k8s.rules.container_memory_swap + params: {} + rules: + - annotations: {} + expr: |- + container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (namespace,pod,cluster) group_left(node) topk by (namespace,pod,cluster) (1, + max by (namespace,pod,node,cluster) (kube_pod_info{node!=""}) + ) + labels: {} + record: node_namespace_pod_container:container_memory_swap diff --git a/packages/system/monitoring/alerts/k8s.rules.container_memory_working_set_bytes.yaml b/packages/system/monitoring/alerts/k8s.rules.container_memory_working_set_bytes.yaml new file mode 100644 index 00000000..c1b71af2 --- /dev/null +++ b/packages/system/monitoring/alerts/k8s.rules.container_memory_working_set_bytes.yaml @@ -0,0 +1,17 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-k8s.rules.containermemoryworkingsetbytes +spec: + groups: + - name: k8s.rules.container_memory_working_set_bytes + params: {} + rules: + - annotations: {} + expr: |- + container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} + * on (namespace,pod,cluster) group_left(node) topk by (namespace,pod,cluster) (1, + max by (namespace,pod,node,cluster) (kube_pod_info{node!=""}) + ) + labels: {} + record: node_namespace_pod_container:container_memory_working_set_bytes diff --git a/packages/system/monitoring/alerts/k8s.rules.container_resource.yaml b/packages/system/monitoring/alerts/k8s.rules.container_resource.yaml new file mode 100644 index 00000000..793267e0 --- /dev/null +++ b/packages/system/monitoring/alerts/k8s.rules.container_resource.yaml @@ -0,0 +1,93 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-k8s.rules.containerresource +spec: + groups: + - name: k8s.rules.container_resource + params: {} + rules: + - annotations: {} + expr: |- + kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace,pod,cluster) + group_left() max by (namespace,pod,cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + labels: {} + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests + - annotations: {} + expr: |- + sum by (namespace,cluster) ( + sum by (namespace,pod,cluster) ( + max by (namespace,pod,container,cluster) ( + kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} + ) * on (namespace,pod,cluster) group_left() max by (namespace,pod,cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + labels: {} + record: namespace_memory:kube_pod_container_resource_requests:sum + - annotations: {} + expr: |- + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace,pod,cluster) + group_left() max by (namespace,pod,cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + labels: {} + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests + - annotations: {} + expr: |- + sum by (namespace,cluster) ( + sum by (namespace,pod,cluster) ( + max by (namespace,pod,container,cluster) ( + kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} + ) * on (namespace,pod,cluster) group_left() max by (namespace,pod,cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + labels: {} + record: namespace_cpu:kube_pod_container_resource_requests:sum + - annotations: {} + expr: |- + kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace,pod,cluster) + group_left() max by (namespace,pod,cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + labels: {} + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits + - annotations: {} + expr: |- + sum by (namespace,cluster) ( + sum by (namespace,pod,cluster) ( + max by (namespace,pod,container,cluster) ( + kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} + ) * on (namespace,pod,cluster) group_left() max by (namespace,pod,cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + labels: {} + record: namespace_memory:kube_pod_container_resource_limits:sum + - annotations: {} + expr: |- + kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace,pod,cluster) + group_left() max by (namespace,pod,cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + labels: {} + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits + - annotations: {} + expr: |- + sum by (namespace,cluster) ( + sum by (namespace,pod,cluster) ( + max by (namespace,pod,container,cluster) ( + kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} + ) * on (namespace,pod,cluster) group_left() max by (namespace,pod,cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + labels: {} + record: namespace_cpu:kube_pod_container_resource_limits:sum diff --git a/packages/system/monitoring/alerts/k8s.rules.pod_owner.yaml b/packages/system/monitoring/alerts/k8s.rules.pod_owner.yaml new file mode 100644 index 00000000..ef0e5cf0 --- /dev/null +++ b/packages/system/monitoring/alerts/k8s.rules.pod_owner.yaml @@ -0,0 +1,60 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-k8s.rules.podowner +spec: + groups: + - name: k8s.rules.pod_owner + params: {} + rules: + - annotations: {} + expr: |- + max by (namespace,workload,pod,cluster) ( + label_replace( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on (replicaset,namespace,cluster) group_left(owner_name) topk by (replicaset,namespace,cluster) ( + 1, max by (replicaset,namespace,owner_name,cluster) ( + kube_replicaset_owner{job="kube-state-metrics"} + ) + ), + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: deployment + record: namespace_workload_pod:kube_pod_owner:relabel + - annotations: {} + expr: |- + max by (namespace,workload,pod,cluster) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: daemonset + record: namespace_workload_pod:kube_pod_owner:relabel + - annotations: {} + expr: |- + max by (namespace,workload,pod,cluster) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: statefulset + record: namespace_workload_pod:kube_pod_owner:relabel + - annotations: {} + expr: |- + max by (namespace,workload,pod,cluster) ( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="Job"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: job + record: namespace_workload_pod:kube_pod_owner:relabel diff --git a/packages/system/monitoring/alerts/kube-apiserver-availability.rules.yaml b/packages/system/monitoring/alerts/kube-apiserver-availability.rules.yaml new file mode 100644 index 00000000..a1847131 --- /dev/null +++ b/packages/system/monitoring/alerts/kube-apiserver-availability.rules.yaml @@ -0,0 +1,146 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kube-apiserver-availability.rules +spec: + groups: + - interval: 3m + name: kube-apiserver-availability.rules + params: {} + rules: + - annotations: {} + expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 + * 30 + labels: {} + record: code_verb:apiserver_request_total:increase30d + - annotations: {} + expr: sum by (code,cluster) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) + labels: + verb: read + record: code:apiserver_request_total:increase30d + - annotations: {} + expr: sum by (code,cluster) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + labels: + verb: write + record: code:apiserver_request_total:increase30d + - annotations: {} + expr: sum by (verb,scope,cluster) (increase(apiserver_request_sli_duration_seconds_count{job="kube-apiserver"}[1h])) + labels: {} + record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h + - annotations: {} + expr: sum by (verb,scope,cluster) (avg_over_time(cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h[30d]) + * 24 * 30) + labels: {} + record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d + - annotations: {} + expr: sum by (verb,scope,le,cluster) (increase(apiserver_request_sli_duration_seconds_bucket[1h])) + labels: {} + record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h + - annotations: {} + expr: sum by (verb,scope,le,cluster) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) + * 24 * 30) + labels: {} + record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d + - annotations: {} + expr: |- + 1 - ( + ( + # write too slow + sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + - + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) + ) + + ( + # read too slow + sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) + - + ( + ( + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) + or + vector(0) + ) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) + ) + ) + + # errors + sum by (cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) + ) + / + sum by (cluster) (code:apiserver_request_total:increase30d) + labels: + verb: all + record: apiserver_request:availability30d + - annotations: {} + expr: |- + 1 - ( + sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) + - + ( + # too slow + ( + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) + or + vector(0) + ) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) + + + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) + ) + + + # errors + sum by (cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) + ) + / + sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"}) + labels: + verb: read + record: apiserver_request:availability30d + - annotations: {} + expr: |- + 1 - ( + ( + # too slow + sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + - + sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) + ) + + + # errors + sum by (cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) + ) + / + sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"}) + labels: + verb: write + record: apiserver_request:availability30d + - annotations: {} + expr: sum by (code,resource,cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[5m])) + labels: + verb: read + record: code_resource:apiserver_request_total:rate5m + - annotations: {} + expr: sum by (code,resource,cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + record: code_resource:apiserver_request_total:rate5m + - annotations: {} + expr: sum by (code,verb,cluster) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) + labels: {} + record: code_verb:apiserver_request_total:increase1h + - annotations: {} + expr: sum by (code,verb,cluster) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) + labels: {} + record: code_verb:apiserver_request_total:increase1h + - annotations: {} + expr: sum by (code,verb,cluster) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) + labels: {} + record: code_verb:apiserver_request_total:increase1h + - annotations: {} + expr: sum by (code,verb,cluster) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + labels: {} + record: code_verb:apiserver_request_total:increase1h diff --git a/packages/system/monitoring/alerts/kube-apiserver-burnrate.rules.yaml b/packages/system/monitoring/alerts/kube-apiserver-burnrate.rules.yaml new file mode 100644 index 00000000..92435ff7 --- /dev/null +++ b/packages/system/monitoring/alerts/kube-apiserver-burnrate.rules.yaml @@ -0,0 +1,324 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kube-apiserver-burnrate.rules +spec: + groups: + - name: kube-apiserver-burnrate.rules + params: {} + rules: + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1d])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1d])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1d])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[1d])) + labels: + verb: read + record: apiserver_request:burnrate1d + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1h])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1h])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1h])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[1h])) + labels: + verb: read + record: apiserver_request:burnrate1h + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[2h])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[2h])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[2h])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[2h])) + labels: + verb: read + record: apiserver_request:burnrate2h + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[30m])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[30m])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[30m])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[30m])) + labels: + verb: read + record: apiserver_request:burnrate30m + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[3d])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[3d])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[3d])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[3d])) + labels: + verb: read + record: apiserver_request:burnrate3d + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[5m])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[5m])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[5m])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[5m])) + labels: + verb: read + record: apiserver_request:burnrate5m + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + - + ( + ( + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[6h])) + or + vector(0) + ) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[6h])) + + + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[6h])) + ) + ) + + + # errors + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[6h])) + labels: + verb: read + record: apiserver_request:burnrate6h + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1d])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) + labels: + verb: write + record: apiserver_request:burnrate1d + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1h])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) + labels: + verb: write + record: apiserver_request:burnrate1h + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[2h])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) + labels: + verb: write + record: apiserver_request:burnrate2h + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[30m])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) + labels: + verb: write + record: apiserver_request:burnrate30m + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[3d])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) + labels: + verb: write + record: apiserver_request:burnrate3d + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[5m])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + record: apiserver_request:burnrate5m + - annotations: {} + expr: |- + ( + ( + # too slow + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + - + sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[6h])) + ) + + + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) + ) + / + sum by (cluster) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) + labels: + verb: write + record: apiserver_request:burnrate6h diff --git a/packages/system/monitoring/alerts/kube-apiserver-histogram.rules.yaml b/packages/system/monitoring/alerts/kube-apiserver-histogram.rules.yaml new file mode 100644 index 00000000..5fd4b7e5 --- /dev/null +++ b/packages/system/monitoring/alerts/kube-apiserver-histogram.rules.yaml @@ -0,0 +1,23 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kube-apiserver-histogram.rules +spec: + groups: + - name: kube-apiserver-histogram.rules + params: {} + rules: + - annotations: {} + expr: histogram_quantile(0.99, sum by (le,resource,cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) + > 0 + labels: + quantile: '0.99' + verb: read + record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.99, sum by (le,resource,cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) + > 0 + labels: + quantile: '0.99' + verb: write + record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile diff --git a/packages/system/monitoring/alerts/kube-apiserver-slos.yaml b/packages/system/monitoring/alerts/kube-apiserver-slos.yaml new file mode 100644 index 00000000..441d4958 --- /dev/null +++ b/packages/system/monitoring/alerts/kube-apiserver-slos.yaml @@ -0,0 +1,73 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kube-apiserver-slos +spec: + groups: + - name: kube-apiserver-slos + params: {} + rules: + - alert: KubeAPIErrorBudgetBurn + annotations: + description: The API server is burning too much error budget. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) + and + sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) + for: 2m + labels: + long: 1h + severity: critical + short: 5m + exported_instance: '{{ $labels.namespace }}/{{ $labels.apiserver }}' + service: kube-apiserver-slos + - alert: KubeAPIErrorBudgetBurn + annotations: + description: The API server is burning too much error budget. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) + and + sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) + for: 15m + labels: + long: 6h + severity: critical + short: 30m + exported_instance: '{{ $labels.namespace }}/{{ $labels.apiserver }}' + service: kube-apiserver-slos + - alert: KubeAPIErrorBudgetBurn + annotations: + description: The API server is burning too much error budget. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) + and + sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) + for: 1h + labels: + long: 1d + severity: warning + short: 2h + exported_instance: '{{ $labels.namespace }}/{{ $labels.apiserver }}' + service: kube-apiserver-slos + - alert: KubeAPIErrorBudgetBurn + annotations: + description: The API server is burning too much error budget. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) + and + sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) + for: 3h + labels: + long: 3d + severity: warning + short: 6h + exported_instance: '{{ $labels.namespace }}/{{ $labels.apiserver }}' + service: kube-apiserver-slos diff --git a/packages/system/monitoring/alerts/kube-prometheus-general.rules.yaml b/packages/system/monitoring/alerts/kube-prometheus-general.rules.yaml new file mode 100644 index 00000000..c2080727 --- /dev/null +++ b/packages/system/monitoring/alerts/kube-prometheus-general.rules.yaml @@ -0,0 +1,17 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kube-prometheus-general.rules +spec: + groups: + - name: kube-prometheus-general.rules + params: {} + rules: + - annotations: {} + expr: count without(instance, pod, node) (up == 1) + labels: {} + record: count:up1 + - annotations: {} + expr: count without(instance, pod, node) (up == 0) + labels: {} + record: count:up0 diff --git a/packages/system/monitoring/alerts/kube-prometheus-node-recording.rules.yaml b/packages/system/monitoring/alerts/kube-prometheus-node-recording.rules.yaml new file mode 100644 index 00000000..80513b06 --- /dev/null +++ b/packages/system/monitoring/alerts/kube-prometheus-node-recording.rules.yaml @@ -0,0 +1,37 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kube-prometheus-node-recording.rules +spec: + groups: + - name: kube-prometheus-node-recording.rules + params: {} + rules: + - annotations: {} + expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) + BY (instance) + labels: {} + record: instance:node_cpu:rate:sum + - annotations: {} + expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) + labels: {} + record: instance:node_network_receive_bytes:rate:sum + - annotations: {} + expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) + labels: {} + record: instance:node_network_transmit_bytes:rate:sum + - annotations: {} + expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) + WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) + BY (instance, cpu)) BY (instance) + labels: {} + record: instance:node_cpu:ratio + - annotations: {} + expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) + labels: {} + record: cluster:node_cpu:sum_rate5m + - annotations: {} + expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, + cpu)) + labels: {} + record: cluster:node_cpu:ratio diff --git a/packages/system/monitoring/alerts/kube-scheduler.rules.yaml b/packages/system/monitoring/alerts/kube-scheduler.rules.yaml new file mode 100644 index 00000000..adc0cc77 --- /dev/null +++ b/packages/system/monitoring/alerts/kube-scheduler.rules.yaml @@ -0,0 +1,63 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kube-scheduler.rules +spec: + groups: + - name: kube-scheduler.rules + params: {} + rules: + - annotations: {} + expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) + without(instance, pod)) + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) + without(instance, pod)) + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) + without(instance, pod)) + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) + without(instance, pod)) + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) + without(instance, pod)) + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) + without(instance, pod)) + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) + without(instance, pod)) + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) + without(instance, pod)) + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) + without(instance, pod)) + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile diff --git a/packages/system/monitoring/alerts/kube-state-metrics.yaml b/packages/system/monitoring/alerts/kube-state-metrics.yaml new file mode 100644 index 00000000..3549cedf --- /dev/null +++ b/packages/system/monitoring/alerts/kube-state-metrics.yaml @@ -0,0 +1,73 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kube-state-metrics +spec: + groups: + - name: kube-state-metrics + params: {} + rules: + - alert: KubeStateMetricsListErrors + annotations: + description: kube-state-metrics is experiencing errors at an elevated rate + in list operations. This is likely causing it to not be able to expose metrics + about Kubernetes objects correctly or at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors + summary: kube-state-metrics is experiencing errors in list operations. + expr: |- + (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) by (cluster) + / + sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])) by (cluster)) + > 0.01 + for: 15m + labels: + severity: critical + exported_instance: '{{ $labels.cluster }}/kube-state-metrics' + service: kube-state-metrics + - alert: KubeStateMetricsWatchErrors + annotations: + description: kube-state-metrics is experiencing errors at an elevated rate + in watch operations. This is likely causing it to not be able to expose + metrics about Kubernetes objects correctly or at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors + summary: kube-state-metrics is experiencing errors in watch operations. + expr: |- + (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) by (cluster) + / + sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])) by (cluster)) + > 0.01 + for: 15m + labels: + severity: critical + exported_instance: '{{ $labels.cluster }}/kube-state-metrics' + service: kube-state-metrics + - alert: KubeStateMetricsShardingMismatch + annotations: + description: kube-state-metrics pods are running with different --total-shards + configuration, some Kubernetes objects may be exposed multiple times or + not exposed at all. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch + summary: kube-state-metrics sharding is misconfigured. + expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) by + (cluster) != 0 + for: 15m + labels: + severity: critical + exported_instance: '{{ $labels.cluster }}/kube-state-metrics' + service: kube-state-metrics + - alert: KubeStateMetricsShardsMissing + annotations: + description: kube-state-metrics shards are missing, some Kubernetes objects + are not being exposed. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing + summary: kube-state-metrics shards are missing. + expr: |- + 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) - 1 + - + sum( 2 ^ max by (shard_ordinal,cluster) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) by (cluster) + != 0 + for: 15m + labels: + severity: critical + exported_instance: '{{ $labels.cluster }}/kube-state-metrics' + service: kube-state-metrics diff --git a/packages/system/monitoring/alerts/kubelet.rules.yaml b/packages/system/monitoring/alerts/kubelet.rules.yaml new file mode 100644 index 00000000..e890e031 --- /dev/null +++ b/packages/system/monitoring/alerts/kubelet.rules.yaml @@ -0,0 +1,30 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kubelet.rules +spec: + groups: + - name: kubelet.rules + params: {} + rules: + - annotations: {} + expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", + metrics_path="/metrics"}[5m])) by (instance,le,cluster) * on (instance,cluster) + group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.99' + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", + metrics_path="/metrics"}[5m])) by (instance,le,cluster) * on (instance,cluster) + group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.9' + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile + - annotations: {} + expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", + metrics_path="/metrics"}[5m])) by (instance,le,cluster) * on (instance,cluster) + group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) + labels: + quantile: '0.5' + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile diff --git a/packages/system/monitoring/alerts/kubernetes-apps.yaml b/packages/system/monitoring/alerts/kubernetes-apps.yaml new file mode 100644 index 00000000..dbd5c358 --- /dev/null +++ b/packages/system/monitoring/alerts/kubernetes-apps.yaml @@ -0,0 +1,304 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kubernetes-apps +spec: + groups: + - name: kubernetes-apps + params: {} + rules: + - alert: KubePodCrashLooping + annotations: + description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container + }}) is in waiting state (reason: "CrashLoopBackOff").' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping + summary: Pod is crash looping. + expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", + job="kube-state-metrics", namespace=~".*"}[5m]) >= 1 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.pod }}' + service: kubernetes-apps + - alert: KubePodNotReady + annotations: + description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready + state for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready + summary: Pod has been in a non-ready state for more than 15 minutes. + expr: |- + sum by (namespace,pod,cluster) ( + max by (namespace,pod,cluster) ( + kube_pod_status_phase{job="kube-state-metrics", namespace=~".*", phase=~"Pending|Unknown|Failed"} + ) * on (namespace,pod,cluster) group_left(owner_kind) topk by (namespace,pod,cluster) ( + 1, max by (namespace,pod,owner_kind,cluster) (kube_pod_owner{owner_kind!="Job"}) + ) + ) > 0 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.pod }}' + service: kubernetes-apps + - alert: KubeDeploymentGenerationMismatch + annotations: + description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment + }} does not match, this indicates that the Deployment has failed but has + not been rolled back. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch + summary: Deployment generation mismatch due to possible roll-back + expr: |- + kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~".*"} + != + kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~".*"} + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.deployment }}' + service: kubernetes-apps + - alert: KubeDeploymentReplicasMismatch + annotations: + description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has + not matched the expected number of replicas for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch + summary: Deployment has not matched the expected number of replicas. + expr: |- + ( + kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~".*"} + > + kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~".*"} + ) and ( + changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m]) + == + 0 + ) + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.deployment }}' + service: kubernetes-apps + - alert: KubeDeploymentRolloutStuck + annotations: + description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment + }} is not progressing for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck + summary: Deployment rollout is not progressing. + expr: |- + kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics", namespace=~".*"} + != 0 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.deployment }}' + service: kubernetes-apps + - alert: KubeStatefulSetReplicasMismatch + annotations: + description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} + has not matched the expected number of replicas for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch + summary: StatefulSet has not matched the expected number of replicas. + expr: |- + ( + kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~".*"} + != + kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~".*"} + ) and ( + changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m]) + == + 0 + ) + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.statefulset }}' + service: kubernetes-apps + - alert: KubeStatefulSetGenerationMismatch + annotations: + description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset + }} does not match, this indicates that the StatefulSet has failed but has + not been rolled back. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch + summary: StatefulSet generation mismatch due to possible roll-back + expr: |- + kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~".*"} + != + kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~".*"} + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.statefulset }}' + service: kubernetes-apps + - alert: KubeStatefulSetUpdateNotRolledOut + annotations: + description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} + update has not been rolled out. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout + summary: StatefulSet update has not been rolled out. + expr: |- + ( + max by (namespace,statefulset,cluster) ( + kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~".*"} + unless + kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~".*"} + ) + * + ( + kube_statefulset_replicas{job="kube-state-metrics", namespace=~".*"} + != + kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"} + ) + ) and ( + changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[5m]) + == + 0 + ) + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.statefulset }}' + service: kubernetes-apps + - alert: KubeDaemonSetRolloutStuck + annotations: + description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has + not finished or progressed for at least 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck + summary: DaemonSet rollout is stuck. + expr: |- + ( + ( + kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} + != + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} + ) or ( + kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"} + != + 0 + ) or ( + kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"} + != + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} + ) or ( + kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~".*"} + != + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} + ) + ) and ( + changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}[5m]) + == + 0 + ) + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.daemonset }}' + service: kubernetes-apps + - alert: KubeContainerWaiting + annotations: + description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on + container {{ $labels.container}} has been in waiting state for longer than + 1 hour. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting + summary: Pod container waiting longer than 1 hour + expr: sum by (namespace,pod,container,cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", + namespace=~".*"}) > 0 + for: 1h + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.pod }}/{{ $labels.container + }}' + service: kubernetes-apps + - alert: KubeDaemonSetNotScheduled + annotations: + description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset + }} are not scheduled.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled + summary: DaemonSet pods are not scheduled. + expr: |- + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"} + - + kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} > 0 + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.daemonset }}' + service: kubernetes-apps + - alert: KubeDaemonSetMisScheduled + annotations: + description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset + }} are running where they are not supposed to run.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled + summary: DaemonSet pods are misscheduled. + expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"} + > 0 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.daemonset }}' + service: kubernetes-apps + - alert: KubeJobNotCompleted + annotations: + description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking + more than {{ "43200" | humanizeDuration }} to complete. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted + summary: Job did not complete in time + expr: |- + time() - max by (namespace,job_name,cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"} + and + kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > 43200 + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.job_name }}' + service: kubernetes-apps + - alert: KubeJobFailed + annotations: + description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to + complete. Removing failed job after investigation should clear this alert. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed + summary: Job failed to complete. + expr: kube_job_failed{job="kube-state-metrics", namespace=~".*"} > 0 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.job_name }}' + service: kubernetes-apps + - alert: KubeHpaReplicasMismatch + annotations: + description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} + has not matched the desired number of replicas for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch + summary: HPA has not matched desired number of replicas. + expr: |- + (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~".*"} + != + kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}) + and + (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} + > + kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~".*"}) + and + (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} + < + kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"}) + and + changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}[15m]) == 0 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler + }}' + service: kubernetes-apps + - alert: KubeHpaMaxedOut + annotations: + description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} + has been running at max replicas for longer than 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout + summary: HPA is running at max replicas + expr: |- + kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"} + == + kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"} + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler + }}' + service: kubernetes-apps diff --git a/packages/system/monitoring/alerts/kubernetes-resources.yaml b/packages/system/monitoring/alerts/kubernetes-resources.yaml new file mode 100644 index 00000000..48eabcd3 --- /dev/null +++ b/packages/system/monitoring/alerts/kubernetes-resources.yaml @@ -0,0 +1,138 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kubernetes-resources +spec: + groups: + - name: kubernetes-resources + params: {} + rules: + - alert: KubeCPUOvercommit + annotations: + description: Cluster {{ $labels.cluster }} has overcommitted CPU resource + requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit + summary: Cluster has overcommitted CPU resource requests. + expr: |- + sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0 + and + (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0 + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.cluster }}' + service: kubernetes-resources + - alert: KubeMemoryOvercommit + annotations: + description: Cluster {{ $labels.cluster }} has overcommitted memory resource + requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node + failure. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit + summary: Cluster has overcommitted memory resource requests. + expr: |- + sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0 + and + (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0 + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.cluster }}' + service: kubernetes-resources + - alert: KubeCPUQuotaOvercommit + annotations: + description: Cluster {{ $labels.cluster }} has overcommitted CPU resource + requests for Namespaces. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit + summary: Cluster has overcommitted CPU resource requests. + expr: |- + sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) by (cluster) + / + sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) by (cluster) + > 1.5 + for: 5m + labels: + severity: warning + exported_instance: '{{ $labels.cluster }}' + service: kubernetes-resources + - alert: KubeMemoryQuotaOvercommit + annotations: + description: Cluster {{ $labels.cluster }} has overcommitted memory resource + requests for Namespaces. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit + summary: Cluster has overcommitted memory resource requests. + expr: |- + sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) by (cluster) + / + sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) + > 1.5 + for: 5m + labels: + severity: warning + exported_instance: '{{ $labels.cluster }}' + service: kubernetes-resources + - alert: KubeQuotaAlmostFull + annotations: + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage + }} of its {{ $labels.resource }} quota. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull + summary: Namespace quota is going to be full. + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 0.9 < 1 + for: 15m + labels: + severity: informational + exported_instance: '{{ $labels.namespace }}' + service: kubernetes-resources + - alert: KubeQuotaFullyUsed + annotations: + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage + }} of its {{ $labels.resource }} quota. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused + summary: Namespace quota is fully used. + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + == 1 + for: 15m + labels: + severity: informational + exported_instance: '{{ $labels.namespace }}' + service: kubernetes-resources + - alert: KubeQuotaExceeded + annotations: + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage + }} of its {{ $labels.resource }} quota. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded + summary: Namespace quota has exceeded the limits. + expr: |- + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 1 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}' + service: kubernetes-resources + - alert: CPUThrottlingHigh + annotations: + description: '{{ $value | humanizePercentage }} throttling of CPU in namespace + {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ + $labels.pod }}.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh + summary: Processes experience elevated CPU throttling. + expr: |- + sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container,pod,namespace,cluster) + / + sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container,pod,namespace,cluster) + > ( 25 / 100 ) + for: 15m + labels: + severity: informational + exported_instance: '{{ $labels.namespace }}/{{ $labels.pod }}/{{ $labels.container + }}' + service: kubernetes-resources diff --git a/packages/system/monitoring/alerts/kubernetes-storage.yaml b/packages/system/monitoring/alerts/kubernetes-storage.yaml new file mode 100644 index 00000000..5fff4e9b --- /dev/null +++ b/packages/system/monitoring/alerts/kubernetes-storage.yaml @@ -0,0 +1,130 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kubernetes-storage +spec: + groups: + - name: kubernetes-storage + params: {} + rules: + - alert: KubePersistentVolumeFillingUp + annotations: + description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim + }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster + {{ . }} {{- end }} is only {{ $value | humanizePercentage }} free. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup + summary: PersistentVolume is filling up. + expr: |- + ( + kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + ) < 0.03 + and + kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 + unless on (namespace,persistentvolumeclaim,cluster) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (namespace,persistentvolumeclaim,cluster) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1m + labels: + severity: critical + exported_instance: '{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim + }}' + service: kubernetes-storage + - alert: KubePersistentVolumeFillingUp + annotations: + description: Based on recent sampling, the PersistentVolume claimed by {{ + $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} {{ + with $labels.cluster -}} on Cluster {{ . }} {{- end }} is expected to fill + up within four days. Currently {{ $value | humanizePercentage }} is available. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup + summary: PersistentVolume is filling up. + expr: |- + ( + kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + ) < 0.15 + and + kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 + and + predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + unless on (namespace,persistentvolumeclaim,cluster) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (namespace,persistentvolumeclaim,cluster) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1h + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim + }}' + service: kubernetes-storage + - alert: KubePersistentVolumeInodesFillingUp + annotations: + description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim + }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster + {{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup + summary: PersistentVolumeInodes are filling up. + expr: |- + ( + kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"} + / + kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + ) < 0.03 + and + kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 + unless on (namespace,persistentvolumeclaim,cluster) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (namespace,persistentvolumeclaim,cluster) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1m + labels: + severity: critical + exported_instance: '{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim + }}' + service: kubernetes-storage + - alert: KubePersistentVolumeInodesFillingUp + annotations: + description: Based on recent sampling, the PersistentVolume claimed by {{ + $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} {{ + with $labels.cluster -}} on Cluster {{ . }} {{- end }} is expected to run + out of inodes within four days. Currently {{ $value | humanizePercentage + }} of its inodes are free. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup + summary: PersistentVolumeInodes are filling up. + expr: |- + ( + kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"} + / + kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"} + ) < 0.15 + and + kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0 + and + predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + unless on (namespace,persistentvolumeclaim,cluster) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on (namespace,persistentvolumeclaim,cluster) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: 1h + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.persistentvolumeclaim + }}' + service: kubernetes-storage + - alert: KubePersistentVolumeErrors + annotations: + description: The persistent volume {{ $labels.persistentvolume }} {{ with + $labels.cluster -}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase + }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors + summary: PersistentVolume is having issues with provisioning. + expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} + > 0 + for: 5m + labels: + severity: critical + exported_instance: '{{ $labels.persistentvolume }}' + service: kubernetes-storage diff --git a/packages/system/monitoring/alerts/kubernetes-system-apiserver.yaml b/packages/system/monitoring/alerts/kubernetes-system-apiserver.yaml new file mode 100644 index 00000000..3acc123f --- /dev/null +++ b/packages/system/monitoring/alerts/kubernetes-system-apiserver.yaml @@ -0,0 +1,91 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kubernetes-system-apiserver +spec: + groups: + - name: kubernetes-system-apiserver + params: {} + rules: + - alert: KubeClientCertificateExpiration + annotations: + description: A client certificate used to authenticate to kubernetes apiserver + is expiring in less than 7.0 days. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration + summary: Client certificate is about to expire. + expr: apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} + > 0 and on (job,cluster) histogram_quantile(0.01, sum by (job,le,cluster) + (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) + < 604800 + for: 5m + labels: + severity: warning + exported_instance: '{{ $labels.namespace }}/{{ $labels.pod }}' + service: kubernetes-system-apiserver + - alert: KubeClientCertificateExpiration + annotations: + description: A client certificate used to authenticate to kubernetes apiserver + is expiring in less than 24.0 hours. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration + summary: Client certificate is about to expire. + expr: apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} + > 0 and on (job,cluster) histogram_quantile(0.01, sum by (job,le,cluster) + (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) + < 86400 + for: 5m + labels: + severity: critical + exported_instance: '{{ $labels.namespace }}/{{ $labels.pod }}' + service: kubernetes-system-apiserver + - alert: KubeAggregatedAPIErrors + annotations: + description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace + }} has reported errors. It has appeared unavailable {{ $value | humanize + }} times averaged over the past 10m. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors + summary: Kubernetes aggregated API has reported errors. + expr: sum by (name,namespace,cluster)(increase(aggregator_unavailable_apiservice_total{job="kube-apiserver"}[10m])) + > 4 + labels: + severity: warning + exported_instance: '{{ $labels.name }}/{{ $labels.namespace }}' + service: kubernetes-system-apiserver + - alert: KubeAggregatedAPIDown + annotations: + description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace + }} has been only {{ $value | humanize }}% available over the last 10m. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown + summary: Kubernetes aggregated API is down. + expr: (1 - max by (name,namespace,cluster)(avg_over_time(aggregator_unavailable_apiservice{job="kube-apiserver"}[10m]))) + * 100 < 85 + for: 5m + labels: + severity: warning + exported_instance: '{{ $labels.name }}/{{ $labels.namespace }}' + service: kubernetes-system-apiserver + - alert: KubeAPIDown + annotations: + description: KubeAPI has disappeared from Prometheus target discovery. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapidown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="kube-apiserver"} == 1) + for: 15m + labels: + severity: critical + exported_instance: '{{ $labels.cluster }}/apiserver' + service: kubernetes-system-apiserver + - alert: KubeAPITerminatedRequests + annotations: + description: The kubernetes apiserver has terminated {{ $value | humanizePercentage + }} of its incoming requests. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests + summary: The kubernetes apiserver has terminated {{ $value | humanizePercentage + }} of its incoming requests. + expr: sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) / + ( sum(rate(apiserver_request_total{job="kube-apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) + ) > 0.20 + for: 5m + labels: + severity: warning + exported_instance: '{{ $labels.cluster }}/apiserver' + service: kubernetes-system-apiserver diff --git a/packages/system/monitoring/alerts/kubernetes-system-controller-manager.yaml b/packages/system/monitoring/alerts/kubernetes-system-controller-manager.yaml new file mode 100644 index 00000000..9f05727c --- /dev/null +++ b/packages/system/monitoring/alerts/kubernetes-system-controller-manager.yaml @@ -0,0 +1,21 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kubernetes-system-controller-manager +spec: + groups: + - name: kubernetes-system-controller-manager + params: {} + rules: + - alert: KubeControllerManagerDown + annotations: + description: KubeControllerManager has disappeared from Prometheus target + discovery. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="kube-controller-manager"} == 1) + for: 15m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}/controller-manager' + service: kubernetes-system-controller-manager diff --git a/packages/system/monitoring/alerts/kubernetes-system-kubelet.yaml b/packages/system/monitoring/alerts/kubernetes-system-kubelet.yaml new file mode 100644 index 00000000..831a9967 --- /dev/null +++ b/packages/system/monitoring/alerts/kubernetes-system-kubelet.yaml @@ -0,0 +1,175 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kubernetes-system-kubelet +spec: + groups: + - name: kubernetes-system-kubelet + params: {} + rules: + - alert: KubeNodeNotReady + annotations: + description: '{{ $labels.node }} has been unready for more than 15 minutes.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready + summary: Node is not ready. + expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} + == 0 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeNodeUnreachable + annotations: + description: '{{ $labels.node }} is unreachable and some workloads may be + rescheduled.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable + summary: Node is unreachable. + expr: (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} + unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) + == 1 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeletTooManyPods + annotations: + description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage + }} of its Pod capacity. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods + summary: Kubelet is running at capacity. + expr: |- + count by (node,cluster) ( + (kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on (instance,pod,namespace,cluster) group_left(node) topk by (instance,pod,namespace,cluster) (1, kube_pod_info{job="kube-state-metrics"}) + ) + / + max by (node,cluster) ( + kube_node_status_capacity{job="kube-state-metrics",resource="pods"} != 1 + ) > 0.95 + for: 15m + labels: + severity: informational + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeNodeReadinessFlapping + annotations: + description: The readiness status of node {{ $labels.node }} has changed {{ + $value }} times in the last 15 minutes. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping + summary: Node readiness status is flapping. + expr: sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) + by (node,cluster) > 2 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeletPlegDurationHigh + annotations: + description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile + duration of {{ $value }} seconds on node {{ $labels.node }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh + summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist. + expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} + >= 10 + for: 5m + labels: + severity: warning + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeletPodStartUpLatencyHigh + annotations: + description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds + on node {{ $labels.node }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh + summary: Kubelet Pod startup latency is too high. + expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", + metrics_path="/metrics"}[5m])) by (instance,le,cluster)) * on (instance,cluster) + group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} + > 60 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeletClientCertificateExpiration + annotations: + description: Client certificate for Kubelet on node {{ $labels.node }} expires + in {{ $value | humanizeDuration }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration + summary: Kubelet client certificate is about to expire. + expr: kubelet_certificate_manager_client_ttl_seconds < 604800 + labels: + severity: warning + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeletClientCertificateExpiration + annotations: + description: Client certificate for Kubelet on node {{ $labels.node }} expires + in {{ $value | humanizeDuration }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration + summary: Kubelet client certificate is about to expire. + expr: kubelet_certificate_manager_client_ttl_seconds < 86400 + labels: + severity: critical + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeletServerCertificateExpiration + annotations: + description: Server certificate for Kubelet on node {{ $labels.node }} expires + in {{ $value | humanizeDuration }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration + summary: Kubelet server certificate is about to expire. + expr: kubelet_certificate_manager_server_ttl_seconds < 604800 + labels: + severity: warning + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeletServerCertificateExpiration + annotations: + description: Server certificate for Kubelet on node {{ $labels.node }} expires + in {{ $value | humanizeDuration }}. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration + summary: Kubelet server certificate is about to expire. + expr: kubelet_certificate_manager_server_ttl_seconds < 86400 + labels: + severity: critical + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeletClientCertificateRenewalErrors + annotations: + description: Kubelet on node {{ $labels.node }} has failed to renew its client + certificate ({{ $value | humanize }} errors in the last 5 minutes). + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors + summary: Kubelet has failed to renew its client certificate. + expr: increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) + > 0 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeletServerCertificateRenewalErrors + annotations: + description: Kubelet on node {{ $labels.node }} has failed to renew its server + certificate ({{ $value | humanize }} errors in the last 5 minutes). + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors + summary: Kubelet has failed to renew its server certificate. + expr: increase(kubelet_server_expiration_renew_errors[5m]) > 0 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet + - alert: KubeletDown + annotations: + description: Kubelet has disappeared from Prometheus target discovery. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="kubelet", metrics_path="/metrics"} == 1) + for: 15m + labels: + severity: critical + exported_instance: '{{ $labels.node }}' + service: kubernetes-system-kubelet diff --git a/packages/system/monitoring/alerts/kubernetes-system-scheduler.yaml b/packages/system/monitoring/alerts/kubernetes-system-scheduler.yaml new file mode 100644 index 00000000..6b6a425c --- /dev/null +++ b/packages/system/monitoring/alerts/kubernetes-system-scheduler.yaml @@ -0,0 +1,20 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kubernetes-system-scheduler +spec: + groups: + - name: kubernetes-system-scheduler + params: {} + rules: + - alert: KubeSchedulerDown + annotations: + description: KubeScheduler has disappeared from Prometheus target discovery. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeschedulerdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="kube-scheduler"} == 1) + for: 15m + labels: + severity: critical + exported_instance: '{{ $labels.scheduler }}' + service: kubernetes-system-scheduler diff --git a/packages/system/monitoring/alerts/kubernetes-system.yaml b/packages/system/monitoring/alerts/kubernetes-system.yaml new file mode 100644 index 00000000..9c69213e --- /dev/null +++ b/packages/system/monitoring/alerts/kubernetes-system.yaml @@ -0,0 +1,37 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-kubernetes-system +spec: + groups: + - name: kubernetes-system + params: {} + rules: + - alert: KubeVersionMismatch + annotations: + description: There are {{ $value }} different semantic versions of Kubernetes + components running. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch + summary: Different semantic versions of Kubernetes components running. + expr: count by (cluster) (count by (git_version,cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) + > 1 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.cluster }}' + service: kubernetes-system + - alert: KubeClientErrors + annotations: + description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance + }}' is experiencing {{ $value | humanizePercentage }} errors.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors + summary: Kubernetes API server client is experiencing errors. + expr: |- + (sum(rate(rest_client_requests_total{job="kube-apiserver",code=~"5.."}[5m])) by (instance,job,namespace,cluster) + / + sum(rate(rest_client_requests_total{job="kube-apiserver"}[5m])) by (instance,job,namespace,cluster)) + > 0.01 + for: 15m + labels: + severity: warning + service: kubernetes-system diff --git a/packages/system/monitoring/alerts/node-exporter.rules.yaml b/packages/system/monitoring/alerts/node-exporter.rules.yaml new file mode 100644 index 00000000..9667e134 --- /dev/null +++ b/packages/system/monitoring/alerts/node-exporter.rules.yaml @@ -0,0 +1,93 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-node-exporter.rules +spec: + groups: + - name: node-exporter.rules + params: {} + rules: + - annotations: {} + expr: |- + count without (cpu, mode) ( + node_cpu_seconds_total{job="node-exporter",mode="idle"} + ) + labels: {} + record: instance:node_num_cpu:sum + - annotations: {} + expr: |- + 1 - avg without (cpu) ( + sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) + ) + labels: {} + record: instance:node_cpu_utilisation:rate5m + - annotations: {} + expr: |- + ( + node_load1{job="node-exporter"} + / + instance:node_num_cpu:sum{job="node-exporter"} + ) + labels: {} + record: instance:node_load1_per_cpu:ratio + - annotations: {} + expr: |- + 1 - ( + ( + node_memory_MemAvailable_bytes{job="node-exporter"} + or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + + node_memory_Cached_bytes{job="node-exporter"} + + + node_memory_MemFree_bytes{job="node-exporter"} + + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) + / + node_memory_MemTotal_bytes{job="node-exporter"} + ) + labels: {} + record: instance:node_memory_utilisation:ratio + - annotations: {} + expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) + labels: {} + record: instance:node_vmstat_pgmajfault:rate5m + - annotations: {} + expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + labels: {} + record: instance_device:node_disk_io_time_seconds:rate5m + - annotations: {} + expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + labels: {} + record: instance_device:node_disk_io_time_weighted_seconds:rate5m + - annotations: {} + expr: |- + sum without (device) ( + rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + labels: {} + record: instance:node_network_receive_bytes_excluding_lo:rate5m + - annotations: {} + expr: |- + sum without (device) ( + rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + labels: {} + record: instance:node_network_transmit_bytes_excluding_lo:rate5m + - annotations: {} + expr: |- + sum without (device) ( + rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + labels: {} + record: instance:node_network_receive_drop_excluding_lo:rate5m + - annotations: {} + expr: |- + sum without (device) ( + rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + labels: {} + record: instance:node_network_transmit_drop_excluding_lo:rate5m diff --git a/packages/system/monitoring/alerts/node-exporter.yaml b/packages/system/monitoring/alerts/node-exporter.yaml new file mode 100644 index 00000000..4c7fcc7d --- /dev/null +++ b/packages/system/monitoring/alerts/node-exporter.yaml @@ -0,0 +1,396 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-node-exporter +spec: + groups: + - name: node-exporter + params: {} + rules: + - alert: NodeFilesystemSpaceFillingUp + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint + }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + space left and is filling up. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup + summary: Filesystem is predicted to run out of space within the next 24 hours. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeFilesystemSpaceFillingUp + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint + }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + space left and is filling up fast. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup + summary: Filesystem is predicted to run out of space within the next 4 hours. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeFilesystemAlmostOutOfSpace + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint + }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + space left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace + summary: Filesystem has less than 5% space left. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 30m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeFilesystemAlmostOutOfSpace + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint + }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + space left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace + summary: Filesystem has less than 3% space left. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 30m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeFilesystemFilesFillingUp + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint + }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + inodes left and is filling up. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup + summary: Filesystem is predicted to run out of inodes within the next 24 hours. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeFilesystemFilesFillingUp + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint + }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + inodes left and is filling up fast. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup + summary: Filesystem is predicted to run out of inodes within the next 4 hours. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20 + and + predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeFilesystemAlmostOutOfFiles + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint + }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + inodes left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles + summary: Filesystem has less than 5% inodes left. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeFilesystemAlmostOutOfFiles + annotations: + description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint + }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + inodes left. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles + summary: Filesystem has less than 3% inodes left. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 + ) + for: 1h + labels: + severity: critical + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeNetworkReceiveErrs + annotations: + description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered + {{ printf "%.0f" $value }} receive errors in the last two minutes.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs + summary: Network interface is reporting many receive errors. + expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m]) + > 0.01 + for: 1h + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeNetworkTransmitErrs + annotations: + description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered + {{ printf "%.0f" $value }} transmit errors in the last two minutes.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs + summary: Network interface is reporting many transmit errors. + expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m]) + > 0.01 + for: 1h + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeHighNumberConntrackEntriesUsed + annotations: + description: '{{ $value | humanizePercentage }} of conntrack entries are used.' + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused + summary: Number of conntrack are getting close to the limit. + expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) + > 0.75 + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: node-exporter + - alert: NodeTextFileCollectorScrapeError + annotations: + description: Node Exporter text file collector on {{ $labels.instance }} failed + to scrape. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror + summary: Node Exporter text file collector failed to scrape. + expr: node_textfile_scrape_error{job="node-exporter"} == 1 + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: node-exporter + - alert: NodeClockSkewDetected + annotations: + description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s. + Ensure NTP is configured correctly on this host. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected + summary: Clock skew detected. + expr: |- + ( + node_timex_offset_seconds{job="node-exporter"} > 0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0 + ) + or + ( + node_timex_offset_seconds{job="node-exporter"} < -0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0 + ) + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: node-exporter + - alert: NodeClockNotSynchronising + annotations: + description: Clock at {{ $labels.instance }} is not synchronising. Ensure + NTP is configured on this host. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising + summary: Clock not synchronising. + expr: |- + min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0 + and + node_timex_maxerror_seconds{job="node-exporter"} >= 16 + for: 10m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: node-exporter + - alert: NodeRAIDDegraded + annotations: + description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is + in degraded state due to one or more disks failures. Number of spare drives + is insufficient to fix issue automatically. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded + summary: RAID Array is degraded. + expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} + - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) + > 0 + for: 15m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeRAIDDiskFailure + annotations: + description: At least one device in RAID array at {{ $labels.instance }} failed. + Array '{{ $labels.device }}' needs attention and possibly a disk swap. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure + summary: Failed device in RAID array. + expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} + > 0 + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeFileDescriptorLimit + annotations: + description: File descriptors limit at {{ $labels.instance }} is currently + at {{ printf "%.2f" $value }}%. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit + summary: Kernel is predicted to exhaust file descriptors limit soon. + expr: |- + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70 + ) + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: node-exporter + - alert: NodeFileDescriptorLimit + annotations: + description: File descriptors limit at {{ $labels.instance }} is currently + at {{ printf "%.2f" $value }}%. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit + summary: Kernel is predicted to exhaust file descriptors limit soon. + expr: |- + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90 + ) + for: 15m + labels: + severity: critical + exported_instance: '{{ $labels.instance }}' + service: node-exporter + - alert: NodeCPUHighUsage + annotations: + description: | + CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage + summary: High CPU usage. + expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", + mode!="idle"}[2m]))) * 100 > 90 + for: 15m + labels: + severity: informational + exported_instance: '{{ $labels.instance }}' + service: node-exporter + - alert: NodeSystemSaturation + annotations: + description: | + System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. + This might indicate this instance resources saturation and can cause it becoming unresponsive. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemsaturation + summary: System saturated, load per core is very high. + expr: |- + node_load1{job="node-exporter"} + / count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: node-exporter + - alert: NodeMemoryMajorPagesFaults + annotations: + description: | + Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. + Please check that there is enough memory available at this instance. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememorymajorpagesfaults + summary: Memory major page faults are occurring at very high rate. + expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: node-exporter + - alert: NodeMemoryHighUtilization + annotations: + description: | + Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization + summary: Host is running out of memory. + expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} + * 100) > 90 + for: 15m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}' + service: node-exporter + - alert: NodeDiskIOSaturation + annotations: + description: | + Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}. + This symptom might indicate disk saturation. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation + summary: Disk IO queue is high. + expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + > 10 + for: 30m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-exporter + - alert: NodeSystemdServiceFailed + annotations: + description: Systemd service {{ $labels.name }} has entered failed state at + {{ $labels.instance }} + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed + summary: Systemd service has entered failed state. + expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1 + for: 5m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.name }}' + service: node-exporter + - alert: NodeBondingDegraded + annotations: + description: Bonding interface {{ $labels.master }} on {{ $labels.instance + }} is in degraded state due to one or more slave failures. + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded + summary: Bonding interface is degraded + expr: (node_bonding_slaves - node_bonding_active) != 0 + for: 5m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.master }}' + service: node-exporter diff --git a/packages/system/monitoring/alerts/node-network.yaml b/packages/system/monitoring/alerts/node-network.yaml new file mode 100644 index 00000000..956bb686 --- /dev/null +++ b/packages/system/monitoring/alerts/node-network.yaml @@ -0,0 +1,21 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-node-network +spec: + groups: + - name: node-network + params: {} + rules: + - alert: NodeNetworkInterfaceFlapping + annotations: + description: Network interface "{{ $labels.device }}" changing its up status + often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }} + runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping + summary: Network interface is often changing its status + expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 + for: 2m + labels: + severity: warning + exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + service: node-network diff --git a/packages/system/monitoring/alerts/node.rules.yaml b/packages/system/monitoring/alerts/node.rules.yaml new file mode 100644 index 00000000..a64d78a1 --- /dev/null +++ b/packages/system/monitoring/alerts/node.rules.yaml @@ -0,0 +1,55 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: alerts-node.rules +spec: + groups: + - name: node.rules + params: {} + rules: + - annotations: {} + expr: |- + topk by (namespace,pod,cluster) (1, + max by (node,namespace,pod,cluster) ( + label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)") + )) + labels: {} + record: 'node_namespace_pod:kube_pod_info:' + - annotations: {} + expr: |- + count by (node,cluster) ( + node_cpu_seconds_total{mode="idle",job="node-exporter"} + * on (namespace,pod,cluster) group_left(node) + topk by (namespace,pod,cluster) (1, node_namespace_pod:kube_pod_info:) + ) + labels: {} + record: node:node_num_cpu:sum + - annotations: {} + expr: |- + sum( + node_memory_MemAvailable_bytes{job="node-exporter"} or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + node_memory_Cached_bytes{job="node-exporter"} + + node_memory_MemFree_bytes{job="node-exporter"} + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) by (cluster) + labels: {} + record: :node_memory_MemAvailable_bytes:sum + - annotations: {} + expr: |- + avg by (node,cluster) ( + sum without (mode) ( + rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m]) + ) + ) + labels: {} + record: node:node_cpu_utilization:ratio_rate5m + - annotations: {} + expr: |- + avg by (cluster) ( + node:node_cpu_utilization:ratio_rate5m + ) + labels: {} + record: cluster:node_cpu:ratio_rate5m diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/.helmignore b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/.helmignore deleted file mode 100644 index 8148a4ff..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/.helmignore +++ /dev/null @@ -1,26 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ - -# Ignore img folder used for documentation -img/ diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/CHANGELOG.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/CHANGELOG.md deleted file mode 100644 index d044b526..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/CHANGELOG.md +++ /dev/null @@ -1,688 +0,0 @@ -## Next release - -- TODO - -## 0.25.17 - -**Release date:** 2024-09-20 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Added VMAuth to k8s stack. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/829) -- Fixed ETCD dashboard -- Use path prefix from args as a default path prefix for ingress. Related [issue](https://github.com/VictoriaMetrics/helm-charts/issues/1260) -- Allow using vmalert without notifiers configuration. Note that it is required to use `.vmalert.spec.extraArgs["notifiers.blackhole"]: true` in order to start vmalert with a blackhole configuration. - -## 0.25.16 - -**Release date:** 2024-09-10 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Do not truncate servicemonitor, datasources, rules, dashboard, alertmanager & vmalert templates names -- Use service label for node-exporter instead of podLabel. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1458) -- Added common chart to a k8s-stack. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1456) -- Fixed value of custom alertmanager configSecret. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1461) - -## 0.25.15 - -**Release date:** 2024-09-05 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Drop empty endpoints param from scrape configuration -- Fixed proto when TLS is enabled. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1449) - -## 0.25.14 - -**Release date:** 2024-09-04 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- fixed alertmanager templates - -## 0.25.13 - -**Release date:** 2024-09-04 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Use operator's own service monitor - -## 0.25.12 - -**Release date:** 2024-09-03 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Fixed dashboards rendering. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1414) -- Fixed service monitor label name. - -## 0.25.11 - -**Release date:** 2024-09-03 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Merged ingress templates -- Removed custom VMServiceScrape for operator -- Added ability to override default Prometheus-compatible datatasources with all available parameters. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/860). -- Do not use `grafana.dashboards` and `grafana.dashboardProviders`. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1312). -- Migrated Node Exporter dashboard into chart -- Deprecated `grafana.sidecar.jsonData`, `grafana.provisionDefaultDatasource` in a favour of `grafana.sidecar.datasources.default` slice of datasources. -- Fail if no notifiers are set, do not set `notifiers` to null if empty - -## 0.25.10 - -**Release date:** 2024-08-31 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- fixed ingress extraPaths and externalVM urls rendering - -## 0.25.9 - -**Release date:** 2024-08-31 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- fixed vmalert ingress name typo -- Added ability to override default Prometheus-compatible datatasources with all available parameters. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/860). -- Do not use `grafana.dashboards` and `grafana.dashboardProviders`. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1312). - -## 0.25.8 - -**Release date:** 2024-08-30 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- fixed external notifiers rendering, when alertmanager is disabled. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1378) - -## 0.25.7 - -**Release date:** 2024-08-30 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- fixed extra rules template context - -## 0.25.6 - -**Release date:** 2024-08-29 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -**Update note**: Update `kubeProxy.spec` to `kubeProxy.vmScrape.spec` - -**Update note**: Update `kubeScheduler.spec` to `kubeScheduler.vmScrape.spec` - -**Update note**: Update `kubeEtcd.spec` to `kubeEtcd.vmScrape.spec` - -**Update note**: Update `coreDns.spec` to `coreDns.vmScrape.spec` - -**Update note**: Update `kubeDns.spec` to `kubeDns.vmScrape.spec` - -**Update note**: Update `kubeProxy.spec` to `kubeProxy.vmScrape.spec` - -**Update note**: Update `kubeControllerManager.spec` to `kubeControllerManager.vmScrape.spec` - -**Update note**: Update `kubeApiServer.spec` to `kubeApiServer.vmScrape.spec` - -**Update note**: Update `kubelet.spec` to `kubelet.vmScrape.spec` - -**Update note**: Update `kube-state-metrics.spec` to `kube-state-metrics.vmScrape.spec` - -**Update note**: Update `prometheus-node-exporter.spec` to `prometheus-node-exporter.vmScrape.spec` - -**Update note**: Update `grafana.spec` to `grafana.vmScrape.spec` - -- bump version of VM components to [v1.103.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.103.0) -- Added `dashboards.` bool flag to enable dashboard even if component it is for is not installed. -- Allow extra `vmalert.notifiers` without dropping default notifier if `alertmanager.enabled: true` -- Do not drop default notifier, when vmalert.additionalNotifierConfigs is set -- Replaced static url proto with a template, which selects proto depending on a present tls configuration -- Moved kubernetes components monitoring config from `spec` config to `vmScrape.spec` -- Merged servicemonitor templates - -## 0.25.5 - -**Release date:** 2024-08-26 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- TODO - -## 0.25.4 - -**Release date:** 2024-08-26 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- updates operator to [v0.47.2](https://github.com/VictoriaMetrics/operator/releases/tag/v0.47.2) -- kube-state-metrics - 5.16.4 -> 5.25.1 -- prometheus-node-exporter - 4.27.0 -> 4.29.0 -- grafana - 8.3.8 -> 8.4.7 -- added configurable `.Values.global.clusterLabel` to all alerting and recording rules `by` and `on` expressions - -## 0.25.3 - -**Release date:** 2024-08-23 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- updated operator to v0.47.1 release -- Build `app.kubernetes.io/instance` label consistently. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1282) - -## 0.25.2 - -**Release date:** 2024-08-21 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- fixed vmalert ingress name. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1271) -- fixed alertmanager ingress host template rendering. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1270) - -## 0.25.1 - -**Release date:** 2024-08-21 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Added `.Values.global.license` configuration -- Fixed extraLabels rendering. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1248) -- Fixed vmalert url to alertmanager by including its path prefix -- Removed `networking.k8s.io/v1beta1/Ingress` and `extensions/v1beta1/Ingress` support -- Fixed kubedns servicemonitor template. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1255) - -## 0.25.0 - -**Release date:** 2024-08-16 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -**Update note**: it requires to update CRD dependency manually before upgrade - -**Update note**: requires Helm 3.14+ - -- Moved dashboards templating logic out of sync script to Helm template -- Allow to disable default grafana datasource -- Synchronize Etcd dashboards and rules with mixin provided by Etcd -- Add alerting rules for VictoriaMetrics operator. -- Updated alerting rules for VictoriaMetrics components. -- Fixed exact rule annotations propagation to other rules. -- Set minimal kubernetes version to 1.25 -- updates operator to v0.47.0 version - -## 0.24.5 - -**Release date:** 2024-08-01 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM components to [v1.102.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.1) - -## 0.24.4 - -**Release date:** 2024-08-01 - -![AppVersion: v1.102.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Update dependencies: grafana -> 8.3.6. -- Added `.Values.defaultRules.alerting` and `.Values.defaultRules.recording` to setup common properties for all alerting an recording rules - -## 0.24.3 - -**Release date:** 2024-07-23 - -![AppVersion: v1.102.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM components to [v1.102.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.102.0) - -## 0.24.2 - -**Release date:** 2024-07-15 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- fix vmalertmanager configuration when using `.VMAlertmanagerSpec.ConfigRawYaml`. See [this pull request](https://github.com/VictoriaMetrics/helm-charts/pull/1136). - -## 0.24.1 - -**Release date:** 2024-07-10 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- updates operator to v0.46.4 - -## 0.24.0 - -**Release date:** 2024-07-10 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- added ability to override alerting rules labels and annotations: -- globally - `.Values.defaultRules.rule.spec.labels` (before it was `.Values.defaultRules.additionalRuleLabels`) and `.Values.defaultRules.rule.spec.annotations` -- for all rules in a group - `.Values.defaultRules.groups..rules.spec.labels` and `.Valeus.defaultRules.groups..rules.spec.annotations` -- for each rule individually - `.Values.defaultRules.rules..spec.labels` and `.Values.defaultRules.rules..spec.annotations` -- changed `.Values.defaultRules.rules.` to `.Values.defaultRules.groups..create` -- changed `.Values.defaultRules.appNamespacesTarget` to `.Values.defaultRules.groups..targetNamespace` -- changed `.Values.defaultRules.params` to `.Values.defaultRules.group.spec.params` with ability to override it at `.Values.defaultRules.groups..spec.params` - -## 0.23.6 - -**Release date:** 2024-07-08 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- added ability to override alerting rules labels and annotations: -- globally - `.Values.defaultRules.rule.spec.labels` (before it was `.Values.defaultRules.additionalRuleLabels`) and `.Values.defaultRules.rule.spec.annotations` -- for all rules in a group - `.Values.defaultRules.groups..rules.spec.labels` and `.Valeus.defaultRules.groups..rules.spec.annotations` -- for each rule individually - `.Values.defaultRules.rules..spec.labels` and `.Values.defaultRules.rules..spec.annotations` -- changed `.Values.defaultRules.rules.` to `.Values.defaultRules.groups..create` -- changed `.Values.defaultRules.appNamespacesTarget` to `.Values.defaultRules.groups..targetNamespace` -- changed `.Values.defaultRules.params` to `.Values.defaultRules.group.spec.params` with ability to override it at `.Values.defaultRules.groups..spec.params` - -## 0.23.5 - -**Release date:** 2024-07-04 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Support configuring vmalert `-notifier.config` with `.Values.vmalert.additionalNotifierConfigs`. - -## 0.23.4 - -**Release date:** 2024-07-02 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Add `extraObjects` to allow deploying additional resources with the chart release. - -## 0.23.3 - -**Release date:** 2024-06-26 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Enable [conversion of Prometheus CRDs](https://docs.victoriametrics.com/operator/migration/#objects-conversion) by default. See [this](https://github.com/VictoriaMetrics/helm-charts/pull/1069) pull request for details. -- use bitnami/kubectl image for cleanup instead of deprecated gcr.io/google_containers/hyperkube - -## 0.23.2 - -**Release date:** 2024-06-14 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Do not add `cluster` external label at VMAgent by default. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/774) for the details. - -## 0.23.1 - -**Release date:** 2024-06-10 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- updates operator to v0.45.0 release -- sync latest vm alerts and dashboards. - -## 0.23.0 - -**Release date:** 2024-05-30 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- sync latest etcd v3.5.x rules from [upstream](https://github.com/etcd-io/etcd/blob/release-3.5/contrib/mixin/mixin.libsonnet). -- add Prometheus operator CRDs as an optional dependency. See [this PR](https://github.com/VictoriaMetrics/helm-charts/pull/1022) and [related issue](https://github.com/VictoriaMetrics/helm-charts/issues/341) for the details. - -## 0.22.1 - -**Release date:** 2024-05-14 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- fix missing serviceaccounts patch permission in VM operator, see [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/1012) for details. - -## 0.22.0 - -**Release date:** 2024-05-10 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM operator to [0.44.0](https://github.com/VictoriaMetrics/operator/releases/tag/v0.44.0) - -## 0.21.3 - -**Release date:** 2024-04-26 - -![AppVersion: v1.101.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.101.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM components to [v1.101.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.101.0) - -## 0.21.2 - -**Release date:** 2024-04-23 - -![AppVersion: v1.100.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.100.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM operator to [0.43.3](https://github.com/VictoriaMetrics/operator/releases/tag/v0.43.3) - -## 0.21.1 - -**Release date:** 2024-04-18 - -![AppVersion: v1.100.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.100.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -## 0.21.0 - -**Release date:** 2024-04-18 - -![AppVersion: v1.100.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.100.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- TODO - -- bump version of VM operator to [0.43.0](https://github.com/VictoriaMetrics/operator/releases/tag/v0.43.0) -- updates CRDs definitions. - -## 0.20.1 - -**Release date:** 2024-04-16 - -![AppVersion: v1.100.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.100.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- upgraded dashboards and alerting rules, added values file for local (Minikube) setup -- bump version of VM components to [v1.100.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.100.1) - -## 0.20.0 - -**Release date:** 2024-04-02 - -![AppVersion: v1.99.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.99.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM operator to [0.42.3](https://github.com/VictoriaMetrics/operator/releases/tag/v0.42.3) - -## 0.19.4 - -**Release date:** 2024-03-05 - -![AppVersion: v1.99.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.99.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM components to [v1.99.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.99.0) - -## 0.19.3 - -**Release date:** 2024-03-05 - -![AppVersion: v1.98.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.98.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Commented default configuration for alertmanager. It simplifies configuration and makes it more explicit. See this [issue](https://github.com/VictoriaMetrics/helm-charts/issues/473) for details. -- Allow enabling/disabling default k8s rules when installing. See [#904](https://github.com/VictoriaMetrics/helm-charts/pull/904) by @passie. - -## 0.19.2 - -**Release date:** 2024-02-26 - -![AppVersion: v1.98.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.98.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Fix templating of VMAgent `remoteWrite` in case both `VMSingle` and `VMCluster` are disabled. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/865) for details. - -## 0.19.1 - -**Release date:** 2024-02-21 - -![AppVersion: v1.98.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.98.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Update dependencies: victoria-metrics-operator -> 0.28.1, grafana -> 7.3.1. -- Update victoriametrics CRD resources yaml. - -## 0.19.0 - -**Release date:** 2024-02-09 - -![AppVersion: v1.97.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.97.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Do not store original labels in `vmagent`'s memory by default. This reduces memory usage of `vmagent` but makes `vmagent`'s debugging UI less informative. See [this docs](https://docs.victoriametrics.com/vmagent/#relabel-debug) for details on relabeling debug. -- Update dependencies: kube-state-metrics -> 5.16.0, prometheus-node-exporter -> 4.27.0, grafana -> 7.3.0. -- Update victoriametrics CRD resources yaml. -- Update builtin dashboards and rules. - -## 0.18.12 - -**Release date:** 2024-02-01 - -![AppVersion: v1.97.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.97.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM components to [v1.97.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.97.1) -- Fix helm lint when ingress resources enabled - split templates of resources per kind. See [#820](https://github.com/VictoriaMetrics/helm-charts/pull/820) by @MemberIT. - -## 0.18.11 - -**Release date:** 2023-12-15 - -![AppVersion: v1.96.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.96.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Fix missing `.Values.defaultRules.rules.vmcluster` value. See [#801](https://github.com/VictoriaMetrics/helm-charts/pull/801) by @MemberIT. - -## 0.18.10 - -**Release date:** 2023-12-12 - -![AppVersion: v1.96.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.96.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM components to [v1.96.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.96.0) -- Add optional allowCrossNamespaceImport to GrafanaDashboard(s) (#788) - -## 0.18.9 - -**Release date:** 2023-12-08 - -![AppVersion: v1.95.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.95.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Properly use variable from values file for Grafana datasource type. (#769) -- Update dashboards from upstream sources. (#780) - -## 0.18.8 - -**Release date:** 2023-11-16 - -![AppVersion: v1.95.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.95.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM components to [v1.95.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.95.1) - -## 0.18.7 - -**Release date:** 2023-11-15 - -![AppVersion: v1.95.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.95.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM components to [v1.95.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.95.0) -- Support adding extra group parameters for default vmrules. (#752) - -## 0.18.6 - -**Release date:** 2023-11-01 - -![AppVersion: v1.94.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.94.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Fix kube scheduler default scraping port from 10251 to 10259, Kubernetes changed it since 1.23.0. See [this pr](https://github.com/VictoriaMetrics/helm-charts/pull/736) for details. -- Bump version of operator chart to [0.27.4](https://github.com/VictoriaMetrics/helm-charts/releases/tag/victoria-metrics-operator-0.27.4) - -## 0.18.5 - -**Release date:** 2023-10-08 - -![AppVersion: v1.94.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.94.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Update operator chart to [v0.27.3](https://github.com/VictoriaMetrics/helm-charts/releases/tag/victoria-metrics-operator-0.27.3) for fixing [#708](https://github.com/VictoriaMetrics/helm-charts/issues/708) - -## 0.18.4 - -**Release date:** 2023-10-04 - -![AppVersion: v1.94.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.94.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Update dependencies: [victoria-metrics-operator -> 0.27.2](https://github.com/VictoriaMetrics/helm-charts/releases/tag/victoria-metrics-operator-0.27.2), prometheus-node-exporter -> 4.23.2, grafana -> 6.59.5. - -## 0.18.3 - -**Release date:** 2023-10-04 - -![AppVersion: v1.94.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.94.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- bump version of VM components to [v1.94.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.94.0) - -## 0.18.2 - -**Release date:** 2023-09-28 - -![AppVersion: v1.93.5](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.5&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Fix behavior of `vmalert.remoteWriteVMAgent` - remoteWrite.url for VMAlert is correctly generated considering endpoint, name, port and http.pathPrefix of VMAgent - -## 0.18.1 - -**Release date:** 2023-09-21 - -![AppVersion: v1.93.5](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.5&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Bump version of VM components to [v1.93.5](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.5) - -## 0.18.0 - -**Release date:** 2023-09-12 - -![AppVersion: v1.93.4](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.4&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Bump version of `grafana` helm-chart to `6.59.*` -- Bump version of `prometheus-node-exporter` helm-chart to `4.23.*` -- Bump version of `kube-state-metrics` helm-chart to `0.59.*` -- Update alerting rules -- Update grafana dashboards -- Add `make` commands `sync-rules` and `sync-dashboards` -- Add support of VictoriaMetrics datasource - -## 0.17.8 - -**Release date:** 2023-09-11 - -![AppVersion: v1.93.4](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.4&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Bump version of VM components to [v1.93.4](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.4) -- Bump version of operator chart to [0.27.0](https://github.com/VictoriaMetrics/helm-charts/releases/tag/victoria-metrics-operator-0.27.0) - -## 0.17.7 - -**Release date:** 2023-09-07 - -![AppVersion: v1.93.3](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.3&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Bump version of operator helm-chart to `0.26.2` - -## 0.17.6 - -**Release date:** 2023-09-04 - -![AppVersion: v1.93.3](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.3&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Move `cleanupCRD` option to victoria-metrics-operator chart (#593) -- Disable `honorTimestamps` for cadvisor scrape job by default (#617) -- For vmalert all replicas of alertmanager are added to notifiers (only if alertmanager is enabled) (#619) -- Add `grafanaOperatorDashboardsFormat` option (#615) -- Fix query expression for memory calculation in `k8s-views-global` dashboard (#636) -- Bump version of Victoria Metrics components to `v1.93.3` -- Bump version of operator helm-chart to `0.26.0` - -## 0.17.5 - -**Release date:** 2023-08-23 - -![AppVersion: v1.93.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Update VictoriaMetrics components from v1.93.0 to v1.93.1 - -## 0.17.4 - -**Release date:** 2023-08-12 - -![AppVersion: v1.93.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.93.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Update VictoriaMetrics components from v1.92.1 to v1.93.0 -- delete an obsolete parameter remaining by mistake (see ) (#602) - -## 0.17.3 - -**Release date:** 2023-07-28 - -![AppVersion: v1.92.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.92.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Update VictoriaMetrics components from v1.92.0 to v1.92.1 (#599) - -## 0.17.2 - -**Release date:** 2023-07-27 - -![AppVersion: v1.92.0](https://img.shields.io/static/v1?label=AppVersion&message=v1.92.0&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Update VictoriaMetrics components from v1.91.3 to v1.92.0 diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.lock b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.lock deleted file mode 100644 index c059a99c..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.lock +++ /dev/null @@ -1,24 +0,0 @@ -dependencies: -- name: victoria-metrics-common - repository: https://victoriametrics.github.io/helm-charts - version: 0.0.11 -- name: victoria-metrics-operator - repository: https://victoriametrics.github.io/helm-charts - version: 0.34.8 -- name: kube-state-metrics - repository: https://prometheus-community.github.io/helm-charts - version: 5.25.1 -- name: prometheus-node-exporter - repository: https://prometheus-community.github.io/helm-charts - version: 4.39.0 -- name: grafana - repository: https://grafana.github.io/helm-charts - version: 8.4.9 -- name: crds - repository: "" - version: 0.0.0 -- name: prometheus-operator-crds - repository: https://prometheus-community.github.io/helm-charts - version: 11.0.0 -digest: sha256:11b119ebabf4ff0ea2951e7c72f51d0223dc3f50fb061a43b01fe7856491b836 -generated: "2024-09-12T11:50:51.935071545Z" diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.yaml deleted file mode 100644 index 90e1012d..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/Chart.yaml +++ /dev/null @@ -1,66 +0,0 @@ -annotations: - artifacthub.io/category: monitoring-logging - artifacthub.io/changes: | - - Added VMAuth to k8s stack. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/829) - - Fixed ETCD dashboard - - Use path prefix from args as a default path prefix for ingress. Related [issue](https://github.com/VictoriaMetrics/helm-charts/issues/1260) - - 'Allow using vmalert without notifiers configuration. Note that it is required to use `.vmalert.spec.extraArgs["notifiers.blackhole"]: true` in order to start vmalert with a blackhole configuration.' - artifacthub.io/license: Apache-2.0 - artifacthub.io/links: | - - name: Sources - url: https://docs.victoriametrics.com/vmgateway - - name: Charts repo - url: https://victoriametrics.github.io/helm-charts/ - - name: Docs - url: https://docs.victoriametrics.com - artifacthub.io/operator: "true" -apiVersion: v2 -appVersion: v1.102.1 -dependencies: -- name: victoria-metrics-common - repository: https://victoriametrics.github.io/helm-charts - version: 0.0.* -- condition: victoria-metrics-operator.enabled - name: victoria-metrics-operator - repository: https://victoriametrics.github.io/helm-charts - version: 0.34.* -- condition: kube-state-metrics.enabled - name: kube-state-metrics - repository: https://prometheus-community.github.io/helm-charts - version: 5.25.* -- condition: prometheus-node-exporter.enabled - name: prometheus-node-exporter - repository: https://prometheus-community.github.io/helm-charts - version: 4.39.* -- condition: grafana.enabled - name: grafana - repository: https://grafana.github.io/helm-charts - version: 8.4.* -- condition: crds.enabled - name: crds - repository: "" - version: 0.0.0 -- condition: prometheus-operator-crds.enabled - name: prometheus-operator-crds - repository: https://prometheus-community.github.io/helm-charts - version: 11.0.* -description: Kubernetes monitoring on VictoriaMetrics stack. Includes VictoriaMetrics - Operator, Grafana dashboards, ServiceScrapes and VMRules -home: https://github.com/VictoriaMetrics/helm-charts -icon: https://avatars.githubusercontent.com/u/43720803?s=200&v=4 -keywords: -- victoriametrics -- operator -- monitoring -- kubernetes -- observability -- tsdb -- metrics -- metricsql -- timeseries -kubeVersion: '>=1.25.0-0' -name: victoria-metrics-k8s-stack -sources: -- https://github.com/VictoriaMetrics/helm-charts -type: application -version: 0.25.17 diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md deleted file mode 100644 index d95202e0..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md +++ /dev/null @@ -1,2576 +0,0 @@ -![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![Version: 0.25.17](https://img.shields.io/badge/Version-0.25.17-informational?style=flat-square) -[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/victoriametrics)](https://artifacthub.io/packages/helm/victoriametrics/victoria-metrics-k8s-stack) - -Kubernetes monitoring on VictoriaMetrics stack. Includes VictoriaMetrics Operator, Grafana dashboards, ServiceScrapes and VMRules - -* [Overview](#Overview) -* [Configuration](#Configuration) -* [Prerequisites](#Prerequisites) -* [Dependencies](#Dependencies) -* [Quick Start](#How-to-install) -* [Uninstall](#How-to-uninstall) -* [Version Upgrade](#Upgrade-guide) -* [Troubleshooting](#Troubleshooting) -* [Values](#Parameters) - -## Overview -This chart is an All-in-one solution to start monitoring kubernetes cluster. -It installs multiple dependency charts like [grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana), [node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter), [kube-state-metrics](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics) and [victoria-metrics-operator](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-operator). -Also it installs Custom Resources like [VMSingle](https://docs.victoriametrics.com/operator/quick-start#vmsingle), [VMCluster](https://docs.victoriametrics.com/operator/quick-start#vmcluster), [VMAgent](https://docs.victoriametrics.com/operator/quick-start#vmagent), [VMAlert](https://docs.victoriametrics.com/operator/quick-start#vmalert). - -By default, the operator [converts all existing prometheus-operator API objects](https://docs.victoriametrics.com/operator/quick-start#migration-from-prometheus-operator-objects) into corresponding VictoriaMetrics Operator objects. - -To enable metrics collection for kubernetes this chart installs multiple scrape configurations for kuberenetes components like kubelet and kube-proxy, etc. Metrics collection is done by [VMAgent](https://docs.victoriametrics.com/operator/quick-start#vmagent). So if want to ship metrics to external VictoriaMetrics database you can disable VMSingle installation by setting `vmsingle.enabled` to `false` and setting `vmagent.vmagentSpec.remoteWrite.url` to your external VictoriaMetrics database. - -This chart also installs bunch of dashboards and recording rules from [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) project. - -![Overview](img/k8s-stack-overview.png) - -## Configuration - -Configuration of this chart is done through helm values. - -### Dependencies - -Dependencies can be enabled or disabled by setting `enabled` to `true` or `false` in `values.yaml` file. - -**!Important:** for dependency charts anything that you can find in values.yaml of dependency chart can be configured in this chart under key for that dependency. For example if you want to configure `grafana` you can find all possible configuration options in [values.yaml](https://github.com/grafana/helm-charts/blob/main/charts/grafana/values.yaml) and you should set them in values for this chart under grafana: key. For example if you want to configure `grafana.persistence.enabled` you should set it in values.yaml like this: -```yaml -################################################# -### dependencies ##### -################################################# -# Grafana dependency chart configuration. For possible values refer to https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration -grafana: - enabled: true - persistence: - type: pvc - enabled: false -``` - -### VictoriaMetrics components - -This chart installs multiple VictoriaMetrics components using Custom Resources that are managed by [victoria-metrics-operator](https://docs.victoriametrics.com/operator/design) -Each resource can be configured using `spec` of that resource from API docs of [victoria-metrics-operator](https://docs.victoriametrics.com/operator/api). For example if you want to configure `VMAgent` you can find all possible configuration options in [API docs](https://docs.victoriametrics.com/operator/api#vmagent) and you should set them in values for this chart under `vmagent.spec` key. For example if you want to configure `remoteWrite.url` you should set it in values.yaml like this: -```yaml -vmagent: - spec: - remoteWrite: - - url: "https://insert.vmcluster.domain.com/insert/0/prometheus/api/v1/write" -``` - -### ArgoCD issues - -#### Operator self signed certificates -When deploying K8s stack using ArgoCD without Cert Manager (`.Values.victoria-metrics-operator.admissionWebhooks.certManager.enabled: false`) -it will rerender operator's webhook certificates on each sync since Helm `lookup` function is not respected by ArgoCD. -To prevent this please update you K8s stack Application `spec.syncPolicy` and `spec.ignoreDifferences` with a following: - -```yaml -apiVersion: argoproj.io/v1alpha1 -kind: Application -... -spec: - ... - syncPolicy: - syncOptions: - # https://argo-cd.readthedocs.io/en/stable/user-guide/sync-options/#respect-ignore-difference-configs - # argocd must also ignore difference during apply stage - # otherwise it ll silently override changes and cause a problem - - RespectIgnoreDifferences=true - ignoreDifferences: - - group: "" - kind: Secret - name: -validation - namespace: kube-system - jsonPointers: - - /data - - group: admissionregistration.k8s.io - kind: ValidatingWebhookConfiguration - name: -admission - jqPathExpressions: - - '.webhooks[]?.clientConfig.caBundle' -``` -where `` is output of `{{ include "vm-operator.fullname" }}` for your setup - -#### `metadata.annotations: Too long: must have at most 262144 bytes` on dashboards - -If one of dashboards ConfigMap is failing with error `Too long: must have at most 262144 bytes`, please make sure you've added `argocd.argoproj.io/sync-options: ServerSideApply=true` annotation to your dashboards: - -```yaml -grafana: - sidecar: - dashboards: - additionalDashboardAnnotations - argocd.argoproj.io/sync-options: ServerSideApply=true -``` - -argocd.argoproj.io/sync-options: ServerSideApply=true - -### Rules and dashboards - -This chart by default install multiple dashboards and recording rules from [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) -you can disable dashboards with `defaultDashboardsEnabled: false` and `experimentalDashboardsEnabled: false` -and rules can be configured under `defaultRules` - -### Prometheus scrape configs -This chart installs multiple scrape configurations for kubernetes monitoring. They are configured under `#ServiceMonitors` section in `values.yaml` file. For example if you want to configure scrape config for `kubelet` you should set it in values.yaml like this: -```yaml -kubelet: - enabled: true - # spec for VMNodeScrape crd - # https://docs.victoriametrics.com/operator/api#vmnodescrapespec - spec: - interval: "30s" -``` - -### Using externally managed Grafana - -If you want to use an externally managed Grafana instance but still want to use the dashboards provided by this chart you can set - `grafana.enabled` to `false` and set `defaultDashboardsEnabled` to `true`. This will install the dashboards - but will not install Grafana. - -For example: -```yaml -defaultDashboardsEnabled: true - -grafana: - enabled: false -``` - -This will create ConfigMaps with dashboards to be imported into Grafana. - -If additional configuration for labels or annotations is needed in order to import dashboard to an existing Grafana you can -set `.grafana.sidecar.dashboards.additionalDashboardLabels` or `.grafana.sidecar.dashboards.additionalDashboardAnnotations` in `values.yaml`: - -For example: -```yaml -defaultDashboardsEnabled: true - -grafana: - enabled: false - sidecar: - dashboards: - additionalDashboardLabels: - key: value - additionalDashboardAnnotations: - key: value -``` - -## Prerequisites - -* Install the follow packages: ``git``, ``kubectl``, ``helm``, ``helm-docs``. See this [tutorial](../../REQUIREMENTS.md). - -* Add dependency chart repositories - -```console -helm repo add grafana https://grafana.github.io/helm-charts -helm repo add prometheus-community https://prometheus-community.github.io/helm-charts -helm repo update -``` - -* PV support on underlying infrastructure. - -## How to install - -Access a Kubernetes cluster. - -### Setup chart repository (can be omitted for OCI repositories) - -Add a chart helm repository with follow commands: - -```console -helm repo add vm https://victoriametrics.github.io/helm-charts/ - -helm repo update -``` -List versions of `vm/victoria-metrics-k8s-stack` chart available to installation: - -```console -helm search repo vm/victoria-metrics-k8s-stack -l -``` - -### Install `victoria-metrics-k8s-stack` chart - -Export default values of `victoria-metrics-k8s-stack` chart to file `values.yaml`: - - - For HTTPS repository - - ```console - helm show values vm/victoria-metrics-k8s-stack > values.yaml - ``` - - For OCI repository - - ```console - helm show values oci://ghcr.io/victoriametrics/helm-charts/victoria-metrics-k8s-stack > values.yaml - ``` - -Change the values according to the need of the environment in ``values.yaml`` file. - -Test the installation with command: - - - For HTTPS repository - - ```console - helm install vmks vm/victoria-metrics-k8s-stack -f values.yaml -n NAMESPACE --debug --dry-run - ``` - - - For OCI repository - - ```console - helm install vmks oci://ghcr.io/victoriametrics/helm-charts/victoria-metrics-k8s-stack -f values.yaml -n NAMESPACE --debug --dry-run - ``` - -Install chart with command: - - - For HTTPS repository - - ```console - helm install vmks vm/victoria-metrics-k8s-stack -f values.yaml -n NAMESPACE - ``` - - - For OCI repository - - ```console - helm install vmks oci://ghcr.io/victoriametrics/helm-charts/victoria-metrics-k8s-stack -f values.yaml -n NAMESPACE - ``` - -Get the pods lists by running this commands: - -```console -kubectl get pods -A | grep 'vmks' -``` - -Get the application by running this command: - -```console -helm list -f vmks -n NAMESPACE -``` - -See the history of versions of `vmks` application with command. - -```console -helm history vmks -n NAMESPACE -``` - -### Install locally (Minikube) - -To run VictoriaMetrics stack locally it's possible to use [Minikube](https://github.com/kubernetes/minikube). To avoid dashboards and alert rules issues please follow the steps below: - -Run Minikube cluster - -``` -minikube start --container-runtime=containerd --extra-config=scheduler.bind-address=0.0.0.0 --extra-config=controller-manager.bind-address=0.0.0.0 -``` - -Install helm chart - -``` -helm install [RELEASE_NAME] vm/victoria-metrics-k8s-stack -f values.yaml -f values.minikube.yaml -n NAMESPACE --debug --dry-run -``` - -## How to uninstall - -Remove application with command. - -```console -helm uninstall vmks -n NAMESPACE -``` - -CRDs created by this chart are not removed by default and should be manually cleaned up: - -```console -kubectl get crd | grep victoriametrics.com | awk '{print $1 }' | xargs -i kubectl delete crd {} -``` - -## Troubleshooting - -- If you cannot install helm chart with error `configmap already exist`. It could happen because of name collisions, if you set too long release name. - Kubernetes by default, allows only 63 symbols at resource names and all resource names are trimmed by helm to 63 symbols. - To mitigate it, use shorter name for helm chart release name, like: -```bash -# stack - is short enough -helm upgrade -i stack vm/victoria-metrics-k8s-stack -``` - Or use override for helm chart release name: -```bash -helm upgrade -i some-very-long-name vm/victoria-metrics-k8s-stack --set fullnameOverride=stack -``` - -## Upgrade guide - -Usually, helm upgrade doesn't requires manual actions. Just execute command: - -```console -$ helm upgrade [RELEASE_NAME] vm/victoria-metrics-k8s-stack -``` - -But release with CRD update can only be patched manually with kubectl. -Since helm does not perform a CRD update, we recommend that you always perform this when updating the helm-charts version: - -```console -# 1. check the changes in CRD -$ helm show crds vm/victoria-metrics-k8s-stack --version [YOUR_CHART_VERSION] | kubectl diff -f - - -# 2. apply the changes (update CRD) -$ helm show crds vm/victoria-metrics-k8s-stack --version [YOUR_CHART_VERSION] | kubectl apply -f - --server-side -``` - -All other manual actions upgrades listed below: - -### Upgrade to 0.13.0 - -- node-exporter starting from version 4.0.0 is using the Kubernetes recommended labels. Therefore you have to delete the daemonset before you upgrade. - -```bash -kubectl delete daemonset -l app=prometheus-node-exporter -``` -- scrape configuration for kubernetes components was moved from `vmServiceScrape.spec` section to `spec` section. If you previously modified scrape configuration you need to update your `values.yaml` - -- `grafana.defaultDashboardsEnabled` was renamed to `defaultDashboardsEnabled` (moved to top level). You may need to update it in your `values.yaml` - -### Upgrade to 0.6.0 - - All `CRD` must be update to the lastest version with command: - -```bash -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/helm-charts/master/charts/victoria-metrics-k8s-stack/crds/crd.yaml - -``` - -### Upgrade to 0.4.0 - - All `CRD` must be update to `v1` version with command: - -```bash -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/helm-charts/master/charts/victoria-metrics-k8s-stack/crds/crd.yaml - -``` - -### Upgrade from 0.2.8 to 0.2.9 - - Update `VMAgent` crd - -command: -```bash -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.16.0/config/crd/bases/operator.victoriametrics.com_vmagents.yaml -``` - - ### Upgrade from 0.2.5 to 0.2.6 - -New CRD added to operator - `VMUser` and `VMAuth`, new fields added to exist crd. -Manual commands: -```bash -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmusers.yaml -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmauths.yaml -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmalerts.yaml -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmagents.yaml -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmsingles.yaml -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmclusters.yaml -``` - -## Documentation of Helm Chart - -Install ``helm-docs`` following the instructions on this [tutorial](../../REQUIREMENTS.md). - -Generate docs with ``helm-docs`` command. - -```bash -cd charts/victoria-metrics-k8s-stack - -helm-docs -``` - -The markdown generation is entirely go template driven. The tool parses metadata from charts and generates a number of sub-templates that can be referenced in a template file (by default ``README.md.gotmpl``). If no template file is provided, the tool has a default internal template that will generate a reasonably formatted README. - -## Parameters - -The following tables lists the configurable parameters of the chart and their default values. - -Change the values according to the need of the environment in ``victoria-metrics-k8s-stack/values.yaml`` file. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
KeyTypeDefaultDescription
additionalVictoriaMetricsMapstring
-null
-
-
alertmanager.annotationsobject
-{}
-
-
alertmanager.configobject
-receivers:
-    - name: blackhole
-route:
-    receiver: blackhole
-templates:
-    - /etc/vm/configs/**/*.tmpl
-
-

alertmanager configuration

-
alertmanager.enabledbool
-true
-
-
alertmanager.ingressobject
-annotations: {}
-enabled: false
-extraPaths: []
-hosts:
-    - alertmanager.domain.com
-labels: {}
-path: '{{ .Values.alertmanager.spec.routePrefix | default "/" }}'
-pathType: Prefix
-tls: []
-
-

alertmanager ingress configuration

-
alertmanager.monzoTemplate.enabledbool
-true
-
-
alertmanager.specobject
-configSecret: ""
-externalURL: ""
-image:
-    tag: v0.25.0
-port: "9093"
-routePrefix: /
-selectAllByDefault: true
-
-

full spec for VMAlertmanager CRD. Allowed values described here

-
alertmanager.spec.configSecretstring
-""
-
-

if this one defined, it will be used for alertmanager configuration and config parameter will be ignored

-
alertmanager.templateFilesobject
-{}
-
-

extra alert templates

-
argocdReleaseOverridestring
-""
-
-

For correct working need set value ‘argocdReleaseOverride=$ARGOCD_APP_NAME’

-
coreDns.enabledbool
-true
-
-
coreDns.service.enabledbool
-true
-
-
coreDns.service.portint
-9153
-
-
coreDns.service.selector.k8s-appstring
-kube-dns
-
-
coreDns.service.targetPortint
-9153
-
-
coreDns.vmScrapeobject
-spec:
-    endpoints:
-        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
-          port: http-metrics
-    jobLabel: jobLabel
-    namespaceSelector:
-        matchNames:
-            - kube-system
-
-

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

-
crds.enabledbool
-true
-
-
dashboardsobject
-node-exporter-full: true
-operator: false
-vmalert: false
-
-

Enable dashboards despite it’s dependency is not installed

-
dashboards.node-exporter-fullbool
-true
-
-

in ArgoCD using client-side apply this dashboard reaches annotations size limit and causes k8s issues without server side apply See this issue

-
defaultDashboardsEnabledbool
-true
-
-

Create default dashboards

-
defaultRulesobject
-alerting:
-    spec:
-        annotations: {}
-        labels: {}
-annotations: {}
-create: true
-group:
-    spec:
-        params: {}
-groups:
-    alertmanager:
-        create: true
-        rules: {}
-    etcd:
-        create: true
-        rules: {}
-    general:
-        create: true
-        rules: {}
-    k8sContainerCpuUsageSecondsTotal:
-        create: true
-        rules: {}
-    k8sContainerMemoryCache:
-        create: true
-        rules: {}
-    k8sContainerMemoryRss:
-        create: true
-        rules: {}
-    k8sContainerMemorySwap:
-        create: true
-        rules: {}
-    k8sContainerMemoryWorkingSetBytes:
-        create: true
-        rules: {}
-    k8sContainerResource:
-        create: true
-        rules: {}
-    k8sPodOwner:
-        create: true
-        rules: {}
-    kubeApiserver:
-        create: true
-        rules: {}
-    kubeApiserverAvailability:
-        create: true
-        rules: {}
-    kubeApiserverBurnrate:
-        create: true
-        rules: {}
-    kubeApiserverHistogram:
-        create: true
-        rules: {}
-    kubeApiserverSlos:
-        create: true
-        rules: {}
-    kubePrometheusGeneral:
-        create: true
-        rules: {}
-    kubePrometheusNodeRecording:
-        create: true
-        rules: {}
-    kubeScheduler:
-        create: true
-        rules: {}
-    kubeStateMetrics:
-        create: true
-        rules: {}
-    kubelet:
-        create: true
-        rules: {}
-    kubernetesApps:
-        create: true
-        rules: {}
-        targetNamespace: .*
-    kubernetesResources:
-        create: true
-        rules: {}
-    kubernetesStorage:
-        create: true
-        rules: {}
-        targetNamespace: .*
-    kubernetesSystem:
-        create: true
-        rules: {}
-    kubernetesSystemApiserver:
-        create: true
-        rules: {}
-    kubernetesSystemControllerManager:
-        create: true
-        rules: {}
-    kubernetesSystemKubelet:
-        create: true
-        rules: {}
-    kubernetesSystemScheduler:
-        create: true
-        rules: {}
-    node:
-        create: true
-        rules: {}
-    nodeNetwork:
-        create: true
-        rules: {}
-    vmHealth:
-        create: true
-        rules: {}
-    vmagent:
-        create: true
-        rules: {}
-    vmcluster:
-        create: true
-        rules: {}
-    vmoperator:
-        create: true
-        rules: {}
-    vmsingle:
-        create: true
-        rules: {}
-labels: {}
-recording:
-    spec:
-        annotations: {}
-        labels: {}
-rule:
-    spec:
-        annotations: {}
-        labels: {}
-rules: {}
-runbookUrl: https://runbooks.prometheus-operator.dev/runbooks
-
-

Create default rules for monitoring the cluster

-
defaultRules.alertingobject
-spec:
-    annotations: {}
-    labels: {}
-
-

Common properties for VMRules alerts

-
defaultRules.alerting.spec.annotationsobject
-{}
-
-

Additional annotations for VMRule alerts

-
defaultRules.alerting.spec.labelsobject
-{}
-
-

Additional labels for VMRule alerts

-
defaultRules.annotationsobject
-{}
-
-

Annotations for default rules

-
defaultRules.groupobject
-spec:
-    params: {}
-
-

Common properties for VMRule groups

-
defaultRules.group.spec.paramsobject
-{}
-
-

Optional HTTP URL parameters added to each rule request

-
defaultRules.groups.etcd.rulesobject
-{}
-
-

Common properties for all rules in a group

-
defaultRules.labelsobject
-{}
-
-

Labels for default rules

-
defaultRules.recordingobject
-spec:
-    annotations: {}
-    labels: {}
-
-

Common properties for VMRules recording rules

-
defaultRules.recording.spec.annotationsobject
-{}
-
-

Additional annotations for VMRule recording rules

-
defaultRules.recording.spec.labelsobject
-{}
-
-

Additional labels for VMRule recording rules

-
defaultRules.ruleobject
-spec:
-    annotations: {}
-    labels: {}
-
-

Common properties for all VMRules

-
defaultRules.rule.spec.annotationsobject
-{}
-
-

Additional annotations for all VMRules

-
defaultRules.rule.spec.labelsobject
-{}
-
-

Additional labels for all VMRules

-
defaultRules.rulesobject
-{}
-
-

Per rule properties

-
defaultRules.runbookUrlstring
-https://runbooks.prometheus-operator.dev/runbooks
-
-

Runbook url prefix for default rules

-
experimentalDashboardsEnabledbool
-true
-
-

Create experimental dashboards

-
externalVM.read.urlstring
-""
-
-
externalVM.write.urlstring
-""
-
-
extraObjectslist
-[]
-
-

Add extra objects dynamically to this chart

-
fullnameOverridestring
-""
-
-
global.clusterLabelstring
-cluster
-
-
global.license.keystring
-""
-
-
global.license.keyRefobject
-{}
-
-
grafana.additionalDataSourceslist
-[]
-
-
grafana.defaultDashboardsTimezonestring
-utc
-
-
grafana.defaultDatasourceTypestring
-prometheus
-
-
grafana.enabledbool
-true
-
-
grafana.forceDeployDatasourcebool
-false
-
-
grafana.ingress.annotationsobject
-{}
-
-
grafana.ingress.enabledbool
-false
-
-
grafana.ingress.extraPathslist
-[]
-
-
grafana.ingress.hosts[0]string
-grafana.domain.com
-
-
grafana.ingress.labelsobject
-{}
-
-
grafana.ingress.pathstring
-/
-
-
grafana.ingress.pathTypestring
-Prefix
-
-
grafana.ingress.tlslist
-[]
-
-
grafana.sidecar.dashboards.additionalDashboardAnnotationsobject
-{}
-
-
grafana.sidecar.dashboards.additionalDashboardLabelsobject
-{}
-
-
grafana.sidecar.dashboards.defaultFolderNamestring
-default
-
-
grafana.sidecar.dashboards.enabledbool
-true
-
-
grafana.sidecar.dashboards.folderstring
-/var/lib/grafana/dashboards
-
-
grafana.sidecar.dashboards.multiclusterbool
-false
-
-
grafana.sidecar.dashboards.provider.namestring
-default
-
-
grafana.sidecar.dashboards.provider.orgidint
-1
-
-
grafana.sidecar.datasources.createVMReplicasDatasourcesbool
-false
-
-
grafana.sidecar.datasources.defaultlist
-- isDefault: true
-  name: VictoriaMetrics
-- isDefault: false
-  name: VictoriaMetrics (DS)
-  type: victoriametrics-datasource
-
-

list of default prometheus compatible datasource configurations. VM url will be added to each of them in templates and type will be set to defaultDatasourceType if not defined

-
grafana.sidecar.datasources.enabledbool
-true
-
-
grafana.sidecar.datasources.initDatasourcesbool
-true
-
-
grafana.vmScrapeobject
-enabled: true
-spec:
-    endpoints:
-        - port: '{{ .Values.grafana.service.portName }}'
-    selector:
-        matchLabels:
-            app.kubernetes.io/name: '{{ include "grafana.name" .Subcharts.grafana }}'
-
-

grafana VM scrape config

-
grafana.vmScrape.specobject
-endpoints:
-    - port: '{{ .Values.grafana.service.portName }}'
-selector:
-    matchLabels:
-        app.kubernetes.io/name: '{{ include "grafana.name" .Subcharts.grafana }}'
-
-

Scrape configuration for Grafana

-
grafanaOperatorDashboardsFormatobject
-allowCrossNamespaceImport: false
-enabled: false
-instanceSelector:
-    matchLabels:
-        dashboards: grafana
-
-

Create dashboards as CRDs (reuqires grafana-operator to be installed)

-
kube-state-metrics.enabledbool
-true
-
-
kube-state-metrics.vmScrapeobject
-enabled: true
-spec:
-    endpoints:
-        - honorLabels: true
-          metricRelabelConfigs:
-            - action: labeldrop
-              regex: (uid|container_id|image_id)
-          port: http
-    jobLabel: app.kubernetes.io/name
-    selector:
-        matchLabels:
-            app.kubernetes.io/instance: '{{ include "vm.release" . }}'
-            app.kubernetes.io/name: '{{ include "kube-state-metrics.name" (index .Subcharts "kube-state-metrics") }}'
-
-

Scrape configuration for Kube State Metrics

-
kubeApiServer.enabledbool
-true
-
-
kubeApiServer.vmScrapeobject
-spec:
-    endpoints:
-        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
-          port: https
-          scheme: https
-          tlsConfig:
-            caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-            serverName: kubernetes
-    jobLabel: component
-    namespaceSelector:
-        matchNames:
-            - default
-    selector:
-        matchLabels:
-            component: apiserver
-            provider: kubernetes
-
-

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

-
kubeControllerManager.enabledbool
-true
-
-
kubeControllerManager.endpointslist
-[]
-
-
kubeControllerManager.service.enabledbool
-true
-
-
kubeControllerManager.service.portint
-10257
-
-
kubeControllerManager.service.selector.componentstring
-kube-controller-manager
-
-
kubeControllerManager.service.targetPortint
-10257
-
-
kubeControllerManager.vmScrapeobject
-spec:
-    endpoints:
-        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
-          port: http-metrics
-          scheme: https
-          tlsConfig:
-            caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-            serverName: kubernetes
-    jobLabel: jobLabel
-    namespaceSelector:
-        matchNames:
-            - kube-system
-
-

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

-
kubeDns.enabledbool
-false
-
-
kubeDns.service.enabledbool
-false
-
-
kubeDns.service.ports.dnsmasq.portint
-10054
-
-
kubeDns.service.ports.dnsmasq.targetPortint
-10054
-
-
kubeDns.service.ports.skydns.portint
-10055
-
-
kubeDns.service.ports.skydns.targetPortint
-10055
-
-
kubeDns.service.selector.k8s-appstring
-kube-dns
-
-
kubeDns.vmScrapeobject
-spec:
-    endpoints:
-        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
-          port: http-metrics-dnsmasq
-        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
-          port: http-metrics-skydns
-    jobLabel: jobLabel
-    namespaceSelector:
-        matchNames:
-            - kube-system
-
-

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

-
kubeEtcd.enabledbool
-true
-
-
kubeEtcd.endpointslist
-[]
-
-
kubeEtcd.service.enabledbool
-true
-
-
kubeEtcd.service.portint
-2379
-
-
kubeEtcd.service.selector.componentstring
-etcd
-
-
kubeEtcd.service.targetPortint
-2379
-
-
kubeEtcd.vmScrapeobject
-spec:
-    endpoints:
-        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
-          port: http-metrics
-          scheme: https
-          tlsConfig:
-            caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-    jobLabel: jobLabel
-    namespaceSelector:
-        matchNames:
-            - kube-system
-
-

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

-
kubeProxy.enabledbool
-false
-
-
kubeProxy.endpointslist
-[]
-
-
kubeProxy.service.enabledbool
-true
-
-
kubeProxy.service.portint
-10249
-
-
kubeProxy.service.selector.k8s-appstring
-kube-proxy
-
-
kubeProxy.service.targetPortint
-10249
-
-
kubeProxy.vmScrapeobject
-spec:
-    endpoints:
-        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
-          port: http-metrics
-          scheme: https
-          tlsConfig:
-            caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-    jobLabel: jobLabel
-    namespaceSelector:
-        matchNames:
-            - kube-system
-
-

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

-
kubeScheduler.enabledbool
-true
-
-
kubeScheduler.endpointslist
-[]
-
-
kubeScheduler.service.enabledbool
-true
-
-
kubeScheduler.service.portint
-10259
-
-
kubeScheduler.service.selector.componentstring
-kube-scheduler
-
-
kubeScheduler.service.targetPortint
-10259
-
-
kubeScheduler.vmScrapeobject
-spec:
-    endpoints:
-        - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
-          port: http-metrics
-          scheme: https
-          tlsConfig:
-            caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-    jobLabel: jobLabel
-    namespaceSelector:
-        matchNames:
-            - kube-system
-
-

spec for VMServiceScrape crd https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec

-
kubelet.enabledbool
-true
-
-
kubelet.vmScrapeobject
-kind: VMNodeScrape
-spec:
-    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
-    honorLabels: true
-    honorTimestamps: false
-    interval: 30s
-    metricRelabelConfigs:
-        - action: labeldrop
-          regex: (uid)
-        - action: labeldrop
-          regex: (id|name)
-        - action: drop
-          regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count)
-          source_labels:
-            - __name__
-    relabelConfigs:
-        - action: labelmap
-          regex: __meta_kubernetes_node_label_(.+)
-        - sourceLabels:
-            - __metrics_path__
-          targetLabel: metrics_path
-        - replacement: kubelet
-          targetLabel: job
-    scheme: https
-    scrapeTimeout: 5s
-    tlsConfig:
-        caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
-        insecureSkipVerify: true
-
-

spec for VMNodeScrape crd https://docs.victoriametrics.com/operator/api.html#vmnodescrapespec

-
kubelet.vmScrapes.cadvisorobject
-enabled: true
-spec:
-    path: /metrics/cadvisor
-
-

Enable scraping /metrics/cadvisor from kubelet’s service

-
kubelet.vmScrapes.kubelet.specobject
-{}
-
-
kubelet.vmScrapes.probesobject
-enabled: true
-spec:
-    path: /metrics/probes
-
-

Enable scraping /metrics/probes from kubelet’s service

-
nameOverridestring
-""
-
-
prometheus-node-exporter.enabledbool
-true
-
-
prometheus-node-exporter.extraArgs[0]string
---collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)
-
-
prometheus-node-exporter.extraArgs[1]string
---collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
-
-
prometheus-node-exporter.service.labels.jobLabelstring
-node-exporter
-
-
prometheus-node-exporter.vmScrapeobject
-enabled: true
-spec:
-    endpoints:
-        - metricRelabelConfigs:
-            - action: drop
-              regex: /var/lib/kubelet/pods.+
-              source_labels:
-                - mountpoint
-          port: metrics
-    jobLabel: jobLabel
-    selector:
-        matchLabels:
-            app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}'
-
-

node exporter VM scrape config

-
prometheus-node-exporter.vmScrape.specobject
-endpoints:
-    - metricRelabelConfigs:
-        - action: drop
-          regex: /var/lib/kubelet/pods.+
-          source_labels:
-            - mountpoint
-      port: metrics
-jobLabel: jobLabel
-selector:
-    matchLabels:
-        app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}'
-
-

Scrape configuration for Node Exporter

-
prometheus-operator-crds.enabledbool
-false
-
-
serviceAccount.annotationsobject
-{}
-
-

Annotations to add to the service account

-
serviceAccount.createbool
-true
-
-

Specifies whether a service account should be created

-
serviceAccount.namestring
-""
-
-

If not set and create is true, a name is generated using the fullname template

-
tenantstring
-"0"
-
-
victoria-metrics-operatorobject
-crd:
-    cleanup:
-        enabled: true
-        image:
-            pullPolicy: IfNotPresent
-            repository: bitnami/kubectl
-    create: false
-enabled: true
-operator:
-    disable_prometheus_converter: false
-serviceMonitor:
-    enabled: true
-
-

also checkout here possible ENV variables to configure operator behaviour https://docs.victoriametrics.com/operator/vars

-
victoria-metrics-operator.crd.cleanupobject
-enabled: true
-image:
-    pullPolicy: IfNotPresent
-    repository: bitnami/kubectl
-
-

tells helm to clean up vm cr resources when uninstalling

-
victoria-metrics-operator.crd.createbool
-false
-
-

we disable crd creation by operator chart as we create them in this chart

-
victoria-metrics-operator.operator.disable_prometheus_converterbool
-false
-
-

By default, operator converts prometheus-operator objects.

-
vmagent.additionalRemoteWriteslist
-[]
-
-

remoteWrite configuration of VMAgent, allowed parameters defined in a spec

-
vmagent.annotationsobject
-{}
-
-
vmagent.enabledbool
-true
-
-
vmagent.ingressobject
-annotations: {}
-enabled: false
-extraPaths: []
-hosts:
-    - vmagent.domain.com
-labels: {}
-path: ""
-pathType: Prefix
-tls: []
-
-

vmagent ingress configuration

-
vmagent.ingress.extraPathslist
-[]
-
-

Extra paths to prepend to every host configuration. This is useful when working with annotation based services.

-
vmagent.specobject
-externalLabels: {}
-extraArgs:
-    promscrape.dropOriginalLabels: "true"
-    promscrape.streamParse: "true"
-image:
-    tag: v1.103.0
-port: "8429"
-scrapeInterval: 20s
-selectAllByDefault: true
-
-

full spec for VMAgent CRD. Allowed values described here

-
vmalert.additionalNotifierConfigsobject
-{}
-
-
vmalert.annotationsobject
-{}
-
-
vmalert.enabledbool
-true
-
-
vmalert.ingressobject
-annotations: {}
-enabled: false
-extraPaths: []
-hosts:
-    - vmalert.domain.com
-labels: {}
-path: ""
-pathType: Prefix
-tls: []
-
-

vmalert ingress config

-
vmalert.remoteWriteVMAgentbool
-false
-
-
vmalert.specobject
-evaluationInterval: 15s
-externalLabels: {}
-extraArgs:
-    http.pathPrefix: /
-image:
-    tag: v1.103.0
-port: "8080"
-selectAllByDefault: true
-
-

full spec for VMAlert CRD. Allowed values described here

-
vmalert.templateFilesobject
-{}
-
-

extra vmalert annotation templates

-
vmauth.annotationsobject
-{}
-
-
vmauth.enabledbool
-false
-
-
vmauth.specobject
-discover_backend_ips: true
-port: "8427"
-
-

full spec for VMAuth CRD. Allowed values described here

-
vmcluster.annotationsobject
-{}
-
-
vmcluster.enabledbool
-false
-
-
vmcluster.ingress.insert.annotationsobject
-{}
-
-
vmcluster.ingress.insert.enabledbool
-false
-
-
vmcluster.ingress.insert.extraPathslist
-[]
-
-
vmcluster.ingress.insert.hosts[0]string
-vminsert.domain.com
-
-
vmcluster.ingress.insert.labelsobject
-{}
-
-
vmcluster.ingress.insert.pathstring
-'{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vminsert }}'
-
-
vmcluster.ingress.insert.pathTypestring
-Prefix
-
-
vmcluster.ingress.insert.tlslist
-[]
-
-
vmcluster.ingress.select.annotationsobject
-{}
-
-
vmcluster.ingress.select.enabledbool
-false
-
-
vmcluster.ingress.select.extraPathslist
-[]
-
-
vmcluster.ingress.select.hosts[0]string
-vmselect.domain.com
-
-
vmcluster.ingress.select.labelsobject
-{}
-
-
vmcluster.ingress.select.pathstring
-'{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vmselect }}'
-
-
vmcluster.ingress.select.pathTypestring
-Prefix
-
-
vmcluster.ingress.select.tlslist
-[]
-
-
vmcluster.ingress.storage.annotationsobject
-{}
-
-
vmcluster.ingress.storage.enabledbool
-false
-
-
vmcluster.ingress.storage.extraPathslist
-[]
-
-
vmcluster.ingress.storage.hosts[0]string
-vmstorage.domain.com
-
-
vmcluster.ingress.storage.labelsobject
-{}
-
-
vmcluster.ingress.storage.pathstring
-""
-
-
vmcluster.ingress.storage.pathTypestring
-Prefix
-
-
vmcluster.ingress.storage.tlslist
-[]
-
-
vmcluster.specobject
-replicationFactor: 2
-retentionPeriod: "1"
-vminsert:
-    extraArgs: {}
-    image:
-        tag: v1.103.0-cluster
-    port: "8480"
-    replicaCount: 2
-    resources: {}
-vmselect:
-    cacheMountPath: /select-cache
-    extraArgs: {}
-    image:
-        tag: v1.103.0-cluster
-    port: "8481"
-    replicaCount: 2
-    resources: {}
-    storage:
-        volumeClaimTemplate:
-            spec:
-                resources:
-                    requests:
-                        storage: 2Gi
-vmstorage:
-    image:
-        tag: v1.103.0-cluster
-    replicaCount: 2
-    resources: {}
-    storage:
-        volumeClaimTemplate:
-            spec:
-                resources:
-                    requests:
-                        storage: 10Gi
-    storageDataPath: /vm-data
-
-

full spec for VMCluster CRD. Allowed values described here

-
vmcluster.spec.retentionPeriodstring
-"1"
-
-

Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these docs

-
vmsingle.annotationsobject
-{}
-
-
vmsingle.enabledbool
-true
-
-
vmsingle.ingress.annotationsobject
-{}
-
-
vmsingle.ingress.enabledbool
-false
-
-
vmsingle.ingress.extraPathslist
-[]
-
-
vmsingle.ingress.hosts[0]string
-vmsingle.domain.com
-
-
vmsingle.ingress.labelsobject
-{}
-
-
vmsingle.ingress.pathstring
-""
-
-
vmsingle.ingress.pathTypestring
-Prefix
-
-
vmsingle.ingress.tlslist
-[]
-
-
vmsingle.specobject
-extraArgs: {}
-image:
-    tag: v1.103.0
-port: "8429"
-replicaCount: 1
-retentionPeriod: "1"
-storage:
-    accessModes:
-        - ReadWriteOnce
-    resources:
-        requests:
-            storage: 20Gi
-
-

full spec for VMSingle CRD. Allowed values describe here

-
vmsingle.spec.retentionPeriodstring
-"1"
-
-

Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these docs

-
- diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md.gotmpl b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md.gotmpl deleted file mode 100644 index 7ac63b6e..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/README.md.gotmpl +++ /dev/null @@ -1,300 +0,0 @@ -{{ template "chart.typeBadge" . }} {{ template "chart.versionBadge" . }} -[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/victoriametrics)](https://artifacthub.io/packages/helm/victoriametrics/victoria-metrics-k8s-stack) - -{{ template "chart.description" . }} - -* [Overview](#Overview) -* [Configuration](#Configuration) -* [Prerequisites](#Prerequisites) -* [Dependencies](#Dependencies) -* [Quick Start](#How-to-install) -* [Uninstall](#How-to-uninstall) -* [Version Upgrade](#Upgrade-guide) -* [Troubleshooting](#Troubleshooting) -* [Values](#Parameters) - - -## Overview -This chart is an All-in-one solution to start monitoring kubernetes cluster. -It installs multiple dependency charts like [grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana), [node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter), [kube-state-metrics](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics) and [victoria-metrics-operator](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-operator). -Also it installs Custom Resources like [VMSingle](https://docs.victoriametrics.com/operator/quick-start#vmsingle), [VMCluster](https://docs.victoriametrics.com/operator/quick-start#vmcluster), [VMAgent](https://docs.victoriametrics.com/operator/quick-start#vmagent), [VMAlert](https://docs.victoriametrics.com/operator/quick-start#vmalert). - -By default, the operator [converts all existing prometheus-operator API objects](https://docs.victoriametrics.com/operator/quick-start#migration-from-prometheus-operator-objects) into corresponding VictoriaMetrics Operator objects. - -To enable metrics collection for kubernetes this chart installs multiple scrape configurations for kuberenetes components like kubelet and kube-proxy, etc. Metrics collection is done by [VMAgent](https://docs.victoriametrics.com/operator/quick-start#vmagent). So if want to ship metrics to external VictoriaMetrics database you can disable VMSingle installation by setting `vmsingle.enabled` to `false` and setting `vmagent.vmagentSpec.remoteWrite.url` to your external VictoriaMetrics database. - -This chart also installs bunch of dashboards and recording rules from [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) project. - -![Overview](img/k8s-stack-overview.png) - - -## Configuration - -Configuration of this chart is done through helm values. - -### Dependencies - -Dependencies can be enabled or disabled by setting `enabled` to `true` or `false` in `values.yaml` file. - -**!Important:** for dependency charts anything that you can find in values.yaml of dependency chart can be configured in this chart under key for that dependency. For example if you want to configure `grafana` you can find all possible configuration options in [values.yaml](https://github.com/grafana/helm-charts/blob/main/charts/grafana/values.yaml) and you should set them in values for this chart under grafana: key. For example if you want to configure `grafana.persistence.enabled` you should set it in values.yaml like this: -```yaml -################################################# -### dependencies ##### -################################################# -# Grafana dependency chart configuration. For possible values refer to https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration -grafana: - enabled: true - persistence: - type: pvc - enabled: false -``` - -### VictoriaMetrics components - -This chart installs multiple VictoriaMetrics components using Custom Resources that are managed by [victoria-metrics-operator](https://docs.victoriametrics.com/operator/design) -Each resource can be configured using `spec` of that resource from API docs of [victoria-metrics-operator](https://docs.victoriametrics.com/operator/api). For example if you want to configure `VMAgent` you can find all possible configuration options in [API docs](https://docs.victoriametrics.com/operator/api#vmagent) and you should set them in values for this chart under `vmagent.spec` key. For example if you want to configure `remoteWrite.url` you should set it in values.yaml like this: -```yaml -vmagent: - spec: - remoteWrite: - - url: "https://insert.vmcluster.domain.com/insert/0/prometheus/api/v1/write" -``` - -### ArgoCD issues - -#### Operator self signed certificates -When deploying K8s stack using ArgoCD without Cert Manager (`.Values.victoria-metrics-operator.admissionWebhooks.certManager.enabled: false`) -it will rerender operator's webhook certificates on each sync since Helm `lookup` function is not respected by ArgoCD. -To prevent this please update you K8s stack Application `spec.syncPolicy` and `spec.ignoreDifferences` with a following: - -```yaml -apiVersion: argoproj.io/v1alpha1 -kind: Application -... -spec: - ... - syncPolicy: - syncOptions: - # https://argo-cd.readthedocs.io/en/stable/user-guide/sync-options/#respect-ignore-difference-configs - # argocd must also ignore difference during apply stage - # otherwise it ll silently override changes and cause a problem - - RespectIgnoreDifferences=true - ignoreDifferences: - - group: "" - kind: Secret - name: -validation - namespace: kube-system - jsonPointers: - - /data - - group: admissionregistration.k8s.io - kind: ValidatingWebhookConfiguration - name: -admission - jqPathExpressions: - - '.webhooks[]?.clientConfig.caBundle' -``` -where `` is output of `{{"{{"}} include "vm-operator.fullname" {{"}}"}}` for your setup - -#### `metadata.annotations: Too long: must have at most 262144 bytes` on dashboards - -If one of dashboards ConfigMap is failing with error `Too long: must have at most 262144 bytes`, please make sure you've added `argocd.argoproj.io/sync-options: ServerSideApply=true` annotation to your dashboards: - -```yaml -grafana: - sidecar: - dashboards: - additionalDashboardAnnotations - argocd.argoproj.io/sync-options: ServerSideApply=true -``` - -argocd.argoproj.io/sync-options: ServerSideApply=true - -### Rules and dashboards - -This chart by default install multiple dashboards and recording rules from [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) -you can disable dashboards with `defaultDashboardsEnabled: false` and `experimentalDashboardsEnabled: false` -and rules can be configured under `defaultRules` - -### Prometheus scrape configs -This chart installs multiple scrape configurations for kubernetes monitoring. They are configured under `#ServiceMonitors` section in `values.yaml` file. For example if you want to configure scrape config for `kubelet` you should set it in values.yaml like this: -```yaml -kubelet: - enabled: true - # spec for VMNodeScrape crd - # https://docs.victoriametrics.com/operator/api#vmnodescrapespec - spec: - interval: "30s" -``` - -### Using externally managed Grafana - -If you want to use an externally managed Grafana instance but still want to use the dashboards provided by this chart you can set - `grafana.enabled` to `false` and set `defaultDashboardsEnabled` to `true`. This will install the dashboards - but will not install Grafana. - -For example: -```yaml -defaultDashboardsEnabled: true - -grafana: - enabled: false -``` - -This will create ConfigMaps with dashboards to be imported into Grafana. - -If additional configuration for labels or annotations is needed in order to import dashboard to an existing Grafana you can -set `.grafana.sidecar.dashboards.additionalDashboardLabels` or `.grafana.sidecar.dashboards.additionalDashboardAnnotations` in `values.yaml`: - -For example: -```yaml -defaultDashboardsEnabled: true - -grafana: - enabled: false - sidecar: - dashboards: - additionalDashboardLabels: - key: value - additionalDashboardAnnotations: - key: value -``` - -## Prerequisites - -* Install the follow packages: ``git``, ``kubectl``, ``helm``, ``helm-docs``. See this [tutorial](../../REQUIREMENTS.md). - -* Add dependency chart repositories - -```console -helm repo add grafana https://grafana.github.io/helm-charts -helm repo add prometheus-community https://prometheus-community.github.io/helm-charts -helm repo update -``` - -* PV support on underlying infrastructure. - -{{ include "chart.installSection" . }} - -### Install locally (Minikube) - -To run VictoriaMetrics stack locally it's possible to use [Minikube](https://github.com/kubernetes/minikube). To avoid dashboards and alert rules issues please follow the steps below: - -Run Minikube cluster - -``` -minikube start --container-runtime=containerd --extra-config=scheduler.bind-address=0.0.0.0 --extra-config=controller-manager.bind-address=0.0.0.0 -``` - -Install helm chart - -``` -helm install [RELEASE_NAME] vm/victoria-metrics-k8s-stack -f values.yaml -f values.minikube.yaml -n NAMESPACE --debug --dry-run -``` - -{{ include "chart.uninstallSection" . }} - -CRDs created by this chart are not removed by default and should be manually cleaned up: - -```console -kubectl get crd | grep victoriametrics.com | awk '{print $1 }' | xargs -i kubectl delete crd {} -``` - -## Troubleshooting - -- If you cannot install helm chart with error `configmap already exist`. It could happen because of name collisions, if you set too long release name. - Kubernetes by default, allows only 63 symbols at resource names and all resource names are trimmed by helm to 63 symbols. - To mitigate it, use shorter name for helm chart release name, like: -```bash -# stack - is short enough -helm upgrade -i stack vm/victoria-metrics-k8s-stack -``` - Or use override for helm chart release name: -```bash -helm upgrade -i some-very-long-name vm/victoria-metrics-k8s-stack --set fullnameOverride=stack -``` - - -## Upgrade guide - -Usually, helm upgrade doesn't requires manual actions. Just execute command: - -```console -$ helm upgrade [RELEASE_NAME] vm/victoria-metrics-k8s-stack -``` - -But release with CRD update can only be patched manually with kubectl. -Since helm does not perform a CRD update, we recommend that you always perform this when updating the helm-charts version: - -```console -# 1. check the changes in CRD -$ helm show crds vm/victoria-metrics-k8s-stack --version [YOUR_CHART_VERSION] | kubectl diff -f - - -# 2. apply the changes (update CRD) -$ helm show crds vm/victoria-metrics-k8s-stack --version [YOUR_CHART_VERSION] | kubectl apply -f - --server-side -``` - -All other manual actions upgrades listed below: - - -### Upgrade to 0.13.0 - - -- node-exporter starting from version 4.0.0 is using the Kubernetes recommended labels. Therefore you have to delete the daemonset before you upgrade. - -```bash -kubectl delete daemonset -l app=prometheus-node-exporter -``` -- scrape configuration for kubernetes components was moved from `vmServiceScrape.spec` section to `spec` section. If you previously modified scrape configuration you need to update your `values.yaml` - -- `grafana.defaultDashboardsEnabled` was renamed to `defaultDashboardsEnabled` (moved to top level). You may need to update it in your `values.yaml` - - -### Upgrade to 0.6.0 - - - All `CRD` must be update to the lastest version with command: - -```bash -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/helm-charts/master/charts/victoria-metrics-k8s-stack/crds/crd.yaml - -``` - -### Upgrade to 0.4.0 - - All `CRD` must be update to `v1` version with command: - -```bash -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/helm-charts/master/charts/victoria-metrics-k8s-stack/crds/crd.yaml - -``` - -### Upgrade from 0.2.8 to 0.2.9 - - Update `VMAgent` crd - -command: -```bash -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.16.0/config/crd/bases/operator.victoriametrics.com_vmagents.yaml -``` - - ### Upgrade from 0.2.5 to 0.2.6 - -New CRD added to operator - `VMUser` and `VMAuth`, new fields added to exist crd. -Manual commands: -```bash -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmusers.yaml -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmauths.yaml -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmalerts.yaml -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmagents.yaml -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmsingles.yaml -kubectl apply -f https://raw.githubusercontent.com/VictoriaMetrics/operator/v0.15.0/config/crd/bases/operator.victoriametrics.com_vmclusters.yaml -``` - -{{ include "chart.helmDocs" . }} - -## Parameters - -The following tables lists the configurable parameters of the chart and their default values. - -Change the values according to the need of the environment in ``victoria-metrics-k8s-stack/values.yaml`` file. - -{{ template "chart.valuesTableHtml" . }} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_GUIDE.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_GUIDE.md deleted file mode 100644 index 12e01eb5..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_GUIDE.md +++ /dev/null @@ -1,40 +0,0 @@ -# Release process guidance - -## Update version for VictoriaMetrics kubernetes monitoring stack - -1. Update dependency requirements in [Chart.yml](https://github.com/VictoriaMetrics/helm-charts/blob/master/charts/victoria-metrics-k8s-stack/Chart.yaml) -2. Apply changes via `helm dependency update` -3. Update image tag in chart values: - -
- - ```console - make sync-rules - make sync-dashboards - ``` -
-4. Bump version of the victoria-metrics-k8s-stack [Chart.yml](https://github.com/VictoriaMetrics/helm-charts/blob/master/charts/victoria-metrics-k8s-stack/Chart.yaml) -5. Run linter: - -
- - ```console - make lint - ``` - -
-6. Render templates locally to check for errors: - -
- - ```console - helm template vm-k8s-stack ./charts/victoria-metrics-k8s-stack --output-dir out --values ./charts/victoria-metrics-k8s-stack/values.yaml --debug - ``` - -
-7. Test updated chart by installing it to your kubernetes cluster. -8. Update docs with - ```console - helm-docs - ``` -9. Commit the changes and send a [PR](https://github.com/VictoriaMetrics/helm-charts/pulls) diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_NOTES.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_NOTES.md deleted file mode 100644 index 0a413896..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/RELEASE_NOTES.md +++ /dev/null @@ -1,12 +0,0 @@ -# Release notes for version 0.25.17 - -**Release date:** 2024-09-20 - -![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion&message=v1.102.1&color=success&logo=) -![Helm: v3](https://img.shields.io/static/v1?label=Helm&message=v3&color=informational&logo=helm) - -- Added VMAuth to k8s stack. See [this issue](https://github.com/VictoriaMetrics/helm-charts/issues/829) -- Fixed ETCD dashboard -- Use path prefix from args as a default path prefix for ingress. Related [issue](https://github.com/VictoriaMetrics/helm-charts/issues/1260) -- Allow using vmalert without notifiers configuration. Note that it is required to use `.vmalert.spec.extraArgs["notifiers.blackhole"]: true` in order to start vmalert with a blackhole configuration. - diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_changelog.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_changelog.md deleted file mode 100644 index 79e80b88..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_changelog.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -weight: 1 -title: CHANGELOG -menu: - docs: - weight: 1 - identifier: helm-victoriametrics-k8s-stack-changelog - parent: helm-victoriametrics-k8s-stack -url: /helm/victoriametrics-k8s-stack/changelog -aliases: - - /helm/victoriametrics-k8s-stack/changelog/index.html ---- -{{% content "CHANGELOG.md" %}} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_index.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_index.md deleted file mode 100644 index d23dc833..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/_index.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -weight: 9 -title: VictoriaMetrics K8s Stack -menu: - docs: - parent: helm - weight: 9 - identifier: helm-victoriametrics-k8s-stack -url: /helm/victoriametrics-k8s-stack -aliases: - - /helm/victoriametrics-k8s-stack/index.html ---- -{{% content "README.md" %}} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml deleted file mode 100644 index 9484dd86..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/etcd.yaml +++ /dev/null @@ -1,165 +0,0 @@ -condition: '{{ .Values.kubeEtcd.enabled }}' -name: etcd -rules: -- alert: etcdMembersDown - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).' - summary: 'etcd cluster members are down.' - condition: '{{ true }}' - expr: |- - max without (endpoint) ( - sum without (instance) (up{job=~".*etcd.*"} == bool 0) - or - count without (To) ( - sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01 - ) - ) - > 0 - for: 10m - labels: - severity: critical -- alert: etcdInsufficientMembers - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).' - summary: 'etcd cluster has insufficient number of members.' - condition: '{{ true }}' - expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"}) without (instance) + 1) / 2) - for: 3m - labels: - severity: critical -- alert: etcdNoLeader - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.' - summary: 'etcd cluster has no leader.' - condition: '{{ true }}' - expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 - for: 1m - labels: - severity: critical -- alert: etcdHighNumberOfLeaderChanges - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.' - summary: 'etcd cluster has high number of leader changes.' - condition: '{{ true }}' - expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4 - for: 5m - labels: - severity: warning -- alert: etcdHighNumberOfFailedGRPCRequests - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - summary: 'etcd cluster has high number of failed grpc requests.' - condition: '{{ true }}' - expr: |- - 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) - / - sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) - > 1 - for: 10m - labels: - severity: warning -- alert: etcdHighNumberOfFailedGRPCRequests - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - summary: 'etcd cluster has high number of failed grpc requests.' - condition: '{{ true }}' - expr: |- - 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) - / - sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) - > 5 - for: 5m - labels: - severity: critical -- alert: etcdGRPCRequestsSlow - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile of gRPC requests is {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}} for {{`{{`}} $labels.grpc_method {{`}}`}} method.' - summary: 'etcd grpc requests are slow' - condition: '{{ true }}' - expr: |- - histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) - > 0.15 - for: 10m - labels: - severity: critical -- alert: etcdMemberCommunicationSlow - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - summary: 'etcd cluster member communication is slow.' - condition: '{{ true }}' - expr: |- - histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) - > 0.15 - for: 10m - labels: - severity: warning -- alert: etcdHighNumberOfFailedProposals - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last 30 minutes on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - summary: 'etcd cluster has high number of proposal failures.' - condition: '{{ true }}' - expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 - for: 15m - labels: - severity: warning -- alert: etcdHighFsyncDurations - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fsync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - summary: 'etcd cluster 99th percentile fsync durations are too high.' - condition: '{{ true }}' - expr: |- - histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) - > 0.5 - for: 10m - labels: - severity: warning -- alert: etcdHighFsyncDurations - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fsync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - summary: 'etcd cluster 99th percentile fsync durations are too high.' - condition: '{{ true }}' - expr: |- - histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) - > 1 - for: 10m - labels: - severity: critical -- alert: etcdHighCommitDurations - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' - summary: 'etcd cluster 99th percentile commit durations are too high.' - condition: '{{ true }}' - expr: |- - histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) - > 0.25 - for: 10m - labels: - severity: warning -- alert: etcdDatabaseQuotaLowSpace - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size exceeds the defined quota on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.' - summary: 'etcd cluster database is running full.' - condition: '{{ true }}' - expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95 - for: 10m - labels: - severity: critical -- alert: etcdExcessiveDatabaseGrowth - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please check as it might be disruptive.' - summary: 'etcd cluster database growing very fast.' - condition: '{{ true }}' - expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60) > etcd_server_quota_backend_bytes{job=~".*etcd.*"} - for: 10m - labels: - severity: warning -- alert: etcdDatabaseHighFragmentationRatio - annotations: - description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size in use on instance {{`{{`}} $labels.instance {{`}}`}} is {{`{{`}} $value | humanizePercentage {{`}}`}} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.' - runbook_url: 'https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation' - summary: 'etcd database size in use is less than 50% of the actual allocated storage.' - condition: '{{ true }}' - expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600 - for: 10m - labels: - severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml deleted file mode 100644 index ae0fa110..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/general.rules.yaml +++ /dev/null @@ -1,53 +0,0 @@ -condition: '{{ true }}' -name: general.rules -rules: -- alert: TargetDown - annotations: - description: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/general/targetdown' - summary: 'One or more targets are unreachable.' - condition: '{{ true }}' - expr: 100 * (count(up == 0) BY (job,namespace,service,{{ .Values.global.clusterLabel }}) / count(up) BY (job,namespace,service,{{ .Values.global.clusterLabel }})) > 10 - for: 10m - labels: - severity: warning -- alert: Watchdog - annotations: - description: 'This is an alert meant to ensure that the entire alerting pipeline is functional. - - This alert is always firing, therefore it should always be firing in Alertmanager - - and always fire against a receiver. There are integrations with various notification - - mechanisms that send a notification when this alert is not firing. For example the - - "DeadMansSnitch" integration in PagerDuty. - - ' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/general/watchdog' - summary: 'An alert that should always be firing to certify that Alertmanager is working properly.' - condition: '{{ true }}' - expr: vector(1) - labels: - severity: ok -- alert: InfoInhibitor - annotations: - description: 'This is an alert that is used to inhibit info alerts. - - By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with - - other alerts. - - This alert fires whenever there''s a severity="info" alert, and stops firing when another alert with a - - severity of ''warning'' or ''critical'' starts firing on the same namespace. - - This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". - - ' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/general/infoinhibitor' - summary: 'Info-level alert inhibition.' - condition: '{{ true }}' - expr: ALERTS{severity = "info"} == 1 unless on (namespace,{{ .Values.global.clusterLabel }}) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1 - labels: - severity: major diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml deleted file mode 100644 index 85ed9d09..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_cpu_usage_seconds_total.yaml +++ /dev/null @@ -1,11 +0,0 @@ -condition: '{{ true }}' -name: k8s.rules.container_cpu_usage_seconds_total -rules: -- condition: '{{ true }}' - expr: |- - sum by (namespace,pod,container,{{ .Values.global.clusterLabel }}) ( - irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) - ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - 1, max by (namespace,pod,node,{{ .Values.global.clusterLabel }}) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml deleted file mode 100644 index 3b22ddd6..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_cache.yaml +++ /dev/null @@ -1,10 +0,0 @@ -condition: '{{ true }}' -name: k8s.rules.container_memory_cache -rules: -- condition: '{{ true }}' - expr: |- - container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, - max by (namespace,pod,node,{{ .Values.global.clusterLabel }}) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_cache diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml deleted file mode 100644 index 6a5f6df3..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_rss.yaml +++ /dev/null @@ -1,10 +0,0 @@ -condition: '{{ true }}' -name: k8s.rules.container_memory_rss -rules: -- condition: '{{ true }}' - expr: |- - container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, - max by (namespace,pod,node,{{ .Values.global.clusterLabel }}) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_rss diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml deleted file mode 100644 index 481e7711..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_swap.yaml +++ /dev/null @@ -1,10 +0,0 @@ -condition: '{{ true }}' -name: k8s.rules.container_memory_swap -rules: -- condition: '{{ true }}' - expr: |- - container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, - max by (namespace,pod,node,{{ .Values.global.clusterLabel }}) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_swap diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml deleted file mode 100644 index f5ca9504..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_memory_working_set_bytes.yaml +++ /dev/null @@ -1,10 +0,0 @@ -condition: '{{ true }}' -name: k8s.rules.container_memory_working_set_bytes -rules: -- condition: '{{ true }}' - expr: |- - container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""} - * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, - max by (namespace,pod,node,{{ .Values.global.clusterLabel }}) (kube_pod_info{node!=""}) - ) - record: node_namespace_pod_container:container_memory_working_set_bytes diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_resource.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_resource.yaml deleted file mode 100644 index 260a20e2..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.container_resource.yaml +++ /dev/null @@ -1,79 +0,0 @@ -condition: '{{ true }}' -name: k8s.rules.container_resource -rules: -- condition: '{{ true }}' - expr: |- - kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace,pod,{{ .Values.global.clusterLabel }}) - group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) - record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests -- condition: '{{ true }}' - expr: |- - sum by (namespace,{{ .Values.global.clusterLabel }}) ( - sum by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - max by (namespace,pod,container,{{ .Values.global.clusterLabel }}) ( - kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} - ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace_memory:kube_pod_container_resource_requests:sum -- condition: '{{ true }}' - expr: |- - kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace,pod,{{ .Values.global.clusterLabel }}) - group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) - record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests -- condition: '{{ true }}' - expr: |- - sum by (namespace,{{ .Values.global.clusterLabel }}) ( - sum by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - max by (namespace,pod,container,{{ .Values.global.clusterLabel }}) ( - kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} - ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace_cpu:kube_pod_container_resource_requests:sum -- condition: '{{ true }}' - expr: |- - kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace,pod,{{ .Values.global.clusterLabel }}) - group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) - record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits -- condition: '{{ true }}' - expr: |- - sum by (namespace,{{ .Values.global.clusterLabel }}) ( - sum by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - max by (namespace,pod,container,{{ .Values.global.clusterLabel }}) ( - kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} - ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace_memory:kube_pod_container_resource_limits:sum -- condition: '{{ true }}' - expr: |- - kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace,pod,{{ .Values.global.clusterLabel }}) - group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) - record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits -- condition: '{{ true }}' - expr: |- - sum by (namespace,{{ .Values.global.clusterLabel }}) ( - sum by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - max by (namespace,pod,container,{{ .Values.global.clusterLabel }}) ( - kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} - ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left() max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 - ) - ) - ) - record: namespace_cpu:kube_pod_container_resource_limits:sum diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml deleted file mode 100644 index 3cefefe0..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/k8s.rules.pod_owner.yaml +++ /dev/null @@ -1,54 +0,0 @@ -condition: '{{ true }}' -name: k8s.rules.pod_owner -rules: -- condition: '{{ true }}' - expr: |- - max by (namespace,workload,pod,{{ .Values.global.clusterLabel }}) ( - label_replace( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, - "replicaset", "$1", "owner_name", "(.*)" - ) * on (replicaset,namespace,{{ .Values.global.clusterLabel }}) group_left(owner_name) topk by (replicaset,namespace,{{ .Values.global.clusterLabel }}) ( - 1, max by (replicaset,namespace,owner_name,{{ .Values.global.clusterLabel }}) ( - kube_replicaset_owner{job="kube-state-metrics"} - ) - ), - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: deployment - record: namespace_workload_pod:kube_pod_owner:relabel -- condition: '{{ true }}' - expr: |- - max by (namespace,workload,pod,{{ .Values.global.clusterLabel }}) ( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: daemonset - record: namespace_workload_pod:kube_pod_owner:relabel -- condition: '{{ true }}' - expr: |- - max by (namespace,workload,pod,{{ .Values.global.clusterLabel }}) ( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: statefulset - record: namespace_workload_pod:kube_pod_owner:relabel -- condition: '{{ true }}' - expr: |- - max by (namespace,workload,pod,{{ .Values.global.clusterLabel }}) ( - label_replace( - kube_pod_owner{job="kube-state-metrics", owner_kind="Job"}, - "workload", "$1", "owner_name", "(.*)" - ) - ) - labels: - workload_type: job - record: namespace_workload_pod:kube_pod_owner:relabel diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml deleted file mode 100644 index aab98ce1..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-availability.rules.yaml +++ /dev/null @@ -1,128 +0,0 @@ -condition: '{{ .Values.kubeApiServer.enabled }}' -interval: 3m -name: kube-apiserver-availability.rules -rules: -- condition: '{{ true }}' - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 - record: code_verb:apiserver_request_total:increase30d -- condition: '{{ true }}' - expr: sum by (code,{{ .Values.global.clusterLabel }}) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) - labels: - verb: read - record: code:apiserver_request_total:increase30d -- condition: '{{ true }}' - expr: sum by (code,{{ .Values.global.clusterLabel }}) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - labels: - verb: write - record: code:apiserver_request_total:increase30d -- condition: '{{ true }}' - expr: sum by (verb,scope,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_sli_duration_seconds_count{job="kube-apiserver"}[1h])) - record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h -- condition: '{{ true }}' - expr: sum by (verb,scope,{{ .Values.global.clusterLabel }}) (avg_over_time(cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h[30d]) * 24 * 30) - record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d -- condition: '{{ true }}' - expr: sum by (verb,scope,le,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_sli_duration_seconds_bucket[1h])) - record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h -- condition: '{{ true }}' - expr: sum by (verb,scope,le,{{ .Values.global.clusterLabel }}) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30) - record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d -- condition: '{{ true }}' - expr: |- - 1 - ( - ( - # write too slow - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - - - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) - ) + - ( - # read too slow - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) - - - ( - ( - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) - or - vector(0) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) - + - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) - ) - ) + - # errors - sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d) - labels: - verb: all - record: apiserver_request:availability30d -- condition: '{{ true }}' - expr: |- - 1 - ( - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) - - - ( - # too slow - ( - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le="1"}) - or - vector(0) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le="5"}) - + - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le="30"}) - ) - + - # errors - sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d{verb="read"}) - labels: - verb: read - record: apiserver_request:availability30d -- condition: '{{ true }}' - expr: |- - 1 - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) - - - sum by ({{ .Values.global.clusterLabel }}) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le="1"}) - ) - + - # errors - sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (code:apiserver_request_total:increase30d{verb="write"}) - labels: - verb: write - record: apiserver_request:availability30d -- condition: '{{ true }}' - expr: sum by (code,resource,{{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[5m])) - labels: - verb: read - record: code_resource:apiserver_request_total:rate5m -- condition: '{{ true }}' - expr: sum by (code,resource,{{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - labels: - verb: write - record: code_resource:apiserver_request_total:rate5m -- condition: '{{ true }}' - expr: sum by (code,verb,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) - record: code_verb:apiserver_request_total:increase1h -- condition: '{{ true }}' - expr: sum by (code,verb,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) - record: code_verb:apiserver_request_total:increase1h -- condition: '{{ true }}' - expr: sum by (code,verb,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) - record: code_verb:apiserver_request_total:increase1h -- condition: '{{ true }}' - expr: sum by (code,verb,{{ .Values.global.clusterLabel }}) (increase(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) - record: code_verb:apiserver_request_total:increase1h diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml deleted file mode 100644 index 6a87f5a7..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-burnrate.rules.yaml +++ /dev/null @@ -1,318 +0,0 @@ -condition: '{{ .Values.kubeApiServer.enabled }}' -name: kube-apiserver-burnrate.rules -rules: -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1d])) - - - ( - ( - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1d])) - or - vector(0) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1d])) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1d])) - ) - ) - + - # errors - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[1d])) - labels: - verb: read - record: apiserver_request:burnrate1d -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h])) - - - ( - ( - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[1h])) - or - vector(0) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[1h])) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[1h])) - ) - ) - + - # errors - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[1h])) - labels: - verb: read - record: apiserver_request:burnrate1h -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[2h])) - - - ( - ( - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[2h])) - or - vector(0) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[2h])) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[2h])) - ) - ) - + - # errors - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[2h])) - labels: - verb: read - record: apiserver_request:burnrate2h -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m])) - - - ( - ( - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[30m])) - or - vector(0) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[30m])) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[30m])) - ) - ) - + - # errors - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[30m])) - labels: - verb: read - record: apiserver_request:burnrate30m -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[3d])) - - - ( - ( - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[3d])) - or - vector(0) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[3d])) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[3d])) - ) - ) - + - # errors - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[3d])) - labels: - verb: read - record: apiserver_request:burnrate3d -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m])) - - - ( - ( - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[5m])) - or - vector(0) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[5m])) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[5m])) - ) - ) - + - # errors - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[5m])) - labels: - verb: read - record: apiserver_request:burnrate5m -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h])) - - - ( - ( - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le="1"}[6h])) - or - vector(0) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le="5"}[6h])) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le="30"}[6h])) - ) - ) - + - # errors - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"LIST|GET"}[6h])) - labels: - verb: read - record: apiserver_request:burnrate6h -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) - - - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1d])) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) - labels: - verb: write - record: apiserver_request:burnrate1d -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) - - - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[1h])) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) - labels: - verb: write - record: apiserver_request:burnrate1h -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) - - - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[2h])) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) - labels: - verb: write - record: apiserver_request:burnrate2h -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) - - - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[30m])) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) - labels: - verb: write - record: apiserver_request:burnrate30m -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) - - - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[3d])) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) - labels: - verb: write - record: apiserver_request:burnrate3d -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) - - - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[5m])) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) - labels: - verb: write - record: apiserver_request:burnrate5m -- condition: '{{ true }}' - expr: |- - ( - ( - # too slow - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_count{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) - - - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le="1"}[6h])) - ) - + - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) - ) - / - sum by ({{ .Values.global.clusterLabel }}) (rate(apiserver_request_total{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) - labels: - verb: write - record: apiserver_request:burnrate6h diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml deleted file mode 100644 index 4ba999e2..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-histogram.rules.yaml +++ /dev/null @@ -1,15 +0,0 @@ -condition: '{{ .Values.kubeApiServer.enabled }}' -name: kube-apiserver-histogram.rules -rules: -- condition: '{{ true }}' - expr: histogram_quantile(0.99, sum by (le,resource,{{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 - labels: - quantile: '0.99' - verb: read - record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.99, sum by (le,resource,{{ .Values.global.clusterLabel }}) (rate(apiserver_request_sli_duration_seconds_bucket{job="kube-apiserver",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 - labels: - quantile: '0.99' - verb: write - record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml deleted file mode 100644 index ed75cccd..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-apiserver-slos.yaml +++ /dev/null @@ -1,63 +0,0 @@ -condition: '{{ .Values.kubeApiServer.enabled }}' -name: kube-apiserver-slos -rules: -- alert: KubeAPIErrorBudgetBurn - annotations: - description: 'The API server is burning too much error budget.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn' - summary: 'The API server is burning too much error budget.' - condition: '{{ true }}' - expr: |- - sum(apiserver_request:burnrate1h) > (14.40 * 0.01000) - and - sum(apiserver_request:burnrate5m) > (14.40 * 0.01000) - for: 2m - labels: - long: 1h - severity: critical - short: 5m -- alert: KubeAPIErrorBudgetBurn - annotations: - description: 'The API server is burning too much error budget.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn' - summary: 'The API server is burning too much error budget.' - condition: '{{ true }}' - expr: |- - sum(apiserver_request:burnrate6h) > (6.00 * 0.01000) - and - sum(apiserver_request:burnrate30m) > (6.00 * 0.01000) - for: 15m - labels: - long: 6h - severity: critical - short: 30m -- alert: KubeAPIErrorBudgetBurn - annotations: - description: 'The API server is burning too much error budget.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn' - summary: 'The API server is burning too much error budget.' - condition: '{{ true }}' - expr: |- - sum(apiserver_request:burnrate1d) > (3.00 * 0.01000) - and - sum(apiserver_request:burnrate2h) > (3.00 * 0.01000) - for: 1h - labels: - long: 1d - severity: warning - short: 2h -- alert: KubeAPIErrorBudgetBurn - annotations: - description: 'The API server is burning too much error budget.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn' - summary: 'The API server is burning too much error budget.' - condition: '{{ true }}' - expr: |- - sum(apiserver_request:burnrate3d) > (1.00 * 0.01000) - and - sum(apiserver_request:burnrate6h) > (1.00 * 0.01000) - for: 3h - labels: - long: 3d - severity: warning - short: 6h diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml deleted file mode 100644 index 68295151..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-general.rules.yaml +++ /dev/null @@ -1,9 +0,0 @@ -condition: '{{ true }}' -name: kube-prometheus-general.rules -rules: -- condition: '{{ true }}' - expr: count without(instance, pod, node) (up == 1) - record: count:up1 -- condition: '{{ true }}' - expr: count without(instance, pod, node) (up == 0) - record: count:up0 diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml deleted file mode 100644 index ee7e514e..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-prometheus-node-recording.rules.yaml +++ /dev/null @@ -1,21 +0,0 @@ -condition: '{{ true }}' -name: kube-prometheus-node-recording.rules -rules: -- condition: '{{ true }}' - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance) - record: instance:node_cpu:rate:sum -- condition: '{{ true }}' - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) - record: instance:node_network_receive_bytes:rate:sum -- condition: '{{ true }}' - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) - record: instance:node_network_transmit_bytes:rate:sum -- condition: '{{ true }}' - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) - record: instance:node_cpu:ratio -- condition: '{{ true }}' - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) - record: cluster:node_cpu:sum_rate5m -- condition: '{{ true }}' - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) - record: cluster:node_cpu:ratio diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml deleted file mode 100644 index 51cdb0e4..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-scheduler.rules.yaml +++ /dev/null @@ -1,48 +0,0 @@ -condition: '{{ .Values.kubeScheduler.enabled }}' -name: kube-scheduler.rules -rules: -- condition: '{{ true }}' - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml deleted file mode 100644 index e392b7a0..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kube-state-metrics.yaml +++ /dev/null @@ -1,55 +0,0 @@ -condition: '{{ true }}' -name: kube-state-metrics -rules: -- alert: KubeStateMetricsListErrors - annotations: - description: 'kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricslisterrors' - summary: 'kube-state-metrics is experiencing errors in list operations.' - condition: '{{ true }}' - expr: |- - (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m])) by ({{ .Values.global.clusterLabel }}) - / - sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])) by ({{ .Values.global.clusterLabel }})) - > 0.01 - for: 15m - labels: - severity: critical -- alert: KubeStateMetricsWatchErrors - annotations: - description: 'kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricswatcherrors' - summary: 'kube-state-metrics is experiencing errors in watch operations.' - condition: '{{ true }}' - expr: |- - (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m])) by ({{ .Values.global.clusterLabel }}) - / - sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])) by ({{ .Values.global.clusterLabel }})) - > 0.01 - for: 15m - labels: - severity: critical -- alert: KubeStateMetricsShardingMismatch - annotations: - description: 'kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricsshardingmismatch' - summary: 'kube-state-metrics sharding is misconfigured.' - condition: '{{ true }}' - expr: stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) != 0 - for: 15m - labels: - severity: critical -- alert: KubeStateMetricsShardsMissing - annotations: - description: 'kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricsshardsmissing' - summary: 'kube-state-metrics shards are missing.' - condition: '{{ true }}' - expr: |- - 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) - 1 - - - sum( 2 ^ max by (shard_ordinal,{{ .Values.global.clusterLabel }}) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) by ({{ .Values.global.clusterLabel }}) - != 0 - for: 15m - labels: - severity: critical diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml deleted file mode 100644 index 98ea1a57..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubelet.rules.yaml +++ /dev/null @@ -1,18 +0,0 @@ -condition: '{{ .Values.kubelet.enabled }}' -name: kubelet.rules -rules: -- condition: '{{ true }}' - expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance,le,{{ .Values.global.clusterLabel }}) * on (instance,{{ .Values.global.clusterLabel }}) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: '0.99' - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance,le,{{ .Values.global.clusterLabel }}) * on (instance,{{ .Values.global.clusterLabel }}) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: '0.9' - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile -- condition: '{{ true }}' - expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance,le,{{ .Values.global.clusterLabel }}) * on (instance,{{ .Values.global.clusterLabel }}) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) - labels: - quantile: '0.5' - record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml deleted file mode 100644 index 4e398e37..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-apps.yaml +++ /dev/null @@ -1,257 +0,0 @@ -condition: '{{ true }}' -name: kubernetes-apps -rules: -- alert: KubePodCrashLooping - annotations: - description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff").' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodcrashlooping' - summary: 'Pod is crash looping.' - condition: '{{ true }}' - expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[5m]) >= 1 - for: 15m - labels: - severity: warning -- alert: KubePodNotReady - annotations: - description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodnotready' - summary: 'Pod has been in a non-ready state for more than 15 minutes.' - condition: '{{ true }}' - expr: |- - sum by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - max by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}", phase=~"Pending|Unknown|Failed"} - ) * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(owner_kind) topk by (namespace,pod,{{ .Values.global.clusterLabel }}) ( - 1, max by (namespace,pod,owner_kind,{{ .Values.global.clusterLabel }}) (kube_pod_owner{owner_kind!="Job"}) - ) - ) > 0 - for: 15m - labels: - severity: warning -- alert: KubeDeploymentGenerationMismatch - annotations: - description: 'Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentgenerationmismatch' - summary: 'Deployment generation mismatch due to possible roll-back' - condition: '{{ true }}' - expr: |- - kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - != - kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - for: 15m - labels: - severity: warning -- alert: KubeDeploymentReplicasMismatch - annotations: - description: 'Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentreplicasmismatch' - summary: 'Deployment has not matched the expected number of replicas.' - condition: '{{ true }}' - expr: |- - ( - kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - > - kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - ) and ( - changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[10m]) - == - 0 - ) - for: 15m - labels: - severity: warning -- alert: KubeDeploymentRolloutStuck - annotations: - description: 'Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentrolloutstuck' - summary: 'Deployment rollout is not progressing.' - condition: '{{ true }}' - expr: |- - kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - != 0 - for: 15m - labels: - severity: warning -- alert: KubeStatefulSetReplicasMismatch - annotations: - description: 'StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetreplicasmismatch' - summary: 'StatefulSet has not matched the expected number of replicas.' - condition: '{{ true }}' - expr: |- - ( - kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - != - kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - ) and ( - changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[10m]) - == - 0 - ) - for: 15m - labels: - severity: warning -- alert: KubeStatefulSetGenerationMismatch - annotations: - description: 'StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetgenerationmismatch' - summary: 'StatefulSet generation mismatch due to possible roll-back' - condition: '{{ true }}' - expr: |- - kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - != - kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - for: 15m - labels: - severity: warning -- alert: KubeStatefulSetUpdateNotRolledOut - annotations: - description: 'StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetupdatenotrolledout' - summary: 'StatefulSet update has not been rolled out.' - condition: '{{ true }}' - expr: |- - ( - max by (namespace,statefulset,{{ .Values.global.clusterLabel }}) ( - kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - unless - kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - ) - * - ( - kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - != - kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - ) - ) and ( - changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[5m]) - == - 0 - ) - for: 15m - labels: - severity: warning -- alert: KubeDaemonSetRolloutStuck - annotations: - description: 'DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetrolloutstuck' - summary: 'DaemonSet rollout is stuck.' - condition: '{{ true }}' - expr: |- - ( - ( - kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - != - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - ) or ( - kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - != - 0 - ) or ( - kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - != - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - ) or ( - kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - != - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - ) - ) and ( - changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[5m]) - == - 0 - ) - for: 15m - labels: - severity: warning -- alert: KubeContainerWaiting - annotations: - description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontainerwaiting' - summary: 'Pod container waiting longer than 1 hour' - condition: '{{ true }}' - expr: sum by (namespace,pod,container,{{ .Values.global.clusterLabel }}) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}) > 0 - for: 1h - labels: - severity: warning -- alert: KubeDaemonSetNotScheduled - annotations: - description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetnotscheduled' - summary: 'DaemonSet pods are not scheduled.' - condition: '{{ true }}' - expr: |- - kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - - - kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} > 0 - for: 10m - labels: - severity: warning -- alert: KubeDaemonSetMisScheduled - annotations: - description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetmisscheduled' - summary: 'DaemonSet pods are misscheduled.' - condition: '{{ true }}' - expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} > 0 - for: 15m - labels: - severity: warning -- alert: KubeJobNotCompleted - annotations: - description: 'Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobnotcompleted' - summary: 'Job did not complete in time' - condition: '{{ true }}' - expr: |- - time() - max by (namespace,job_name,{{ .Values.global.clusterLabel }}) (kube_job_status_start_time{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - and - kube_job_status_active{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} > 0) > 43200 - labels: - severity: warning -- alert: KubeJobFailed - annotations: - description: 'Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobfailed' - summary: 'Job failed to complete.' - condition: '{{ true }}' - expr: kube_job_failed{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} > 0 - for: 15m - labels: - severity: warning -- alert: KubeHpaReplicasMismatch - annotations: - description: 'HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpareplicasmismatch' - summary: 'HPA has not matched desired number of replicas.' - condition: '{{ true }}' - expr: |- - (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - != - kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}) - and - (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - > - kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}) - and - (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - < - kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}) - and - changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"}[15m]) == 0 - for: 15m - labels: - severity: warning -- alert: KubeHpaMaxedOut - annotations: - description: 'HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpamaxedout' - summary: 'HPA is running at max replicas' - condition: '{{ true }}' - expr: |- - kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - == - kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~"{{ .targetNamespace }}"} - for: 15m - labels: - severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml deleted file mode 100644 index cf32b91d..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-resources.yaml +++ /dev/null @@ -1,113 +0,0 @@ -condition: '{{ true }}' -name: kubernetes-resources -rules: -- alert: KubeCPUOvercommit - annotations: - description: 'Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Pods by {{`{{`}} $value {{`}}`}} CPU shares and cannot tolerate node failure.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuovercommit' - summary: 'Cluster has overcommitted CPU resource requests.' - condition: '{{ true }}' - expr: |- - sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) by ({{ .Values.global.clusterLabel }}) - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by ({{ .Values.global.clusterLabel }}) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by ({{ .Values.global.clusterLabel }})) > 0 - and - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by ({{ .Values.global.clusterLabel }}) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by ({{ .Values.global.clusterLabel }})) > 0 - for: 10m - labels: - severity: warning -- alert: KubeMemoryOvercommit - annotations: - description: 'Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted memory resource requests for Pods by {{`{{`}} $value | humanize {{`}}`}} bytes and cannot tolerate node failure.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubememoryovercommit' - summary: 'Cluster has overcommitted memory resource requests.' - condition: '{{ true }}' - expr: |- - sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by ({{ .Values.global.clusterLabel }}) - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }})) > 0 - and - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }})) > 0 - for: 10m - labels: - severity: warning -- alert: KubeCPUQuotaOvercommit - annotations: - description: 'Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Namespaces.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuquotaovercommit' - summary: 'Cluster has overcommitted CPU resource requests.' - condition: '{{ true }}' - expr: |- - sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) by ({{ .Values.global.clusterLabel }}) - / - sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) - > 1.5 - for: 5m - labels: - severity: warning -- alert: KubeMemoryQuotaOvercommit - annotations: - description: 'Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted memory resource requests for Namespaces.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubememoryquotaovercommit' - summary: 'Cluster has overcommitted memory resource requests.' - condition: '{{ true }}' - expr: |- - sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) by ({{ .Values.global.clusterLabel }}) - / - sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by ({{ .Values.global.clusterLabel }}) - > 1.5 - for: 5m - labels: - severity: warning -- alert: KubeQuotaAlmostFull - annotations: - description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotaalmostfull' - summary: 'Namespace quota is going to be full.' - condition: '{{ true }}' - expr: |- - kube_resourcequota{job="kube-state-metrics", type="used"} - / ignoring(instance, job, type) - (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) - > 0.9 < 1 - for: 15m - labels: - severity: informational -- alert: KubeQuotaFullyUsed - annotations: - description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotafullyused' - summary: 'Namespace quota is fully used.' - condition: '{{ true }}' - expr: |- - kube_resourcequota{job="kube-state-metrics", type="used"} - / ignoring(instance, job, type) - (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) - == 1 - for: 15m - labels: - severity: informational -- alert: KubeQuotaExceeded - annotations: - description: 'Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotaexceeded' - summary: 'Namespace quota has exceeded the limits.' - condition: '{{ true }}' - expr: |- - kube_resourcequota{job="kube-state-metrics", type="used"} - / ignoring(instance, job, type) - (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) - > 1 - for: 15m - labels: - severity: warning -- alert: CPUThrottlingHigh - annotations: - description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}}.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/cputhrottlinghigh' - summary: 'Processes experience elevated CPU throttling.' - condition: '{{ true }}' - expr: |- - sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container,pod,namespace,{{ .Values.global.clusterLabel }}) - / - sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container,pod,namespace,{{ .Values.global.clusterLabel }}) - > ( 25 / 100 ) - for: 15m - labels: - severity: informational diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml deleted file mode 100644 index 1e6703c0..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-storage.yaml +++ /dev/null @@ -1,101 +0,0 @@ -condition: '{{ true }}' -name: kubernetes-storage -rules: -- alert: KubePersistentVolumeFillingUp - annotations: - description: 'The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is only {{`{{`}} $value | humanizePercentage {{`}}`}} free.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumefillingup' - summary: 'PersistentVolume is filling up.' - condition: '{{ true }}' - expr: |- - ( - kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} - / - kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} - ) < 0.03 - and - kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} > 0 - unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) - kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) - kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 - for: 1m - labels: - severity: critical -- alert: KubePersistentVolumeFillingUp - annotations: - description: 'Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} is available.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumefillingup' - summary: 'PersistentVolume is filling up.' - condition: '{{ true }}' - expr: |- - ( - kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} - / - kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} - ) < 0.15 - and - kubelet_volume_stats_used_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} > 0 - and - predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) - kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) - kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 - for: 1h - labels: - severity: warning -- alert: KubePersistentVolumeInodesFillingUp - annotations: - description: 'The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} only has {{`{{`}} $value | humanizePercentage {{`}}`}} free inodes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeinodesfillingup' - summary: 'PersistentVolumeInodes are filling up.' - condition: '{{ true }}' - expr: |- - ( - kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} - / - kubelet_volume_stats_inodes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} - ) < 0.03 - and - kubelet_volume_stats_inodes_used{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} > 0 - unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) - kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) - kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 - for: 1m - labels: - severity: critical -- alert: KubePersistentVolumeInodesFillingUp - annotations: - description: 'Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is expected to run out of inodes within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} of its inodes are free.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeinodesfillingup' - summary: 'PersistentVolumeInodes are filling up.' - condition: '{{ true }}' - expr: |- - ( - kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} - / - kubelet_volume_stats_inodes{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} - ) < 0.15 - and - kubelet_volume_stats_inodes_used{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"} > 0 - and - predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~"{{ .targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) - kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 - unless on (namespace,persistentvolumeclaim,{{ .Values.global.clusterLabel }}) - kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 - for: 1h - labels: - severity: warning -- alert: KubePersistentVolumeErrors - annotations: - description: 'The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeerrors' - summary: 'PersistentVolume is having issues with provisioning.' - condition: '{{ true }}' - expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 - for: 5m - labels: - severity: critical diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml deleted file mode 100644 index 6621da32..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-apiserver.yaml +++ /dev/null @@ -1,62 +0,0 @@ -condition: '{{ true }}' -name: kubernetes-system-apiserver -rules: -- alert: KubeClientCertificateExpiration - annotations: - description: 'A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclientcertificateexpiration' - summary: 'Client certificate is about to expire.' - condition: '{{ true }}' - expr: apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on (job,{{ .Values.global.clusterLabel }}) histogram_quantile(0.01, sum by (job,le,{{ .Values.global.clusterLabel }}) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 604800 - for: 5m - labels: - severity: warning -- alert: KubeClientCertificateExpiration - annotations: - description: 'A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclientcertificateexpiration' - summary: 'Client certificate is about to expire.' - condition: '{{ true }}' - expr: apiserver_client_certificate_expiration_seconds_count{job="kube-apiserver"} > 0 and on (job,{{ .Values.global.clusterLabel }}) histogram_quantile(0.01, sum by (job,le,{{ .Values.global.clusterLabel }}) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="kube-apiserver"}[5m]))) < 86400 - for: 5m - labels: - severity: critical -- alert: KubeAggregatedAPIErrors - annotations: - description: 'Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has reported errors. It has appeared unavailable {{`{{`}} $value | humanize {{`}}`}} times averaged over the past 10m.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeaggregatedapierrors' - summary: 'Kubernetes aggregated API has reported errors.' - condition: '{{ true }}' - expr: sum by (name,namespace,{{ .Values.global.clusterLabel }})(increase(aggregator_unavailable_apiservice_total{job="kube-apiserver"}[10m])) > 4 - labels: - severity: warning -- alert: KubeAggregatedAPIDown - annotations: - description: 'Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeaggregatedapidown' - summary: 'Kubernetes aggregated API is down.' - condition: '{{ true }}' - expr: (1 - max by (name,namespace,{{ .Values.global.clusterLabel }})(avg_over_time(aggregator_unavailable_apiservice{job="kube-apiserver"}[10m]))) * 100 < 85 - for: 5m - labels: - severity: warning -- alert: KubeAPIDown - annotations: - description: 'KubeAPI has disappeared from Prometheus target discovery.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapidown' - summary: 'Target disappeared from Prometheus target discovery.' - condition: '{{ .Values.kubeApiServer.enabled }}' - expr: absent(up{job="kube-apiserver"} == 1) - for: 15m - labels: - severity: critical -- alert: KubeAPITerminatedRequests - annotations: - description: 'The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapiterminatedrequests' - summary: 'The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests.' - condition: '{{ true }}' - expr: sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="kube-apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="kube-apiserver"}[10m])) ) > 0.20 - for: 5m - labels: - severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml deleted file mode 100644 index e53aebed..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-controller-manager.yaml +++ /dev/null @@ -1,13 +0,0 @@ -condition: '{{ .Values.kubeControllerManager.enabled }}' -name: kubernetes-system-controller-manager -rules: -- alert: KubeControllerManagerDown - annotations: - description: 'KubeControllerManager has disappeared from Prometheus target discovery.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontrollermanagerdown' - summary: 'Target disappeared from Prometheus target discovery.' - condition: '{{ .Values.kubeControllerManager.enabled }}' - expr: absent(up{job="kube-controller-manager"} == 1) - for: 15m - labels: - severity: critical diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml deleted file mode 100644 index 77af3f58..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-kubelet.yaml +++ /dev/null @@ -1,136 +0,0 @@ -condition: '{{ true }}' -name: kubernetes-system-kubelet -rules: -- alert: KubeNodeNotReady - annotations: - description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodenotready' - summary: 'Node is not ready.' - condition: '{{ true }}' - expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 - for: 15m - labels: - severity: warning -- alert: KubeNodeUnreachable - annotations: - description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodeunreachable' - summary: 'Node is unreachable.' - condition: '{{ true }}' - expr: (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1 - for: 15m - labels: - severity: warning -- alert: KubeletTooManyPods - annotations: - description: 'Kubelet ''{{`{{`}} $labels.node {{`}}`}}'' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubelettoomanypods' - summary: 'Kubelet is running at capacity.' - condition: '{{ true }}' - expr: |- - count by (node,{{ .Values.global.clusterLabel }}) ( - (kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on (instance,pod,namespace,{{ .Values.global.clusterLabel }}) group_left(node) topk by (instance,pod,namespace,{{ .Values.global.clusterLabel }}) (1, kube_pod_info{job="kube-state-metrics"}) - ) - / - max by (node,{{ .Values.global.clusterLabel }}) ( - kube_node_status_capacity{job="kube-state-metrics",resource="pods"} != 1 - ) > 0.95 - for: 15m - labels: - severity: informational -- alert: KubeNodeReadinessFlapping - annotations: - description: 'The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodereadinessflapping' - summary: 'Node readiness status is flapping.' - condition: '{{ true }}' - expr: sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m])) by (node,{{ .Values.global.clusterLabel }}) > 2 - for: 15m - labels: - severity: warning -- alert: KubeletPlegDurationHigh - annotations: - description: 'The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletplegdurationhigh' - summary: 'Kubelet Pod Lifecycle Event Generator is taking too long to relist.' - condition: '{{ true }}' - expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 - for: 5m - labels: - severity: warning -- alert: KubeletPodStartUpLatencyHigh - annotations: - description: 'Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}}.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletpodstartuplatencyhigh' - summary: 'Kubelet Pod startup latency is too high.' - condition: '{{ true }}' - expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance,le,{{ .Values.global.clusterLabel }})) * on (instance,{{ .Values.global.clusterLabel }}) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60 - for: 15m - labels: - severity: warning -- alert: KubeletClientCertificateExpiration - annotations: - description: 'Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificateexpiration' - summary: 'Kubelet client certificate is about to expire.' - condition: '{{ true }}' - expr: kubelet_certificate_manager_client_ttl_seconds < 604800 - labels: - severity: warning -- alert: KubeletClientCertificateExpiration - annotations: - description: 'Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificateexpiration' - summary: 'Kubelet client certificate is about to expire.' - condition: '{{ true }}' - expr: kubelet_certificate_manager_client_ttl_seconds < 86400 - labels: - severity: critical -- alert: KubeletServerCertificateExpiration - annotations: - description: 'Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificateexpiration' - summary: 'Kubelet server certificate is about to expire.' - condition: '{{ true }}' - expr: kubelet_certificate_manager_server_ttl_seconds < 604800 - labels: - severity: warning -- alert: KubeletServerCertificateExpiration - annotations: - description: 'Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}}.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificateexpiration' - summary: 'Kubelet server certificate is about to expire.' - condition: '{{ true }}' - expr: kubelet_certificate_manager_server_ttl_seconds < 86400 - labels: - severity: critical -- alert: KubeletClientCertificateRenewalErrors - annotations: - description: 'Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificaterenewalerrors' - summary: 'Kubelet has failed to renew its client certificate.' - condition: '{{ true }}' - expr: increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0 - for: 15m - labels: - severity: warning -- alert: KubeletServerCertificateRenewalErrors - annotations: - description: 'Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes).' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificaterenewalerrors' - summary: 'Kubelet has failed to renew its server certificate.' - condition: '{{ true }}' - expr: increase(kubelet_server_expiration_renew_errors[5m]) > 0 - for: 15m - labels: - severity: warning -- alert: KubeletDown - annotations: - description: 'Kubelet has disappeared from Prometheus target discovery.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletdown' - summary: 'Target disappeared from Prometheus target discovery.' - condition: '{{ .Values.kubelet.enabled }}' - expr: absent(up{job="kubelet", metrics_path="/metrics"} == 1) - for: 15m - labels: - severity: critical diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml deleted file mode 100644 index 1a0983f2..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system-scheduler.yaml +++ /dev/null @@ -1,13 +0,0 @@ -condition: '{{ .Values.kubeScheduler.enabled }}' -name: kubernetes-system-scheduler -rules: -- alert: KubeSchedulerDown - annotations: - description: 'KubeScheduler has disappeared from Prometheus target discovery.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeschedulerdown' - summary: 'Target disappeared from Prometheus target discovery.' - condition: '{{ .Values.kubeScheduler.enabled }}' - expr: absent(up{job="kube-scheduler"} == 1) - for: 15m - labels: - severity: critical diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml deleted file mode 100644 index 677f4929..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/kubernetes-system.yaml +++ /dev/null @@ -1,27 +0,0 @@ -condition: '{{ true }}' -name: kubernetes-system -rules: -- alert: KubeVersionMismatch - annotations: - description: 'There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeversionmismatch' - summary: 'Different semantic versions of Kubernetes components running.' - condition: '{{ true }}' - expr: count by ({{ .Values.global.clusterLabel }}) (count by (git_version,{{ .Values.global.clusterLabel }}) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1 - for: 15m - labels: - severity: warning -- alert: KubeClientErrors - annotations: - description: 'Kubernetes API server client ''{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}'' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors.''' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclienterrors' - summary: 'Kubernetes API server client is experiencing errors.' - condition: '{{ true }}' - expr: |- - (sum(rate(rest_client_requests_total{job="kube-apiserver",code=~"5.."}[5m])) by (instance,job,namespace,{{ .Values.global.clusterLabel }}) - / - sum(rate(rest_client_requests_total{job="kube-apiserver"}[5m])) by (instance,job,namespace,{{ .Values.global.clusterLabel }})) - > 0.01 - for: 15m - labels: - severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml deleted file mode 100644 index 77cfa5a1..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.rules.yaml +++ /dev/null @@ -1,76 +0,0 @@ -condition: '{{ true }}' -name: node-exporter.rules -rules: -- condition: '{{ true }}' - expr: |- - count without (cpu, mode) ( - node_cpu_seconds_total{job="node-exporter",mode="idle"} - ) - record: instance:node_num_cpu:sum -- condition: '{{ true }}' - expr: |- - 1 - avg without (cpu) ( - sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) - ) - record: instance:node_cpu_utilisation:rate5m -- condition: '{{ true }}' - expr: |- - ( - node_load1{job="node-exporter"} - / - instance:node_num_cpu:sum{job="node-exporter"} - ) - record: instance:node_load1_per_cpu:ratio -- condition: '{{ true }}' - expr: |- - 1 - ( - ( - node_memory_MemAvailable_bytes{job="node-exporter"} - or - ( - node_memory_Buffers_bytes{job="node-exporter"} - + - node_memory_Cached_bytes{job="node-exporter"} - + - node_memory_MemFree_bytes{job="node-exporter"} - + - node_memory_Slab_bytes{job="node-exporter"} - ) - ) - / - node_memory_MemTotal_bytes{job="node-exporter"} - ) - record: instance:node_memory_utilisation:ratio -- condition: '{{ true }}' - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) - record: instance:node_vmstat_pgmajfault:rate5m -- condition: '{{ true }}' - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) - record: instance_device:node_disk_io_time_seconds:rate5m -- condition: '{{ true }}' - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) - record: instance_device:node_disk_io_time_weighted_seconds:rate5m -- condition: '{{ true }}' - expr: |- - sum without (device) ( - rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) - ) - record: instance:node_network_receive_bytes_excluding_lo:rate5m -- condition: '{{ true }}' - expr: |- - sum without (device) ( - rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) - ) - record: instance:node_network_transmit_bytes_excluding_lo:rate5m -- condition: '{{ true }}' - expr: |- - sum without (device) ( - rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) - ) - record: instance:node_network_receive_drop_excluding_lo:rate5m -- condition: '{{ true }}' - expr: |- - sum without (device) ( - rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) - ) - record: instance:node_network_transmit_drop_excluding_lo:rate5m diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml deleted file mode 100644 index 3df8ed27..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-exporter.yaml +++ /dev/null @@ -1,336 +0,0 @@ -condition: '{{ true }}' -name: node-exporter -rules: -- alert: NodeFilesystemSpaceFillingUp - annotations: - description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemspacefillingup' - summary: 'Filesystem is predicted to run out of space within the next 24 hours.' - condition: '{{ true }}' - expr: |- - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15 - and - predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: warning -- alert: NodeFilesystemSpaceFillingUp - annotations: - description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemspacefillingup' - summary: 'Filesystem is predicted to run out of space within the next 4 hours.' - condition: '{{ true }}' - expr: |- - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10 - and - predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: critical -- alert: NodeFilesystemAlmostOutOfSpace - annotations: - description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutofspace' - summary: 'Filesystem has less than 5% space left.' - condition: '{{ true }}' - expr: |- - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 30m - labels: - severity: warning -- alert: NodeFilesystemAlmostOutOfSpace - annotations: - description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutofspace' - summary: 'Filesystem has less than 3% space left.' - condition: '{{ true }}' - expr: |- - ( - node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 30m - labels: - severity: critical -- alert: NodeFilesystemFilesFillingUp - annotations: - description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemfilesfillingup' - summary: 'Filesystem is predicted to run out of inodes within the next 24 hours.' - condition: '{{ true }}' - expr: |- - ( - node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40 - and - predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: warning -- alert: NodeFilesystemFilesFillingUp - annotations: - description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up fast.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemfilesfillingup' - summary: 'Filesystem is predicted to run out of inodes within the next 4 hours.' - condition: '{{ true }}' - expr: |- - ( - node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20 - and - predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: critical -- alert: NodeFilesystemAlmostOutOfFiles - annotations: - description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutoffiles' - summary: 'Filesystem has less than 5% inodes left.' - condition: '{{ true }}' - expr: |- - ( - node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: warning -- alert: NodeFilesystemAlmostOutOfFiles - annotations: - description: 'Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutoffiles' - summary: 'Filesystem has less than 3% inodes left.' - condition: '{{ true }}' - expr: |- - ( - node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3 - and - node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0 - ) - for: 1h - labels: - severity: critical -- alert: NodeNetworkReceiveErrs - annotations: - description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} receive errors in the last two minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodenetworkreceiveerrs' - summary: 'Network interface is reporting many receive errors.' - condition: '{{ true }}' - expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m]) > 0.01 - for: 1h - labels: - severity: warning -- alert: NodeNetworkTransmitErrs - annotations: - description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} transmit errors in the last two minutes.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodenetworktransmiterrs' - summary: 'Network interface is reporting many transmit errors.' - condition: '{{ true }}' - expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m]) > 0.01 - for: 1h - labels: - severity: warning -- alert: NodeHighNumberConntrackEntriesUsed - annotations: - description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodehighnumberconntrackentriesused' - summary: 'Number of conntrack are getting close to the limit.' - condition: '{{ true }}' - expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) > 0.75 - labels: - severity: warning -- alert: NodeTextFileCollectorScrapeError - annotations: - description: 'Node Exporter text file collector on {{`{{`}} $labels.instance {{`}}`}} failed to scrape.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodetextfilecollectorscrapeerror' - summary: 'Node Exporter text file collector failed to scrape.' - condition: '{{ true }}' - expr: node_textfile_scrape_error{job="node-exporter"} == 1 - labels: - severity: warning -- alert: NodeClockSkewDetected - annotations: - description: 'Clock at {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodeclockskewdetected' - summary: 'Clock skew detected.' - condition: '{{ true }}' - expr: |- - ( - node_timex_offset_seconds{job="node-exporter"} > 0.05 - and - deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0 - ) - or - ( - node_timex_offset_seconds{job="node-exporter"} < -0.05 - and - deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0 - ) - for: 10m - labels: - severity: warning -- alert: NodeClockNotSynchronising - annotations: - description: 'Clock at {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodeclocknotsynchronising' - summary: 'Clock not synchronising.' - condition: '{{ true }}' - expr: |- - min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0 - and - node_timex_maxerror_seconds{job="node-exporter"} >= 16 - for: 10m - labels: - severity: warning -- alert: NodeRAIDDegraded - annotations: - description: 'RAID array ''{{`{{`}} $labels.device {{`}}`}}'' at {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/noderaiddegraded' - summary: 'RAID Array is degraded.' - condition: '{{ true }}' - expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 - for: 15m - labels: - severity: critical -- alert: NodeRAIDDiskFailure - annotations: - description: 'At least one device in RAID array at {{`{{`}} $labels.instance {{`}}`}} failed. Array ''{{`{{`}} $labels.device {{`}}`}}'' needs attention and possibly a disk swap.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/noderaiddiskfailure' - summary: 'Failed device in RAID array.' - condition: '{{ true }}' - expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 - labels: - severity: warning -- alert: NodeFileDescriptorLimit - annotations: - description: 'File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefiledescriptorlimit' - summary: 'Kernel is predicted to exhaust file descriptors limit soon.' - condition: '{{ true }}' - expr: |- - ( - node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70 - ) - for: 15m - labels: - severity: warning -- alert: NodeFileDescriptorLimit - annotations: - description: 'File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodefiledescriptorlimit' - summary: 'Kernel is predicted to exhaust file descriptors limit soon.' - condition: '{{ true }}' - expr: |- - ( - node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90 - ) - for: 15m - labels: - severity: critical -- alert: NodeCPUHighUsage - annotations: - description: 'CPU usage at {{`{{`}} $labels.instance {{`}}`}} has been above 90% for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. - - ' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodecpuhighusage' - summary: 'High CPU usage.' - condition: '{{ true }}' - expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!="idle"}[2m]))) * 100 > 90 - for: 15m - labels: - severity: informational -- alert: NodeSystemSaturation - annotations: - description: 'System load per core at {{`{{`}} $labels.instance {{`}}`}} has been above 2 for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. - - This might indicate this instance resources saturation and can cause it becoming unresponsive. - - ' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodesystemsaturation' - summary: 'System saturated, load per core is very high.' - condition: '{{ true }}' - expr: |- - node_load1{job="node-exporter"} - / count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2 - for: 15m - labels: - severity: warning -- alert: NodeMemoryMajorPagesFaults - annotations: - description: 'Memory major pages are occurring at very high rate at {{`{{`}} $labels.instance {{`}}`}}, 500 major page faults per second for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. - - Please check that there is enough memory available at this instance. - - ' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodememorymajorpagesfaults' - summary: 'Memory major page faults are occurring at very high rate.' - condition: '{{ true }}' - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500 - for: 15m - labels: - severity: warning -- alert: NodeMemoryHighUtilization - annotations: - description: 'Memory is filling up at {{`{{`}} $labels.instance {{`}}`}}, has been above 90% for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. - - ' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodememoryhighutilization' - summary: 'Host is running out of memory.' - condition: '{{ true }}' - expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90 - for: 15m - labels: - severity: warning -- alert: NodeDiskIOSaturation - annotations: - description: 'Disk IO queue (aqu-sq) is high on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}}, has been above 10 for the last 30 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. - - This symptom might indicate disk saturation. - - ' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodediskiosaturation' - summary: 'Disk IO queue is high.' - condition: '{{ true }}' - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) > 10 - for: 30m - labels: - severity: warning -- alert: NodeSystemdServiceFailed - annotations: - description: 'Systemd service {{`{{`}} $labels.name {{`}}`}} has entered failed state at {{`{{`}} $labels.instance {{`}}`}}' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodesystemdservicefailed' - summary: 'Systemd service has entered failed state.' - condition: '{{ true }}' - expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1 - for: 5m - labels: - severity: warning -- alert: NodeBondingDegraded - annotations: - description: 'Bonding interface {{`{{`}} $labels.master {{`}}`}} on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more slave failures.' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/node/nodebondingdegraded' - summary: 'Bonding interface is degraded' - condition: '{{ true }}' - expr: (node_bonding_slaves - node_bonding_active) != 0 - for: 5m - labels: - severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml deleted file mode 100644 index d785e205..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node-network.yaml +++ /dev/null @@ -1,13 +0,0 @@ -condition: '{{ true }}' -name: node-network -rules: -- alert: NodeNetworkInterfaceFlapping - annotations: - description: 'Network interface "{{`{{`}} $labels.device {{`}}`}}" changing its up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}' - runbook_url: '{{ .Values.defaultRules.runbookUrl }}/general/nodenetworkinterfaceflapping' - summary: 'Network interface is often changing its status' - condition: '{{ true }}' - expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 - for: 2m - labels: - severity: warning diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml deleted file mode 100644 index 80e9fb18..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/files/rules/generated/node.rules.yaml +++ /dev/null @@ -1,44 +0,0 @@ -condition: '{{ true }}' -name: node.rules -rules: -- condition: '{{ true }}' - expr: |- - topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, - max by (node,namespace,pod,{{ .Values.global.clusterLabel }}) ( - label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)") - )) - record: 'node_namespace_pod:kube_pod_info:' -- condition: '{{ true }}' - expr: |- - count by (node,{{ .Values.global.clusterLabel }}) ( - node_cpu_seconds_total{mode="idle",job="node-exporter"} - * on (namespace,pod,{{ .Values.global.clusterLabel }}) group_left(node) - topk by (namespace,pod,{{ .Values.global.clusterLabel }}) (1, node_namespace_pod:kube_pod_info:) - ) - record: node:node_num_cpu:sum -- condition: '{{ true }}' - expr: |- - sum( - node_memory_MemAvailable_bytes{job="node-exporter"} or - ( - node_memory_Buffers_bytes{job="node-exporter"} + - node_memory_Cached_bytes{job="node-exporter"} + - node_memory_MemFree_bytes{job="node-exporter"} + - node_memory_Slab_bytes{job="node-exporter"} - ) - ) by ({{ .Values.global.clusterLabel }}) - record: :node_memory_MemAvailable_bytes:sum -- condition: '{{ true }}' - expr: |- - avg by (node,{{ .Values.global.clusterLabel }}) ( - sum without (mode) ( - rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m]) - ) - ) - record: node:node_cpu_utilization:ratio_rate5m -- condition: '{{ true }}' - expr: |- - avg by ({{ .Values.global.clusterLabel }}) ( - node:node_cpu_utilization:ratio_rate5m - ) - record: cluster:node_cpu:ratio_rate5m diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl deleted file mode 100644 index 4429e725..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/_helpers.tpl +++ /dev/null @@ -1,458 +0,0 @@ -{{- /* Expand the name of the chart. */ -}} -{{- define "victoria-metrics-k8s-stack.name" -}} - {{- $Chart := (.helm).Chart | default .Chart -}} - {{- $Values := (.helm).Values | default .Values -}} - {{- default $Chart.Name $Values.nameOverride | trunc 63 | trimSuffix "-" -}} -{{- end }} - -{{- /* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/ -}} -{{- define "victoria-metrics-k8s-stack.fullname" -}} - {{- $Values := (.helm).Values | default .Values -}} - {{- $Chart := (.helm).Chart | default .Chart -}} - {{- $Release := (.helm).Release | default .Release -}} - {{- $fullname := "" -}} - {{- if .appKey -}} - {{- $appKey := ternary (list .appKey) .appKey (kindIs "string" .appKey) -}} - {{- $values := $Values -}} - {{- $global := (index $Values.global $Chart.Name) | default dict -}} - {{- range $ak := $appKey }} - {{- $values = (index $values $ak) | default dict -}} - {{- $global = (index $global $ak) | default dict -}} - {{- if $values.name -}} - {{- $fullname = $values.name -}} - {{- else if $global.name -}} - {{- $fullname = $global.name -}} - {{- end -}} - {{- end }} - {{- end -}} - {{- if empty $fullname -}} - {{- if $Values.fullnameOverride -}} - {{- $fullname = $Values.fullnameOverride -}} - {{- else if (dig $Chart.Name "fullnameOverride" "" ($Values.global)) -}} - {{- $fullname = (dig $Chart.Name "fullnameOverride" "" ($Values.global)) -}} - {{- else -}} - {{- $name := default $Chart.Name $Values.nameOverride -}} - {{- if contains $name $Release.Name -}} - {{- $fullname = $Release.Name -}} - {{- else -}} - {{- $fullname = (printf "%s-%s" $Release.Name $name) }} - {{- end -}} - {{- end }} - {{- end -}} - {{- $fullname | trunc 63 | trimSuffix "-" -}} -{{- end -}} - -{{- /* Create chart name and version as used by the chart label. */ -}} -{{- define "victoria-metrics-k8s-stack.chart" -}} - {{- $Chart := (.helm).Chart | default .Chart -}} - {{- printf "%s-%s" $Chart.Name $Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} -{{- end }} - -{{- /* Create the name of the service account to use */ -}} -{{- define "victoria-metrics-k8s-stack.serviceAccountName" -}} - {{- $Values := (.helm).Values | default .Values -}} - {{- if $Values.serviceAccount.create -}} - {{- default (include "victoria-metrics-k8s-stack.fullname" .) $Values.serviceAccount.name -}} - {{- else -}} - {{- default "default" $Values.serviceAccount.name -}} - {{- end }} -{{- end }} - -{{- /* Common labels */ -}} -{{- define "victoria-metrics-k8s-stack.labels" -}} - {{- $Release := (.helm).Release | default .Release -}} - {{- $Chart := (.helm).Chart | default .Chart -}} - {{- $labels := (fromYaml (include "victoria-metrics-k8s-stack.selectorLabels" .)) -}} - {{- $_ := set $labels "helm.sh/chart" (include "victoria-metrics-k8s-stack.chart" .) -}} - {{- $_ := set $labels "app.kubernetes.io/managed-by" $Release.Service -}} - {{- with $Chart.AppVersion }} - {{- $_ := set $labels "app.kubernetes.io/version" . -}} - {{- end -}} - {{- toYaml $labels -}} -{{- end }} - -{{- define "vm.release" -}} - {{- $Release := (.helm).Release | default .Release -}} - {{- $Values := (.helm).Values | default .Values -}} - {{- default $Release.Name $Values.argocdReleaseOverride | trunc 63 | trimSuffix "-" -}} -{{- end -}} - -{{- /* Selector labels */ -}} -{{- define "victoria-metrics-k8s-stack.selectorLabels" -}} - {{- $labels := .extraLabels | default dict -}} - {{- $_ := set $labels "app.kubernetes.io/name" (include "victoria-metrics-k8s-stack.name" .) -}} - {{- $_ := set $labels "app.kubernetes.io/instance" (include "vm.release" .) -}} - {{- toYaml $labels -}} -{{- end }} - -{{- /* Create the name for VM service */ -}} -{{- define "vm.service" -}} - {{- $Values := (.helm).Values | default .Values -}} - {{- $name := (include "victoria-metrics-k8s-stack.fullname" .) -}} - {{- with .appKey -}} - {{- $prefix := . -}} - {{- if kindIs "slice" $prefix }} - {{- $prefix = last $prefix -}} - {{- end -}} - {{- $prefix = ternary $prefix (printf "vm%s" $prefix) (hasPrefix "vm" $prefix) -}} - {{- $name = printf "%s-%s" $prefix $name -}} - {{- end -}} - {{- if hasKey . "appIdx" -}} - {{- $name = (printf "%s-%d.%s" $name .appIdx $name) -}} - {{- end -}} - {{- $name -}} -{{- end }} - -{{- define "vm.url" -}} - {{- $name := (include "vm.service" .) -}} - {{- $Release := (.helm).Release | default .Release -}} - {{- $Values := (.helm).Values | default .Values -}} - {{- $ns := include "vm.namespace" . -}} - {{- $proto := "http" -}} - {{- $port := 80 -}} - {{- $path := .appRoute | default "/" -}} - {{- $isSecure := false -}} - {{- if .appSecure -}} - {{- $isSecure = .appSecure -}} - {{- end -}} - {{- if .appKey -}} - {{- $appKey := ternary (list .appKey) .appKey (kindIs "string" .appKey) -}} - {{- $spec := $Values -}} - {{- range $ak := $appKey -}} - {{- if hasKey $spec $ak -}} - {{- $spec = (index $spec $ak) -}} - {{- end -}} - {{- if hasKey $spec "spec" -}} - {{- $spec = $spec.spec -}} - {{- end -}} - {{- end -}} - {{- $isSecure = (eq ($spec.extraArgs).tls "true") | default $isSecure -}} - {{- $proto = (ternary "https" "http" $isSecure) -}} - {{- $port = (ternary 443 80 $isSecure) -}} - {{- $port = $spec.port | default $port -}} - {{- $path = dig "http.pathPrefix" $path ($spec.extraArgs | default dict) -}} - {{- end -}} - {{- printf "%s://%s.%s.svc:%d%s" $proto $name $ns (int $port) $path -}} -{{- end -}} - -{{- define "vm.read.endpoint" -}} - {{- $ctx := . -}} - {{- $Values := (.helm).Values | default .Values -}} - {{- $endpoint := default dict -}} - {{- if $Values.vmsingle.enabled -}} - {{- $_ := set $ctx "appKey" "vmsingle" -}} - {{- $_ := set $endpoint "url" (include "vm.url" $ctx) -}} - {{- else if $Values.vmcluster.enabled -}} - {{- $_ := set $ctx "appKey" (list "vmcluster" "vmselect") -}} - {{- $baseURL := (trimSuffix "/" (include "vm.url" $ctx)) -}} - {{- $tenant := ($Values.tenant | default 0) -}} - {{- $_ := set $endpoint "url" (printf "%s/select/%d/prometheus" $baseURL (int $tenant)) -}} - {{- else if $Values.externalVM.read.url -}} - {{- $endpoint = $Values.externalVM.read -}} - {{- end -}} - {{- toYaml $endpoint -}} -{{- end }} - -{{- define "vm.write.endpoint" -}} - {{- $ctx := . -}} - {{- $Values := (.helm).Values | default .Values -}} - {{- $endpoint := default dict -}} - {{- if $Values.vmsingle.enabled -}} - {{- $_ := set $ctx "appKey" "vmsingle" -}} - {{- $baseURL := (trimSuffix "/" (include "vm.url" $ctx)) -}} - {{- $_ := set $endpoint "url" (printf "%s/api/v1/write" $baseURL) -}} - {{- else if $Values.vmcluster.enabled -}} - {{- $_ := set $ctx "appKey" (list "vmcluster" "vminsert") -}} - {{- $baseURL := (trimSuffix "/" (include "vm.url" $ctx)) -}} - {{- $tenant := ($Values.tenant | default 0) -}} - {{- $_ := set $endpoint "url" (printf "%s/insert/%d/prometheus/api/v1/write" $baseURL (int $tenant)) -}} - {{- else if $Values.externalVM.write.url -}} - {{- $endpoint = $Values.externalVM.write -}} - {{- end -}} - {{- toYaml $endpoint -}} -{{- end -}} - -{{- /* VMAlert remotes */ -}} -{{- define "vm.alert.remotes" -}} - {{- $Values := (.helm).Values | default .Values -}} - {{- $remotes := default dict -}} - {{- $fullname := (include "victoria-metrics-k8s-stack.fullname" .) -}} - {{- $ctx := dict "helm" . -}} - {{- $remoteWrite := (include "vm.write.endpoint" $ctx | fromYaml) -}} - {{- if $Values.vmalert.remoteWriteVMAgent -}} - {{- $ctx := dict "helm" . "appKey" "vmagent" -}} - {{- $remoteWrite = dict "url" (printf "%s/api/v1/write" (include "vm.url" $ctx)) -}} - {{- end -}} - {{- $ctx := dict "helm" . -}} - {{- $remoteRead := (fromYaml (include "vm.read.endpoint" $ctx)) -}} - {{- $_ := set $remotes "remoteWrite" $remoteWrite -}} - {{- $_ := set $remotes "remoteRead" $remoteRead -}} - {{- $_ := set $remotes "datasource" $remoteRead -}} - {{- if $Values.vmalert.additionalNotifierConfigs }} - {{- $configName := printf "%s-vmalert-additional-notifier" $fullname -}} - {{- $notifierConfigRef := dict "name" $configName "key" "notifier-configs.yaml" -}} - {{- $_ := set $remotes "notifierConfigRef" $notifierConfigRef -}} - {{- else if $Values.alertmanager.enabled -}} - {{- $notifiers := default list -}} - {{- $appSecure := (not (empty (((.Values.alertmanager).spec).webConfig).tls_server_config)) -}} - {{- $ctx := dict "helm" . "appKey" "alertmanager" "appSecure" $appSecure "appRoute" ((.Values.alertmanager).spec).routePrefix -}} - {{- $alertManagerReplicas := (.Values.alertmanager.spec.replicaCount | default 1 | int) -}} - {{- range until $alertManagerReplicas -}} - {{- $_ := set $ctx "appIdx" . -}} - {{- $notifiers = append $notifiers (dict "url" (include "vm.url" $ctx)) -}} - {{- end }} - {{- $_ := set $remotes "notifiers" $notifiers -}} - {{- end -}} - {{- toYaml $remotes -}} -{{- end -}} - -{{- /* VMAlert templates */ -}} -{{- define "vm.alert.templates" -}} - {{- $Values := (.helm).Values | default .Values}} - {{- $cms := ($Values.vmalert.spec.configMaps | default list) -}} - {{- if $Values.vmalert.templateFiles -}} - {{- $fullname := (include "victoria-metrics-k8s-stack.fullname" .) -}} - {{- $cms = append $cms (printf "%s-vmalert-extra-tpl" $fullname) -}} - {{- end -}} - {{- $output := dict "configMaps" (compact $cms) -}} - {{- toYaml $output -}} -{{- end -}} - -{{- define "vm.license.global" -}} - {{- $license := (deepCopy (.Values.global).license) | default dict -}} - {{- if $license.key -}} - {{- if hasKey $license "keyRef" -}} - {{- $_ := unset $license "keyRef" -}} - {{- end -}} - {{- else if $license.keyRef.name -}} - {{- if hasKey $license "key" -}} - {{- $_ := unset $license "key" -}} - {{- end -}} - {{- else -}} - {{- $license = default dict -}} - {{- end -}} - {{- toYaml $license -}} -{{- end -}} - -{{- /* VMAlert spec */ -}} -{{- define "vm.alert.spec" -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $extraArgs := dict "remoteWrite.disablePathAppend" "true" -}} - {{- if $Values.vmalert.templateFiles -}} - {{- $ruleTmpl := (printf "/etc/vm/configs/%s-vmalert-extra-tpl/*.tmpl" (include "victoria-metrics-k8s-stack.fullname" .)) -}} - {{- $_ := set $extraArgs "rule.templates" $ruleTmpl -}} - {{- end -}} - {{- $vmAlertRemotes := (include "vm.alert.remotes" . | fromYaml) -}} - {{- $vmAlertTemplates := (include "vm.alert.templates" . | fromYaml) -}} - {{- $spec := dict "extraArgs" $extraArgs -}} - {{- with (include "vm.license.global" .) -}} - {{- $_ := set $spec "license" (fromYaml .) -}} - {{- end -}} - {{- with concat ($vmAlertRemotes.notifiers | default list) (.Values.vmalert.spec.notifiers | default list) }} - {{- $_ := set $vmAlertRemotes "notifiers" . }} - {{- end }} - {{- $spec := deepCopy (omit $Values.vmalert.spec "notifiers") | mergeOverwrite $vmAlertRemotes | mergeOverwrite $vmAlertTemplates | mergeOverwrite $spec }} - {{- if not (or (hasKey $spec "notifier") (hasKey $spec "notifiers") (hasKey $spec "notifierConfigRef") (hasKey $spec.extraArgs "notifier.blackhole")) }} - {{- fail "Neither `notifier`, `notifiers` nor `notifierConfigRef` is set for vmalert. If it's intentionally please consider setting `.vmalert.spec.extraArgs.['notifier.blackhole']` to `'true'`"}} - {{- end }} - {{- tpl (deepCopy (omit $Values.vmalert.spec "notifiers") | mergeOverwrite $vmAlertRemotes | mergeOverwrite $vmAlertTemplates | mergeOverwrite $spec | toYaml) . -}} -{{- end }} - -{{- /* VM Agent remoteWrites */ -}} -{{- define "vm.agent.remote.write" -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $remoteWrites := $Values.vmagent.additionalRemoteWrites | default list -}} - {{- if or $Values.vmsingle.enabled $Values.vmcluster.enabled $Values.externalVM.write.url -}} - {{- $ctx := dict "helm" . -}} - {{- $remoteWrites = append $remoteWrites (fromYaml (include "vm.write.endpoint" $ctx)) -}} - {{- end -}} - {{- toYaml (dict "remoteWrite" $remoteWrites) -}} -{{- end -}} - -{{- /* VMAgent spec */ -}} -{{- define "vm.agent.spec" -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $spec := (include "vm.agent.remote.write" . | fromYaml) -}} - {{- with (include "vm.license.global" .) -}} - {{- $_ := set $spec "license" (fromYaml .) -}} - {{- end -}} - {{- tpl (deepCopy $Values.vmagent.spec | mergeOverwrite $spec | toYaml) . -}} -{{- end }} - -{{- /* VMAuth spec */ -}} -{{- define "vm.auth.spec" -}} - {{- $ctx := . -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $unauthorizedAccessConfig := default list }} - {{- if $Values.vmsingle.enabled -}} - {{- $_ := set $ctx "appKey" (list "vmsingle") -}} - {{- $url := (include "vm.url" $ctx) }} - {{- $srcPath := clean (printf "%s/.*" (urlParse $url).path) }} - {{- $unauthorizedAccessConfig = append $unauthorizedAccessConfig (dict "src_paths" (list $srcPath) "url_prefix" (list $url)) }} - {{- else if $Values.vmcluster.enabled -}} - {{- $_ := set $ctx "appKey" (list "vmcluster" "vminsert") -}} - {{- $writeUrl := (include "vm.url" $ctx) }} - {{- $writeSrcPath := clean (printf "%s/insert/.*" (urlParse $writeUrl).path) }} - {{- $unauthorizedAccessConfig = append $unauthorizedAccessConfig (dict "src_paths" (list $writeSrcPath) "url_prefix" (list $writeUrl)) }} - {{- $_ := set $ctx "appKey" (list "vmcluster" "vmselect") -}} - {{- $readUrl := (include "vm.url" $ctx) }} - {{- $readSrcPath := clean (printf "%s/select/.*" (urlParse $readUrl).path) }} - {{- $unauthorizedAccessConfig = append $unauthorizedAccessConfig (dict "src_paths" (list $readSrcPath) "url_prefix" (list $readUrl)) }} - {{- else if or $Values.externalVM.read.url $Values.externalVM.write.url }} - {{- with $Values.externalVM.read.url }} - {{- $srcPath := regexReplaceAll "(.*)/api/.*" (clean (printf "%s/.*" (urlParse .).path)) "${1}" }} - {{- $unauthorizedAccessConfig = append $unauthorizedAccessConfig (dict "src_paths" (list $srcPath) "url_prefix" (list .)) }} - {{- end -}} - {{- with $Values.externalVM.write.url }} - {{- $srcPath := regexReplaceAll "(.*)/api/.*" (clean (printf "%s/.*" (urlParse .).path)) "${1}" }} - {{- $unauthorizedAccessConfig = append $unauthorizedAccessConfig (dict "src_paths" (list $srcPath) "url_prefix" (list .)) }} - {{- end -}} - {{- end -}} - {{- $spec := $Values.vmauth.spec }} - {{- $_ := set $spec "unauthorizedAccessConfig" (concat $unauthorizedAccessConfig ($spec.unauthorizedAccessConfig | default list)) }} - {{- with (include "vm.license.global" .) -}} - {{- $_ := set $spec "license" (fromYaml .) -}} - {{- end -}} - {{- tpl (toYaml $spec) . -}} -{{- end -}} - -{{- /* Alermanager spec */ -}} -{{- define "vm.alertmanager.spec" -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $fullname := (include "victoria-metrics-k8s-stack.fullname" .) -}} - {{- $spec := $Values.alertmanager.spec -}} - {{- if and (not $Values.alertmanager.spec.configRawYaml) (not $Values.alertmanager.spec.configSecret) -}} - {{- $_ := set $spec "configSecret" (printf "%s-alertmanager" $fullname) -}} - {{- end -}} - {{- $templates := default list -}} - {{- if $Values.alertmanager.monzoTemplate.enabled -}} - {{- $configMap := (printf "%s-alertmanager-monzo-tpl" $fullname) -}} - {{- $templates = append $templates (dict "name" $configMap "key" "monzo.tmpl") -}} - {{- end -}} - {{- $configMap := (printf "%s-alertmanager-extra-tpl" $fullname) -}} - {{- range $key, $value := (.Values.alertmanager.templateFiles | default dict) -}} - {{- $templates = append $templates (dict "name" $configMap "key" $key) -}} - {{- end -}} - {{- $_ := set $spec "templates" $templates -}} - {{- toYaml $spec -}} -{{- end -}} - -{{- /* Single spec */ -}} -{{- define "vm.single.spec" -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $extraArgs := default dict -}} - {{- if $Values.vmalert.enabled }} - {{- $ctx := dict "helm" . "appKey" "vmalert" -}} - {{- $_ := set $extraArgs "vmalert.proxyURL" (include "vm.url" $ctx) -}} - {{- end -}} - {{- $spec := dict "extraArgs" $extraArgs -}} - {{- with (include "vm.license.global" .) -}} - {{- $_ := set $spec "license" (fromYaml .) -}} - {{- end -}} - {{- tpl (deepCopy $Values.vmsingle.spec | mergeOverwrite $spec | toYaml) . -}} -{{- end }} - -{{- /* Cluster spec */ -}} -{{- define "vm.select.spec" -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $extraArgs := default dict -}} - {{- if $Values.vmalert.enabled -}} - {{- $ctx := dict "helm" . "appKey" "vmalert" -}} - {{- $_ := set $extraArgs "vmalert.proxyURL" (include "vm.url" $ctx) -}} - {{- end -}} - {{- $spec := dict "extraArgs" $extraArgs -}} - {{- toYaml $spec -}} -{{- end -}} - -{{- define "vm.cluster.spec" -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $spec := (include "vm.select.spec" . | fromYaml) -}} - {{- $clusterSpec := (deepCopy $Values.vmcluster.spec) -}} - {{- with (include "vm.license.global" .) -}} - {{- $_ := set $clusterSpec "license" (fromYaml .) -}} - {{- end -}} - {{- tpl ($clusterSpec | mergeOverwrite (dict "vmselect" $spec) | toYaml) . -}} -{{- end -}} - -{{- define "vm.data.source.enabled" -}} - {{- $Values := (.helm).Values | default .Values -}} - {{- $grafana := $Values.grafana -}} - {{- $isEnabled := false -}} - {{- if $grafana.plugins -}} - {{- range $value := $grafana.plugins -}} - {{- if contains "victoriametrics-datasource" $value -}} - {{- $isEnabled = true -}} - {{- end }} - {{- end }} - {{- end }} - {{- $unsignedPlugins := ((index $grafana "grafana.ini").plugins).allow_loading_unsigned_plugins | default "" -}} - {{- $allowUnsigned := contains "victoriametrics-datasource" $unsignedPlugins -}} - {{- ternary "true" "" (and $isEnabled $allowUnsigned) -}} -{{- end -}} - -{{- /* Datasources */ -}} -{{- define "vm.data.sources" -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $grafana := $Values.grafana -}} - {{- $datasources := $Values.grafana.additionalDataSources | default list -}} - {{- $vmDatasource := "victoriametrics-datasource" -}} - {{- $allowVMDatasource := (ternary false true (empty (include "vm.data.source.enabled" .))) -}} - {{- if or $Values.vmsingle.enabled $Values.vmcluster.enabled -}} - {{- $ctx := dict "helm" . -}} - {{- $readEndpoint:= (include "vm.read.endpoint" $ctx | fromYaml) -}} - {{- $defaultDatasources := default list -}} - {{- range $ds := $grafana.sidecar.datasources.default }} - {{- if not $ds.type -}} - {{- $_ := set $ds "type" $Values.grafana.defaultDatasourceType }} - {{- end -}} - {{- if or (ne $ds.type $vmDatasource) $allowVMDatasource -}} - {{- $_ := set $ds "url" $readEndpoint.url -}} - {{- $defaultDatasources = append $defaultDatasources $ds -}} - {{- end -}} - {{- end }} - {{- $datasources = concat $datasources $defaultDatasources -}} - {{- if and $grafana.sidecar.datasources.createVMReplicasDatasources $defaultDatasources -}} - {{- range $id := until (int $Values.vmsingle.spec.replicaCount) -}} - {{- $_ := set $ctx "appIdx" $id -}} - {{- $readEndpoint := (include "vm.read.endpoint" $ctx | fromYaml) -}} - {{- range $ds := $defaultDatasources -}} - {{- $ds = (deepCopy $ds) -}} - {{- $_ := set $ds "url" $readEndpoint.url -}} - {{- $_ := set $ds "name" (printf "%s-%d" $ds.name $id) -}} - {{- $_ := set $ds "isDefault" false -}} - {{- $datasources = append $datasources $ds -}} - {{- end -}} - {{- end -}} - {{- end -}} - {{- end -}} - {{- toYaml $datasources -}} -{{- end }} - -{{- /* VMRule name */ -}} -{{- define "victoria-metrics-k8s-stack.rulegroup.name" -}} - {{- printf "%s-%s" (include "victoria-metrics-k8s-stack.fullname" .) (.name | replace "_" "") -}} -{{- end -}} - -{{- /* VMRule labels */ -}} -{{- define "victoria-metrics-k8s-stack.rulegroup.labels" -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $labels := (fromYaml (include "victoria-metrics-k8s-stack.labels" .)) -}} - {{- $_ := set $labels "app" (include "victoria-metrics-k8s-stack.name" .) -}} - {{- $labels = mergeOverwrite $labels (deepCopy $Values.defaultRules.labels) -}} - {{- toYaml $labels -}} -{{- end }} - -{{- /* VMRule key */ -}} -{{- define "victoria-metrics-k8s-stack.rulegroup.key" -}} - {{- without (regexSplit "[-_.]" .name -1) "exporter" "rules" | join "-" | camelcase | untitle -}} -{{- end -}} - -{{- /* VMAlertmanager name */ -}} -{{- define "victoria-metrics-k8s-stack.alertmanager.name" -}} - {{- $Values := (.helm).Values | default .Values }} - {{- $Values.alertmanager.name | default (printf "%s-%s" "vmalertmanager" (include "victoria-metrics-k8s-stack.fullname" .) | trunc 63 | trimSuffix "-") -}} -{{- end -}} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/extra-objects.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/extra-objects.yaml deleted file mode 100644 index a9bb3b6b..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/extra-objects.yaml +++ /dev/null @@ -1,4 +0,0 @@ -{{ range .Values.extraObjects }} ---- -{{ tpl (toYaml .) $ }} -{{ end }} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/rules/rule.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/rules/rule.yaml deleted file mode 100644 index afa2900c..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/templates/rules/rule.yaml +++ /dev/null @@ -1,121 +0,0 @@ -{{- if .Values.defaultRules.create }} - -{{- /* -Default rules alias -*/}} -{{- $defaultRules := .Values.defaultRules -}} - -{{- /* -Exact rules overrides -*/}} -{{- $exactRules := index $defaultRules "rules" | default dict }} - -{{- /* -Create custom template context -*/}} -{{- $ctx := (dict "Values" (deepCopy .Values) "Release" (deepCopy .Release) "Chart" (deepCopy .Chart) "Template" (deepCopy .Template)) -}} - -{{- /* -Loop through all rules files, that were crafted with hack/sync_rules.py -*/}} -{{- range $groupFile, $_ := .Files.Glob "files/rules/**.yaml" -}} - -{{- /* -Get group name from file -*/}} -{{- $groupBase := base $groupFile -}} -{{- $groupNameRaw := trimSuffix (ext $groupBase) $groupBase -}} - -{{- /* -Create context for templating -*/}} -{{- $_ := set $ctx "name" $groupNameRaw -}} - -{{- /* -Create sanitized group name retrieved from file -*/}} -{{- $groupName := include "victoria-metrics-k8s-stack.rulegroup.key" $ctx -}} - -{{- /* -Merge common group with a group data of a current iteration -*/}} - -{{- $group := mergeOverwrite (deepCopy (dig "group" (default dict) $defaultRules)) (dig "groups" $groupName (default dict) $defaultRules) -}} - -{{- /* -Get group data from file -*/}} -{{- $groupCtx := mergeOverwrite (deepCopy $ctx) $group }} -{{- $groupData := fromYaml (tpl ($.Files.Get $groupFile) $groupCtx) -}} - -{{- /* -Save rules spec from file -*/}} -{{- $rulesSpec := $groupData.rules -}} - -{{- /* -Delete rules from group -*/}} -{{- $_ := unset $groupData "rules" -}} - -{{- /* -Save condition for group from file -*/}} -{{- $groupCondition := (eq $groupData.condition "true") -}} - -{{- /* -Delete condition from group -*/}} -{{- $_ := unset $groupData "condition" -}} - -{{- /* -Merge group spec -*/}} -{{- $groupSpec := mergeOverwrite (deepCopy $groupData) (dig "spec" (default dict) $group) -}} - -{{- /* -Filter out ignore rules -*/}} -{{- $commonRule := dig "rule" (default dict) $defaultRules }} -{{- $commonInGroupRule := dig "rules" (default dict) $group }} -{{- $filteredRulesSpec := default list }} -{{- range $_, $ruleSpec := $rulesSpec }} - {{- $ruleName := $ruleSpec.alert | default "" }} - {{- $ruleKey := (hasKey $ruleSpec "record" | ternary "recording" "alerting") -}} - {{- $ruleCondition := (eq $ruleSpec.condition "true") }} - {{- $_ := unset $ruleSpec "condition" }} - {{- $exactRule := index $exactRules $ruleName | default dict }} - {{- $defaultRule := deepCopy (index $defaultRules $ruleKey) }} - {{- $resultRule := mergeOverwrite (deepCopy $commonRule) $defaultRule $commonInGroupRule $exactRule }} - {{- if (and (dig "create" true $resultRule) $ruleCondition) }} - {{- $ruleSpec := mergeOverwrite (deepCopy $ruleSpec) (dig "spec" (default dict) $resultRule) }} - {{- $filteredRulesSpec = append $filteredRulesSpec $ruleSpec }} - {{- end }} -{{- end -}} -{{- $rulesSpec = $filteredRulesSpec }} - -{{- /* -Check if group is enabled -*/}} -{{- if (and $rulesSpec (dig "create" true $group) $groupCondition) }} ---- -apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMRule -metadata: - name: {{ include "victoria-metrics-k8s-stack.rulegroup.name" $ctx }} - {{- with (include "victoria-metrics-k8s-stack.rulegroup.labels" $ctx) }} - labels: {{ . | nindent 4 }} - {{- end }} - {{- with $.Values.defaultRules.annotations }} - annotations: {{ toYaml . | nindent 4 }} - {{- end }} -spec: - groups: - - {{ toYaml $groupSpec | indent 4 | trim }} - rules: - {{- range $ruleSpec := $rulesSpec }} - - {{ toYaml $ruleSpec | indent 6 | trim }} - {{- end }} -{{- end }} -{{- end }} -{{- end }} diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/todo.md b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/todo.md deleted file mode 100644 index 33f0a470..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/todo.md +++ /dev/null @@ -1,26 +0,0 @@ -### plans - -* [x] VMCluster -* [x] VMSingle -* [x] VMAgent -* [x] VMAlert -* [x] AlertManager -* [x] Annotations -* [x] ServiceScrapes - * [x] Nodeexporter - * [x] Grafana - * [x] kube-state-metrics - * [x] kube-mixin - * [x] core-dns -* [x] Grafana DS -* [x] Dashboards - * [x] Nodeexporter - * [x] kube-state-metrics - * [x] kube-mixin -* [x] Rules - * [x] kube-mixin - * [x] kube-prometheus - * [x] victoria-metrics -* [ ] ServiceAccounts stuff -* [ ] SelectorOvverride for ServiceScrapes -* [ ] helm hook for uninstall crd objects before chart remove diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.minikube.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.minikube.yaml deleted file mode 100644 index f4438de3..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.minikube.yaml +++ /dev/null @@ -1,38 +0,0 @@ -vmagent: - spec: - volumes: - - hostPath: - path: /var/lib/minikube/certs/etcd - type: DirectoryOrCreate - name: etcd-certs - volumeMounts: - - mountPath: /var/lib/minikube/certs/etcd - name: etcd-certs -kubeScheduler: - spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - port: http-metrics - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecureSkipVerify: true -kubeControllerManager: - spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - port: http-metrics - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - insecureSkipVerify: true -kubeEtcd: - spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - port: http-metrics - scheme: https - tlsConfig: - caFile: /var/lib/minikube/certs/etcd/ca.crt - certFile: /var/lib/minikube/certs/etcd/peer.crt - keyFile: /var/lib/minikube/certs/etcd/peer.key diff --git a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.yaml b/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.yaml deleted file mode 100644 index 778f7e00..00000000 --- a/packages/system/monitoring/charts/victoria-metrics-k8s-stack/values.yaml +++ /dev/null @@ -1,1233 +0,0 @@ -global: - clusterLabel: cluster - license: - key: "" - keyRef: {} - # name: secret-license - # key: license - -nameOverride: "" -fullnameOverride: "" -tenant: "0" -# -- If this chart is used in "Argocd" with "releaseName" field then -# -- VMServiceScrapes couldn't select the proper services. -# -- For correct working need set value 'argocdReleaseOverride=$ARGOCD_APP_NAME' -argocdReleaseOverride: "" - -# -- victoria-metrics-operator dependency chart configuration. -# -- For possible values refer to https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-operator#parameters -# -- also checkout here possible ENV variables to configure operator behaviour https://docs.victoriametrics.com/operator/vars -victoria-metrics-operator: - enabled: true - serviceMonitor: - enabled: true - crd: - # -- we disable crd creation by operator chart as we create them in this chart - create: false - - # -- tells helm to clean up vm cr resources when uninstalling - cleanup: - enabled: true - image: - repository: bitnami/kubectl - # use image tag that matches k8s API version by default - # tag: 1.29.6 - pullPolicy: IfNotPresent - operator: - # -- By default, operator converts prometheus-operator objects. - disable_prometheus_converter: false - -serviceAccount: - # -- Specifies whether a service account should be created - create: true - # -- Annotations to add to the service account - annotations: {} - # -- The name of the service account to use. - # -- If not set and create is true, a name is generated using the fullname template - name: "" - -# -- Enable dashboards despite it's dependency is not installed -dashboards: - vmalert: false - operator: false - # -- in ArgoCD using client-side apply this dashboard reaches annotations size limit and causes k8s issues without server side apply - # See [this issue](https://github.com/VictoriaMetrics/helm-charts/tree/disable-node-exporter-dashboard-by-default/charts/victoria-metrics-k8s-stack#metadataannotations-too-long-must-have-at-most-262144-bytes-on-dashboards) - node-exporter-full: true - -# -- Create default rules for monitoring the cluster -defaultRules: - create: true - - # -- Common properties for VMRule groups - group: - spec: - # -- Optional HTTP URL parameters added to each rule request - params: {} - - # -- Common properties for all VMRules - rule: - spec: - # -- Additional labels for all VMRules - labels: {} - # -- Additional annotations for all VMRules - annotations: {} - - # -- Common properties for VMRules alerts - alerting: - spec: - # -- Additional labels for VMRule alerts - labels: {} - # -- Additional annotations for VMRule alerts - annotations: {} - - # -- Common properties for VMRules recording rules - recording: - spec: - # -- Additional labels for VMRule recording rules - labels: {} - # -- Additional annotations for VMRule recording rules - annotations: {} - - # -- Per rule properties - rules: {} - # CPUThrottlingHigh: - # create: true - # spec: - # for: 15m - # labels: - # severity: critical - groups: - etcd: - create: true - # -- Common properties for all rules in a group - rules: {} - # spec: - # annotations: - # dashboard: https://example.com/dashboard/1 - general: - create: true - rules: {} - k8sContainerMemoryRss: - create: true - rules: {} - k8sContainerMemoryCache: - create: true - rules: {} - k8sContainerCpuUsageSecondsTotal: - create: true - rules: {} - k8sPodOwner: - create: true - rules: {} - k8sContainerResource: - create: true - rules: {} - k8sContainerMemoryWorkingSetBytes: - create: true - rules: {} - k8sContainerMemorySwap: - create: true - rules: {} - kubeApiserver: - create: true - rules: {} - kubeApiserverAvailability: - create: true - rules: {} - kubeApiserverBurnrate: - create: true - rules: {} - kubeApiserverHistogram: - create: true - rules: {} - kubeApiserverSlos: - create: true - rules: {} - kubelet: - create: true - rules: {} - kubePrometheusGeneral: - create: true - rules: {} - kubePrometheusNodeRecording: - create: true - rules: {} - kubernetesApps: - create: true - rules: {} - targetNamespace: ".*" - kubernetesResources: - create: true - rules: {} - kubernetesStorage: - create: true - rules: {} - targetNamespace: ".*" - kubernetesSystem: - create: true - rules: {} - kubernetesSystemKubelet: - create: true - rules: {} - kubernetesSystemApiserver: - create: true - rules: {} - kubernetesSystemControllerManager: - create: true - rules: {} - kubeScheduler: - create: true - rules: {} - kubernetesSystemScheduler: - create: true - rules: {} - kubeStateMetrics: - create: true - rules: {} - nodeNetwork: - create: true - rules: {} - node: - create: true - rules: {} - vmagent: - create: true - rules: {} - vmsingle: - create: true - rules: {} - vmcluster: - create: true - rules: {} - vmHealth: - create: true - rules: {} - vmoperator: - create: true - rules: {} - alertmanager: - create: true - rules: {} - - # -- Runbook url prefix for default rules - runbookUrl: https://runbooks.prometheus-operator.dev/runbooks - - # -- Labels for default rules - labels: {} - # -- Annotations for default rules - annotations: {} - -# -- Create default dashboards -defaultDashboardsEnabled: true - -# -- Create experimental dashboards -experimentalDashboardsEnabled: true - -# -- Create dashboards as CRDs (reuqires grafana-operator to be installed) -grafanaOperatorDashboardsFormat: - enabled: false - instanceSelector: - matchLabels: - dashboards: "grafana" - allowCrossNamespaceImport: false - -## -- Provide custom recording or alerting rules to be deployed into the cluster. -additionalVictoriaMetricsMap: -# rule-name: -# groups: -# - name: my_group -# rules: -# - record: my_record -# expr: 100 * my_record - -externalVM: - read: - url: "" - # bearerTokenSecret: - # name: dbaas-read-access-token - # key: bearerToken - write: - url: "" - # bearerTokenSecret: - # name: dbaas-read-access-token - # key: bearerToken - -############## - -# Configures vmsingle params -vmsingle: - annotations: {} - enabled: true - # -- full spec for VMSingle CRD. Allowed values describe [here](https://docs.victoriametrics.com/operator/api#vmsinglespec) - spec: - port: "8429" - image: - tag: v1.103.0 - # -- Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these [docs](https://docs.victoriametrics.com/single-server-victoriametrics/#retention) - retentionPeriod: "1" - replicaCount: 1 - extraArgs: {} - storage: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 20Gi - ingress: - enabled: false - # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName - # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress - # ingressClassName: nginx - # Values can be templated - annotations: - {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - labels: {} - path: "" - # pathType is only for k8s > 1.19 - pathType: Prefix - - hosts: - - vmsingle.domain.com - ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. - extraPaths: [] - # - path: /* - # backend: - # serviceName: ssl-redirect - # servicePort: use-annotation - ## Or for k8s > 1.19 - # - path: /* - # pathType: Prefix - # backend: - # service: - # name: ssl-redirect - # port: - # name: service - tls: [] - # - secretName: vmsingle-ingress-tls - # hosts: - # - vmsingle.domain.com - -vmcluster: - enabled: false - annotations: {} - # -- full spec for VMCluster CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmclusterspec) - spec: - # -- Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these [docs](https://docs.victoriametrics.com/single-server-victoriametrics/#retention) - retentionPeriod: "1" - replicationFactor: 2 - vmstorage: - image: - tag: v1.103.0-cluster - replicaCount: 2 - storageDataPath: "/vm-data" - storage: - volumeClaimTemplate: - spec: - resources: - requests: - storage: 10Gi - resources: - {} - # limits: - # cpu: "1" - # memory: 1500Mi - vmselect: - port: "8481" - image: - tag: v1.103.0-cluster - replicaCount: 2 - cacheMountPath: "/select-cache" - extraArgs: {} - storage: - volumeClaimTemplate: - spec: - resources: - requests: - storage: 2Gi - resources: - {} - # limits: - # cpu: "1" - # memory: "1000Mi" - # requests: - # cpu: "0.5" - # memory: "500Mi" - vminsert: - port: "8480" - image: - tag: v1.103.0-cluster - replicaCount: 2 - extraArgs: {} - resources: - {} - # limits: - # cpu: "1" - # memory: 1000Mi - # requests: - # cpu: "0.5" - # memory: "500Mi" - - ingress: - storage: - enabled: false - # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName - # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress - # ingressClassName: nginx - # Values can be templated - annotations: - {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - labels: {} - path: "" - # pathType is only for k8s > 1.19 - pathType: Prefix - - hosts: - - vmstorage.domain.com - ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. - extraPaths: [] - # - path: /* - # backend: - # serviceName: ssl-redirect - # servicePort: use-annotation - ## Or for k8s > 1.19 - # - path: /* - # pathType: Prefix - # backend: - # service: - # name: ssl-redirect - # port: - # name: service - tls: [] - # - secretName: vmstorage-ingress-tls - # hosts: - # - vmstorage.domain.com - select: - enabled: false - # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName - # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress - # ingressClassName: nginx - # Values can be templated - annotations: - {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - labels: {} - path: '{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vmselect }}' - # pathType is only for k8s > 1.19 - pathType: Prefix - - hosts: - - vmselect.domain.com - ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. - extraPaths: [] - # - path: /* - # backend: - # serviceName: ssl-redirect - # servicePort: use-annotation - ## Or for k8s > 1.19 - # - path: /* - # pathType: Prefix - # backend: - # service: - # name: ssl-redirect - # port: - # name: service - tls: [] - # - secretName: vmselect-ingress-tls - # hosts: - # - vmselect.domain.com - insert: - enabled: false - # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName - # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress - # ingressClassName: nginx - # Values can be templated - annotations: - {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - labels: {} - path: '{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vminsert }}' - # pathType is only for k8s > 1.19 - pathType: Prefix - - hosts: - - vminsert.domain.com - ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. - extraPaths: [] - # - path: /* - # backend: - # serviceName: ssl-redirect - # servicePort: use-annotation - ## Or for k8s > 1.19 - # - path: /* - # pathType: Prefix - # backend: - # service: - # name: ssl-redirect - # port: - # name: service - tls: [] - # - secretName: vminsert-ingress-tls - # hosts: - # - vminsert.domain.com - -alertmanager: - enabled: true - annotations: {} - # -- (object) full spec for VMAlertmanager CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmalertmanagerspec) - spec: - port: "9093" - selectAllByDefault: true - image: - tag: v0.25.0 - externalURL: "" - routePrefix: / - - # -- (string) if this one defined, it will be used for alertmanager configuration and config parameter will be ignored - configSecret: "" - # -- (object) alertmanager configuration - config: - templates: - - "/etc/vm/configs/**/*.tmpl" - route: - # group_by: ["alertgroup", "job"] - # group_wait: 30s - # group_interval: 5m - # repeat_interval: 12h - receiver: "blackhole" - ## routes: - ################################################### - ## Duplicate code_owner routes to teams - ## These will send alerts to team channels but continue - ## processing through the rest of the tree to handled by on-call - # - matchers: - # - code_owner_channel!="" - # - severity=~"info|warning|critical" - # group_by: ["code_owner_channel", "alertgroup", "job"] - # receiver: slack-code-owners - # ################################################### - # ## Standard on-call routes - # - matchers: - # - severity=~"info|warning|critical" - # receiver: slack-monitoring - # continue: true - - # inhibit_rules: - # - target_matchers: - # - severity=~"warning|info" - # source_matchers: - # - severity=critical - # equal: - # - cluster - # - namespace - # - alertname - # - target_matchers: - # - severity=info - # source_matchers: - # - severity=warning - # equal: - # - cluster - # - namespace - # - alertname - # - target_matchers: - # - severity=info - # source_matchers: - # - alertname=InfoInhibitor - # equal: - # - cluster - # - namespace - - receivers: - - name: blackhole - # - name: "slack-monitoring" - # slack_configs: - # - channel: "#channel" - # send_resolved: true - # title: '{{ template "slack.monzo.title" . }}' - # icon_emoji: '{{ template "slack.monzo.icon_emoji" . }}' - # color: '{{ template "slack.monzo.color" . }}' - # text: '{{ template "slack.monzo.text" . }}' - # actions: - # - type: button - # text: "Runbook :green_book:" - # url: "{{ (index .Alerts 0).Annotations.runbook_url }}" - # - type: button - # text: "Query :mag:" - # url: "{{ (index .Alerts 0).GeneratorURL }}" - # - type: button - # text: "Dashboard :grafana:" - # url: "{{ (index .Alerts 0).Annotations.dashboard }}" - # - type: button - # text: "Silence :no_bell:" - # url: '{{ template "__alert_silence_link" . }}' - # - type: button - # text: '{{ template "slack.monzo.link_button_text" . }}' - # url: "{{ .CommonAnnotations.link_url }}" - # - name: slack-code-owners - # slack_configs: - # - channel: "#{{ .CommonLabels.code_owner_channel }}" - # send_resolved: true - # title: '{{ template "slack.monzo.title" . }}' - # icon_emoji: '{{ template "slack.monzo.icon_emoji" . }}' - # color: '{{ template "slack.monzo.color" . }}' - # text: '{{ template "slack.monzo.text" . }}' - # actions: - # - type: button - # text: "Runbook :green_book:" - # url: "{{ (index .Alerts 0).Annotations.runbook }}" - # - type: button - # text: "Query :mag:" - # url: "{{ (index .Alerts 0).GeneratorURL }}" - # - type: button - # text: "Dashboard :grafana:" - # url: "{{ (index .Alerts 0).Annotations.dashboard }}" - # - type: button - # text: "Silence :no_bell:" - # url: '{{ template "__alert_silence_link" . }}' - # - type: button - # text: '{{ template "slack.monzo.link_button_text" . }}' - # url: "{{ .CommonAnnotations.link_url }}" - # - # better alert templates for slack - # source https://gist.github.com/milesbxf/e2744fc90e9c41b47aa47925f8ff6512 - monzoTemplate: - enabled: true - - # -- (object) extra alert templates - templateFiles: - {} - # template_1.tmpl: |- - # {{ define "hello" -}} - # hello, Victoria! - # {{- end }} - # template_2.tmpl: "" - - # -- (object) alertmanager ingress configuration - ingress: - enabled: false - # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName - # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress - # ingressClassName: nginx - # Values can be templated - annotations: - {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - labels: {} - path: '{{ .Values.alertmanager.spec.routePrefix | default "/" }}' - # pathType is only for k8s > 1.19 - pathType: Prefix - - hosts: - - alertmanager.domain.com - ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. - extraPaths: [] - # - path: /* - # backend: - # serviceName: ssl-redirect - # servicePort: use-annotation - ## Or for k8s > 1.19 - # - path: /* - # pathType: Prefix - # backend: - # service: - # name: ssl-redirect - # port: - # name: service - tls: [] - # - secretName: alertmanager-ingress-tls - # hosts: - # - alertmanager.domain.com - -vmalert: - annotations: {} - enabled: true - - # Controls whether VMAlert should use VMAgent or VMInsert as a target for remotewrite - remoteWriteVMAgent: false - # -- (object) full spec for VMAlert CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmalertspec) - spec: - port: "8080" - selectAllByDefault: true - image: - tag: v1.103.0 - evaluationInterval: 15s - extraArgs: - http.pathPrefix: "/" - - # External labels to add to all generated recording rules and alerts - externalLabels: {} - - # -- (object) extra vmalert annotation templates - templateFiles: - {} - # template_1.tmpl: |- - # {{ define "hello" -}} - # hello, Victoria! - # {{- end }} - # template_2.tmpl: "" - - ## additionalNotifierConfigs allows to configure static notifiers, discover notifiers via Consul and DNS, - ## see specification in https://docs.victoriametrics.com/vmalert/#notifier-configuration-file. - ## This configuration will be created as separate secret and mounted to vmalert pod. - additionalNotifierConfigs: {} - # dns_sd_configs: - # - names: - # - my.domain.com - # type: 'A' - # port: 9093 - # -- (object) vmalert ingress config - ingress: - enabled: false - # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName - # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress - # ingressClassName: nginx - # Values can be templated - annotations: - {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - labels: {} - path: "" - # pathType is only for k8s > 1.19 - pathType: Prefix - - hosts: - - vmalert.domain.com - ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. - extraPaths: [] - # - path: /* - # backend: - # serviceName: ssl-redirect - # servicePort: use-annotation - ## Or for k8s > 1.19 - # - path: /* - # pathType: Prefix - # backend: - # service: - # name: ssl-redirect - # port: - # name: service - tls: [] - # - secretName: vmalert-ingress-tls - # hosts: - # - vmalert.domain.com - -vmauth: - enabled: false - annotations: {} - # -- (object) full spec for VMAuth CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmauthspec) - spec: - discover_backend_ips: true - port: "8427" - -vmagent: - enabled: true - annotations: {} - # -- remoteWrite configuration of VMAgent, allowed parameters defined in a [spec](https://docs.victoriametrics.com/operator/api#vmagentremotewritespec) - additionalRemoteWrites: - [] - #- url: http://some-remote-write/api/v1/write - # -- (object) full spec for VMAgent CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmagentspec) - spec: - port: "8429" - selectAllByDefault: true - image: - tag: v1.103.0 - scrapeInterval: 20s - externalLabels: {} - # For multi-cluster setups it is useful to use "cluster" label to identify the metrics source. - # For example: - # cluster: cluster-name - extraArgs: - promscrape.streamParse: "true" - # Do not store original labels in vmagent's memory by default. This reduces the amount of memory used by vmagent - # but makes vmagent debugging UI less informative. See: https://docs.victoriametrics.com/vmagent/#relabel-debug - promscrape.dropOriginalLabels: "true" - # -- (object) vmagent ingress configuration - ingress: - enabled: false - # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName - # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress - # ingressClassName: nginx - # Values can be templated - annotations: - {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - labels: {} - path: "" - # pathType is only for k8s > 1.19 - pathType: Prefix - - hosts: - - vmagent.domain.com - # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. - extraPaths: [] - # - path: /* - # backend: - # serviceName: ssl-redirect - # servicePort: use-annotation - ## Or for k8s > 1.19 - # - path: /* - # pathType: Prefix - # backend: - # service: - # name: ssl-redirect - # port: - # name: service - tls: [] - # - secretName: vmagent-ingress-tls - # hosts: - # - vmagent.domain.com - -################################################# -### dependencies ##### -################################################# -# Grafana dependency chart configuration. For possible values refer to https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration -grafana: - enabled: true - ## all values for grafana helm chart can be specified here - sidecar: - datasources: - enabled: true - # -- list of default prometheus compatible datasource configurations. - # VM `url` will be added to each of them in templates and `type` will be set to defaultDatasourceType if not defined - default: - - name: VictoriaMetrics - isDefault: true - - name: VictoriaMetrics (DS) - isDefault: false - type: victoriametrics-datasource - initDatasources: true - createVMReplicasDatasources: false - dashboards: - provider: - name: default - orgid: 1 - additionalDashboardLabels: {} - folder: /var/lib/grafana/dashboards - defaultFolderName: default - additionalDashboardAnnotations: {} - enabled: true - multicluster: false - - ## ForceDeployDatasource Create datasource configmap even if grafana deployment has been disabled - forceDeployDatasource: false - - ## Configure additional grafana datasources (passed through tpl) - ## ref: http://docs.grafana.org/administration/provisioning/#datasources - additionalDataSources: [] - # - name: prometheus-sample - # access: proxy - # basicAuth: true - # basicAuthPassword: pass - # basicAuthUser: daco - # editable: false - # jsonData: - # tlsSkipVerify: true - # orgId: 1 - # type: prometheus - # url: https://{{ printf "%s-prometheus.svc" .Release.Name }}:9090 - # version: 1 - - defaultDashboardsTimezone: utc - - # Enabling VictoriaMetrics Datasource in Grafana. See more details here: https://github.com/VictoriaMetrics/grafana-datasource/blob/main/README.md#victoriametrics-datasource-for-grafana - # Note that Grafana will need internet access to install the datasource plugin. - # Uncomment the block below, if you want to enable VictoriaMetrics Datasource in Grafana: - #plugins: - # - "https://github.com/VictoriaMetrics/grafana-datasource/releases/download/v0.5.0/victoriametrics-datasource-v0.5.0.zip;victoriametrics-datasource" - #grafana.ini: - # plugins: - # # Why VictoriaMetrics datasource is unsigned: https://github.com/VictoriaMetrics/grafana-datasource/blob/main/README.md#why-victoriametrics-datasource-is-unsigned - # allow_loading_unsigned_plugins: victoriametrics-datasource - - # Change datasource type in dashboards from Prometheus to VictoriaMetrics. - # you can use `victoriametrics-datasource` instead of `prometheus` if enabled VictoriaMetrics Datasource above - defaultDatasourceType: "prometheus" - - ingress: - enabled: false - # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName - # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress - # ingressClassName: nginx - # Values can be templated - annotations: - {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - labels: {} - path: / - # pathType is only for k8s > 1.19 - pathType: Prefix - - hosts: - - grafana.domain.com - ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. - extraPaths: [] - # - path: /* - # backend: - # serviceName: ssl-redirect - # servicePort: use-annotation - ## Or for k8s > 1.19 - # - path: /* - # pathType: Prefix - # backend: - # service: - # name: ssl-redirect - # port: - # name: service - tls: [] - # - secretName: grafana-ingress-tls - # hosts: - # - grafana.domain.com - - # -- grafana VM scrape config - vmScrape: - # whether we should create a service scrape resource for grafana - enabled: true - - # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Grafana - spec: - selector: - matchLabels: - app.kubernetes.io/name: '{{ include "grafana.name" .Subcharts.grafana }}' - endpoints: - - port: '{{ .Values.grafana.service.portName }}' - -# prometheus-node-exporter dependency chart configuration. For possible values refer to https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus-node-exporter/values.yaml -prometheus-node-exporter: - enabled: true - - # all values for prometheus-node-exporter helm chart can be specified here - service: - ## Add the 'node-exporter' label to be used by serviceMonitor to match standard common usage in rules and grafana dashboards - ## - labels: - jobLabel: node-exporter - extraArgs: - - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) - - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ - # -- node exporter VM scrape config - vmScrape: - # whether we should create a service scrape resource for node-exporter - enabled: true - - # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Node Exporter - spec: - jobLabel: jobLabel - selector: - matchLabels: - app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}' - endpoints: - - port: metrics - metricRelabelConfigs: - - action: drop - source_labels: [mountpoint] - regex: "/var/lib/kubelet/pods.+" -# kube-state-metrics dependency chart configuration. For possible values refer to https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-state-metrics/values.yaml -kube-state-metrics: - enabled: true - ## all values for kube-state-metrics helm chart can be specified here - - # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Kube State Metrics - vmScrape: - enabled: true - spec: - selector: - matchLabels: - app.kubernetes.io/name: '{{ include "kube-state-metrics.name" (index .Subcharts "kube-state-metrics") }}' - app.kubernetes.io/instance: '{{ include "vm.release" . }}' - endpoints: - - port: http - honorLabels: true - metricRelabelConfigs: - - action: labeldrop - regex: (uid|container_id|image_id) - jobLabel: app.kubernetes.io/name - -################################################# -### Service Monitors ##### -################################################# -# Component scraping the kubelets -kubelet: - enabled: true - vmScrapes: - # -- Enable scraping /metrics/cadvisor from kubelet's service - cadvisor: - enabled: true - spec: - path: /metrics/cadvisor - # -- Enable scraping /metrics/probes from kubelet's service - probes: - enabled: true - spec: - path: /metrics/probes - kubelet: - spec: {} - # -- spec for VMNodeScrape crd - # https://docs.victoriametrics.com/operator/api.html#vmnodescrapespec - vmScrape: - kind: VMNodeScrape - spec: - scheme: "https" - honorLabels: true - interval: "30s" - scrapeTimeout: "5s" - tlsConfig: - insecureSkipVerify: true - caFile: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" - bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token" - # drop high cardinality label and useless metrics for cadvisor and kubelet - metricRelabelConfigs: - - action: labeldrop - regex: (uid) - - action: labeldrop - regex: (id|name) - - action: drop - source_labels: [__name__] - regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count) - relabelConfigs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - sourceLabels: [__metrics_path__] - targetLabel: metrics_path - - targetLabel: "job" - replacement: "kubelet" - # ignore timestamps of cadvisor's metrics by default - # more info here https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4697#issuecomment-1656540535 - honorTimestamps: false -# Component scraping the kube api server -kubeApiServer: - enabled: true - # -- spec for VMServiceScrape crd - # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec - vmScrape: - spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - # bearerTokenSecret: - # key: "" - port: https - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - serverName: kubernetes - jobLabel: component - namespaceSelector: - matchNames: - - default - selector: - matchLabels: - component: apiserver - provider: kubernetes - -# Component scraping the kube controller manager -kubeControllerManager: - enabled: true - - ## If your kube controller manager is not deployed as a pod, specify IPs it can be found on - ## - endpoints: [] - # - 10.141.4.22 - # - 10.141.4.23 - # - 10.141.4.24 - - ## If using kubeControllerManager.endpoints only the port and targetPort are used - ## - service: - enabled: true - port: 10257 - targetPort: 10257 - selector: - component: kube-controller-manager - - # -- spec for VMServiceScrape crd - # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec - vmScrape: - spec: - jobLabel: jobLabel - namespaceSelector: - matchNames: - - "kube-system" - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - # bearerTokenSecret: - # key: "" - port: http-metrics - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - serverName: kubernetes - -# Component scraping kubeDns. Use either this or coreDns -kubeDns: - enabled: false - # -- - service: - enabled: false - ports: - dnsmasq: - port: 10054 - targetPort: 10054 - skydns: - port: 10055 - targetPort: 10055 - selector: - k8s-app: kube-dns - # -- spec for VMServiceScrape crd - # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec - vmScrape: - spec: - jobLabel: jobLabel - namespaceSelector: - matchNames: [kube-system] - endpoints: - - port: http-metrics-dnsmasq - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - - port: http-metrics-skydns - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - -# Component scraping coreDns. Use either this or kubeDns -coreDns: - enabled: true - service: - enabled: true - port: 9153 - targetPort: 9153 - selector: - k8s-app: kube-dns - - # -- spec for VMServiceScrape crd - # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec - vmScrape: - spec: - jobLabel: jobLabel - namespaceSelector: - matchNames: [kube-system] - endpoints: - - port: http-metrics - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - -# Component scraping etcd -kubeEtcd: - enabled: true - - ## If your etcd is not deployed as a pod, specify IPs it can be found on - ## - endpoints: [] - # - 10.141.4.22 - # - 10.141.4.23 - # - 10.141.4.24 - - ## Etcd service. If using kubeEtcd.endpoints only the port and targetPort are used - ## - service: - enabled: true - port: 2379 - targetPort: 2379 - selector: - component: etcd - - # -- spec for VMServiceScrape crd - # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec - vmScrape: - spec: - jobLabel: jobLabel - namespaceSelector: - matchNames: [kube-system] - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - # bearerTokenSecret: - # key: "" - port: http-metrics - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - -# Component scraping kube scheduler -kubeScheduler: - enabled: true - - ## If your kube scheduler is not deployed as a pod, specify IPs it can be found on - ## - endpoints: [] - # - 10.141.4.22 - # - 10.141.4.23 - # - 10.141.4.24 - - ## If using kubeScheduler.endpoints only the port and targetPort are used - ## - service: - enabled: true - port: 10259 - targetPort: 10259 - selector: - component: kube-scheduler - - # -- spec for VMServiceScrape crd - # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec - vmScrape: - spec: - jobLabel: jobLabel - namespaceSelector: - matchNames: [kube-system] - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - # bearerTokenSecret: - # key: "" - port: http-metrics - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - -# Component scraping kube proxy -kubeProxy: - enabled: false - - ## If your kube proxy is not deployed as a pod, specify IPs it can be found on - ## - endpoints: [] - # - 10.141.4.22 - # - 10.141.4.23 - # - 10.141.4.24 - - service: - enabled: true - port: 10249 - targetPort: 10249 - selector: - k8s-app: kube-proxy - - # -- spec for VMServiceScrape crd - # https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec - vmScrape: - spec: - jobLabel: jobLabel - namespaceSelector: - matchNames: [kube-system] - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - # bearerTokenSecret: - # key: "" - port: http-metrics - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - -## install vm operator crds -crds: - enabled: true - -## install prometheus operator crds -prometheus-operator-crds: - enabled: false - -# -- Add extra objects dynamically to this chart -extraObjects: [] diff --git a/packages/system/monitoring/templates/alerts.yaml b/packages/system/monitoring/templates/alerts.yaml new file mode 100644 index 00000000..70d47014 --- /dev/null +++ b/packages/system/monitoring/templates/alerts.yaml @@ -0,0 +1,7 @@ +{{- $files := .Files.Glob "alerts/*.yaml" -}} +{{- range $path, $file := $files }} +--- +# from: {{ $path }} +{{ toString $file }} + +{{- end -}} diff --git a/packages/system/monitoring/values.yaml b/packages/system/monitoring/values.yaml index 1b505397..a96e5bb1 100644 --- a/packages/system/monitoring/values.yaml +++ b/packages/system/monitoring/values.yaml @@ -82,3 +82,275 @@ victoria-metrics-k8s-stack: enabled: false alertmanager: name: vmalertmanager-alertmanager + +kube-state-metrics: + rbac: + extraRules: + - apiGroups: + - source.toolkit.fluxcd.io + - kustomize.toolkit.fluxcd.io + - helm.toolkit.fluxcd.io + - notification.toolkit.fluxcd.io + - image.toolkit.fluxcd.io + resources: + - gitrepositories + - buckets + - helmrepositories + - helmcharts + - ocirepositories + - kustomizations + - helmreleases + - alerts + - providers + - receivers + - imagerepositories + - imagepolicies + - imageupdateautomations + verbs: [ "list", "watch" ] + customResourceState: + enabled: true + config: + spec: + resources: + - groupVersionKind: + group: kustomize.toolkit.fluxcd.io + version: v1 + kind: Kustomization + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux Kustomization resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, lastAppliedRevision ] + source_name: [ spec, sourceRef, name ] + - groupVersionKind: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux HelmRelease resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, history, "0", chartVersion ] + chart_name: [ status, history, "0", chartName ] + chart_app_version: [ status, history, "0", appVersion ] + chart_ref_name: [ spec, chartRef, name ] + chart_source_name: [ spec, chart, spec, sourceRef, name ] + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1 + kind: GitRepository + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux GitRepository resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, artifact, revision ] + url: [ spec, url ] + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1beta2 + kind: Bucket + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux Bucket resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, artifact, revision ] + endpoint: [ spec, endpoint ] + bucket_name: [ spec, bucketName ] + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1 + kind: HelmRepository + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux HelmRepository resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, artifact, revision ] + url: [ spec, url ] + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1 + kind: HelmChart + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux HelmChart resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, artifact, revision ] + chart_name: [ spec, chart ] + chart_version: [ spec, version ] + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1beta2 + kind: OCIRepository + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux OCIRepository resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + revision: [ status, artifact, revision ] + url: [ spec, url ] + - groupVersionKind: + group: notification.toolkit.fluxcd.io + version: v1beta3 + kind: Alert + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux Alert resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + suspended: [ spec, suspend ] + - groupVersionKind: + group: notification.toolkit.fluxcd.io + version: v1beta3 + kind: Provider + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux Provider resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + suspended: [ spec, suspend ] + - groupVersionKind: + group: notification.toolkit.fluxcd.io + version: v1 + kind: Receiver + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux Receiver resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + webhook_path: [ status, webhookPath ] + - groupVersionKind: + group: image.toolkit.fluxcd.io + version: v1beta2 + kind: ImageRepository + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux ImageRepository resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + image: [ spec, image ] + - groupVersionKind: + group: image.toolkit.fluxcd.io + version: v1beta2 + kind: ImagePolicy + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux ImagePolicy resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + source_name: [ spec, imageRepositoryRef, name ] + - groupVersionKind: + group: image.toolkit.fluxcd.io + version: v1beta2 + kind: ImageUpdateAutomation + metricNamePrefix: gotk + metrics: + - name: "resource_info" + help: "The current state of a Flux ImageUpdateAutomation resource." + each: + type: Info + info: + labelsFromPath: + name: [ metadata, name ] + labelsFromPath: + exported_namespace: [ metadata, namespace ] + ready: [ status, conditions, "[type=Ready]", status ] + suspended: [ spec, suspend ] + source_name: [ spec, sourceRef, name ] From 4eaca42ce9a97d272bd833988c6e0f46d1858b40 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Thu, 3 Oct 2024 16:14:42 +0200 Subject: [PATCH 22/41] fix node-exporter alerts (#389) to show node hostname instead of ip address Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- .../monitoring/alerts/node-exporter.yaml | 98 +++++++++---------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/packages/system/monitoring/alerts/node-exporter.yaml b/packages/system/monitoring/alerts/node-exporter.yaml index 4c7fcc7d..12845766 100644 --- a/packages/system/monitoring/alerts/node-exporter.yaml +++ b/packages/system/monitoring/alerts/node-exporter.yaml @@ -10,7 +10,7 @@ spec: - alert: NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint - }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + }}, at {{ $labels.node }} has only {{ printf "%.2f" $value }}% available space left and is filling up. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup summary: Filesystem is predicted to run out of space within the next 24 hours. @@ -25,12 +25,12 @@ spec: for: 1h labels: severity: warning - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint - }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + }}, at {{ $labels.node }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup summary: Filesystem is predicted to run out of space within the next 4 hours. @@ -45,12 +45,12 @@ spec: for: 1h labels: severity: critical - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeFilesystemAlmostOutOfSpace annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint - }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + }}, at {{ $labels.node }} has only {{ printf "%.2f" $value }}% available space left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace summary: Filesystem has less than 5% space left. @@ -63,12 +63,12 @@ spec: for: 30m labels: severity: warning - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeFilesystemAlmostOutOfSpace annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint - }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + }}, at {{ $labels.node }} has only {{ printf "%.2f" $value }}% available space left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace summary: Filesystem has less than 3% space left. @@ -81,12 +81,12 @@ spec: for: 30m labels: severity: critical - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeFilesystemFilesFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint - }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + }}, at {{ $labels.node }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup summary: Filesystem is predicted to run out of inodes within the next 24 hours. @@ -101,12 +101,12 @@ spec: for: 1h labels: severity: warning - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeFilesystemFilesFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint - }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + }}, at {{ $labels.node }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup summary: Filesystem is predicted to run out of inodes within the next 4 hours. @@ -121,12 +121,12 @@ spec: for: 1h labels: severity: critical - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeFilesystemAlmostOutOfFiles annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint - }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + }}, at {{ $labels.node }} has only {{ printf "%.2f" $value }}% available inodes left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles summary: Filesystem has less than 5% inodes left. @@ -139,12 +139,12 @@ spec: for: 1h labels: severity: warning - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeFilesystemAlmostOutOfFiles annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint - }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available + }}, at {{ $labels.node }} has only {{ printf "%.2f" $value }}% available inodes left. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles summary: Filesystem has less than 3% inodes left. @@ -157,11 +157,11 @@ spec: for: 1h labels: severity: critical - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeNetworkReceiveErrs annotations: - description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered + description: '{{ $labels.node }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs summary: Network interface is reporting many receive errors. @@ -170,11 +170,11 @@ spec: for: 1h labels: severity: warning - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeNetworkTransmitErrs annotations: - description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered + description: '{{ $labels.node }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.' runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs summary: Network interface is reporting many transmit errors. @@ -183,7 +183,7 @@ spec: for: 1h labels: severity: warning - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeHighNumberConntrackEntriesUsed annotations: @@ -194,22 +194,22 @@ spec: > 0.75 labels: severity: warning - exported_instance: '{{ $labels.instance }}' + exported_instance: '{{ $labels.node }}' service: node-exporter - alert: NodeTextFileCollectorScrapeError annotations: - description: Node Exporter text file collector on {{ $labels.instance }} failed + description: Node Exporter text file collector on {{ $labels.node }} failed to scrape. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror summary: Node Exporter text file collector failed to scrape. expr: node_textfile_scrape_error{job="node-exporter"} == 1 labels: severity: warning - exported_instance: '{{ $labels.instance }}' + exported_instance: '{{ $labels.node }}' service: node-exporter - alert: NodeClockSkewDetected annotations: - description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s. + description: Clock at {{ $labels.node }} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected summary: Clock skew detected. @@ -228,11 +228,11 @@ spec: for: 10m labels: severity: warning - exported_instance: '{{ $labels.instance }}' + exported_instance: '{{ $labels.node }}' service: node-exporter - alert: NodeClockNotSynchronising annotations: - description: Clock at {{ $labels.instance }} is not synchronising. Ensure + description: Clock at {{ $labels.node }} is not synchronising. Ensure NTP is configured on this host. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising summary: Clock not synchronising. @@ -243,11 +243,11 @@ spec: for: 10m labels: severity: warning - exported_instance: '{{ $labels.instance }}' + exported_instance: '{{ $labels.node }}' service: node-exporter - alert: NodeRAIDDegraded annotations: - description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is + description: RAID array '{{ $labels.device }}' at {{ $labels.node }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded @@ -258,11 +258,11 @@ spec: for: 15m labels: severity: critical - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeRAIDDiskFailure annotations: - description: At least one device in RAID array at {{ $labels.instance }} failed. + description: At least one device in RAID array at {{ $labels.node }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure summary: Failed device in RAID array. @@ -270,11 +270,11 @@ spec: > 0 labels: severity: warning - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeFileDescriptorLimit annotations: - description: File descriptors limit at {{ $labels.instance }} is currently + description: File descriptors limit at {{ $labels.node }} is currently at {{ printf "%.2f" $value }}%. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit summary: Kernel is predicted to exhaust file descriptors limit soon. @@ -285,11 +285,11 @@ spec: for: 15m labels: severity: warning - exported_instance: '{{ $labels.instance }}' + exported_instance: '{{ $labels.node }}' service: node-exporter - alert: NodeFileDescriptorLimit annotations: - description: File descriptors limit at {{ $labels.instance }} is currently + description: File descriptors limit at {{ $labels.node }} is currently at {{ printf "%.2f" $value }}%. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit summary: Kernel is predicted to exhaust file descriptors limit soon. @@ -300,12 +300,12 @@ spec: for: 15m labels: severity: critical - exported_instance: '{{ $labels.instance }}' + exported_instance: '{{ $labels.node }}' service: node-exporter - alert: NodeCPUHighUsage annotations: description: | - CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. + CPU usage at {{ $labels.node }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage summary: High CPU usage. expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", @@ -313,12 +313,12 @@ spec: for: 15m labels: severity: informational - exported_instance: '{{ $labels.instance }}' + exported_instance: '{{ $labels.node }}' service: node-exporter - alert: NodeSystemSaturation annotations: description: | - System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. + System load per core at {{ $labels.node }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. This might indicate this instance resources saturation and can cause it becoming unresponsive. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemsaturation summary: System saturated, load per core is very high. @@ -328,12 +328,12 @@ spec: for: 15m labels: severity: warning - exported_instance: '{{ $labels.instance }}' + exported_instance: '{{ $labels.node }}' service: node-exporter - alert: NodeMemoryMajorPagesFaults annotations: description: | - Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. + Memory major pages are occurring at very high rate at {{ $labels.node }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}. Please check that there is enough memory available at this instance. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememorymajorpagesfaults summary: Memory major page faults are occurring at very high rate. @@ -341,12 +341,12 @@ spec: for: 15m labels: severity: warning - exported_instance: '{{ $labels.instance }}' + exported_instance: '{{ $labels.node }}' service: node-exporter - alert: NodeMemoryHighUtilization annotations: description: | - Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. + Memory is filling up at {{ $labels.node }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization summary: Host is running out of memory. expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} @@ -354,12 +354,12 @@ spec: for: 15m labels: severity: warning - exported_instance: '{{ $labels.instance }}' + exported_instance: '{{ $labels.node }}' service: node-exporter - alert: NodeDiskIOSaturation annotations: description: | - Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}. + Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.node }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}. This symptom might indicate disk saturation. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation summary: Disk IO queue is high. @@ -368,23 +368,23 @@ spec: for: 30m labels: severity: warning - exported_instance: '{{ $labels.instance }}/{{ $labels.device }}' + exported_instance: '{{ $labels.node }}/{{ $labels.device }}' service: node-exporter - alert: NodeSystemdServiceFailed annotations: description: Systemd service {{ $labels.name }} has entered failed state at - {{ $labels.instance }} + {{ $labels.node }} runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed summary: Systemd service has entered failed state. expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1 for: 5m labels: severity: warning - exported_instance: '{{ $labels.instance }}/{{ $labels.name }}' + exported_instance: '{{ $labels.node }}/{{ $labels.name }}' service: node-exporter - alert: NodeBondingDegraded annotations: - description: Bonding interface {{ $labels.master }} on {{ $labels.instance + description: Bonding interface {{ $labels.master }} on {{ $labels.node }} is in degraded state due to one or more slave failures. runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded summary: Bonding interface is degraded @@ -392,5 +392,5 @@ spec: for: 5m labels: severity: warning - exported_instance: '{{ $labels.instance }}/{{ $labels.master }}' + exported_instance: '{{ $labels.node }}/{{ $labels.master }}' service: node-exporter From 0daa7605afd0e6696e454124a68901389816da98 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Fri, 4 Oct 2024 12:34:40 +0200 Subject: [PATCH 23/41] Prepare release v0.16.1 (#390) Signed-off-by: Andrei Kvapil ## Summary by CodeRabbit - **New Features** - Introduced the `cozystack` application with necessary Kubernetes resources, including a new namespace, service account, and deployment. - Updated container images for `cozystack` and associated services to version `v0.16.1`. - **Bug Fixes** - Resolved issues related to image versioning across various components, ensuring consistency and reliability. - **Documentation** - Updated configuration files to reflect new image tags and versions for multiple components, enhancing clarity for users. Signed-off-by: Andrei Kvapil --- manifests/cozystack-installer.yaml | 4 ++-- packages/apps/kubernetes/images/kubevirt-cloud-provider.tag | 2 +- packages/core/installer/values.yaml | 2 +- packages/core/testing/values.yaml | 2 +- packages/system/dashboard/values.yaml | 4 ++-- packages/system/kamaji/values.yaml | 2 +- packages/system/kubeovn/values.yaml | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/manifests/cozystack-installer.yaml b/manifests/cozystack-installer.yaml index 860ca3e0..d54bd748 100644 --- a/manifests/cozystack-installer.yaml +++ b/manifests/cozystack-installer.yaml @@ -68,7 +68,7 @@ spec: serviceAccountName: cozystack containers: - name: cozystack - image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.0" + image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.1" env: - name: KUBERNETES_SERVICE_HOST value: localhost @@ -87,7 +87,7 @@ spec: fieldRef: fieldPath: metadata.name - name: darkhttpd - image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.0" + image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.1" command: - /usr/bin/darkhttpd - /cozystack/assets diff --git a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag index 8117b8c6..bb0cf834 100644 --- a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag +++ b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:0.11.0@sha256:ba567212f9fe5e7c32af9857edd79eb012f3eb39c2eae0fc831b14d5b7879427 +ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:0.11.0@sha256:91e6843afa704ba7c513842bc3a612f2c0b295ce95aebe60fbb6be09709a1947 diff --git a/packages/core/installer/values.yaml b/packages/core/installer/values.yaml index 05385327..65cbc04c 100644 --- a/packages/core/installer/values.yaml +++ b/packages/core/installer/values.yaml @@ -1,2 +1,2 @@ cozystack: - image: ghcr.io/aenix-io/cozystack/cozystack:v0.16.0@sha256:1119f30a50b3fea1ac7d8068009ca233df6214d709c7861f7ce8fbf0402cdc72 + image: ghcr.io/aenix-io/cozystack/cozystack:v0.16.1@sha256:f27695d23d449f10888295bd2ba6c084c8fa4b81f109d4836ec9db528b943b62 diff --git a/packages/core/testing/values.yaml b/packages/core/testing/values.yaml index 61e3ddbc..bb8aa2f2 100644 --- a/packages/core/testing/values.yaml +++ b/packages/core/testing/values.yaml @@ -1,2 +1,2 @@ e2e: - image: ghcr.io/aenix-io/cozystack/e2e-sandbox:v0.16.0@sha256:25b298d621ec79431d106184d59849bbae634588742583d111628126ad8615c5 + image: ghcr.io/aenix-io/cozystack/e2e-sandbox:v0.16.1@sha256:25b298d621ec79431d106184d59849bbae634588742583d111628126ad8615c5 diff --git a/packages/system/dashboard/values.yaml b/packages/system/dashboard/values.yaml index c4c35bf7..03a1c4ff 100644 --- a/packages/system/dashboard/values.yaml +++ b/packages/system/dashboard/values.yaml @@ -33,11 +33,11 @@ kubeapps: image: registry: ghcr.io/aenix-io/cozystack repository: dashboard - tag: v0.16.0 + tag: v0.16.1 digest: "sha256:4818712e9fc9c57cc321512760c3226af564a04e69d4b3ec9229ab91fd39abeb" kubeappsapis: image: registry: ghcr.io/aenix-io/cozystack repository: kubeapps-apis - tag: v0.16.0 + tag: v0.16.1 digest: "sha256:55bc8e2495933112c7cb4bb9e3b1fcb8df46aa14e27fa007f78388a9757e3238" diff --git a/packages/system/kamaji/values.yaml b/packages/system/kamaji/values.yaml index c9fc799f..b3ccffe5 100644 --- a/packages/system/kamaji/values.yaml +++ b/packages/system/kamaji/values.yaml @@ -3,7 +3,7 @@ kamaji: deploy: false image: pullPolicy: IfNotPresent - tag: v0.16.0@sha256:241e6cdf60905e53f0cb47aadcab69dd8ffa97d316faac6bdeb704c1b13c24db + tag: v0.16.1@sha256:95a9658cbbe1cbfbc42b9ab1df4f2a39342d7a8f1ff10a10b81b8656f3744c39 repository: ghcr.io/aenix-io/cozystack/kamaji resources: limits: diff --git a/packages/system/kubeovn/values.yaml b/packages/system/kubeovn/values.yaml index 4d7d705e..70816d53 100644 --- a/packages/system/kubeovn/values.yaml +++ b/packages/system/kubeovn/values.yaml @@ -22,4 +22,4 @@ global: images: kubeovn: repository: kubeovn - tag: v1.13.0@sha256:6676da12917c3645a08fb2adb7def5fcb68995338126b257e0a0a9570516cd5c + tag: v1.13.0@sha256:d13ac4f916cd88d33d1d64c949978165272998d6594441a9dd4be5e6892caf4e From a2cc212a400c5cbc6f5a0be2bdb4a803aedb056e Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Fri, 4 Oct 2024 06:37:02 -0400 Subject: [PATCH 24/41] Upgrade Flux to 2.4.x (#386) Now that Cozystack 0.16 is out with flux-operator 0.9.0, users that need it can easily upgrade to Flux 2.4.0 ## Summary by CodeRabbit - **New Features** - Upgraded FluxCD version from 2.3.x to 2.4.x. - Enhanced configuration for several controllers with new command-line arguments for improved performance and functionality. - **Bug Fixes** - Updated resource limits for containers to ensure optimal resource allocation. Signed-off-by: Kingdon Barrett --- packages/system/fluxcd/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/system/fluxcd/values.yaml b/packages/system/fluxcd/values.yaml index c766a685..2d0580d0 100644 --- a/packages/system/fluxcd/values.yaml +++ b/packages/system/fluxcd/values.yaml @@ -4,7 +4,7 @@ flux-instance: networkPolicy: true domain: cozy.local # -- default value is overriden in patches distribution: - version: 2.3.x + version: 2.4.x registry: ghcr.io/fluxcd components: - source-controller From 0908ee63f0f924b76ac7ae384e02086d0292aa83 Mon Sep 17 00:00:00 2001 From: Kingdon Barrett Date: Fri, 4 Oct 2024 06:37:29 -0400 Subject: [PATCH 25/41] Upgrade Flux operator to 0.10 (#387) ## Summary by CodeRabbit - **New Features** - Updated Flux Operator Helm chart to version 0.10.0, introducing new configuration options: `extraArgs` for additional command-line arguments and `logLevel` for logging verbosity. - **Bug Fixes** - Corrected formatting in the annotations section of the Helm chart. - **Documentation** - Updated README to reflect new version and configuration options. - **Chores** - Updated FluxCD instance configuration to version 2.4.x with refined deployment specifications for improved functionality. --------- Signed-off-by: Kingdon Barrett --- .../charts/flux-operator/Chart.yaml | 4 ++-- .../charts/flux-operator/README.md | 4 +++- .../flux-operator/templates/deployment.yaml | 5 +++++ .../charts/flux-operator/values.schema.json | 15 +++++++++++++++ .../charts/flux-operator/values.yaml | 6 ++++++ 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/packages/system/fluxcd-operator/charts/flux-operator/Chart.yaml b/packages/system/fluxcd-operator/charts/flux-operator/Chart.yaml index 4cfa8643..c0093bd1 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/Chart.yaml +++ b/packages/system/fluxcd-operator/charts/flux-operator/Chart.yaml @@ -8,7 +8,7 @@ annotations: - name: Upstream Project url: https://github.com/controlplaneio-fluxcd/flux-operator apiVersion: v2 -appVersion: v0.9.0 +appVersion: v0.10.0 description: 'A Helm chart for deploying the Flux Operator. ' home: https://github.com/controlplaneio-fluxcd icon: https://raw.githubusercontent.com/cncf/artwork/main/projects/flux/icon/color/flux-icon-color.png @@ -25,4 +25,4 @@ sources: - https://github.com/controlplaneio-fluxcd/flux-operator - https://github.com/controlplaneio-fluxcd/charts type: application -version: 0.9.0 +version: 0.10.0 diff --git a/packages/system/fluxcd-operator/charts/flux-operator/README.md b/packages/system/fluxcd-operator/charts/flux-operator/README.md index c377eb54..52f04f8b 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/README.md +++ b/packages/system/fluxcd-operator/charts/flux-operator/README.md @@ -1,6 +1,6 @@ # flux-operator -![Version: 0.9.0](https://img.shields.io/badge/Version-0.9.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.9.0](https://img.shields.io/badge/AppVersion-v0.9.0-informational?style=flat-square) +![Version: 0.10.0](https://img.shields.io/badge/Version-0.10.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.10.0](https://img.shields.io/badge/AppVersion-v0.10.0-informational?style=flat-square) The [Flux Operator](https://github.com/controlplaneio-fluxcd/flux-operator) provides a declarative API for the installation and upgrade of CNCF [Flux](https://fluxcd.io) and the @@ -35,12 +35,14 @@ see the Flux Operator [documentation](https://fluxcd.control-plane.io/operator/) | affinity | object | `{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"kubernetes.io/os","operator":"In","values":["linux"]}]}]}}}` | Pod affinity and anti-affinity settings. | | commonAnnotations | object | `{}` | Common annotations to add to all deployed objects including pods. | | commonLabels | object | `{}` | Common labels to add to all deployed objects including pods. | +| extraArgs | list | `[]` | Container extra arguments. | | extraEnvs | list | `[]` | Container extra environment variables. | | fullnameOverride | string | `""` | | | hostNetwork | bool | `false` | If `true`, the container ports (`8080` and `8081`) are exposed on the host network. | | image | object | `{"pullSecrets":[],"repository":"ghcr.io/controlplaneio-fluxcd/flux-operator","tag":""}` | Container image settings. The image tag defaults to the chart appVersion. | | installCRDs | bool | `true` | Install and upgrade the custom resource definitions. | | livenessProbe | object | `{"httpGet":{"path":"/healthz","port":8081},"initialDelaySeconds":15,"periodSeconds":20}` | Container liveness probe settings. | +| logLevel | string | `"info"` | Container logging level flag. | | marketplace | object | `{"account":"","license":"","type":""}` | Marketplace settings. | | nameOverride | string | `""` | | | podSecurityContext | object | `{}` | Pod security context settings. | diff --git a/packages/system/fluxcd-operator/charts/flux-operator/templates/deployment.yaml b/packages/system/fluxcd-operator/charts/flux-operator/templates/deployment.yaml index 31c712d2..dbda7888 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/templates/deployment.yaml +++ b/packages/system/fluxcd-operator/charts/flux-operator/templates/deployment.yaml @@ -48,6 +48,11 @@ spec: {{- end }} containers: - name: manager + args: + - --log-level={{ .Values.logLevel }} + {{- range .Values.extraArgs }} + - {{ . }} + {{- end }} env: - name: RUNTIME_NAMESPACE valueFrom: diff --git a/packages/system/fluxcd-operator/charts/flux-operator/values.schema.json b/packages/system/fluxcd-operator/charts/flux-operator/values.schema.json index 59c16f24..09a28d0c 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/values.schema.json +++ b/packages/system/fluxcd-operator/charts/flux-operator/values.schema.json @@ -71,6 +71,13 @@ "properties": {}, "type": "object" }, + "extraArgs": { + "items": { + "type": "string" + }, + "type": "array", + "uniqueItems": true + }, "extraEnvs": { "items": { "type": "object" @@ -140,6 +147,14 @@ }, "type": "object" }, + "logLevel": { + "enum": [ + "debug", + "info", + "error" + ], + "type": "string" + }, "marketplace": { "properties": { "account": { diff --git a/packages/system/fluxcd-operator/charts/flux-operator/values.yaml b/packages/system/fluxcd-operator/charts/flux-operator/values.yaml index 2d28fec2..46ec9ac1 100644 --- a/packages/system/fluxcd-operator/charts/flux-operator/values.yaml +++ b/packages/system/fluxcd-operator/charts/flux-operator/values.yaml @@ -90,6 +90,12 @@ hostNetwork: false # @schema default: false # -- Container extra environment variables. extraEnvs: [ ] # @schema item: object ; uniqueItems: true +# -- Container extra arguments. +extraArgs: [ ] # @schema item: string ; uniqueItems: true + +# -- Container logging level flag. +logLevel: "info" # @schema enum:[debug,info,error] + # -- Prometheus Operator scraping settings. serviceMonitor: # @schema default: {"create":false,"interval":"60s","scrapeTimeout":"30s"} create: false From 992d0404b47471d2c9f0c8d7b5e76513bd78de0b Mon Sep 17 00:00:00 2001 From: Mr Khachaturov <105451445+mrkhachaturov@users.noreply.github.com> Date: Fri, 4 Oct 2024 13:43:48 +0300 Subject: [PATCH 26/41] Added external-secrets-operator (#370) This pull request adds the external-secrets-operator to our main bundles. By integrating the external-secrets-operator, we enable seamless connectivity to external hosted secret management services such as HashiCorp Vault, 1Password, AWS Secrets Manager, and more. Benefits: Unified Secret Management: Allows the application to securely fetch secrets from external providers without hardcoding them into configurations. Flexibility: Supports multiple external secret stores, giving users the freedom to choose their preferred secret management solution. Enhanced Security: Reduces the risk of exposing sensitive information by leveraging established secret management platforms. ## Summary by CodeRabbit - **New Features** - Introduced the `external-secrets-operator` for managing external secrets in Kubernetes. - Added a Helm chart for the `external-secrets` application, including configuration options and dependencies. - Implemented a certificate controller within the external-secrets-operator. - **Documentation** - Added README.md with installation instructions and configuration options for the External Secrets Operator. - Included success message and setup instructions in NOTES.txt for the external-secrets deployment. - **Chores** - Created .helmignore to streamline Helm packaging by excluding unnecessary files. --------- Signed-off-by: Andrei Kvapil Co-authored-by: Andrei Kvapil --- .../core/platform/bundles/distro-full.yaml | 6 + .../core/platform/bundles/distro-hosted.yaml | 6 + packages/core/platform/bundles/paas-full.yaml | 6 + .../core/platform/bundles/paas-hosted.yaml | 6 + .../external-secrets-operator/.helmignore | 3 + .../external-secrets-operator/Chart.yaml | 3 + .../system/external-secrets-operator/Makefile | 11 + .../charts/external-secrets/Chart.lock | 6 + .../charts/external-secrets/Chart.yaml | 20 + .../charts/external-secrets/README.md | 225 + .../external-secrets/templates/NOTES.txt | 7 + .../external-secrets/templates/_helpers.tpl | 198 + .../templates/cert-controller-deployment.yaml | 124 + .../cert-controller-poddisruptionbudget.yaml | 19 + .../templates/cert-controller-rbac.yaml | 86 + .../templates/cert-controller-service.yaml | 28 + .../cert-controller-serviceaccount.yaml | 16 + .../templates/crds/acraccesstoken.yaml | 204 + .../templates/crds/clusterexternalsecret.yaml | 666 +++ .../templates/crds/clustersecretstore.yaml | 4640 +++++++++++++++++ .../templates/crds/ecrauthorizationtoken.yaml | 178 + .../templates/crds/externalsecret.yaml | 820 +++ .../external-secrets/templates/crds/fake.yaml | 87 + .../templates/crds/gcraccesstoken.yaml | 139 + .../templates/crds/githubaccesstoken.yaml | 113 + .../templates/crds/password.yaml | 109 + .../templates/crds/pushsecret.yaml | 388 ++ .../templates/crds/secretstore.yaml | 4640 +++++++++++++++++ .../external-secrets/templates/crds/uuid.yaml | 72 + .../templates/crds/vaultdynamicsecret.yaml | 708 +++ .../templates/crds/webhook.yaml | 158 + .../templates/deployment.yaml | 146 + .../templates/extra-manifests.yaml | 4 + .../templates/poddisruptionbudget.yaml | 19 + .../external-secrets/templates/rbac.yaml | 301 ++ .../external-secrets/templates/service.yaml | 28 + .../templates/serviceaccount.yaml | 16 + .../templates/servicemonitor.yaml | 164 + .../templates/validatingwebhook.yaml | 78 + .../templates/webhook-certificate.yaml | 30 + .../templates/webhook-deployment.yaml | 128 + .../webhook-poddisruptionbudget.yaml | 20 + .../templates/webhook-secret.yaml | 14 + .../templates/webhook-service.yaml | 37 + .../templates/webhook-serviceaccount.yaml | 16 + .../external-secrets/values.schema.json | 905 ++++ .../charts/external-secrets/values.yaml | 532 ++ .../external-secrets-operator/values.yaml | 4 + 48 files changed, 16134 insertions(+) create mode 100644 packages/system/external-secrets-operator/.helmignore create mode 100644 packages/system/external-secrets-operator/Chart.yaml create mode 100644 packages/system/external-secrets-operator/Makefile create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/Chart.lock create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/Chart.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/README.md create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/NOTES.txt create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/_helpers.tpl create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-deployment.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-poddisruptionbudget.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-rbac.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-service.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-serviceaccount.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/acraccesstoken.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/clusterexternalsecret.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/clustersecretstore.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/ecrauthorizationtoken.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/externalsecret.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/fake.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/gcraccesstoken.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/githubaccesstoken.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/password.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/pushsecret.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/secretstore.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/uuid.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/vaultdynamicsecret.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/crds/webhook.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/deployment.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/extra-manifests.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/poddisruptionbudget.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/rbac.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/service.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/serviceaccount.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/servicemonitor.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/validatingwebhook.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-certificate.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-deployment.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-poddisruptionbudget.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-secret.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-service.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-serviceaccount.yaml create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/values.schema.json create mode 100644 packages/system/external-secrets-operator/charts/external-secrets/values.yaml create mode 100644 packages/system/external-secrets-operator/values.yaml diff --git a/packages/core/platform/bundles/distro-full.yaml b/packages/core/platform/bundles/distro-full.yaml index 5ac57835..061e27b9 100644 --- a/packages/core/platform/bundles/distro-full.yaml +++ b/packages/core/platform/bundles/distro-full.yaml @@ -141,3 +141,9 @@ releases: chart: cozy-telepresence namespace: cozy-telepresence dependsOn: [] + +- name: external-secrets-operator + releaseName: external-secrets-operator + chart: cozy-external-secrets-operator + namespace: cozy-external-secrets-operator + dependsOn: [cilium] \ No newline at end of file diff --git a/packages/core/platform/bundles/distro-hosted.yaml b/packages/core/platform/bundles/distro-hosted.yaml index 6923c1fb..5be68fbb 100644 --- a/packages/core/platform/bundles/distro-hosted.yaml +++ b/packages/core/platform/bundles/distro-hosted.yaml @@ -92,3 +92,9 @@ releases: chart: cozy-telepresence namespace: cozy-telepresence dependsOn: [] + +- name: external-secrets-operator + releaseName: external-secrets-operator + chart: cozy-external-secrets-operator + namespace: cozy-external-secrets-operator + dependsOn: [] diff --git a/packages/core/platform/bundles/paas-full.yaml b/packages/core/platform/bundles/paas-full.yaml index 988024c7..ed9a81c5 100644 --- a/packages/core/platform/bundles/paas-full.yaml +++ b/packages/core/platform/bundles/paas-full.yaml @@ -216,3 +216,9 @@ releases: namespace: cozy-cluster-api privileged: true dependsOn: [cilium,kubeovn,capi-operator] + +- name: external-secrets-operator + releaseName: external-secrets-operator + chart: cozy-external-secrets-operator + namespace: cozy-external-secrets-operator + dependsOn: [cilium,kubeovn] \ No newline at end of file diff --git a/packages/core/platform/bundles/paas-hosted.yaml b/packages/core/platform/bundles/paas-hosted.yaml index 60194d84..2f63f870 100644 --- a/packages/core/platform/bundles/paas-hosted.yaml +++ b/packages/core/platform/bundles/paas-hosted.yaml @@ -99,6 +99,12 @@ releases: namespace: cozy-telepresence dependsOn: [] +- name: external-secrets-operator + releaseName: external-secrets-operator + chart: cozy-external-secrets-operator + namespace: cozy-external-secrets-operator + dependsOn: [] + - name: dashboard releaseName: dashboard chart: cozy-dashboard diff --git a/packages/system/external-secrets-operator/.helmignore b/packages/system/external-secrets-operator/.helmignore new file mode 100644 index 00000000..6fbae5fc --- /dev/null +++ b/packages/system/external-secrets-operator/.helmignore @@ -0,0 +1,3 @@ +images +hack +.gitkeep \ No newline at end of file diff --git a/packages/system/external-secrets-operator/Chart.yaml b/packages/system/external-secrets-operator/Chart.yaml new file mode 100644 index 00000000..29a07f53 --- /dev/null +++ b/packages/system/external-secrets-operator/Chart.yaml @@ -0,0 +1,3 @@ +apiVersion: v2 +name: cozy-external-secrets-operator +version: 0.0.0 # Placeholder, the actual version will be automatically set during the build process \ No newline at end of file diff --git a/packages/system/external-secrets-operator/Makefile b/packages/system/external-secrets-operator/Makefile new file mode 100644 index 00000000..f4d9215d --- /dev/null +++ b/packages/system/external-secrets-operator/Makefile @@ -0,0 +1,11 @@ +export NAME=external-secrets-operator +export NAMESPACE=cozy-$(NAME) + +include ../../../scripts/package.mk + +update: + rm -rf charts + helm repo add external-secrets https://charts.external-secrets.io + helm repo update external-secrets + helm pull external-secrets/external-secrets --untar --untardir charts + rm -rf charts/external-secrets/charts \ No newline at end of file diff --git a/packages/system/external-secrets-operator/charts/external-secrets/Chart.lock b/packages/system/external-secrets-operator/charts/external-secrets/Chart.lock new file mode 100644 index 00000000..f9abae8c --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: bitwarden-sdk-server + repository: oci://ghcr.io/external-secrets/charts + version: v0.3.1 +digest: sha256:2d01e9083fc32c18dca4f9614625e0172e338a663138c2670e5b911645b6b8ee +generated: "2024-09-20T12:57:07.63511+02:00" diff --git a/packages/system/external-secrets-operator/charts/external-secrets/Chart.yaml b/packages/system/external-secrets-operator/charts/external-secrets/Chart.yaml new file mode 100644 index 00000000..dd8161a8 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +appVersion: v0.10.4 +dependencies: +- condition: bitwarden-sdk-server.enabled + name: bitwarden-sdk-server + repository: oci://ghcr.io/external-secrets/charts + version: v0.3.1 +description: External secret management for Kubernetes +home: https://github.com/external-secrets/external-secrets +icon: https://raw.githubusercontent.com/external-secrets/external-secrets/main/assets/eso-logo-large.png +keywords: +- kubernetes-external-secrets +- secrets +kubeVersion: '>= 1.19.0-0' +maintainers: +- email: kellinmcavoy@gmail.com + name: mcavoyk +name: external-secrets +type: application +version: 0.10.4 diff --git a/packages/system/external-secrets-operator/charts/external-secrets/README.md b/packages/system/external-secrets-operator/charts/external-secrets/README.md new file mode 100644 index 00000000..89f7b57b --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/README.md @@ -0,0 +1,225 @@ +# External Secrets + +

external-secrets

+ +[//]: # (README.md generated by gotmpl. DO NOT EDIT.) + +![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![Version: 0.10.4](https://img.shields.io/badge/Version-0.10.4-informational?style=flat-square) + +External secret management for Kubernetes + +## TL;DR +```bash +helm repo add external-secrets https://charts.external-secrets.io +helm install external-secrets external-secrets/external-secrets +``` + +## Installing the Chart +To install the chart with the release name `external-secrets`: +```bash +helm install external-secrets external-secrets/external-secrets +``` + +### Custom Resources +By default, the chart will install external-secrets CRDs, this can be controlled with `installCRDs` value. + +## Uninstalling the Chart +To uninstall the `external-secrets` deployment: +```bash +helm uninstall external-secrets +``` +The command removes all the Kubernetes components associated with the chart and deletes the release. + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | | +| bitwarden-sdk-server.enabled | bool | `false` | | +| certController.affinity | object | `{}` | | +| certController.create | bool | `true` | Specifies whether a certificate controller deployment be created. | +| certController.deploymentAnnotations | object | `{}` | Annotations to add to Deployment | +| certController.extraArgs | object | `{}` | | +| certController.extraEnv | list | `[]` | | +| certController.extraVolumeMounts | list | `[]` | | +| certController.extraVolumes | list | `[]` | | +| certController.fullnameOverride | string | `""` | | +| certController.hostNetwork | bool | `false` | Run the certController on the host network | +| certController.image.flavour | string | `""` | | +| certController.image.pullPolicy | string | `"IfNotPresent"` | | +| certController.image.repository | string | `"oci.external-secrets.io/external-secrets/external-secrets"` | | +| certController.image.tag | string | `""` | | +| certController.imagePullSecrets | list | `[]` | | +| certController.log | object | `{"level":"info","timeEncoding":"epoch"}` | Specifices Log Params to the Webhook | +| certController.metrics.listen.port | int | `8080` | | +| certController.metrics.service.annotations | object | `{}` | Additional service annotations | +| certController.metrics.service.enabled | bool | `false` | Enable if you use another monitoring tool than Prometheus to scrape the metrics | +| certController.metrics.service.port | int | `8080` | Metrics service port to scrape | +| certController.nameOverride | string | `""` | | +| certController.nodeSelector | object | `{}` | | +| certController.podAnnotations | object | `{}` | Annotations to add to Pod | +| certController.podDisruptionBudget | object | `{"enabled":false,"minAvailable":1}` | Pod disruption budget - for more details see https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ | +| certController.podLabels | object | `{}` | | +| certController.podSecurityContext.enabled | bool | `true` | | +| certController.priorityClassName | string | `""` | Pod priority class name. | +| certController.rbac.create | bool | `true` | Specifies whether role and rolebinding resources should be created. | +| certController.readinessProbe.address | string | `""` | Address for readiness probe | +| certController.readinessProbe.port | int | `8081` | ReadinessProbe port for kubelet | +| certController.replicaCount | int | `1` | | +| certController.requeueInterval | string | `"5m"` | | +| certController.resources | object | `{}` | | +| certController.revisionHistoryLimit | int | `10` | Specifies the amount of historic ReplicaSets k8s should keep (see https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#clean-up-policy) | +| certController.securityContext.allowPrivilegeEscalation | bool | `false` | | +| certController.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| certController.securityContext.enabled | bool | `true` | | +| certController.securityContext.readOnlyRootFilesystem | bool | `true` | | +| certController.securityContext.runAsNonRoot | bool | `true` | | +| certController.securityContext.runAsUser | int | `1000` | | +| certController.securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | +| certController.serviceAccount.annotations | object | `{}` | Annotations to add to the service account. | +| certController.serviceAccount.automount | bool | `true` | Automounts the service account token in all containers of the pod | +| certController.serviceAccount.create | bool | `true` | Specifies whether a service account should be created. | +| certController.serviceAccount.extraLabels | object | `{}` | Extra Labels to add to the service account. | +| certController.serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template. | +| certController.tolerations | list | `[]` | | +| certController.topologySpreadConstraints | list | `[]` | | +| commonLabels | object | `{}` | Additional labels added to all helm chart resources. | +| concurrent | int | `1` | Specifies the number of concurrent ExternalSecret Reconciles external-secret executes at a time. | +| controllerClass | string | `""` | If set external secrets will filter matching Secret Stores with the appropriate controller values. | +| crds.annotations | object | `{}` | | +| crds.conversion.enabled | bool | `true` | | +| crds.createClusterExternalSecret | bool | `true` | If true, create CRDs for Cluster External Secret. | +| crds.createClusterSecretStore | bool | `true` | If true, create CRDs for Cluster Secret Store. | +| crds.createPushSecret | bool | `true` | If true, create CRDs for Push Secret. | +| createOperator | bool | `true` | Specifies whether an external secret operator deployment be created. | +| deploymentAnnotations | object | `{}` | Annotations to add to Deployment | +| dnsConfig | object | `{}` | Specifies `dnsOptions` to deployment | +| dnsPolicy | string | `"ClusterFirst"` | Specifies `dnsPolicy` to deployment | +| extendedMetricLabels | bool | `false` | If true external secrets will use recommended kubernetes annotations as prometheus metric labels. | +| extraArgs | object | `{}` | | +| extraContainers | list | `[]` | | +| extraEnv | list | `[]` | | +| extraObjects | list | `[]` | | +| extraVolumeMounts | list | `[]` | | +| extraVolumes | list | `[]` | | +| fullnameOverride | string | `""` | | +| global.affinity | object | `{}` | | +| global.compatibility.openshift.adaptSecurityContext | string | `"auto"` | Manages the securityContext properties to make them compatible with OpenShift. Possible values: auto - Apply configurations if it is detected that OpenShift is the target platform. force - Always apply configurations. disabled - No modification applied. | +| global.nodeSelector | object | `{}` | | +| global.tolerations | list | `[]` | | +| global.topologySpreadConstraints | list | `[]` | | +| hostNetwork | bool | `false` | Run the controller on the host network | +| image.flavour | string | `""` | The flavour of tag you want to use There are different image flavours available, like distroless and ubi. Please see GitHub release notes for image tags for these flavors. By default, the distroless image is used. | +| image.pullPolicy | string | `"IfNotPresent"` | | +| image.repository | string | `"oci.external-secrets.io/external-secrets/external-secrets"` | | +| image.tag | string | `""` | The image tag to use. The default is the chart appVersion. | +| imagePullSecrets | list | `[]` | | +| installCRDs | bool | `true` | If set, install and upgrade CRDs through helm chart. | +| leaderElect | bool | `false` | If true, external-secrets will perform leader election between instances to ensure no more than one instance of external-secrets operates at a time. | +| log | object | `{"level":"info","timeEncoding":"epoch"}` | Specifices Log Params to the Webhook | +| metrics.listen.port | int | `8080` | | +| metrics.service.annotations | object | `{}` | Additional service annotations | +| metrics.service.enabled | bool | `false` | Enable if you use another monitoring tool than Prometheus to scrape the metrics | +| metrics.service.port | int | `8080` | Metrics service port to scrape | +| nameOverride | string | `""` | | +| namespaceOverride | string | `""` | | +| nodeSelector | object | `{}` | | +| podAnnotations | object | `{}` | Annotations to add to Pod | +| podDisruptionBudget | object | `{"enabled":false,"minAvailable":1}` | Pod disruption budget - for more details see https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ | +| podLabels | object | `{}` | | +| podSecurityContext.enabled | bool | `true` | | +| podSpecExtra | object | `{}` | Any extra pod spec on the deployment | +| priorityClassName | string | `""` | Pod priority class name. | +| processClusterExternalSecret | bool | `true` | if true, the operator will process cluster external secret. Else, it will ignore them. | +| processClusterStore | bool | `true` | if true, the operator will process cluster store. Else, it will ignore them. | +| processPushSecret | bool | `true` | if true, the operator will process push secret. Else, it will ignore them. | +| rbac.create | bool | `true` | Specifies whether role and rolebinding resources should be created. | +| rbac.servicebindings.create | bool | `true` | Specifies whether a clusterrole to give servicebindings read access should be created. | +| replicaCount | int | `1` | | +| resources | object | `{}` | | +| revisionHistoryLimit | int | `10` | Specifies the amount of historic ReplicaSets k8s should keep (see https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#clean-up-policy) | +| scopedNamespace | string | `""` | If set external secrets are only reconciled in the provided namespace | +| scopedRBAC | bool | `false` | Must be used with scopedNamespace. If true, create scoped RBAC roles under the scoped namespace and implicitly disable cluster stores and cluster external secrets | +| securityContext.allowPrivilegeEscalation | bool | `false` | | +| securityContext.capabilities.drop[0] | string | `"ALL"` | | +| securityContext.enabled | bool | `true` | | +| securityContext.readOnlyRootFilesystem | bool | `true` | | +| securityContext.runAsNonRoot | bool | `true` | | +| securityContext.runAsUser | int | `1000` | | +| securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | +| service.ipFamilies | list | `[]` | Sets the families that should be supported and the order in which they should be applied to ClusterIP as well. Can be IPv4 and/or IPv6. | +| service.ipFamilyPolicy | string | `""` | Set the ip family policy to configure dual-stack see [Configure dual-stack](https://kubernetes.io/docs/concepts/services-networking/dual-stack/#services) | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account. | +| serviceAccount.automount | bool | `true` | Automounts the service account token in all containers of the pod | +| serviceAccount.create | bool | `true` | Specifies whether a service account should be created. | +| serviceAccount.extraLabels | object | `{}` | Extra Labels to add to the service account. | +| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template. | +| serviceMonitor.additionalLabels | object | `{}` | Additional labels | +| serviceMonitor.enabled | bool | `false` | Specifies whether to create a ServiceMonitor resource for collecting Prometheus metrics | +| serviceMonitor.honorLabels | bool | `false` | Let prometheus add an exported_ prefix to conflicting labels | +| serviceMonitor.interval | string | `"30s"` | Interval to scrape metrics | +| serviceMonitor.metricRelabelings | list | `[]` | Metric relabel configs to apply to samples before ingestion. [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs) | +| serviceMonitor.namespace | string | `""` | namespace where you want to install ServiceMonitors | +| serviceMonitor.relabelings | list | `[]` | Relabel configs to apply to samples before ingestion. [Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) | +| serviceMonitor.scrapeTimeout | string | `"25s"` | Timeout if metrics can't be retrieved in given time interval | +| tolerations | list | `[]` | | +| topologySpreadConstraints | list | `[]` | | +| webhook.affinity | object | `{}` | | +| webhook.certCheckInterval | string | `"5m"` | Specifices the time to check if the cert is valid | +| webhook.certDir | string | `"/tmp/certs"` | | +| webhook.certManager.addInjectorAnnotations | bool | `true` | Automatically add the cert-manager.io/inject-ca-from annotation to the webhooks and CRDs. As long as you have the cert-manager CA Injector enabled, this will automatically setup your webhook's CA to the one used by cert-manager. See https://cert-manager.io/docs/concepts/ca-injector | +| webhook.certManager.cert.annotations | object | `{}` | Add extra annotations to the Certificate resource. | +| webhook.certManager.cert.create | bool | `true` | Create a certificate resource within this chart. See https://cert-manager.io/docs/usage/certificate/ | +| webhook.certManager.cert.duration | string | `"8760h"` | Set the requested duration (i.e. lifetime) of the Certificate. See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec One year by default. | +| webhook.certManager.cert.issuerRef | object | `{"group":"cert-manager.io","kind":"Issuer","name":"my-issuer"}` | For the Certificate created by this chart, setup the issuer. See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.IssuerSpec | +| webhook.certManager.cert.renewBefore | string | `""` | How long before the currently issued certificate’s expiry cert-manager should renew the certificate. See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec Note that renewBefore should be greater than .webhook.lookaheadInterval since the webhook will check this far in advance that the certificate is valid. | +| webhook.certManager.enabled | bool | `false` | Enabling cert-manager support will disable the built in secret and switch to using cert-manager (installed separately) to automatically issue and renew the webhook certificate. This chart does not install cert-manager for you, See https://cert-manager.io/docs/ | +| webhook.create | bool | `true` | Specifies whether a webhook deployment be created. | +| webhook.deploymentAnnotations | object | `{}` | Annotations to add to Deployment | +| webhook.extraArgs | object | `{}` | | +| webhook.extraEnv | list | `[]` | | +| webhook.extraVolumeMounts | list | `[]` | | +| webhook.extraVolumes | list | `[]` | | +| webhook.failurePolicy | string | `"Fail"` | Specifies whether validating webhooks should be created with failurePolicy: Fail or Ignore | +| webhook.fullnameOverride | string | `""` | | +| webhook.hostNetwork | bool | `false` | Specifies if webhook pod should use hostNetwork or not. | +| webhook.image.flavour | string | `""` | The flavour of tag you want to use | +| webhook.image.pullPolicy | string | `"IfNotPresent"` | | +| webhook.image.repository | string | `"oci.external-secrets.io/external-secrets/external-secrets"` | | +| webhook.image.tag | string | `""` | The image tag to use. The default is the chart appVersion. | +| webhook.imagePullSecrets | list | `[]` | | +| webhook.log | object | `{"level":"info","timeEncoding":"epoch"}` | Specifices Log Params to the Webhook | +| webhook.lookaheadInterval | string | `""` | Specifices the lookaheadInterval for certificate validity | +| webhook.metrics.listen.port | int | `8080` | | +| webhook.metrics.service.annotations | object | `{}` | Additional service annotations | +| webhook.metrics.service.enabled | bool | `false` | Enable if you use another monitoring tool than Prometheus to scrape the metrics | +| webhook.metrics.service.port | int | `8080` | Metrics service port to scrape | +| webhook.nameOverride | string | `""` | | +| webhook.nodeSelector | object | `{}` | | +| webhook.podAnnotations | object | `{}` | Annotations to add to Pod | +| webhook.podDisruptionBudget | object | `{"enabled":false,"minAvailable":1}` | Pod disruption budget - for more details see https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ | +| webhook.podLabels | object | `{}` | | +| webhook.podSecurityContext.enabled | bool | `true` | | +| webhook.port | int | `10250` | The port the webhook will listen to | +| webhook.priorityClassName | string | `""` | Pod priority class name. | +| webhook.rbac.create | bool | `true` | Specifies whether role and rolebinding resources should be created. | +| webhook.readinessProbe.address | string | `""` | Address for readiness probe | +| webhook.readinessProbe.port | int | `8081` | ReadinessProbe port for kubelet | +| webhook.replicaCount | int | `1` | | +| webhook.resources | object | `{}` | | +| webhook.revisionHistoryLimit | int | `10` | Specifies the amount of historic ReplicaSets k8s should keep (see https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#clean-up-policy) | +| webhook.secretAnnotations | object | `{}` | Annotations to add to Secret | +| webhook.securityContext.allowPrivilegeEscalation | bool | `false` | | +| webhook.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| webhook.securityContext.enabled | bool | `true` | | +| webhook.securityContext.readOnlyRootFilesystem | bool | `true` | | +| webhook.securityContext.runAsNonRoot | bool | `true` | | +| webhook.securityContext.runAsUser | int | `1000` | | +| webhook.securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | +| webhook.serviceAccount.annotations | object | `{}` | Annotations to add to the service account. | +| webhook.serviceAccount.automount | bool | `true` | Automounts the service account token in all containers of the pod | +| webhook.serviceAccount.create | bool | `true` | Specifies whether a service account should be created. | +| webhook.serviceAccount.extraLabels | object | `{}` | Extra Labels to add to the service account. | +| webhook.serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template. | +| webhook.tolerations | list | `[]` | | +| webhook.topologySpreadConstraints | list | `[]` | | diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/NOTES.txt b/packages/system/external-secrets-operator/charts/external-secrets/templates/NOTES.txt new file mode 100644 index 00000000..ffa0fc7e --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/NOTES.txt @@ -0,0 +1,7 @@ +external-secrets has been deployed successfully in namespace {{ template "external-secrets.namespace" . }}! + +In order to begin using ExternalSecrets, you will need to set up a SecretStore +or ClusterSecretStore resource (for example, by creating a 'vault' SecretStore). + +More information on the different types of SecretStores and how to configure them +can be found in our Github: {{ .Chart.Home }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/_helpers.tpl b/packages/system/external-secrets-operator/charts/external-secrets/templates/_helpers.tpl new file mode 100644 index 00000000..d5eea075 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/_helpers.tpl @@ -0,0 +1,198 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "external-secrets.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "external-secrets.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Define namespace of chart, useful for multi-namespace deployments +*/}} +{{- define "external-secrets.namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "external-secrets.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "external-secrets.labels" -}} +helm.sh/chart: {{ include "external-secrets.chart" . }} +{{ include "external-secrets.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- with .Values.commonLabels }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{- define "external-secrets-webhook.labels" -}} +helm.sh/chart: {{ include "external-secrets.chart" . }} +{{ include "external-secrets-webhook.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- with .Values.commonLabels }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{- define "external-secrets-webhook-metrics.labels" -}} +{{ include "external-secrets-webhook.selectorLabels" . }} +app.kubernetes.io/metrics: "webhook" +{{- with .Values.commonLabels }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{- define "external-secrets-cert-controller.labels" -}} +helm.sh/chart: {{ include "external-secrets.chart" . }} +{{ include "external-secrets-cert-controller.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- with .Values.commonLabels }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{- define "external-secrets-cert-controller-metrics.labels" -}} +{{ include "external-secrets-cert-controller.selectorLabels" . }} +app.kubernetes.io/metrics: "cert-controller" +{{- with .Values.commonLabels }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "external-secrets.selectorLabels" -}} +app.kubernetes.io/name: {{ include "external-secrets.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} +{{- define "external-secrets-webhook.selectorLabels" -}} +app.kubernetes.io/name: {{ include "external-secrets.name" . }}-webhook +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} +{{- define "external-secrets-cert-controller.selectorLabels" -}} +app.kubernetes.io/name: {{ include "external-secrets.name" . }}-cert-controller +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} +{{/* +Create the name of the service account to use +*/}} +{{- define "external-secrets.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "external-secrets.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "external-secrets-webhook.serviceAccountName" -}} +{{- if .Values.webhook.serviceAccount.create }} +{{- default "external-secrets-webhook" .Values.webhook.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.webhook.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "external-secrets-cert-controller.serviceAccountName" -}} +{{- if .Values.certController.serviceAccount.create }} +{{- default "external-secrets-cert-controller" .Values.certController.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.certController.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Determine the image to use, including if using a flavour. +*/}} +{{- define "external-secrets.image" -}} +{{- if .image.flavour -}} +{{ printf "%s:%s-%s" .image.repository (.image.tag | default .chartAppVersion) .image.flavour }} +{{- else }} +{{ printf "%s:%s" .image.repository (.image.tag | default .chartAppVersion) }} +{{- end }} +{{- end }} + +{{/* +Renders a complete tree, even values that contains template. +*/}} +{{- define "external-secrets.render" -}} + {{- if typeIs "string" .value }} + {{- tpl .value .context }} + {{ else }} + {{- tpl (.value | toYaml) .context }} + {{- end }} +{{- end -}} + +{{/* +Return true if the OpenShift is the detected platform +Usage: +{{- include "external-secrets.isOpenShift" . -}} +*/}} +{{- define "external-secrets.isOpenShift" -}} +{{- if .Capabilities.APIVersions.Has "security.openshift.io/v1" -}} +{{- true -}} +{{- end -}} +{{- end -}} + +{{/* +Render the securityContext based on the provided securityContext + {{- include "external-secrets.renderSecurityContext" (dict "securityContext" .Values.securityContext "context" $) -}} +*/}} +{{- define "external-secrets.renderSecurityContext" -}} +{{- $adaptedContext := .securityContext -}} +{{- if .context.Values.global.compatibility -}} + {{- if .context.Values.global.compatibility.openshift -}} + {{- if or (eq .context.Values.global.compatibility.openshift.adaptSecurityContext "force") (and (eq .context.Values.global.compatibility.openshift.adaptSecurityContext "auto") (include "external-secrets.isOpenShift" .context)) -}} + {{/* Remove OpenShift managed fields */}} + {{- $adaptedContext = omit $adaptedContext "fsGroup" "runAsUser" "runAsGroup" -}} + {{- if not .securityContext.seLinuxOptions -}} + {{- $adaptedContext = omit $adaptedContext "seLinuxOptions" -}} + {{- end -}} + {{- end -}} + {{- end -}} +{{- end -}} +{{- omit $adaptedContext "enabled" | toYaml -}} +{{- end -}} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-deployment.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-deployment.yaml new file mode 100644 index 00000000..a843f045 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-deployment.yaml @@ -0,0 +1,124 @@ +{{- if and .Values.certController.create (not .Values.webhook.certManager.enabled) }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "external-secrets.fullname" . }}-cert-controller + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-cert-controller.labels" . | nindent 4 }} + {{- with .Values.certController.deploymentAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.certController.replicaCount }} + revisionHistoryLimit: {{ .Values.certController.revisionHistoryLimit }} + selector: + matchLabels: + {{- include "external-secrets-cert-controller.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.certController.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "external-secrets-cert-controller.labels" . | nindent 8 }} + {{- with .Values.certController.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.certController.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "external-secrets-cert-controller.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.certController.serviceAccount.automount }} + {{- with .Values.certController.podSecurityContext }} + {{- if and (.enabled) (gt (keys . | len) 1) }} + securityContext: + {{- include "external-secrets.renderSecurityContext" (dict "securityContext" . "context" $) | nindent 8 }} + {{- end }} + {{- end }} + hostNetwork: {{ .Values.certController.hostNetwork }} + containers: + - name: cert-controller + {{- with .Values.certController.securityContext }} + {{- if and (.enabled) (gt (keys . | len) 1) }} + securityContext: + {{- include "external-secrets.renderSecurityContext" (dict "securityContext" . "context" $) | nindent 12 }} + {{- end }} + {{- end }} + image: {{ include "external-secrets.image" (dict "chartAppVersion" .Chart.AppVersion "image" .Values.certController.image) | trim }} + imagePullPolicy: {{ .Values.certController.image.pullPolicy }} + args: + - certcontroller + - --crd-requeue-interval={{ .Values.certController.requeueInterval }} + - --service-name={{ include "external-secrets.fullname" . }}-webhook + - --service-namespace={{ template "external-secrets.namespace" . }} + - --secret-name={{ include "external-secrets.fullname" . }}-webhook + - --secret-namespace={{ template "external-secrets.namespace" . }} + - --metrics-addr=:{{ .Values.certController.metrics.listen.port }} + - --healthz-addr={{ .Values.certController.readinessProbe.address }}:{{ .Values.certController.readinessProbe.port }} + - --loglevel={{ .Values.certController.log.level }} + - --zap-time-encoding={{ .Values.certController.log.timeEncoding }} + {{- if not .Values.crds.createClusterSecretStore }} + - --crd-names=externalsecrets.external-secrets.io + - --crd-names=secretstores.external-secrets.io + {{- end }} + {{- if .Values.installCRDs }} + - --enable-partial-cache=true + {{- end }} + {{- range $key, $value := .Values.certController.extraArgs }} + {{- if $value }} + - --{{ $key }}={{ $value }} + {{- else }} + - --{{ $key }} + {{- end }} + {{- end }} + ports: + - containerPort: {{ .Values.certController.metrics.listen.port }} + protocol: TCP + name: metrics + readinessProbe: + httpGet: + port: {{ .Values.certController.readinessProbe.port }} + path: /readyz + initialDelaySeconds: 20 + periodSeconds: 5 + {{- with .Values.certController.extraEnv }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.certController.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.certController.extraVolumeMounts }} + volumeMounts: + {{- toYaml .Values.certController.extraVolumeMounts | nindent 12 }} + {{- end }} + {{- if .Values.certController.extraVolumes }} + volumes: + {{- toYaml .Values.certController.extraVolumes | nindent 8 }} + {{- end }} + {{- with .Values.certController.nodeSelector | default .Values.global.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.certController.affinity | default .Values.global.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.certController.tolerations | default .Values.global.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.certController.topologySpreadConstraints | default .Values.global.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.certController.priorityClassName }} + priorityClassName: {{ .Values.certController.priorityClassName }} + {{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-poddisruptionbudget.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-poddisruptionbudget.yaml new file mode 100644 index 00000000..e61cb8eb --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-poddisruptionbudget.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.certController.create .Values.certController.podDisruptionBudget.enabled (not .Values.webhook.certManager.enabled) }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "external-secrets.fullname" . }}-cert-controller-pdb + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-cert-controller.labels" . | nindent 4 }} +spec: + {{- if .Values.certController.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.certController.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.certController.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.certController.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + {{- include "external-secrets-cert-controller.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-rbac.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-rbac.yaml new file mode 100644 index 00000000..84a0c110 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-rbac.yaml @@ -0,0 +1,86 @@ +{{- if and .Values.certController.create .Values.certController.rbac.create (not .Values.webhook.certManager.enabled) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "external-secrets.fullname" . }}-cert-controller + labels: + {{- include "external-secrets-cert-controller.labels" . | nindent 4 }} +rules: + - apiGroups: + - "apiextensions.k8s.io" + resources: + - "customresourcedefinitions" + verbs: + - "get" + - "list" + - "watch" + - "update" + - "patch" + - apiGroups: + - "admissionregistration.k8s.io" + resources: + - "validatingwebhookconfigurations" + verbs: + - "list" + - "watch" + - "get" + - apiGroups: + - "admissionregistration.k8s.io" + resources: + - "validatingwebhookconfigurations" + resourceNames: + - "secretstore-validate" + - "externalsecret-validate" + verbs: + - "update" + - "patch" + - apiGroups: + - "" + resources: + - "endpoints" + verbs: + - "list" + - "get" + - "watch" + - apiGroups: + - "" + resources: + - "events" + verbs: + - "create" + - "patch" + - apiGroups: + - "" + resources: + - "secrets" + verbs: + - "get" + - "list" + - "watch" + - "update" + - "patch" + - apiGroups: + - "coordination.k8s.io" + resources: + - "leases" + verbs: + - "get" + - "create" + - "update" + - "patch" +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "external-secrets.fullname" . }}-cert-controller + labels: + {{- include "external-secrets-cert-controller.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "external-secrets.fullname" . }}-cert-controller +subjects: + - name: {{ include "external-secrets-cert-controller.serviceAccountName" . }} + namespace: {{ template "external-secrets.namespace" . }} + kind: ServiceAccount +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-service.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-service.yaml new file mode 100644 index 00000000..12cb4f4d --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-service.yaml @@ -0,0 +1,28 @@ +{{- if and .Values.certController.create .Values.certController.metrics.service.enabled (not .Values.webhook.certManager.enabled) }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "external-secrets.fullname" . }}-cert-controller-metrics + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} + {{- with .Values.metrics.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: ClusterIP + {{- if .Values.service.ipFamilyPolicy }} + ipFamilyPolicy: {{ .Values.service.ipFamilyPolicy }} + {{- end }} + {{- if .Values.service.ipFamilies }} + ipFamilies: {{ .Values.service.ipFamilies | toYaml | nindent 2 }} + {{- end }} + ports: + - port: {{ .Values.certController.metrics.service.port }} + protocol: TCP + targetPort: metrics + name: metrics + selector: + {{- include "external-secrets-cert-controller.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-serviceaccount.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-serviceaccount.yaml new file mode 100644 index 00000000..6a36f9d7 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/cert-controller-serviceaccount.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.certController.create .Values.certController.serviceAccount.create (not .Values.webhook.certManager.enabled) -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "external-secrets-cert-controller.serviceAccountName" . }} + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-cert-controller.labels" . | nindent 4 }} + {{- with .Values.certController.serviceAccount.extraLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.certController.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/acraccesstoken.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/acraccesstoken.yaml new file mode 100644 index 00000000..3ad09232 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/acraccesstoken.yaml @@ -0,0 +1,204 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: acraccesstokens.generators.external-secrets.io +spec: + group: generators.external-secrets.io + names: + categories: + - external-secrets + - external-secrets-generators + kind: ACRAccessToken + listKind: ACRAccessTokenList + plural: acraccesstokens + shortNames: + - acraccesstoken + singular: acraccesstoken + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + ACRAccessToken returns a Azure Container Registry token + that can be used for pushing/pulling images. + Note: by default it will return an ACR Refresh Token with full access + (depending on the identity). + This can be scoped down to the repository level using .spec.scope. + In case scope is defined it will return an ACR Access Token. + + See docs: https://github.com/Azure/acr/blob/main/docs/AAD-OAuth.md + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + ACRAccessTokenSpec defines how to generate the access token + e.g. how to authenticate and which registry to use. + see: https://github.com/Azure/acr/blob/main/docs/AAD-OAuth.md#overview + properties: + auth: + properties: + managedIdentity: + description: ManagedIdentity uses Azure Managed Identity to authenticate with Azure. + properties: + identityId: + description: If multiple Managed Identity is assigned to the pod, you can select the one to be used + type: string + type: object + servicePrincipal: + description: ServicePrincipal uses Azure Service Principal credentials to authenticate with Azure. + properties: + secretRef: + description: |- + Configuration used to authenticate with Azure using static + credentials stored in a Kind=Secret. + properties: + clientId: + description: The Azure clientId of the service principle used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientSecret: + description: The Azure ClientSecret of the service principle used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - secretRef + type: object + workloadIdentity: + description: WorkloadIdentity uses Azure Workload Identity to authenticate with Azure. + properties: + serviceAccountRef: + description: |- + ServiceAccountRef specified the service account + that should be used when authenticating with WorkloadIdentity. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + type: object + environmentType: + default: PublicCloud + description: |- + EnvironmentType specifies the Azure cloud environment endpoints to use for + connecting and authenticating with Azure. By default it points to the public cloud AAD endpoint. + The following endpoints are available, also see here: https://github.com/Azure/go-autorest/blob/main/autorest/azure/environments.go#L152 + PublicCloud, USGovernmentCloud, ChinaCloud, GermanCloud + enum: + - PublicCloud + - USGovernmentCloud + - ChinaCloud + - GermanCloud + type: string + registry: + description: |- + the domain name of the ACR registry + e.g. foobarexample.azurecr.io + type: string + scope: + description: |- + Define the scope for the access token, e.g. pull/push access for a repository. + if not provided it will return a refresh token that has full scope. + Note: you need to pin it down to the repository level, there is no wildcard available. + + examples: + repository:my-repository:pull,push + repository:my-repository:pull + + see docs for details: https://docs.docker.com/registry/spec/auth/scope/ + type: string + tenantId: + description: TenantID configures the Azure Tenant to send requests to. Required for ServicePrincipal auth type. + type: string + required: + - auth + - registry + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/clusterexternalsecret.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/clusterexternalsecret.yaml new file mode 100644 index 00000000..c0be1fe0 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/clusterexternalsecret.yaml @@ -0,0 +1,666 @@ +{{- if and (.Values.installCRDs) (.Values.crds.createClusterExternalSecret) }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: clusterexternalsecrets.external-secrets.io +spec: + group: external-secrets.io + names: + categories: + - external-secrets + kind: ClusterExternalSecret + listKind: ClusterExternalSecretList + plural: clusterexternalsecrets + shortNames: + - ces + singular: clusterexternalsecret + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.externalSecretSpec.secretStoreRef.name + name: Store + type: string + - jsonPath: .spec.refreshTime + name: Refresh Interval + type: string + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: ClusterExternalSecret is the Schema for the clusterexternalsecrets API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ClusterExternalSecretSpec defines the desired state of ClusterExternalSecret. + properties: + externalSecretMetadata: + description: The metadata of the external secrets to be created + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + externalSecretName: + description: The name of the external secrets to be created defaults to the name of the ClusterExternalSecret + type: string + externalSecretSpec: + description: The spec for the ExternalSecrets to be created + properties: + data: + description: Data defines the connection between the Kubernetes Secret keys and the Provider data + items: + description: ExternalSecretData defines the connection between the Kubernetes Secret key (spec.data.) and the Provider data. + properties: + remoteRef: + description: |- + RemoteRef points to the remote secret and defines + which secret (version/property/..) to fetch. + properties: + conversionStrategy: + default: Default + description: Used to define a conversion Strategy + enum: + - Default + - Unicode + type: string + decodingStrategy: + default: None + description: Used to define a decoding Strategy + enum: + - Auto + - Base64 + - Base64URL + - None + type: string + key: + description: Key is the key used in the Provider, mandatory + type: string + metadataPolicy: + default: None + description: Policy for fetching tags/labels from provider secrets, possible options are Fetch, None. Defaults to None + enum: + - None + - Fetch + type: string + property: + description: Used to select a specific property of the Provider value (if a map), if supported + type: string + version: + description: Used to select a specific version of the Provider value, if supported + type: string + required: + - key + type: object + secretKey: + description: |- + SecretKey defines the key in which the controller stores + the value. This is the key in the Kind=Secret + type: string + sourceRef: + description: |- + SourceRef allows you to override the source + from which the value will pulled from. + maxProperties: 1 + properties: + generatorRef: + description: |- + GeneratorRef points to a generator custom resource. + + Deprecated: The generatorRef is not implemented in .data[]. + this will be removed with v1. + properties: + apiVersion: + default: generators.external-secrets.io/v1alpha1 + description: Specify the apiVersion of the generator resource + type: string + kind: + description: Specify the Kind of the resource, e.g. Password, ACRAccessToken etc. + type: string + name: + description: Specify the name of the generator resource + type: string + required: + - kind + - name + type: object + storeRef: + description: SecretStoreRef defines which SecretStore to fetch the ExternalSecret data. + properties: + kind: + description: |- + Kind of the SecretStore resource (SecretStore or ClusterSecretStore) + Defaults to `SecretStore` + type: string + name: + description: Name of the SecretStore resource + type: string + required: + - name + type: object + type: object + required: + - remoteRef + - secretKey + type: object + type: array + dataFrom: + description: |- + DataFrom is used to fetch all properties from a specific Provider data + If multiple entries are specified, the Secret keys are merged in the specified order + items: + properties: + extract: + description: |- + Used to extract multiple key/value pairs from one secret + Note: Extract does not support sourceRef.Generator or sourceRef.GeneratorRef. + properties: + conversionStrategy: + default: Default + description: Used to define a conversion Strategy + enum: + - Default + - Unicode + type: string + decodingStrategy: + default: None + description: Used to define a decoding Strategy + enum: + - Auto + - Base64 + - Base64URL + - None + type: string + key: + description: Key is the key used in the Provider, mandatory + type: string + metadataPolicy: + default: None + description: Policy for fetching tags/labels from provider secrets, possible options are Fetch, None. Defaults to None + enum: + - None + - Fetch + type: string + property: + description: Used to select a specific property of the Provider value (if a map), if supported + type: string + version: + description: Used to select a specific version of the Provider value, if supported + type: string + required: + - key + type: object + find: + description: |- + Used to find secrets based on tags or regular expressions + Note: Find does not support sourceRef.Generator or sourceRef.GeneratorRef. + properties: + conversionStrategy: + default: Default + description: Used to define a conversion Strategy + enum: + - Default + - Unicode + type: string + decodingStrategy: + default: None + description: Used to define a decoding Strategy + enum: + - Auto + - Base64 + - Base64URL + - None + type: string + name: + description: Finds secrets based on the name. + properties: + regexp: + description: Finds secrets base + type: string + type: object + path: + description: A root path to start the find operations. + type: string + tags: + additionalProperties: + type: string + description: Find secrets based on tags. + type: object + type: object + rewrite: + description: |- + Used to rewrite secret Keys after getting them from the secret Provider + Multiple Rewrite operations can be provided. They are applied in a layered order (first to last) + items: + properties: + regexp: + description: |- + Used to rewrite with regular expressions. + The resulting key will be the output of a regexp.ReplaceAll operation. + properties: + source: + description: Used to define the regular expression of a re.Compiler. + type: string + target: + description: Used to define the target pattern of a ReplaceAll operation. + type: string + required: + - source + - target + type: object + transform: + description: |- + Used to apply string transformation on the secrets. + The resulting key will be the output of the template applied by the operation. + properties: + template: + description: |- + Used to define the template to apply on the secret name. + `.value ` will specify the secret name in the template. + type: string + required: + - template + type: object + type: object + type: array + sourceRef: + description: |- + SourceRef points to a store or generator + which contains secret values ready to use. + Use this in combination with Extract or Find pull values out of + a specific SecretStore. + When sourceRef points to a generator Extract or Find is not supported. + The generator returns a static map of values + maxProperties: 1 + properties: + generatorRef: + description: GeneratorRef points to a generator custom resource. + properties: + apiVersion: + default: generators.external-secrets.io/v1alpha1 + description: Specify the apiVersion of the generator resource + type: string + kind: + description: Specify the Kind of the resource, e.g. Password, ACRAccessToken etc. + type: string + name: + description: Specify the name of the generator resource + type: string + required: + - kind + - name + type: object + storeRef: + description: SecretStoreRef defines which SecretStore to fetch the ExternalSecret data. + properties: + kind: + description: |- + Kind of the SecretStore resource (SecretStore or ClusterSecretStore) + Defaults to `SecretStore` + type: string + name: + description: Name of the SecretStore resource + type: string + required: + - name + type: object + type: object + type: object + type: array + refreshInterval: + default: 1h + description: |- + RefreshInterval is the amount of time before the values are read again from the SecretStore provider + Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h" + May be set to zero to fetch and create it once. Defaults to 1h. + type: string + secretStoreRef: + description: SecretStoreRef defines which SecretStore to fetch the ExternalSecret data. + properties: + kind: + description: |- + Kind of the SecretStore resource (SecretStore or ClusterSecretStore) + Defaults to `SecretStore` + type: string + name: + description: Name of the SecretStore resource + type: string + required: + - name + type: object + target: + default: + creationPolicy: Owner + deletionPolicy: Retain + description: |- + ExternalSecretTarget defines the Kubernetes Secret to be created + There can be only one target per ExternalSecret. + properties: + creationPolicy: + default: Owner + description: |- + CreationPolicy defines rules on how to create the resulting Secret + Defaults to 'Owner' + enum: + - Owner + - Orphan + - Merge + - None + type: string + deletionPolicy: + default: Retain + description: |- + DeletionPolicy defines rules on how to delete the resulting Secret + Defaults to 'Retain' + enum: + - Delete + - Merge + - Retain + type: string + immutable: + description: Immutable defines if the final secret will be immutable + type: boolean + name: + description: |- + Name defines the name of the Secret resource to be managed + This field is immutable + Defaults to the .metadata.name of the ExternalSecret resource + type: string + template: + description: Template defines a blueprint for the created Secret resource. + properties: + data: + additionalProperties: + type: string + type: object + engineVersion: + default: v2 + description: |- + EngineVersion specifies the template engine version + that should be used to compile/execute the + template specified in .data and .templateFrom[]. + enum: + - v1 + - v2 + type: string + mergePolicy: + default: Replace + enum: + - Replace + - Merge + type: string + metadata: + description: ExternalSecretTemplateMetadata defines metadata fields for the Secret blueprint. + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + templateFrom: + items: + properties: + configMap: + properties: + items: + items: + properties: + key: + type: string + templateAs: + default: Values + enum: + - Values + - KeysAndValues + type: string + required: + - key + type: object + type: array + name: + type: string + required: + - items + - name + type: object + literal: + type: string + secret: + properties: + items: + items: + properties: + key: + type: string + templateAs: + default: Values + enum: + - Values + - KeysAndValues + type: string + required: + - key + type: object + type: array + name: + type: string + required: + - items + - name + type: object + target: + default: Data + enum: + - Data + - Annotations + - Labels + type: string + type: object + type: array + type: + type: string + type: object + type: object + type: object + namespaceSelector: + description: |- + The labels to select by to find the Namespaces to create the ExternalSecrets in. + Deprecated: Use NamespaceSelectors instead. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaceSelectors: + description: A list of labels to select by to find the Namespaces to create the ExternalSecrets in. The selectors are ORed. + items: + description: |- + A label selector is a label query over a set of resources. The result of matchLabels and + matchExpressions are ANDed. An empty label selector matches all objects. A null + label selector matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + type: array + namespaces: + description: Choose namespaces by name. This field is ORed with anything that NamespaceSelectors ends up choosing. + items: + type: string + type: array + refreshTime: + description: The time in which the controller should reconcile its objects and recheck namespaces for labels. + type: string + required: + - externalSecretSpec + type: object + status: + description: ClusterExternalSecretStatus defines the observed state of ClusterExternalSecret. + properties: + conditions: + items: + properties: + message: + type: string + status: + type: string + type: + type: string + required: + - status + - type + type: object + type: array + externalSecretName: + description: ExternalSecretName is the name of the ExternalSecrets created by the ClusterExternalSecret + type: string + failedNamespaces: + description: Failed namespaces are the namespaces that failed to apply an ExternalSecret + items: + description: ClusterExternalSecretNamespaceFailure represents a failed namespace deployment and it's reason. + properties: + namespace: + description: Namespace is the namespace that failed when trying to apply an ExternalSecret + type: string + reason: + description: Reason is why the ExternalSecret failed to apply to the namespace + type: string + required: + - namespace + type: object + type: array + provisionedNamespaces: + description: ProvisionedNamespaces are the namespaces where the ClusterExternalSecret has secrets + items: + type: string + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/clustersecretstore.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/clustersecretstore.yaml new file mode 100644 index 00000000..22c8009a --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/clustersecretstore.yaml @@ -0,0 +1,4640 @@ +{{- if and (.Values.installCRDs) (.Values.crds.createClusterSecretStore) }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: clustersecretstores.external-secrets.io +spec: + group: external-secrets.io + names: + categories: + - external-secrets + kind: ClusterSecretStore + listKind: ClusterSecretStoreList + plural: clustersecretstores + shortNames: + - css + singular: clustersecretstore + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + - jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Status + type: string + deprecated: true + name: v1alpha1 + schema: + openAPIV3Schema: + description: ClusterSecretStore represents a secure external location for storing secrets, which can be referenced as part of `storeRef` fields. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: SecretStoreSpec defines the desired state of SecretStore. + properties: + controller: + description: |- + Used to select the correct ESO controller (think: ingress.ingressClassName) + The ESO controller is instantiated with a specific controller name and filters ES based on this property + type: string + provider: + description: Used to configure the provider. Only one provider may be set + maxProperties: 1 + minProperties: 1 + properties: + akeyless: + description: Akeyless configures this store to sync secrets using Akeyless Vault provider + properties: + akeylessGWApiURL: + description: Akeyless GW API Url from which the secrets to be fetched from. + type: string + authSecretRef: + description: Auth configures how the operator authenticates with Akeyless. + properties: + kubernetesAuth: + description: |- + Kubernetes authenticates with Akeyless by passing the ServiceAccount + token stored in the named Secret resource. + properties: + accessID: + description: the Akeyless Kubernetes auth-method access-id + type: string + k8sConfName: + description: Kubernetes-auth configuration name in Akeyless-Gateway + type: string + secretRef: + description: |- + Optional secret field containing a Kubernetes ServiceAccount JWT used + for authenticating with Akeyless. If a name is specified without a key, + `token` is the default. If one is not specified, the one bound to + the controller will be used. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional service account field containing the name of a kubernetes ServiceAccount. + If the service account is specified, the service account secret token JWT will be used + for authenticating with Akeyless. If the service account selector is not supplied, + the secretRef will be used instead. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - accessID + - k8sConfName + type: object + secretRef: + description: |- + Reference to a Secret that contains the details + to authenticate with Akeyless. + properties: + accessID: + description: The SecretAccessID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessType: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessTypeParam: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + caBundle: + description: |- + PEM/base64 encoded CA bundle used to validate Akeyless Gateway certificate. Only used + if the AkeylessGWApiURL URL is using HTTPS protocol. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate Akeyless Gateway certificate. + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + required: + - akeylessGWApiURL + - authSecretRef + type: object + alibaba: + description: Alibaba configures this store to sync secrets using Alibaba Cloud provider + properties: + auth: + description: AlibabaAuth contains a secretRef for credentials. + properties: + rrsa: + description: Authenticate against Alibaba using RRSA. + properties: + oidcProviderArn: + type: string + oidcTokenFilePath: + type: string + roleArn: + type: string + sessionName: + type: string + required: + - oidcProviderArn + - oidcTokenFilePath + - roleArn + - sessionName + type: object + secretRef: + description: AlibabaAuthSecretRef holds secret references for Alibaba credentials. + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessKeySecretSecretRef: + description: The AccessKeySecret is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - accessKeyIDSecretRef + - accessKeySecretSecretRef + type: object + type: object + regionID: + description: Alibaba Region to be used for the provider + type: string + required: + - auth + - regionID + type: object + aws: + description: AWS configures this store to sync secrets using AWS Secret Manager provider + properties: + auth: + description: |- + Auth defines the information necessary to authenticate against AWS + if not set aws sdk will infer credentials from your environment + see: https://docs.aws.amazon.com/sdk-for-go/v1/developer-guide/configuring-sdk.html#specifying-credentials + properties: + jwt: + description: Authenticate against AWS using service account tokens. + properties: + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + secretRef: + description: |- + AWSAuthSecretRef holds secret references for AWS credentials + both AccessKeyID and SecretAccessKey must be defined in order to properly authenticate. + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + region: + description: AWS Region to be used for the provider + type: string + role: + description: Role is a Role ARN which the SecretManager provider will assume + type: string + service: + description: Service defines which service should be used to fetch the secrets + enum: + - SecretsManager + - ParameterStore + type: string + required: + - region + - service + type: object + azurekv: + description: AzureKV configures this store to sync secrets using Azure Key Vault provider + properties: + authSecretRef: + description: Auth configures how the operator authenticates with Azure. Required for ServicePrincipal auth type. + properties: + clientId: + description: The Azure clientId of the service principle used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientSecret: + description: The Azure ClientSecret of the service principle used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + authType: + default: ServicePrincipal + description: |- + Auth type defines how to authenticate to the keyvault service. + Valid values are: + - "ServicePrincipal" (default): Using a service principal (tenantId, clientId, clientSecret) + - "ManagedIdentity": Using Managed Identity assigned to the pod (see aad-pod-identity) + enum: + - ServicePrincipal + - ManagedIdentity + - WorkloadIdentity + type: string + identityId: + description: If multiple Managed Identity is assigned to the pod, you can select the one to be used + type: string + serviceAccountRef: + description: |- + ServiceAccountRef specified the service account + that should be used when authenticating with WorkloadIdentity. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + tenantId: + description: TenantID configures the Azure Tenant to send requests to. Required for ServicePrincipal auth type. + type: string + vaultUrl: + description: Vault Url from which the secrets to be fetched from. + type: string + required: + - vaultUrl + type: object + fake: + description: Fake configures a store with static key/value pairs + properties: + data: + items: + properties: + key: + type: string + value: + type: string + valueMap: + additionalProperties: + type: string + type: object + version: + type: string + required: + - key + type: object + type: array + required: + - data + type: object + gcpsm: + description: GCPSM configures this store to sync secrets using Google Cloud Platform Secret Manager provider + properties: + auth: + description: Auth defines the information necessary to authenticate against GCP + properties: + secretRef: + properties: + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + workloadIdentity: + properties: + clusterLocation: + type: string + clusterName: + type: string + clusterProjectID: + type: string + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - clusterLocation + - clusterName + - serviceAccountRef + type: object + type: object + projectID: + description: ProjectID project where secret is located + type: string + type: object + gitlab: + description: GitLab configures this store to sync secrets using GitLab Variables provider + properties: + auth: + description: Auth configures how secret-manager authenticates with a GitLab instance. + properties: + SecretRef: + properties: + accessToken: + description: AccessToken is used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - SecretRef + type: object + projectID: + description: ProjectID specifies a project where secrets are located. + type: string + url: + description: URL configures the GitLab instance URL. Defaults to https://gitlab.com/. + type: string + required: + - auth + type: object + ibm: + description: IBM configures this store to sync secrets using IBM Cloud provider + properties: + auth: + description: Auth configures how secret-manager authenticates with the IBM secrets manager. + properties: + secretRef: + properties: + secretApiKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - secretRef + type: object + serviceUrl: + description: ServiceURL is the Endpoint URL that is specific to the Secrets Manager service instance + type: string + required: + - auth + type: object + kubernetes: + description: Kubernetes configures this store to sync secrets using a Kubernetes cluster provider + properties: + auth: + description: Auth configures how secret-manager authenticates with a Kubernetes instance. + maxProperties: 1 + minProperties: 1 + properties: + cert: + description: has both clientCert and clientKey as secretKeySelector + properties: + clientCert: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientKey: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + serviceAccount: + description: points to a service account that should be used for authentication + properties: + serviceAccount: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + token: + description: use static token to authenticate with + properties: + bearerToken: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + remoteNamespace: + default: default + description: Remote namespace to fetch the secrets from + type: string + server: + description: configures the Kubernetes server Address. + properties: + caBundle: + description: CABundle is a base64-encoded CA certificate + format: byte + type: string + caProvider: + description: 'see: https://external-secrets.io/v0.4.1/spec/#external-secrets.io/v1alpha1.CAProvider' + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + url: + default: kubernetes.default + description: configures the Kubernetes server Address. + type: string + type: object + required: + - auth + type: object + oracle: + description: Oracle configures this store to sync secrets using Oracle Vault provider + properties: + auth: + description: |- + Auth configures how secret-manager authenticates with the Oracle Vault. + If empty, instance principal is used. Optionally, the authenticating principal type + and/or user data may be supplied for the use of workload identity and user principal. + properties: + secretRef: + description: SecretRef to pass through sensitive information. + properties: + fingerprint: + description: Fingerprint is the fingerprint of the API private key. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + privatekey: + description: PrivateKey is the user's API Signing Key in PEM format, used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - fingerprint + - privatekey + type: object + tenancy: + description: Tenancy is the tenancy OCID where user is located. + type: string + user: + description: User is an access OCID specific to the account. + type: string + required: + - secretRef + - tenancy + - user + type: object + compartment: + description: |- + Compartment is the vault compartment OCID. + Required for PushSecret + type: string + encryptionKey: + description: |- + EncryptionKey is the OCID of the encryption key within the vault. + Required for PushSecret + type: string + principalType: + description: |- + The type of principal to use for authentication. If left blank, the Auth struct will + determine the principal type. This optional field must be specified if using + workload identity. + enum: + - "" + - UserPrincipal + - InstancePrincipal + - Workload + type: string + region: + description: Region is the region where vault is located. + type: string + serviceAccountRef: + description: |- + ServiceAccountRef specified the service account + that should be used when authenticating with WorkloadIdentity. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + vault: + description: Vault is the vault's OCID of the specific vault where secret is located. + type: string + required: + - region + - vault + type: object + passworddepot: + description: Configures a store to sync secrets with a Password Depot instance. + properties: + auth: + description: Auth configures how secret-manager authenticates with a Password Depot instance. + properties: + secretRef: + properties: + credentials: + description: Username / Password is used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - secretRef + type: object + database: + description: Database to use as source + type: string + host: + description: URL configures the Password Depot instance URL. + type: string + required: + - auth + - database + - host + type: object + vault: + description: Vault configures this store to sync secrets using Hashi provider + properties: + auth: + description: Auth configures how secret-manager authenticates with the Vault server. + properties: + appRole: + description: |- + AppRole authenticates with Vault using the App Role auth mechanism, + with the role and secret stored in a Kubernetes Secret resource. + properties: + path: + default: approle + description: |- + Path where the App Role authentication backend is mounted + in Vault, e.g: "approle" + type: string + roleId: + description: |- + RoleID configured in the App Role authentication backend when setting + up the authentication backend in Vault. + type: string + secretRef: + description: |- + Reference to a key in a Secret that contains the App Role secret used + to authenticate with Vault. + The `key` field must be specified and denotes which entry within the Secret + resource is used as the app role secret. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - path + - roleId + - secretRef + type: object + cert: + description: |- + Cert authenticates with TLS Certificates by passing client certificate, private key and ca certificate + Cert authentication method + properties: + clientCert: + description: |- + ClientCert is a certificate to authenticate using the Cert Vault + authentication method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretRef: + description: |- + SecretRef to a key in a Secret resource containing client private key to + authenticate with Vault using the Cert authentication method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + jwt: + description: |- + Jwt authenticates with Vault by passing role and JWT token using the + JWT/OIDC authentication method + properties: + kubernetesServiceAccountToken: + description: |- + Optional ServiceAccountToken specifies the Kubernetes service account for which to request + a token for with the `TokenRequest` API. + properties: + audiences: + description: |- + Optional audiences field that will be used to request a temporary Kubernetes service + account token for the service account referenced by `serviceAccountRef`. + Defaults to a single audience `vault` it not specified. + items: + type: string + type: array + expirationSeconds: + description: |- + Optional expiration time in seconds that will be used to request a temporary + Kubernetes service account token for the service account referenced by + `serviceAccountRef`. + Defaults to 10 minutes. + format: int64 + type: integer + serviceAccountRef: + description: Service account field containing the name of a kubernetes ServiceAccount. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - serviceAccountRef + type: object + path: + default: jwt + description: |- + Path where the JWT authentication backend is mounted + in Vault, e.g: "jwt" + type: string + role: + description: |- + Role is a JWT role to authenticate using the JWT/OIDC Vault + authentication method + type: string + secretRef: + description: |- + Optional SecretRef that refers to a key in a Secret resource containing JWT token to + authenticate with Vault using the JWT/OIDC authentication method. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - path + type: object + kubernetes: + description: |- + Kubernetes authenticates with Vault by passing the ServiceAccount + token stored in the named Secret resource to the Vault server. + properties: + mountPath: + default: kubernetes + description: |- + Path where the Kubernetes authentication backend is mounted in Vault, e.g: + "kubernetes" + type: string + role: + description: |- + A required field containing the Vault Role to assume. A Role binds a + Kubernetes ServiceAccount with a set of Vault policies. + type: string + secretRef: + description: |- + Optional secret field containing a Kubernetes ServiceAccount JWT used + for authenticating with Vault. If a name is specified without a key, + `token` is the default. If one is not specified, the one bound to + the controller will be used. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional service account field containing the name of a kubernetes ServiceAccount. + If the service account is specified, the service account secret token JWT will be used + for authenticating with Vault. If the service account selector is not supplied, + the secretRef will be used instead. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - mountPath + - role + type: object + ldap: + description: |- + Ldap authenticates with Vault by passing username/password pair using + the LDAP authentication method + properties: + path: + default: ldap + description: |- + Path where the LDAP authentication backend is mounted + in Vault, e.g: "ldap" + type: string + secretRef: + description: |- + SecretRef to a key in a Secret resource containing password for the LDAP + user used to authenticate with Vault using the LDAP authentication + method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + username: + description: |- + Username is a LDAP user name used to authenticate using the LDAP Vault + authentication method + type: string + required: + - path + - username + type: object + tokenSecretRef: + description: TokenSecretRef authenticates with Vault by presenting a token. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + caBundle: + description: |- + PEM encoded CA bundle used to validate Vault server certificate. Only used + if the Server URL is using HTTPS protocol. This parameter is ignored for + plain HTTP protocol connection. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate Vault server certificate. + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + forwardInconsistent: + description: |- + ForwardInconsistent tells Vault to forward read-after-write requests to the Vault + leader instead of simply retrying within a loop. This can increase performance if + the option is enabled serverside. + https://www.vaultproject.io/docs/configuration/replication#allow_forwarding_via_header + type: boolean + namespace: + description: |- + Name of the vault namespace. Namespaces is a set of features within Vault Enterprise that allows + Vault environments to support Secure Multi-tenancy. e.g: "ns1". + More about namespaces can be found here https://www.vaultproject.io/docs/enterprise/namespaces + type: string + path: + description: |- + Path is the mount path of the Vault KV backend endpoint, e.g: + "secret". The v2 KV secret engine version specific "/data" path suffix + for fetching secrets from Vault is optional and will be appended + if not present in specified path. + type: string + readYourWrites: + description: |- + ReadYourWrites ensures isolated read-after-write semantics by + providing discovered cluster replication states in each request. + More information about eventual consistency in Vault can be found here + https://www.vaultproject.io/docs/enterprise/consistency + type: boolean + server: + description: 'Server is the connection address for the Vault server, e.g: "https://vault.example.com:8200".' + type: string + version: + default: v2 + description: |- + Version is the Vault KV secret engine version. This can be either "v1" or + "v2". Version defaults to "v2". + enum: + - v1 + - v2 + type: string + required: + - auth + - server + type: object + webhook: + description: Webhook configures this store to sync secrets using a generic templated webhook + properties: + body: + description: Body + type: string + caBundle: + description: |- + PEM encoded CA bundle used to validate webhook server certificate. Only used + if the Server URL is using HTTPS protocol. This parameter is ignored for + plain HTTP protocol connection. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate webhook server certificate. + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + headers: + additionalProperties: + type: string + description: Headers + type: object + method: + description: Webhook Method + type: string + result: + description: Result formatting + properties: + jsonPath: + description: Json path of return value + type: string + type: object + secrets: + description: |- + Secrets to fill in templates + These secrets will be passed to the templating function as key value pairs under the given name + items: + properties: + name: + description: Name of this secret in templates + type: string + secretRef: + description: Secret ref to fill in credentials + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - name + - secretRef + type: object + type: array + timeout: + description: Timeout + type: string + url: + description: Webhook url to call + type: string + required: + - result + - url + type: object + yandexlockbox: + description: YandexLockbox configures this store to sync secrets using Yandex Lockbox provider + properties: + apiEndpoint: + description: Yandex.Cloud API endpoint (e.g. 'api.cloud.yandex.net:443') + type: string + auth: + description: Auth defines the information necessary to authenticate against Yandex Lockbox + properties: + authorizedKeySecretRef: + description: The authorized key used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + caProvider: + description: The provider for the CA bundle to use to validate Yandex.Cloud server certificate. + properties: + certSecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - auth + type: object + type: object + retrySettings: + description: Used to configure http retries if failed + properties: + maxRetries: + format: int32 + type: integer + retryInterval: + type: string + type: object + required: + - provider + type: object + status: + description: SecretStoreStatus defines the observed state of the SecretStore. + properties: + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + type: string + reason: + type: string + status: + type: string + type: + type: string + required: + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: false + subresources: + status: {} + - additionalPrinterColumns: + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + - jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Status + type: string + - jsonPath: .status.capabilities + name: Capabilities + type: string + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: ClusterSecretStore represents a secure external location for storing secrets, which can be referenced as part of `storeRef` fields. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: SecretStoreSpec defines the desired state of SecretStore. + properties: + conditions: + description: Used to constraint a ClusterSecretStore to specific namespaces. Relevant only to ClusterSecretStore + items: + description: |- + ClusterSecretStoreCondition describes a condition by which to choose namespaces to process ExternalSecrets in + for a ClusterSecretStore instance. + properties: + namespaceRegexes: + description: Choose namespaces by using regex matching + items: + type: string + type: array + namespaceSelector: + description: Choose namespace using a labelSelector + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: Choose namespaces by name + items: + type: string + type: array + type: object + type: array + controller: + description: |- + Used to select the correct ESO controller (think: ingress.ingressClassName) + The ESO controller is instantiated with a specific controller name and filters ES based on this property + type: string + provider: + description: Used to configure the provider. Only one provider may be set + maxProperties: 1 + minProperties: 1 + properties: + akeyless: + description: Akeyless configures this store to sync secrets using Akeyless Vault provider + properties: + akeylessGWApiURL: + description: Akeyless GW API Url from which the secrets to be fetched from. + type: string + authSecretRef: + description: Auth configures how the operator authenticates with Akeyless. + properties: + kubernetesAuth: + description: |- + Kubernetes authenticates with Akeyless by passing the ServiceAccount + token stored in the named Secret resource. + properties: + accessID: + description: the Akeyless Kubernetes auth-method access-id + type: string + k8sConfName: + description: Kubernetes-auth configuration name in Akeyless-Gateway + type: string + secretRef: + description: |- + Optional secret field containing a Kubernetes ServiceAccount JWT used + for authenticating with Akeyless. If a name is specified without a key, + `token` is the default. If one is not specified, the one bound to + the controller will be used. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional service account field containing the name of a kubernetes ServiceAccount. + If the service account is specified, the service account secret token JWT will be used + for authenticating with Akeyless. If the service account selector is not supplied, + the secretRef will be used instead. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - accessID + - k8sConfName + type: object + secretRef: + description: |- + Reference to a Secret that contains the details + to authenticate with Akeyless. + properties: + accessID: + description: The SecretAccessID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessType: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessTypeParam: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + caBundle: + description: |- + PEM/base64 encoded CA bundle used to validate Akeyless Gateway certificate. Only used + if the AkeylessGWApiURL URL is using HTTPS protocol. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate Akeyless Gateway certificate. + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + required: + - akeylessGWApiURL + - authSecretRef + type: object + alibaba: + description: Alibaba configures this store to sync secrets using Alibaba Cloud provider + properties: + auth: + description: AlibabaAuth contains a secretRef for credentials. + properties: + rrsa: + description: Authenticate against Alibaba using RRSA. + properties: + oidcProviderArn: + type: string + oidcTokenFilePath: + type: string + roleArn: + type: string + sessionName: + type: string + required: + - oidcProviderArn + - oidcTokenFilePath + - roleArn + - sessionName + type: object + secretRef: + description: AlibabaAuthSecretRef holds secret references for Alibaba credentials. + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessKeySecretSecretRef: + description: The AccessKeySecret is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - accessKeyIDSecretRef + - accessKeySecretSecretRef + type: object + type: object + regionID: + description: Alibaba Region to be used for the provider + type: string + required: + - auth + - regionID + type: object + aws: + description: AWS configures this store to sync secrets using AWS Secret Manager provider + properties: + additionalRoles: + description: AdditionalRoles is a chained list of Role ARNs which the provider will sequentially assume before assuming the Role + items: + type: string + type: array + auth: + description: |- + Auth defines the information necessary to authenticate against AWS + if not set aws sdk will infer credentials from your environment + see: https://docs.aws.amazon.com/sdk-for-go/v1/developer-guide/configuring-sdk.html#specifying-credentials + properties: + jwt: + description: Authenticate against AWS using service account tokens. + properties: + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + secretRef: + description: |- + AWSAuthSecretRef holds secret references for AWS credentials + both AccessKeyID and SecretAccessKey must be defined in order to properly authenticate. + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + sessionTokenSecretRef: + description: |- + The SessionToken used for authentication + This must be defined if AccessKeyID and SecretAccessKey are temporary credentials + see: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + externalID: + description: AWS External ID set on assumed IAM roles + type: string + prefix: + description: Prefix adds a prefix to all retrieved values. + type: string + region: + description: AWS Region to be used for the provider + type: string + role: + description: Role is a Role ARN which the provider will assume + type: string + secretsManager: + description: SecretsManager defines how the provider behaves when interacting with AWS SecretsManager + properties: + forceDeleteWithoutRecovery: + description: |- + Specifies whether to delete the secret without any recovery window. You + can't use both this parameter and RecoveryWindowInDays in the same call. + If you don't use either, then by default Secrets Manager uses a 30 day + recovery window. + see: https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_DeleteSecret.html#SecretsManager-DeleteSecret-request-ForceDeleteWithoutRecovery + type: boolean + recoveryWindowInDays: + description: |- + The number of days from 7 to 30 that Secrets Manager waits before + permanently deleting the secret. You can't use both this parameter and + ForceDeleteWithoutRecovery in the same call. If you don't use either, + then by default Secrets Manager uses a 30 day recovery window. + see: https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_DeleteSecret.html#SecretsManager-DeleteSecret-request-RecoveryWindowInDays + format: int64 + type: integer + type: object + service: + description: Service defines which service should be used to fetch the secrets + enum: + - SecretsManager + - ParameterStore + type: string + sessionTags: + description: AWS STS assume role session tags + items: + properties: + key: + type: string + value: + type: string + required: + - key + - value + type: object + type: array + transitiveTagKeys: + description: AWS STS assume role transitive session tags. Required when multiple rules are used with the provider + items: + type: string + type: array + required: + - region + - service + type: object + azurekv: + description: AzureKV configures this store to sync secrets using Azure Key Vault provider + properties: + authSecretRef: + description: Auth configures how the operator authenticates with Azure. Required for ServicePrincipal auth type. Optional for WorkloadIdentity. + properties: + clientCertificate: + description: The Azure ClientCertificate of the service principle used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientId: + description: The Azure clientId of the service principle or managed identity used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientSecret: + description: The Azure ClientSecret of the service principle used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + tenantId: + description: The Azure tenantId of the managed identity used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + authType: + default: ServicePrincipal + description: |- + Auth type defines how to authenticate to the keyvault service. + Valid values are: + - "ServicePrincipal" (default): Using a service principal (tenantId, clientId, clientSecret) + - "ManagedIdentity": Using Managed Identity assigned to the pod (see aad-pod-identity) + enum: + - ServicePrincipal + - ManagedIdentity + - WorkloadIdentity + type: string + environmentType: + default: PublicCloud + description: |- + EnvironmentType specifies the Azure cloud environment endpoints to use for + connecting and authenticating with Azure. By default it points to the public cloud AAD endpoint. + The following endpoints are available, also see here: https://github.com/Azure/go-autorest/blob/main/autorest/azure/environments.go#L152 + PublicCloud, USGovernmentCloud, ChinaCloud, GermanCloud + enum: + - PublicCloud + - USGovernmentCloud + - ChinaCloud + - GermanCloud + type: string + identityId: + description: If multiple Managed Identity is assigned to the pod, you can select the one to be used + type: string + serviceAccountRef: + description: |- + ServiceAccountRef specified the service account + that should be used when authenticating with WorkloadIdentity. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + tenantId: + description: TenantID configures the Azure Tenant to send requests to. Required for ServicePrincipal auth type. Optional for WorkloadIdentity. + type: string + vaultUrl: + description: Vault Url from which the secrets to be fetched from. + type: string + required: + - vaultUrl + type: object + beyondtrust: + description: Beyondtrust configures this store to sync secrets using Password Safe provider. + properties: + auth: + description: Auth configures how the operator authenticates with Beyondtrust. + properties: + certificate: + description: Content of the certificate (cert.pem) for use when authenticating with an OAuth client Id using a Client Certificate. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + certificateKey: + description: Certificate private key (key.pem). For use when authenticating with an OAuth client Id + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + clientId: + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + clientSecret: + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + required: + - clientId + - clientSecret + type: object + server: + description: Auth configures how API server works. + properties: + apiUrl: + type: string + clientTimeOutSeconds: + description: Timeout specifies a time limit for requests made by this Client. The timeout includes connection time, any redirects, and reading the response body. Defaults to 45 seconds. + type: integer + retrievalType: + description: The secret retrieval type. SECRET = Secrets Safe (credential, text, file). MANAGED_ACCOUNT = Password Safe account associated with a system. + type: string + separator: + description: A character that separates the folder names. + type: string + verifyCA: + type: boolean + required: + - apiUrl + - verifyCA + type: object + required: + - auth + - server + type: object + bitwardensecretsmanager: + description: BitwardenSecretsManager configures this store to sync secrets using BitwardenSecretsManager provider + properties: + apiURL: + type: string + auth: + description: |- + Auth configures how secret-manager authenticates with a bitwarden machine account instance. + Make sure that the token being used has permissions on the given secret. + properties: + secretRef: + description: BitwardenSecretsManagerSecretRef contains the credential ref to the bitwarden instance. + properties: + credentials: + description: AccessToken used for the bitwarden instance. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - credentials + type: object + required: + - secretRef + type: object + bitwardenServerSDKURL: + type: string + caBundle: + description: |- + Base64 encoded certificate for the bitwarden server sdk. The sdk MUST run with HTTPS to make sure no MITM attack + can be performed. + type: string + caProvider: + description: 'see: https://external-secrets.io/latest/spec/#external-secrets.io/v1alpha1.CAProvider' + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + identityURL: + type: string + organizationID: + description: OrganizationID determines which organization this secret store manages. + type: string + projectID: + description: ProjectID determines which project this secret store manages. + type: string + required: + - auth + - organizationID + - projectID + type: object + chef: + description: Chef configures this store to sync secrets with chef server + properties: + auth: + description: Auth defines the information necessary to authenticate against chef Server + properties: + secretRef: + description: ChefAuthSecretRef holds secret references for chef server login credentials. + properties: + privateKeySecretRef: + description: SecretKey is the Signing Key in PEM format, used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - privateKeySecretRef + type: object + required: + - secretRef + type: object + serverUrl: + description: ServerURL is the chef server URL used to connect to. If using orgs you should include your org in the url and terminate the url with a "/" + type: string + username: + description: UserName should be the user ID on the chef server + type: string + required: + - auth + - serverUrl + - username + type: object + conjur: + description: Conjur configures this store to sync secrets using conjur provider + properties: + auth: + properties: + apikey: + properties: + account: + type: string + apiKeyRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + userRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - account + - apiKeyRef + - userRef + type: object + jwt: + properties: + account: + type: string + hostId: + description: |- + Optional HostID for JWT authentication. This may be used depending + on how the Conjur JWT authenticator policy is configured. + type: string + secretRef: + description: |- + Optional SecretRef that refers to a key in a Secret resource containing JWT token to + authenticate with Conjur using the JWT authentication method. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional ServiceAccountRef specifies the Kubernetes service account for which to request + a token for with the `TokenRequest` API. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + serviceID: + description: The conjur authn jwt webservice id + type: string + required: + - account + - serviceID + type: object + type: object + caBundle: + type: string + caProvider: + description: |- + Used to provide custom certificate authority (CA) certificates + for a secret store. The CAProvider points to a Secret or ConfigMap resource + that contains a PEM-encoded certificate. + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + url: + type: string + required: + - auth + - url + type: object + delinea: + description: |- + Delinea DevOps Secrets Vault + https://docs.delinea.com/online-help/products/devops-secrets-vault/current + properties: + clientId: + description: ClientID is the non-secret part of the credential. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + clientSecret: + description: ClientSecret is the secret part of the credential. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + tenant: + description: Tenant is the chosen hostname / site name. + type: string + tld: + description: |- + TLD is based on the server location that was chosen during provisioning. + If unset, defaults to "com". + type: string + urlTemplate: + description: |- + URLTemplate + If unset, defaults to "https://%s.secretsvaultcloud.%s/v1/%s%s". + type: string + required: + - clientId + - clientSecret + - tenant + type: object + device42: + description: Device42 configures this store to sync secrets using the Device42 provider + properties: + auth: + description: Auth configures how secret-manager authenticates with a Device42 instance. + properties: + secretRef: + properties: + credentials: + description: Username / Password is used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - secretRef + type: object + host: + description: URL configures the Device42 instance URL. + type: string + required: + - auth + - host + type: object + doppler: + description: Doppler configures this store to sync secrets using the Doppler provider + properties: + auth: + description: Auth configures how the Operator authenticates with the Doppler API + properties: + secretRef: + properties: + dopplerToken: + description: |- + The DopplerToken is used for authentication. + See https://docs.doppler.com/reference/api#authentication for auth token types. + The Key attribute defaults to dopplerToken if not specified. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - dopplerToken + type: object + required: + - secretRef + type: object + config: + description: Doppler config (required if not using a Service Token) + type: string + format: + description: Format enables the downloading of secrets as a file (string) + enum: + - json + - dotnet-json + - env + - yaml + - docker + type: string + nameTransformer: + description: Environment variable compatible name transforms that change secret names to a different format + enum: + - upper-camel + - camel + - lower-snake + - tf-var + - dotnet-env + - lower-kebab + type: string + project: + description: Doppler project (required if not using a Service Token) + type: string + required: + - auth + type: object + fake: + description: Fake configures a store with static key/value pairs + properties: + data: + items: + properties: + key: + type: string + value: + type: string + valueMap: + additionalProperties: + type: string + description: 'Deprecated: ValueMap is deprecated and is intended to be removed in the future, use the `value` field instead.' + type: object + version: + type: string + required: + - key + type: object + type: array + required: + - data + type: object + fortanix: + description: Fortanix configures this store to sync secrets using the Fortanix provider + properties: + apiKey: + description: APIKey is the API token to access SDKMS Applications. + properties: + secretRef: + description: SecretRef is a reference to a secret containing the SDKMS API Key. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + apiUrl: + description: APIURL is the URL of SDKMS API. Defaults to `sdkms.fortanix.com`. + type: string + type: object + gcpsm: + description: GCPSM configures this store to sync secrets using Google Cloud Platform Secret Manager provider + properties: + auth: + description: Auth defines the information necessary to authenticate against GCP + properties: + secretRef: + properties: + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + workloadIdentity: + properties: + clusterLocation: + type: string + clusterName: + type: string + clusterProjectID: + type: string + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - clusterLocation + - clusterName + - serviceAccountRef + type: object + type: object + location: + description: Location optionally defines a location for a secret + type: string + projectID: + description: ProjectID project where secret is located + type: string + type: object + gitlab: + description: GitLab configures this store to sync secrets using GitLab Variables provider + properties: + auth: + description: Auth configures how secret-manager authenticates with a GitLab instance. + properties: + SecretRef: + properties: + accessToken: + description: AccessToken is used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - SecretRef + type: object + environment: + description: Environment environment_scope of gitlab CI/CD variables (Please see https://docs.gitlab.com/ee/ci/environments/#create-a-static-environment on how to create environments) + type: string + groupIDs: + description: GroupIDs specify, which gitlab groups to pull secrets from. Group secrets are read from left to right followed by the project variables. + items: + type: string + type: array + inheritFromGroups: + description: InheritFromGroups specifies whether parent groups should be discovered and checked for secrets. + type: boolean + projectID: + description: ProjectID specifies a project where secrets are located. + type: string + url: + description: URL configures the GitLab instance URL. Defaults to https://gitlab.com/. + type: string + required: + - auth + type: object + ibm: + description: IBM configures this store to sync secrets using IBM Cloud provider + properties: + auth: + description: Auth configures how secret-manager authenticates with the IBM secrets manager. + maxProperties: 1 + minProperties: 1 + properties: + containerAuth: + description: IBM Container-based auth with IAM Trusted Profile. + properties: + iamEndpoint: + type: string + profile: + description: the IBM Trusted Profile + type: string + tokenLocation: + description: Location the token is mounted on the pod + type: string + required: + - profile + type: object + secretRef: + properties: + secretApiKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + serviceUrl: + description: ServiceURL is the Endpoint URL that is specific to the Secrets Manager service instance + type: string + required: + - auth + type: object + infisical: + description: Infisical configures this store to sync secrets using the Infisical provider + properties: + auth: + description: Auth configures how the Operator authenticates with the Infisical API + properties: + universalAuthCredentials: + properties: + clientId: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientSecret: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - clientId + - clientSecret + type: object + type: object + hostAPI: + default: https://app.infisical.com/api + type: string + secretsScope: + properties: + environmentSlug: + type: string + projectSlug: + type: string + secretsPath: + default: / + type: string + required: + - environmentSlug + - projectSlug + type: object + required: + - auth + - secretsScope + type: object + keepersecurity: + description: KeeperSecurity configures this store to sync secrets using the KeeperSecurity provider + properties: + authRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + folderID: + type: string + required: + - authRef + - folderID + type: object + kubernetes: + description: Kubernetes configures this store to sync secrets using a Kubernetes cluster provider + properties: + auth: + description: Auth configures how secret-manager authenticates with a Kubernetes instance. + maxProperties: 1 + minProperties: 1 + properties: + cert: + description: has both clientCert and clientKey as secretKeySelector + properties: + clientCert: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientKey: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + serviceAccount: + description: points to a service account that should be used for authentication + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + token: + description: use static token to authenticate with + properties: + bearerToken: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + authRef: + description: A reference to a secret that contains the auth information. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + remoteNamespace: + default: default + description: Remote namespace to fetch the secrets from + type: string + server: + description: configures the Kubernetes server Address. + properties: + caBundle: + description: CABundle is a base64-encoded CA certificate + format: byte + type: string + caProvider: + description: 'see: https://external-secrets.io/v0.4.1/spec/#external-secrets.io/v1alpha1.CAProvider' + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + url: + default: kubernetes.default + description: configures the Kubernetes server Address. + type: string + type: object + type: object + onboardbase: + description: Onboardbase configures this store to sync secrets using the Onboardbase provider + properties: + apiHost: + default: https://public.onboardbase.com/api/v1/ + description: APIHost use this to configure the host url for the API for selfhosted installation, default is https://public.onboardbase.com/api/v1/ + type: string + auth: + description: Auth configures how the Operator authenticates with the Onboardbase API + properties: + apiKeyRef: + description: |- + OnboardbaseAPIKey is the APIKey generated by an admin account. + It is used to recognize and authorize access to a project and environment within onboardbase + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + passcodeRef: + description: OnboardbasePasscode is the passcode attached to the API Key + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - apiKeyRef + - passcodeRef + type: object + environment: + default: development + description: Environment is the name of an environmnent within a project to pull the secrets from + type: string + project: + default: development + description: Project is an onboardbase project that the secrets should be pulled from + type: string + required: + - apiHost + - auth + - environment + - project + type: object + onepassword: + description: OnePassword configures this store to sync secrets using the 1Password Cloud provider + properties: + auth: + description: Auth defines the information necessary to authenticate against OnePassword Connect Server + properties: + secretRef: + description: OnePasswordAuthSecretRef holds secret references for 1Password credentials. + properties: + connectTokenSecretRef: + description: The ConnectToken is used for authentication to a 1Password Connect Server. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - connectTokenSecretRef + type: object + required: + - secretRef + type: object + connectHost: + description: ConnectHost defines the OnePassword Connect Server to connect to + type: string + vaults: + additionalProperties: + type: integer + description: Vaults defines which OnePassword vaults to search in which order + type: object + required: + - auth + - connectHost + - vaults + type: object + oracle: + description: Oracle configures this store to sync secrets using Oracle Vault provider + properties: + auth: + description: |- + Auth configures how secret-manager authenticates with the Oracle Vault. + If empty, use the instance principal, otherwise the user credentials specified in Auth. + properties: + secretRef: + description: SecretRef to pass through sensitive information. + properties: + fingerprint: + description: Fingerprint is the fingerprint of the API private key. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + privatekey: + description: PrivateKey is the user's API Signing Key in PEM format, used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - fingerprint + - privatekey + type: object + tenancy: + description: Tenancy is the tenancy OCID where user is located. + type: string + user: + description: User is an access OCID specific to the account. + type: string + required: + - secretRef + - tenancy + - user + type: object + compartment: + description: |- + Compartment is the vault compartment OCID. + Required for PushSecret + type: string + encryptionKey: + description: |- + EncryptionKey is the OCID of the encryption key within the vault. + Required for PushSecret + type: string + principalType: + description: |- + The type of principal to use for authentication. If left blank, the Auth struct will + determine the principal type. This optional field must be specified if using + workload identity. + enum: + - "" + - UserPrincipal + - InstancePrincipal + - Workload + type: string + region: + description: Region is the region where vault is located. + type: string + serviceAccountRef: + description: |- + ServiceAccountRef specified the service account + that should be used when authenticating with WorkloadIdentity. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + vault: + description: Vault is the vault's OCID of the specific vault where secret is located. + type: string + required: + - region + - vault + type: object + passbolt: + properties: + auth: + description: Auth defines the information necessary to authenticate against Passbolt Server + properties: + passwordSecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + privateKeySecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - passwordSecretRef + - privateKeySecretRef + type: object + host: + description: Host defines the Passbolt Server to connect to + type: string + required: + - auth + - host + type: object + passworddepot: + description: Configures a store to sync secrets with a Password Depot instance. + properties: + auth: + description: Auth configures how secret-manager authenticates with a Password Depot instance. + properties: + secretRef: + properties: + credentials: + description: Username / Password is used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - secretRef + type: object + database: + description: Database to use as source + type: string + host: + description: URL configures the Password Depot instance URL. + type: string + required: + - auth + - database + - host + type: object + previder: + description: Previder configures this store to sync secrets using the Previder provider + properties: + auth: + description: PreviderAuth contains a secretRef for credentials. + properties: + secretRef: + description: PreviderAuthSecretRef holds secret references for Previder Vault credentials. + properties: + accessToken: + description: The AccessToken is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - accessToken + type: object + type: object + baseUri: + type: string + required: + - auth + type: object + pulumi: + description: Pulumi configures this store to sync secrets using the Pulumi provider + properties: + accessToken: + description: AccessToken is the access tokens to sign in to the Pulumi Cloud Console. + properties: + secretRef: + description: SecretRef is a reference to a secret containing the Pulumi API token. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + apiUrl: + default: https://api.pulumi.com/api/esc + description: APIURL is the URL of the Pulumi API. + type: string + environment: + description: |- + Environment are YAML documents composed of static key-value pairs, programmatic expressions, + dynamically retrieved values from supported providers including all major clouds, + and other Pulumi ESC environments. + To create a new environment, visit https://www.pulumi.com/docs/esc/environments/ for more information. + type: string + organization: + description: |- + Organization are a space to collaborate on shared projects and stacks. + To create a new organization, visit https://app.pulumi.com/ and click "New Organization". + type: string + project: + description: Project is the name of the Pulumi ESC project the environment belongs to. + type: string + required: + - accessToken + - environment + - organization + - project + type: object + scaleway: + description: Scaleway + properties: + accessKey: + description: AccessKey is the non-secret part of the api key. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + apiUrl: + description: APIURL is the url of the api to use. Defaults to https://api.scaleway.com + type: string + projectId: + description: 'ProjectID is the id of your project, which you can find in the console: https://console.scaleway.com/project/settings' + type: string + region: + description: 'Region where your secrets are located: https://developers.scaleway.com/en/quickstart/#region-and-zone' + type: string + secretKey: + description: SecretKey is the non-secret part of the api key. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + required: + - accessKey + - projectId + - region + - secretKey + type: object + secretserver: + description: |- + SecretServer configures this store to sync secrets using SecretServer provider + https://docs.delinea.com/online-help/secret-server/start.htm + properties: + password: + description: Password is the secret server account password. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + serverURL: + description: |- + ServerURL + URL to your secret server installation + type: string + username: + description: Username is the secret server account username. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + required: + - password + - serverURL + - username + type: object + senhasegura: + description: Senhasegura configures this store to sync secrets using senhasegura provider + properties: + auth: + description: Auth defines parameters to authenticate in senhasegura + properties: + clientId: + type: string + clientSecretSecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - clientId + - clientSecretSecretRef + type: object + ignoreSslCertificate: + default: false + description: IgnoreSslCertificate defines if SSL certificate must be ignored + type: boolean + module: + description: Module defines which senhasegura module should be used to get secrets + type: string + url: + description: URL of senhasegura + type: string + required: + - auth + - module + - url + type: object + vault: + description: Vault configures this store to sync secrets using Hashi provider + properties: + auth: + description: Auth configures how secret-manager authenticates with the Vault server. + properties: + appRole: + description: |- + AppRole authenticates with Vault using the App Role auth mechanism, + with the role and secret stored in a Kubernetes Secret resource. + properties: + path: + default: approle + description: |- + Path where the App Role authentication backend is mounted + in Vault, e.g: "approle" + type: string + roleId: + description: |- + RoleID configured in the App Role authentication backend when setting + up the authentication backend in Vault. + type: string + roleRef: + description: |- + Reference to a key in a Secret that contains the App Role ID used + to authenticate with Vault. + The `key` field must be specified and denotes which entry within the Secret + resource is used as the app role id. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretRef: + description: |- + Reference to a key in a Secret that contains the App Role secret used + to authenticate with Vault. + The `key` field must be specified and denotes which entry within the Secret + resource is used as the app role secret. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - path + - secretRef + type: object + cert: + description: |- + Cert authenticates with TLS Certificates by passing client certificate, private key and ca certificate + Cert authentication method + properties: + clientCert: + description: |- + ClientCert is a certificate to authenticate using the Cert Vault + authentication method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretRef: + description: |- + SecretRef to a key in a Secret resource containing client private key to + authenticate with Vault using the Cert authentication method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + iam: + description: |- + Iam authenticates with vault by passing a special AWS request signed with AWS IAM credentials + AWS IAM authentication method + properties: + externalID: + description: AWS External ID set on assumed IAM roles + type: string + jwt: + description: Specify a service account with IRSA enabled + properties: + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + path: + description: 'Path where the AWS auth method is enabled in Vault, e.g: "aws"' + type: string + region: + description: AWS region + type: string + role: + description: This is the AWS role to be assumed before talking to vault + type: string + secretRef: + description: Specify credentials in a Secret object + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + sessionTokenSecretRef: + description: |- + The SessionToken used for authentication + This must be defined if AccessKeyID and SecretAccessKey are temporary credentials + see: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + vaultAwsIamServerID: + description: 'X-Vault-AWS-IAM-Server-ID is an additional header used by Vault IAM auth method to mitigate against different types of replay attacks. More details here: https://developer.hashicorp.com/vault/docs/auth/aws' + type: string + vaultRole: + description: Vault Role. In vault, a role describes an identity with a set of permissions, groups, or policies you want to attach a user of the secrets engine + type: string + required: + - vaultRole + type: object + jwt: + description: |- + Jwt authenticates with Vault by passing role and JWT token using the + JWT/OIDC authentication method + properties: + kubernetesServiceAccountToken: + description: |- + Optional ServiceAccountToken specifies the Kubernetes service account for which to request + a token for with the `TokenRequest` API. + properties: + audiences: + description: |- + Optional audiences field that will be used to request a temporary Kubernetes service + account token for the service account referenced by `serviceAccountRef`. + Defaults to a single audience `vault` it not specified. + Deprecated: use serviceAccountRef.Audiences instead + items: + type: string + type: array + expirationSeconds: + description: |- + Optional expiration time in seconds that will be used to request a temporary + Kubernetes service account token for the service account referenced by + `serviceAccountRef`. + Deprecated: this will be removed in the future. + Defaults to 10 minutes. + format: int64 + type: integer + serviceAccountRef: + description: Service account field containing the name of a kubernetes ServiceAccount. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - serviceAccountRef + type: object + path: + default: jwt + description: |- + Path where the JWT authentication backend is mounted + in Vault, e.g: "jwt" + type: string + role: + description: |- + Role is a JWT role to authenticate using the JWT/OIDC Vault + authentication method + type: string + secretRef: + description: |- + Optional SecretRef that refers to a key in a Secret resource containing JWT token to + authenticate with Vault using the JWT/OIDC authentication method. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - path + type: object + kubernetes: + description: |- + Kubernetes authenticates with Vault by passing the ServiceAccount + token stored in the named Secret resource to the Vault server. + properties: + mountPath: + default: kubernetes + description: |- + Path where the Kubernetes authentication backend is mounted in Vault, e.g: + "kubernetes" + type: string + role: + description: |- + A required field containing the Vault Role to assume. A Role binds a + Kubernetes ServiceAccount with a set of Vault policies. + type: string + secretRef: + description: |- + Optional secret field containing a Kubernetes ServiceAccount JWT used + for authenticating with Vault. If a name is specified without a key, + `token` is the default. If one is not specified, the one bound to + the controller will be used. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional service account field containing the name of a kubernetes ServiceAccount. + If the service account is specified, the service account secret token JWT will be used + for authenticating with Vault. If the service account selector is not supplied, + the secretRef will be used instead. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - mountPath + - role + type: object + ldap: + description: |- + Ldap authenticates with Vault by passing username/password pair using + the LDAP authentication method + properties: + path: + default: ldap + description: |- + Path where the LDAP authentication backend is mounted + in Vault, e.g: "ldap" + type: string + secretRef: + description: |- + SecretRef to a key in a Secret resource containing password for the LDAP + user used to authenticate with Vault using the LDAP authentication + method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + username: + description: |- + Username is a LDAP user name used to authenticate using the LDAP Vault + authentication method + type: string + required: + - path + - username + type: object + namespace: + description: |- + Name of the vault namespace to authenticate to. This can be different than the namespace your secret is in. + Namespaces is a set of features within Vault Enterprise that allows + Vault environments to support Secure Multi-tenancy. e.g: "ns1". + More about namespaces can be found here https://www.vaultproject.io/docs/enterprise/namespaces + This will default to Vault.Namespace field if set, or empty otherwise + type: string + tokenSecretRef: + description: TokenSecretRef authenticates with Vault by presenting a token. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + userPass: + description: UserPass authenticates with Vault by passing username/password pair + properties: + path: + default: user + description: |- + Path where the UserPassword authentication backend is mounted + in Vault, e.g: "user" + type: string + secretRef: + description: |- + SecretRef to a key in a Secret resource containing password for the + user used to authenticate with Vault using the UserPass authentication + method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + username: + description: |- + Username is a user name used to authenticate using the UserPass Vault + authentication method + type: string + required: + - path + - username + type: object + type: object + caBundle: + description: |- + PEM encoded CA bundle used to validate Vault server certificate. Only used + if the Server URL is using HTTPS protocol. This parameter is ignored for + plain HTTP protocol connection. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate Vault server certificate. + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + forwardInconsistent: + description: |- + ForwardInconsistent tells Vault to forward read-after-write requests to the Vault + leader instead of simply retrying within a loop. This can increase performance if + the option is enabled serverside. + https://www.vaultproject.io/docs/configuration/replication#allow_forwarding_via_header + type: boolean + headers: + additionalProperties: + type: string + description: Headers to be added in Vault request + type: object + namespace: + description: |- + Name of the vault namespace. Namespaces is a set of features within Vault Enterprise that allows + Vault environments to support Secure Multi-tenancy. e.g: "ns1". + More about namespaces can be found here https://www.vaultproject.io/docs/enterprise/namespaces + type: string + path: + description: |- + Path is the mount path of the Vault KV backend endpoint, e.g: + "secret". The v2 KV secret engine version specific "/data" path suffix + for fetching secrets from Vault is optional and will be appended + if not present in specified path. + type: string + readYourWrites: + description: |- + ReadYourWrites ensures isolated read-after-write semantics by + providing discovered cluster replication states in each request. + More information about eventual consistency in Vault can be found here + https://www.vaultproject.io/docs/enterprise/consistency + type: boolean + server: + description: 'Server is the connection address for the Vault server, e.g: "https://vault.example.com:8200".' + type: string + tls: + description: |- + The configuration used for client side related TLS communication, when the Vault server + requires mutual authentication. Only used if the Server URL is using HTTPS protocol. + This parameter is ignored for plain HTTP protocol connection. + It's worth noting this configuration is different from the "TLS certificates auth method", + which is available under the `auth.cert` section. + properties: + certSecretRef: + description: |- + CertSecretRef is a certificate added to the transport layer + when communicating with the Vault server. + If no key for the Secret is specified, external-secret will default to 'tls.crt'. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + keySecretRef: + description: |- + KeySecretRef to a key in a Secret resource containing client private key + added to the transport layer when communicating with the Vault server. + If no key for the Secret is specified, external-secret will default to 'tls.key'. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + version: + default: v2 + description: |- + Version is the Vault KV secret engine version. This can be either "v1" or + "v2". Version defaults to "v2". + enum: + - v1 + - v2 + type: string + required: + - auth + - server + type: object + webhook: + description: Webhook configures this store to sync secrets using a generic templated webhook + properties: + body: + description: Body + type: string + caBundle: + description: |- + PEM encoded CA bundle used to validate webhook server certificate. Only used + if the Server URL is using HTTPS protocol. This parameter is ignored for + plain HTTP protocol connection. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate webhook server certificate. + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + headers: + additionalProperties: + type: string + description: Headers + type: object + method: + description: Webhook Method + type: string + result: + description: Result formatting + properties: + jsonPath: + description: Json path of return value + type: string + type: object + secrets: + description: |- + Secrets to fill in templates + These secrets will be passed to the templating function as key value pairs under the given name + items: + properties: + name: + description: Name of this secret in templates + type: string + secretRef: + description: Secret ref to fill in credentials + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - name + - secretRef + type: object + type: array + timeout: + description: Timeout + type: string + url: + description: Webhook url to call + type: string + required: + - result + - url + type: object + yandexcertificatemanager: + description: YandexCertificateManager configures this store to sync secrets using Yandex Certificate Manager provider + properties: + apiEndpoint: + description: Yandex.Cloud API endpoint (e.g. 'api.cloud.yandex.net:443') + type: string + auth: + description: Auth defines the information necessary to authenticate against Yandex Certificate Manager + properties: + authorizedKeySecretRef: + description: The authorized key used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + caProvider: + description: The provider for the CA bundle to use to validate Yandex.Cloud server certificate. + properties: + certSecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - auth + type: object + yandexlockbox: + description: YandexLockbox configures this store to sync secrets using Yandex Lockbox provider + properties: + apiEndpoint: + description: Yandex.Cloud API endpoint (e.g. 'api.cloud.yandex.net:443') + type: string + auth: + description: Auth defines the information necessary to authenticate against Yandex Lockbox + properties: + authorizedKeySecretRef: + description: The authorized key used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + caProvider: + description: The provider for the CA bundle to use to validate Yandex.Cloud server certificate. + properties: + certSecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - auth + type: object + type: object + refreshInterval: + description: Used to configure store refresh interval in seconds. Empty or 0 will default to the controller config. + type: integer + retrySettings: + description: Used to configure http retries if failed + properties: + maxRetries: + format: int32 + type: integer + retryInterval: + type: string + type: object + required: + - provider + type: object + status: + description: SecretStoreStatus defines the observed state of the SecretStore. + properties: + capabilities: + description: SecretStoreCapabilities defines the possible operations a SecretStore can do. + type: string + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + type: string + reason: + type: string + status: + type: string + type: + type: string + required: + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/ecrauthorizationtoken.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/ecrauthorizationtoken.yaml new file mode 100644 index 00000000..f0180592 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/ecrauthorizationtoken.yaml @@ -0,0 +1,178 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: ecrauthorizationtokens.generators.external-secrets.io +spec: + group: generators.external-secrets.io + names: + categories: + - external-secrets + - external-secrets-generators + kind: ECRAuthorizationToken + listKind: ECRAuthorizationTokenList + plural: ecrauthorizationtokens + shortNames: + - ecrauthorizationtoken + singular: ecrauthorizationtoken + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + ECRAuthorizationTokenSpec uses the GetAuthorizationToken API to retrieve an + authorization token. + The authorization token is valid for 12 hours. + The authorizationToken returned is a base64 encoded string that can be decoded + and used in a docker login command to authenticate to a registry. + For more information, see Registry authentication (https://docs.aws.amazon.com/AmazonECR/latest/userguide/Registries.html#registry_auth) in the Amazon Elastic Container Registry User Guide. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + auth: + description: Auth defines how to authenticate with AWS + properties: + jwt: + description: Authenticate against AWS using service account tokens. + properties: + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + secretRef: + description: |- + AWSAuthSecretRef holds secret references for AWS credentials + both AccessKeyID and SecretAccessKey must be defined in order to properly authenticate. + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + sessionTokenSecretRef: + description: |- + The SessionToken used for authentication + This must be defined if AccessKeyID and SecretAccessKey are temporary credentials + see: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + region: + description: Region specifies the region to operate in. + type: string + role: + description: |- + You can assume a role before making calls to the + desired AWS service. + type: string + required: + - region + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/externalsecret.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/externalsecret.yaml new file mode 100644 index 00000000..c2dabe68 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/externalsecret.yaml @@ -0,0 +1,820 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: externalsecrets.external-secrets.io +spec: + group: external-secrets.io + names: + categories: + - external-secrets + kind: ExternalSecret + listKind: ExternalSecretList + plural: externalsecrets + shortNames: + - es + singular: externalsecret + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.secretStoreRef.name + name: Store + type: string + - jsonPath: .spec.refreshInterval + name: Refresh Interval + type: string + - jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Status + type: string + deprecated: true + name: v1alpha1 + schema: + openAPIV3Schema: + description: ExternalSecret is the Schema for the external-secrets API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ExternalSecretSpec defines the desired state of ExternalSecret. + properties: + data: + description: Data defines the connection between the Kubernetes Secret keys and the Provider data + items: + description: ExternalSecretData defines the connection between the Kubernetes Secret key (spec.data.) and the Provider data. + properties: + remoteRef: + description: ExternalSecretDataRemoteRef defines Provider data location. + properties: + conversionStrategy: + default: Default + description: Used to define a conversion Strategy + enum: + - Default + - Unicode + type: string + key: + description: Key is the key used in the Provider, mandatory + type: string + property: + description: Used to select a specific property of the Provider value (if a map), if supported + type: string + version: + description: Used to select a specific version of the Provider value, if supported + type: string + required: + - key + type: object + secretKey: + type: string + required: + - remoteRef + - secretKey + type: object + type: array + dataFrom: + description: |- + DataFrom is used to fetch all properties from a specific Provider data + If multiple entries are specified, the Secret keys are merged in the specified order + items: + description: ExternalSecretDataRemoteRef defines Provider data location. + properties: + conversionStrategy: + default: Default + description: Used to define a conversion Strategy + enum: + - Default + - Unicode + type: string + key: + description: Key is the key used in the Provider, mandatory + type: string + property: + description: Used to select a specific property of the Provider value (if a map), if supported + type: string + version: + description: Used to select a specific version of the Provider value, if supported + type: string + required: + - key + type: object + type: array + refreshInterval: + default: 1h + description: |- + RefreshInterval is the amount of time before the values are read again from the SecretStore provider + Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h" + May be set to zero to fetch and create it once. Defaults to 1h. + type: string + secretStoreRef: + description: SecretStoreRef defines which SecretStore to fetch the ExternalSecret data. + properties: + kind: + description: |- + Kind of the SecretStore resource (SecretStore or ClusterSecretStore) + Defaults to `SecretStore` + type: string + name: + description: Name of the SecretStore resource + type: string + required: + - name + type: object + target: + description: |- + ExternalSecretTarget defines the Kubernetes Secret to be created + There can be only one target per ExternalSecret. + properties: + creationPolicy: + default: Owner + description: |- + CreationPolicy defines rules on how to create the resulting Secret + Defaults to 'Owner' + enum: + - Owner + - Merge + - None + type: string + immutable: + description: Immutable defines if the final secret will be immutable + type: boolean + name: + description: |- + Name defines the name of the Secret resource to be managed + This field is immutable + Defaults to the .metadata.name of the ExternalSecret resource + type: string + template: + description: Template defines a blueprint for the created Secret resource. + properties: + data: + additionalProperties: + type: string + type: object + engineVersion: + default: v1 + description: |- + EngineVersion specifies the template engine version + that should be used to compile/execute the + template specified in .data and .templateFrom[]. + enum: + - v1 + - v2 + type: string + metadata: + description: ExternalSecretTemplateMetadata defines metadata fields for the Secret blueprint. + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + templateFrom: + items: + maxProperties: 1 + minProperties: 1 + properties: + configMap: + properties: + items: + items: + properties: + key: + type: string + required: + - key + type: object + type: array + name: + type: string + required: + - items + - name + type: object + secret: + properties: + items: + items: + properties: + key: + type: string + required: + - key + type: object + type: array + name: + type: string + required: + - items + - name + type: object + type: object + type: array + type: + type: string + type: object + type: object + required: + - secretStoreRef + - target + type: object + status: + properties: + binding: + description: Binding represents a servicebinding.io Provisioned Service reference to the secret + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + type: string + reason: + type: string + status: + type: string + type: + type: string + required: + - status + - type + type: object + type: array + refreshTime: + description: |- + refreshTime is the time and date the external secret was fetched and + the target secret updated + format: date-time + nullable: true + type: string + syncedResourceVersion: + description: SyncedResourceVersion keeps track of the last synced version + type: string + type: object + type: object + served: true + storage: false + subresources: + status: {} + - additionalPrinterColumns: + - jsonPath: .spec.secretStoreRef.name + name: Store + type: string + - jsonPath: .spec.refreshInterval + name: Refresh Interval + type: string + - jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Status + type: string + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: ExternalSecret is the Schema for the external-secrets API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ExternalSecretSpec defines the desired state of ExternalSecret. + properties: + data: + description: Data defines the connection between the Kubernetes Secret keys and the Provider data + items: + description: ExternalSecretData defines the connection between the Kubernetes Secret key (spec.data.) and the Provider data. + properties: + remoteRef: + description: |- + RemoteRef points to the remote secret and defines + which secret (version/property/..) to fetch. + properties: + conversionStrategy: + default: Default + description: Used to define a conversion Strategy + enum: + - Default + - Unicode + type: string + decodingStrategy: + default: None + description: Used to define a decoding Strategy + enum: + - Auto + - Base64 + - Base64URL + - None + type: string + key: + description: Key is the key used in the Provider, mandatory + type: string + metadataPolicy: + default: None + description: Policy for fetching tags/labels from provider secrets, possible options are Fetch, None. Defaults to None + enum: + - None + - Fetch + type: string + property: + description: Used to select a specific property of the Provider value (if a map), if supported + type: string + version: + description: Used to select a specific version of the Provider value, if supported + type: string + required: + - key + type: object + secretKey: + description: |- + SecretKey defines the key in which the controller stores + the value. This is the key in the Kind=Secret + type: string + sourceRef: + description: |- + SourceRef allows you to override the source + from which the value will pulled from. + maxProperties: 1 + properties: + generatorRef: + description: |- + GeneratorRef points to a generator custom resource. + + Deprecated: The generatorRef is not implemented in .data[]. + this will be removed with v1. + properties: + apiVersion: + default: generators.external-secrets.io/v1alpha1 + description: Specify the apiVersion of the generator resource + type: string + kind: + description: Specify the Kind of the resource, e.g. Password, ACRAccessToken etc. + type: string + name: + description: Specify the name of the generator resource + type: string + required: + - kind + - name + type: object + storeRef: + description: SecretStoreRef defines which SecretStore to fetch the ExternalSecret data. + properties: + kind: + description: |- + Kind of the SecretStore resource (SecretStore or ClusterSecretStore) + Defaults to `SecretStore` + type: string + name: + description: Name of the SecretStore resource + type: string + required: + - name + type: object + type: object + required: + - remoteRef + - secretKey + type: object + type: array + dataFrom: + description: |- + DataFrom is used to fetch all properties from a specific Provider data + If multiple entries are specified, the Secret keys are merged in the specified order + items: + properties: + extract: + description: |- + Used to extract multiple key/value pairs from one secret + Note: Extract does not support sourceRef.Generator or sourceRef.GeneratorRef. + properties: + conversionStrategy: + default: Default + description: Used to define a conversion Strategy + enum: + - Default + - Unicode + type: string + decodingStrategy: + default: None + description: Used to define a decoding Strategy + enum: + - Auto + - Base64 + - Base64URL + - None + type: string + key: + description: Key is the key used in the Provider, mandatory + type: string + metadataPolicy: + default: None + description: Policy for fetching tags/labels from provider secrets, possible options are Fetch, None. Defaults to None + enum: + - None + - Fetch + type: string + property: + description: Used to select a specific property of the Provider value (if a map), if supported + type: string + version: + description: Used to select a specific version of the Provider value, if supported + type: string + required: + - key + type: object + find: + description: |- + Used to find secrets based on tags or regular expressions + Note: Find does not support sourceRef.Generator or sourceRef.GeneratorRef. + properties: + conversionStrategy: + default: Default + description: Used to define a conversion Strategy + enum: + - Default + - Unicode + type: string + decodingStrategy: + default: None + description: Used to define a decoding Strategy + enum: + - Auto + - Base64 + - Base64URL + - None + type: string + name: + description: Finds secrets based on the name. + properties: + regexp: + description: Finds secrets base + type: string + type: object + path: + description: A root path to start the find operations. + type: string + tags: + additionalProperties: + type: string + description: Find secrets based on tags. + type: object + type: object + rewrite: + description: |- + Used to rewrite secret Keys after getting them from the secret Provider + Multiple Rewrite operations can be provided. They are applied in a layered order (first to last) + items: + properties: + regexp: + description: |- + Used to rewrite with regular expressions. + The resulting key will be the output of a regexp.ReplaceAll operation. + properties: + source: + description: Used to define the regular expression of a re.Compiler. + type: string + target: + description: Used to define the target pattern of a ReplaceAll operation. + type: string + required: + - source + - target + type: object + transform: + description: |- + Used to apply string transformation on the secrets. + The resulting key will be the output of the template applied by the operation. + properties: + template: + description: |- + Used to define the template to apply on the secret name. + `.value ` will specify the secret name in the template. + type: string + required: + - template + type: object + type: object + type: array + sourceRef: + description: |- + SourceRef points to a store or generator + which contains secret values ready to use. + Use this in combination with Extract or Find pull values out of + a specific SecretStore. + When sourceRef points to a generator Extract or Find is not supported. + The generator returns a static map of values + maxProperties: 1 + properties: + generatorRef: + description: GeneratorRef points to a generator custom resource. + properties: + apiVersion: + default: generators.external-secrets.io/v1alpha1 + description: Specify the apiVersion of the generator resource + type: string + kind: + description: Specify the Kind of the resource, e.g. Password, ACRAccessToken etc. + type: string + name: + description: Specify the name of the generator resource + type: string + required: + - kind + - name + type: object + storeRef: + description: SecretStoreRef defines which SecretStore to fetch the ExternalSecret data. + properties: + kind: + description: |- + Kind of the SecretStore resource (SecretStore or ClusterSecretStore) + Defaults to `SecretStore` + type: string + name: + description: Name of the SecretStore resource + type: string + required: + - name + type: object + type: object + type: object + type: array + refreshInterval: + default: 1h + description: |- + RefreshInterval is the amount of time before the values are read again from the SecretStore provider + Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h" + May be set to zero to fetch and create it once. Defaults to 1h. + type: string + secretStoreRef: + description: SecretStoreRef defines which SecretStore to fetch the ExternalSecret data. + properties: + kind: + description: |- + Kind of the SecretStore resource (SecretStore or ClusterSecretStore) + Defaults to `SecretStore` + type: string + name: + description: Name of the SecretStore resource + type: string + required: + - name + type: object + target: + default: + creationPolicy: Owner + deletionPolicy: Retain + description: |- + ExternalSecretTarget defines the Kubernetes Secret to be created + There can be only one target per ExternalSecret. + properties: + creationPolicy: + default: Owner + description: |- + CreationPolicy defines rules on how to create the resulting Secret + Defaults to 'Owner' + enum: + - Owner + - Orphan + - Merge + - None + type: string + deletionPolicy: + default: Retain + description: |- + DeletionPolicy defines rules on how to delete the resulting Secret + Defaults to 'Retain' + enum: + - Delete + - Merge + - Retain + type: string + immutable: + description: Immutable defines if the final secret will be immutable + type: boolean + name: + description: |- + Name defines the name of the Secret resource to be managed + This field is immutable + Defaults to the .metadata.name of the ExternalSecret resource + type: string + template: + description: Template defines a blueprint for the created Secret resource. + properties: + data: + additionalProperties: + type: string + type: object + engineVersion: + default: v2 + description: |- + EngineVersion specifies the template engine version + that should be used to compile/execute the + template specified in .data and .templateFrom[]. + enum: + - v1 + - v2 + type: string + mergePolicy: + default: Replace + enum: + - Replace + - Merge + type: string + metadata: + description: ExternalSecretTemplateMetadata defines metadata fields for the Secret blueprint. + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + templateFrom: + items: + properties: + configMap: + properties: + items: + items: + properties: + key: + type: string + templateAs: + default: Values + enum: + - Values + - KeysAndValues + type: string + required: + - key + type: object + type: array + name: + type: string + required: + - items + - name + type: object + literal: + type: string + secret: + properties: + items: + items: + properties: + key: + type: string + templateAs: + default: Values + enum: + - Values + - KeysAndValues + type: string + required: + - key + type: object + type: array + name: + type: string + required: + - items + - name + type: object + target: + default: Data + enum: + - Data + - Annotations + - Labels + type: string + type: object + type: array + type: + type: string + type: object + type: object + type: object + status: + properties: + binding: + description: Binding represents a servicebinding.io Provisioned Service reference to the secret + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + type: string + reason: + type: string + status: + type: string + type: + type: string + required: + - status + - type + type: object + type: array + refreshTime: + description: |- + refreshTime is the time and date the external secret was fetched and + the target secret updated + format: date-time + nullable: true + type: string + syncedResourceVersion: + description: SyncedResourceVersion keeps track of the last synced version + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/fake.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/fake.yaml new file mode 100644 index 00000000..6aa03176 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/fake.yaml @@ -0,0 +1,87 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: fakes.generators.external-secrets.io +spec: + group: generators.external-secrets.io + names: + categories: + - external-secrets + - external-secrets-generators + kind: Fake + listKind: FakeList + plural: fakes + shortNames: + - fake + singular: fake + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + Fake generator is used for testing. It lets you define + a static set of credentials that is always returned. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: FakeSpec contains the static data. + properties: + controller: + description: |- + Used to select the correct ESO controller (think: ingress.ingressClassName) + The ESO controller is instantiated with a specific controller name and filters VDS based on this property + type: string + data: + additionalProperties: + type: string + description: |- + Data defines the static data returned + by this generator. + type: object + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/gcraccesstoken.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/gcraccesstoken.yaml new file mode 100644 index 00000000..2da8a843 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/gcraccesstoken.yaml @@ -0,0 +1,139 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: gcraccesstokens.generators.external-secrets.io +spec: + group: generators.external-secrets.io + names: + categories: + - external-secrets + - external-secrets-generators + kind: GCRAccessToken + listKind: GCRAccessTokenList + plural: gcraccesstokens + shortNames: + - gcraccesstoken + singular: gcraccesstoken + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + GCRAccessToken generates an GCP access token + that can be used to authenticate with GCR. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + auth: + description: Auth defines the means for authenticating with GCP + properties: + secretRef: + properties: + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + workloadIdentity: + properties: + clusterLocation: + type: string + clusterName: + type: string + clusterProjectID: + type: string + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - clusterLocation + - clusterName + - serviceAccountRef + type: object + type: object + projectID: + description: ProjectID defines which project to use to authenticate with + type: string + required: + - auth + - projectID + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/githubaccesstoken.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/githubaccesstoken.yaml new file mode 100644 index 00000000..a94a89f4 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/githubaccesstoken.yaml @@ -0,0 +1,113 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: githubaccesstokens.generators.external-secrets.io +spec: + group: generators.external-secrets.io + names: + categories: + - external-secrets + - external-secrets-generators + kind: GithubAccessToken + listKind: GithubAccessTokenList + plural: githubaccesstokens + shortNames: + - githubaccesstoken + singular: githubaccesstoken + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: GithubAccessToken generates ghs_ accessToken + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + appID: + type: string + auth: + description: Auth configures how ESO authenticates with a Github instance. + properties: + privateKey: + properties: + secretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - secretRef + type: object + required: + - privateKey + type: object + installID: + type: string + url: + description: URL configures the Github instance URL. Defaults to https://github.com/. + type: string + required: + - appID + - auth + - installID + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/password.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/password.yaml new file mode 100644 index 00000000..ce250e8c --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/password.yaml @@ -0,0 +1,109 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: passwords.generators.external-secrets.io +spec: + group: generators.external-secrets.io + names: + categories: + - external-secrets + - external-secrets-generators + kind: Password + listKind: PasswordList + plural: passwords + shortNames: + - password + singular: password + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + Password generates a random password based on the + configuration parameters in spec. + You can specify the length, characterset and other attributes. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: PasswordSpec controls the behavior of the password generator. + properties: + allowRepeat: + default: false + description: set AllowRepeat to true to allow repeating characters. + type: boolean + digits: + description: |- + Digits specifies the number of digits in the generated + password. If omitted it defaults to 25% of the length of the password + type: integer + length: + default: 24 + description: |- + Length of the password to be generated. + Defaults to 24 + type: integer + noUpper: + default: false + description: Set NoUpper to disable uppercase characters + type: boolean + symbolCharacters: + description: |- + SymbolCharacters specifies the special characters that should be used + in the generated password. + type: string + symbols: + description: |- + Symbols specifies the number of symbol characters in the generated + password. If omitted it defaults to 25% of the length of the password + type: integer + required: + - allowRepeat + - length + - noUpper + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/pushsecret.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/pushsecret.yaml new file mode 100644 index 00000000..596b565e --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/pushsecret.yaml @@ -0,0 +1,388 @@ +{{- if and (.Values.installCRDs) (.Values.crds.createPushSecret) }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: pushsecrets.external-secrets.io +spec: + group: external-secrets.io + names: + categories: + - external-secrets + kind: PushSecret + listKind: PushSecretList + plural: pushsecrets + singular: pushsecret + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + - jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Status + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: PushSecretSpec configures the behavior of the PushSecret. + properties: + data: + description: Secret Data that should be pushed to providers + items: + properties: + conversionStrategy: + default: None + description: Used to define a conversion Strategy for the secret keys + enum: + - None + - ReverseUnicode + type: string + match: + description: Match a given Secret Key to be pushed to the provider. + properties: + remoteRef: + description: Remote Refs to push to providers. + properties: + property: + description: Name of the property in the resulting secret + type: string + remoteKey: + description: Name of the resulting provider secret. + type: string + required: + - remoteKey + type: object + secretKey: + description: Secret Key to be pushed + type: string + required: + - remoteRef + type: object + metadata: + description: |- + Metadata is metadata attached to the secret. + The structure of metadata is provider specific, please look it up in the provider documentation. + x-kubernetes-preserve-unknown-fields: true + required: + - match + type: object + type: array + deletionPolicy: + default: None + description: 'Deletion Policy to handle Secrets in the provider. Possible Values: "Delete/None". Defaults to "None".' + enum: + - Delete + - None + type: string + refreshInterval: + description: The Interval to which External Secrets will try to push a secret definition + type: string + secretStoreRefs: + items: + properties: + kind: + default: SecretStore + description: |- + Kind of the SecretStore resource (SecretStore or ClusterSecretStore) + Defaults to `SecretStore` + type: string + labelSelector: + description: Optionally, sync to secret stores with label selector + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + name: + description: Optionally, sync to the SecretStore of the given name + type: string + type: object + type: array + selector: + description: The Secret Selector (k8s source) for the Push Secret + properties: + secret: + description: Select a Secret to Push. + properties: + name: + description: Name of the Secret. The Secret must exist in the same namespace as the PushSecret manifest. + type: string + required: + - name + type: object + required: + - secret + type: object + template: + description: Template defines a blueprint for the created Secret resource. + properties: + data: + additionalProperties: + type: string + type: object + engineVersion: + default: v2 + description: |- + EngineVersion specifies the template engine version + that should be used to compile/execute the + template specified in .data and .templateFrom[]. + enum: + - v1 + - v2 + type: string + mergePolicy: + default: Replace + enum: + - Replace + - Merge + type: string + metadata: + description: ExternalSecretTemplateMetadata defines metadata fields for the Secret blueprint. + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + templateFrom: + items: + properties: + configMap: + properties: + items: + items: + properties: + key: + type: string + templateAs: + default: Values + enum: + - Values + - KeysAndValues + type: string + required: + - key + type: object + type: array + name: + type: string + required: + - items + - name + type: object + literal: + type: string + secret: + properties: + items: + items: + properties: + key: + type: string + templateAs: + default: Values + enum: + - Values + - KeysAndValues + type: string + required: + - key + type: object + type: array + name: + type: string + required: + - items + - name + type: object + target: + default: Data + enum: + - Data + - Annotations + - Labels + type: string + type: object + type: array + type: + type: string + type: object + updatePolicy: + default: Replace + description: 'UpdatePolicy to handle Secrets in the provider. Possible Values: "Replace/IfNotExists". Defaults to "Replace".' + enum: + - Replace + - IfNotExists + type: string + required: + - secretStoreRefs + - selector + type: object + status: + description: PushSecretStatus indicates the history of the status of PushSecret. + properties: + conditions: + items: + description: PushSecretStatusCondition indicates the status of the PushSecret. + properties: + lastTransitionTime: + format: date-time + type: string + message: + type: string + reason: + type: string + status: + type: string + type: + description: PushSecretConditionType indicates the condition of the PushSecret. + type: string + required: + - status + - type + type: object + type: array + refreshTime: + description: |- + refreshTime is the time and date the external secret was fetched and + the target secret updated + format: date-time + nullable: true + type: string + syncedPushSecrets: + additionalProperties: + additionalProperties: + properties: + conversionStrategy: + default: None + description: Used to define a conversion Strategy for the secret keys + enum: + - None + - ReverseUnicode + type: string + match: + description: Match a given Secret Key to be pushed to the provider. + properties: + remoteRef: + description: Remote Refs to push to providers. + properties: + property: + description: Name of the property in the resulting secret + type: string + remoteKey: + description: Name of the resulting provider secret. + type: string + required: + - remoteKey + type: object + secretKey: + description: Secret Key to be pushed + type: string + required: + - remoteRef + type: object + metadata: + description: |- + Metadata is metadata attached to the secret. + The structure of metadata is provider specific, please look it up in the provider documentation. + x-kubernetes-preserve-unknown-fields: true + required: + - match + type: object + type: object + description: |- + Synced PushSecrets, including secrets that already exist in provider. + Matches secret stores to PushSecretData that was stored to that secret store. + type: object + syncedResourceVersion: + description: SyncedResourceVersion keeps track of the last synced version. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/secretstore.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/secretstore.yaml new file mode 100644 index 00000000..b5cfb3b0 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/secretstore.yaml @@ -0,0 +1,4640 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: secretstores.external-secrets.io +spec: + group: external-secrets.io + names: + categories: + - external-secrets + kind: SecretStore + listKind: SecretStoreList + plural: secretstores + shortNames: + - ss + singular: secretstore + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + - jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Status + type: string + deprecated: true + name: v1alpha1 + schema: + openAPIV3Schema: + description: SecretStore represents a secure external location for storing secrets, which can be referenced as part of `storeRef` fields. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: SecretStoreSpec defines the desired state of SecretStore. + properties: + controller: + description: |- + Used to select the correct ESO controller (think: ingress.ingressClassName) + The ESO controller is instantiated with a specific controller name and filters ES based on this property + type: string + provider: + description: Used to configure the provider. Only one provider may be set + maxProperties: 1 + minProperties: 1 + properties: + akeyless: + description: Akeyless configures this store to sync secrets using Akeyless Vault provider + properties: + akeylessGWApiURL: + description: Akeyless GW API Url from which the secrets to be fetched from. + type: string + authSecretRef: + description: Auth configures how the operator authenticates with Akeyless. + properties: + kubernetesAuth: + description: |- + Kubernetes authenticates with Akeyless by passing the ServiceAccount + token stored in the named Secret resource. + properties: + accessID: + description: the Akeyless Kubernetes auth-method access-id + type: string + k8sConfName: + description: Kubernetes-auth configuration name in Akeyless-Gateway + type: string + secretRef: + description: |- + Optional secret field containing a Kubernetes ServiceAccount JWT used + for authenticating with Akeyless. If a name is specified without a key, + `token` is the default. If one is not specified, the one bound to + the controller will be used. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional service account field containing the name of a kubernetes ServiceAccount. + If the service account is specified, the service account secret token JWT will be used + for authenticating with Akeyless. If the service account selector is not supplied, + the secretRef will be used instead. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - accessID + - k8sConfName + type: object + secretRef: + description: |- + Reference to a Secret that contains the details + to authenticate with Akeyless. + properties: + accessID: + description: The SecretAccessID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessType: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessTypeParam: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + caBundle: + description: |- + PEM/base64 encoded CA bundle used to validate Akeyless Gateway certificate. Only used + if the AkeylessGWApiURL URL is using HTTPS protocol. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate Akeyless Gateway certificate. + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + required: + - akeylessGWApiURL + - authSecretRef + type: object + alibaba: + description: Alibaba configures this store to sync secrets using Alibaba Cloud provider + properties: + auth: + description: AlibabaAuth contains a secretRef for credentials. + properties: + rrsa: + description: Authenticate against Alibaba using RRSA. + properties: + oidcProviderArn: + type: string + oidcTokenFilePath: + type: string + roleArn: + type: string + sessionName: + type: string + required: + - oidcProviderArn + - oidcTokenFilePath + - roleArn + - sessionName + type: object + secretRef: + description: AlibabaAuthSecretRef holds secret references for Alibaba credentials. + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessKeySecretSecretRef: + description: The AccessKeySecret is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - accessKeyIDSecretRef + - accessKeySecretSecretRef + type: object + type: object + regionID: + description: Alibaba Region to be used for the provider + type: string + required: + - auth + - regionID + type: object + aws: + description: AWS configures this store to sync secrets using AWS Secret Manager provider + properties: + auth: + description: |- + Auth defines the information necessary to authenticate against AWS + if not set aws sdk will infer credentials from your environment + see: https://docs.aws.amazon.com/sdk-for-go/v1/developer-guide/configuring-sdk.html#specifying-credentials + properties: + jwt: + description: Authenticate against AWS using service account tokens. + properties: + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + secretRef: + description: |- + AWSAuthSecretRef holds secret references for AWS credentials + both AccessKeyID and SecretAccessKey must be defined in order to properly authenticate. + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + region: + description: AWS Region to be used for the provider + type: string + role: + description: Role is a Role ARN which the SecretManager provider will assume + type: string + service: + description: Service defines which service should be used to fetch the secrets + enum: + - SecretsManager + - ParameterStore + type: string + required: + - region + - service + type: object + azurekv: + description: AzureKV configures this store to sync secrets using Azure Key Vault provider + properties: + authSecretRef: + description: Auth configures how the operator authenticates with Azure. Required for ServicePrincipal auth type. + properties: + clientId: + description: The Azure clientId of the service principle used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientSecret: + description: The Azure ClientSecret of the service principle used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + authType: + default: ServicePrincipal + description: |- + Auth type defines how to authenticate to the keyvault service. + Valid values are: + - "ServicePrincipal" (default): Using a service principal (tenantId, clientId, clientSecret) + - "ManagedIdentity": Using Managed Identity assigned to the pod (see aad-pod-identity) + enum: + - ServicePrincipal + - ManagedIdentity + - WorkloadIdentity + type: string + identityId: + description: If multiple Managed Identity is assigned to the pod, you can select the one to be used + type: string + serviceAccountRef: + description: |- + ServiceAccountRef specified the service account + that should be used when authenticating with WorkloadIdentity. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + tenantId: + description: TenantID configures the Azure Tenant to send requests to. Required for ServicePrincipal auth type. + type: string + vaultUrl: + description: Vault Url from which the secrets to be fetched from. + type: string + required: + - vaultUrl + type: object + fake: + description: Fake configures a store with static key/value pairs + properties: + data: + items: + properties: + key: + type: string + value: + type: string + valueMap: + additionalProperties: + type: string + type: object + version: + type: string + required: + - key + type: object + type: array + required: + - data + type: object + gcpsm: + description: GCPSM configures this store to sync secrets using Google Cloud Platform Secret Manager provider + properties: + auth: + description: Auth defines the information necessary to authenticate against GCP + properties: + secretRef: + properties: + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + workloadIdentity: + properties: + clusterLocation: + type: string + clusterName: + type: string + clusterProjectID: + type: string + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - clusterLocation + - clusterName + - serviceAccountRef + type: object + type: object + projectID: + description: ProjectID project where secret is located + type: string + type: object + gitlab: + description: GitLab configures this store to sync secrets using GitLab Variables provider + properties: + auth: + description: Auth configures how secret-manager authenticates with a GitLab instance. + properties: + SecretRef: + properties: + accessToken: + description: AccessToken is used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - SecretRef + type: object + projectID: + description: ProjectID specifies a project where secrets are located. + type: string + url: + description: URL configures the GitLab instance URL. Defaults to https://gitlab.com/. + type: string + required: + - auth + type: object + ibm: + description: IBM configures this store to sync secrets using IBM Cloud provider + properties: + auth: + description: Auth configures how secret-manager authenticates with the IBM secrets manager. + properties: + secretRef: + properties: + secretApiKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - secretRef + type: object + serviceUrl: + description: ServiceURL is the Endpoint URL that is specific to the Secrets Manager service instance + type: string + required: + - auth + type: object + kubernetes: + description: Kubernetes configures this store to sync secrets using a Kubernetes cluster provider + properties: + auth: + description: Auth configures how secret-manager authenticates with a Kubernetes instance. + maxProperties: 1 + minProperties: 1 + properties: + cert: + description: has both clientCert and clientKey as secretKeySelector + properties: + clientCert: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientKey: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + serviceAccount: + description: points to a service account that should be used for authentication + properties: + serviceAccount: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + token: + description: use static token to authenticate with + properties: + bearerToken: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + remoteNamespace: + default: default + description: Remote namespace to fetch the secrets from + type: string + server: + description: configures the Kubernetes server Address. + properties: + caBundle: + description: CABundle is a base64-encoded CA certificate + format: byte + type: string + caProvider: + description: 'see: https://external-secrets.io/v0.4.1/spec/#external-secrets.io/v1alpha1.CAProvider' + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + url: + default: kubernetes.default + description: configures the Kubernetes server Address. + type: string + type: object + required: + - auth + type: object + oracle: + description: Oracle configures this store to sync secrets using Oracle Vault provider + properties: + auth: + description: |- + Auth configures how secret-manager authenticates with the Oracle Vault. + If empty, instance principal is used. Optionally, the authenticating principal type + and/or user data may be supplied for the use of workload identity and user principal. + properties: + secretRef: + description: SecretRef to pass through sensitive information. + properties: + fingerprint: + description: Fingerprint is the fingerprint of the API private key. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + privatekey: + description: PrivateKey is the user's API Signing Key in PEM format, used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - fingerprint + - privatekey + type: object + tenancy: + description: Tenancy is the tenancy OCID where user is located. + type: string + user: + description: User is an access OCID specific to the account. + type: string + required: + - secretRef + - tenancy + - user + type: object + compartment: + description: |- + Compartment is the vault compartment OCID. + Required for PushSecret + type: string + encryptionKey: + description: |- + EncryptionKey is the OCID of the encryption key within the vault. + Required for PushSecret + type: string + principalType: + description: |- + The type of principal to use for authentication. If left blank, the Auth struct will + determine the principal type. This optional field must be specified if using + workload identity. + enum: + - "" + - UserPrincipal + - InstancePrincipal + - Workload + type: string + region: + description: Region is the region where vault is located. + type: string + serviceAccountRef: + description: |- + ServiceAccountRef specified the service account + that should be used when authenticating with WorkloadIdentity. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + vault: + description: Vault is the vault's OCID of the specific vault where secret is located. + type: string + required: + - region + - vault + type: object + passworddepot: + description: Configures a store to sync secrets with a Password Depot instance. + properties: + auth: + description: Auth configures how secret-manager authenticates with a Password Depot instance. + properties: + secretRef: + properties: + credentials: + description: Username / Password is used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - secretRef + type: object + database: + description: Database to use as source + type: string + host: + description: URL configures the Password Depot instance URL. + type: string + required: + - auth + - database + - host + type: object + vault: + description: Vault configures this store to sync secrets using Hashi provider + properties: + auth: + description: Auth configures how secret-manager authenticates with the Vault server. + properties: + appRole: + description: |- + AppRole authenticates with Vault using the App Role auth mechanism, + with the role and secret stored in a Kubernetes Secret resource. + properties: + path: + default: approle + description: |- + Path where the App Role authentication backend is mounted + in Vault, e.g: "approle" + type: string + roleId: + description: |- + RoleID configured in the App Role authentication backend when setting + up the authentication backend in Vault. + type: string + secretRef: + description: |- + Reference to a key in a Secret that contains the App Role secret used + to authenticate with Vault. + The `key` field must be specified and denotes which entry within the Secret + resource is used as the app role secret. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - path + - roleId + - secretRef + type: object + cert: + description: |- + Cert authenticates with TLS Certificates by passing client certificate, private key and ca certificate + Cert authentication method + properties: + clientCert: + description: |- + ClientCert is a certificate to authenticate using the Cert Vault + authentication method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretRef: + description: |- + SecretRef to a key in a Secret resource containing client private key to + authenticate with Vault using the Cert authentication method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + jwt: + description: |- + Jwt authenticates with Vault by passing role and JWT token using the + JWT/OIDC authentication method + properties: + kubernetesServiceAccountToken: + description: |- + Optional ServiceAccountToken specifies the Kubernetes service account for which to request + a token for with the `TokenRequest` API. + properties: + audiences: + description: |- + Optional audiences field that will be used to request a temporary Kubernetes service + account token for the service account referenced by `serviceAccountRef`. + Defaults to a single audience `vault` it not specified. + items: + type: string + type: array + expirationSeconds: + description: |- + Optional expiration time in seconds that will be used to request a temporary + Kubernetes service account token for the service account referenced by + `serviceAccountRef`. + Defaults to 10 minutes. + format: int64 + type: integer + serviceAccountRef: + description: Service account field containing the name of a kubernetes ServiceAccount. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - serviceAccountRef + type: object + path: + default: jwt + description: |- + Path where the JWT authentication backend is mounted + in Vault, e.g: "jwt" + type: string + role: + description: |- + Role is a JWT role to authenticate using the JWT/OIDC Vault + authentication method + type: string + secretRef: + description: |- + Optional SecretRef that refers to a key in a Secret resource containing JWT token to + authenticate with Vault using the JWT/OIDC authentication method. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - path + type: object + kubernetes: + description: |- + Kubernetes authenticates with Vault by passing the ServiceAccount + token stored in the named Secret resource to the Vault server. + properties: + mountPath: + default: kubernetes + description: |- + Path where the Kubernetes authentication backend is mounted in Vault, e.g: + "kubernetes" + type: string + role: + description: |- + A required field containing the Vault Role to assume. A Role binds a + Kubernetes ServiceAccount with a set of Vault policies. + type: string + secretRef: + description: |- + Optional secret field containing a Kubernetes ServiceAccount JWT used + for authenticating with Vault. If a name is specified without a key, + `token` is the default. If one is not specified, the one bound to + the controller will be used. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional service account field containing the name of a kubernetes ServiceAccount. + If the service account is specified, the service account secret token JWT will be used + for authenticating with Vault. If the service account selector is not supplied, + the secretRef will be used instead. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - mountPath + - role + type: object + ldap: + description: |- + Ldap authenticates with Vault by passing username/password pair using + the LDAP authentication method + properties: + path: + default: ldap + description: |- + Path where the LDAP authentication backend is mounted + in Vault, e.g: "ldap" + type: string + secretRef: + description: |- + SecretRef to a key in a Secret resource containing password for the LDAP + user used to authenticate with Vault using the LDAP authentication + method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + username: + description: |- + Username is a LDAP user name used to authenticate using the LDAP Vault + authentication method + type: string + required: + - path + - username + type: object + tokenSecretRef: + description: TokenSecretRef authenticates with Vault by presenting a token. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + caBundle: + description: |- + PEM encoded CA bundle used to validate Vault server certificate. Only used + if the Server URL is using HTTPS protocol. This parameter is ignored for + plain HTTP protocol connection. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate Vault server certificate. + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + forwardInconsistent: + description: |- + ForwardInconsistent tells Vault to forward read-after-write requests to the Vault + leader instead of simply retrying within a loop. This can increase performance if + the option is enabled serverside. + https://www.vaultproject.io/docs/configuration/replication#allow_forwarding_via_header + type: boolean + namespace: + description: |- + Name of the vault namespace. Namespaces is a set of features within Vault Enterprise that allows + Vault environments to support Secure Multi-tenancy. e.g: "ns1". + More about namespaces can be found here https://www.vaultproject.io/docs/enterprise/namespaces + type: string + path: + description: |- + Path is the mount path of the Vault KV backend endpoint, e.g: + "secret". The v2 KV secret engine version specific "/data" path suffix + for fetching secrets from Vault is optional and will be appended + if not present in specified path. + type: string + readYourWrites: + description: |- + ReadYourWrites ensures isolated read-after-write semantics by + providing discovered cluster replication states in each request. + More information about eventual consistency in Vault can be found here + https://www.vaultproject.io/docs/enterprise/consistency + type: boolean + server: + description: 'Server is the connection address for the Vault server, e.g: "https://vault.example.com:8200".' + type: string + version: + default: v2 + description: |- + Version is the Vault KV secret engine version. This can be either "v1" or + "v2". Version defaults to "v2". + enum: + - v1 + - v2 + type: string + required: + - auth + - server + type: object + webhook: + description: Webhook configures this store to sync secrets using a generic templated webhook + properties: + body: + description: Body + type: string + caBundle: + description: |- + PEM encoded CA bundle used to validate webhook server certificate. Only used + if the Server URL is using HTTPS protocol. This parameter is ignored for + plain HTTP protocol connection. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate webhook server certificate. + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + headers: + additionalProperties: + type: string + description: Headers + type: object + method: + description: Webhook Method + type: string + result: + description: Result formatting + properties: + jsonPath: + description: Json path of return value + type: string + type: object + secrets: + description: |- + Secrets to fill in templates + These secrets will be passed to the templating function as key value pairs under the given name + items: + properties: + name: + description: Name of this secret in templates + type: string + secretRef: + description: Secret ref to fill in credentials + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - name + - secretRef + type: object + type: array + timeout: + description: Timeout + type: string + url: + description: Webhook url to call + type: string + required: + - result + - url + type: object + yandexlockbox: + description: YandexLockbox configures this store to sync secrets using Yandex Lockbox provider + properties: + apiEndpoint: + description: Yandex.Cloud API endpoint (e.g. 'api.cloud.yandex.net:443') + type: string + auth: + description: Auth defines the information necessary to authenticate against Yandex Lockbox + properties: + authorizedKeySecretRef: + description: The authorized key used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + caProvider: + description: The provider for the CA bundle to use to validate Yandex.Cloud server certificate. + properties: + certSecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - auth + type: object + type: object + retrySettings: + description: Used to configure http retries if failed + properties: + maxRetries: + format: int32 + type: integer + retryInterval: + type: string + type: object + required: + - provider + type: object + status: + description: SecretStoreStatus defines the observed state of the SecretStore. + properties: + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + type: string + reason: + type: string + status: + type: string + type: + type: string + required: + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: false + subresources: + status: {} + - additionalPrinterColumns: + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + - jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Status + type: string + - jsonPath: .status.capabilities + name: Capabilities + type: string + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + name: v1beta1 + schema: + openAPIV3Schema: + description: SecretStore represents a secure external location for storing secrets, which can be referenced as part of `storeRef` fields. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: SecretStoreSpec defines the desired state of SecretStore. + properties: + conditions: + description: Used to constraint a ClusterSecretStore to specific namespaces. Relevant only to ClusterSecretStore + items: + description: |- + ClusterSecretStoreCondition describes a condition by which to choose namespaces to process ExternalSecrets in + for a ClusterSecretStore instance. + properties: + namespaceRegexes: + description: Choose namespaces by using regex matching + items: + type: string + type: array + namespaceSelector: + description: Choose namespace using a labelSelector + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: Choose namespaces by name + items: + type: string + type: array + type: object + type: array + controller: + description: |- + Used to select the correct ESO controller (think: ingress.ingressClassName) + The ESO controller is instantiated with a specific controller name and filters ES based on this property + type: string + provider: + description: Used to configure the provider. Only one provider may be set + maxProperties: 1 + minProperties: 1 + properties: + akeyless: + description: Akeyless configures this store to sync secrets using Akeyless Vault provider + properties: + akeylessGWApiURL: + description: Akeyless GW API Url from which the secrets to be fetched from. + type: string + authSecretRef: + description: Auth configures how the operator authenticates with Akeyless. + properties: + kubernetesAuth: + description: |- + Kubernetes authenticates with Akeyless by passing the ServiceAccount + token stored in the named Secret resource. + properties: + accessID: + description: the Akeyless Kubernetes auth-method access-id + type: string + k8sConfName: + description: Kubernetes-auth configuration name in Akeyless-Gateway + type: string + secretRef: + description: |- + Optional secret field containing a Kubernetes ServiceAccount JWT used + for authenticating with Akeyless. If a name is specified without a key, + `token` is the default. If one is not specified, the one bound to + the controller will be used. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional service account field containing the name of a kubernetes ServiceAccount. + If the service account is specified, the service account secret token JWT will be used + for authenticating with Akeyless. If the service account selector is not supplied, + the secretRef will be used instead. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - accessID + - k8sConfName + type: object + secretRef: + description: |- + Reference to a Secret that contains the details + to authenticate with Akeyless. + properties: + accessID: + description: The SecretAccessID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessType: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessTypeParam: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + caBundle: + description: |- + PEM/base64 encoded CA bundle used to validate Akeyless Gateway certificate. Only used + if the AkeylessGWApiURL URL is using HTTPS protocol. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate Akeyless Gateway certificate. + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + required: + - akeylessGWApiURL + - authSecretRef + type: object + alibaba: + description: Alibaba configures this store to sync secrets using Alibaba Cloud provider + properties: + auth: + description: AlibabaAuth contains a secretRef for credentials. + properties: + rrsa: + description: Authenticate against Alibaba using RRSA. + properties: + oidcProviderArn: + type: string + oidcTokenFilePath: + type: string + roleArn: + type: string + sessionName: + type: string + required: + - oidcProviderArn + - oidcTokenFilePath + - roleArn + - sessionName + type: object + secretRef: + description: AlibabaAuthSecretRef holds secret references for Alibaba credentials. + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + accessKeySecretSecretRef: + description: The AccessKeySecret is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - accessKeyIDSecretRef + - accessKeySecretSecretRef + type: object + type: object + regionID: + description: Alibaba Region to be used for the provider + type: string + required: + - auth + - regionID + type: object + aws: + description: AWS configures this store to sync secrets using AWS Secret Manager provider + properties: + additionalRoles: + description: AdditionalRoles is a chained list of Role ARNs which the provider will sequentially assume before assuming the Role + items: + type: string + type: array + auth: + description: |- + Auth defines the information necessary to authenticate against AWS + if not set aws sdk will infer credentials from your environment + see: https://docs.aws.amazon.com/sdk-for-go/v1/developer-guide/configuring-sdk.html#specifying-credentials + properties: + jwt: + description: Authenticate against AWS using service account tokens. + properties: + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + secretRef: + description: |- + AWSAuthSecretRef holds secret references for AWS credentials + both AccessKeyID and SecretAccessKey must be defined in order to properly authenticate. + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + sessionTokenSecretRef: + description: |- + The SessionToken used for authentication + This must be defined if AccessKeyID and SecretAccessKey are temporary credentials + see: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + externalID: + description: AWS External ID set on assumed IAM roles + type: string + prefix: + description: Prefix adds a prefix to all retrieved values. + type: string + region: + description: AWS Region to be used for the provider + type: string + role: + description: Role is a Role ARN which the provider will assume + type: string + secretsManager: + description: SecretsManager defines how the provider behaves when interacting with AWS SecretsManager + properties: + forceDeleteWithoutRecovery: + description: |- + Specifies whether to delete the secret without any recovery window. You + can't use both this parameter and RecoveryWindowInDays in the same call. + If you don't use either, then by default Secrets Manager uses a 30 day + recovery window. + see: https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_DeleteSecret.html#SecretsManager-DeleteSecret-request-ForceDeleteWithoutRecovery + type: boolean + recoveryWindowInDays: + description: |- + The number of days from 7 to 30 that Secrets Manager waits before + permanently deleting the secret. You can't use both this parameter and + ForceDeleteWithoutRecovery in the same call. If you don't use either, + then by default Secrets Manager uses a 30 day recovery window. + see: https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_DeleteSecret.html#SecretsManager-DeleteSecret-request-RecoveryWindowInDays + format: int64 + type: integer + type: object + service: + description: Service defines which service should be used to fetch the secrets + enum: + - SecretsManager + - ParameterStore + type: string + sessionTags: + description: AWS STS assume role session tags + items: + properties: + key: + type: string + value: + type: string + required: + - key + - value + type: object + type: array + transitiveTagKeys: + description: AWS STS assume role transitive session tags. Required when multiple rules are used with the provider + items: + type: string + type: array + required: + - region + - service + type: object + azurekv: + description: AzureKV configures this store to sync secrets using Azure Key Vault provider + properties: + authSecretRef: + description: Auth configures how the operator authenticates with Azure. Required for ServicePrincipal auth type. Optional for WorkloadIdentity. + properties: + clientCertificate: + description: The Azure ClientCertificate of the service principle used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientId: + description: The Azure clientId of the service principle or managed identity used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientSecret: + description: The Azure ClientSecret of the service principle used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + tenantId: + description: The Azure tenantId of the managed identity used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + authType: + default: ServicePrincipal + description: |- + Auth type defines how to authenticate to the keyvault service. + Valid values are: + - "ServicePrincipal" (default): Using a service principal (tenantId, clientId, clientSecret) + - "ManagedIdentity": Using Managed Identity assigned to the pod (see aad-pod-identity) + enum: + - ServicePrincipal + - ManagedIdentity + - WorkloadIdentity + type: string + environmentType: + default: PublicCloud + description: |- + EnvironmentType specifies the Azure cloud environment endpoints to use for + connecting and authenticating with Azure. By default it points to the public cloud AAD endpoint. + The following endpoints are available, also see here: https://github.com/Azure/go-autorest/blob/main/autorest/azure/environments.go#L152 + PublicCloud, USGovernmentCloud, ChinaCloud, GermanCloud + enum: + - PublicCloud + - USGovernmentCloud + - ChinaCloud + - GermanCloud + type: string + identityId: + description: If multiple Managed Identity is assigned to the pod, you can select the one to be used + type: string + serviceAccountRef: + description: |- + ServiceAccountRef specified the service account + that should be used when authenticating with WorkloadIdentity. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + tenantId: + description: TenantID configures the Azure Tenant to send requests to. Required for ServicePrincipal auth type. Optional for WorkloadIdentity. + type: string + vaultUrl: + description: Vault Url from which the secrets to be fetched from. + type: string + required: + - vaultUrl + type: object + beyondtrust: + description: Beyondtrust configures this store to sync secrets using Password Safe provider. + properties: + auth: + description: Auth configures how the operator authenticates with Beyondtrust. + properties: + certificate: + description: Content of the certificate (cert.pem) for use when authenticating with an OAuth client Id using a Client Certificate. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + certificateKey: + description: Certificate private key (key.pem). For use when authenticating with an OAuth client Id + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + clientId: + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + clientSecret: + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + required: + - clientId + - clientSecret + type: object + server: + description: Auth configures how API server works. + properties: + apiUrl: + type: string + clientTimeOutSeconds: + description: Timeout specifies a time limit for requests made by this Client. The timeout includes connection time, any redirects, and reading the response body. Defaults to 45 seconds. + type: integer + retrievalType: + description: The secret retrieval type. SECRET = Secrets Safe (credential, text, file). MANAGED_ACCOUNT = Password Safe account associated with a system. + type: string + separator: + description: A character that separates the folder names. + type: string + verifyCA: + type: boolean + required: + - apiUrl + - verifyCA + type: object + required: + - auth + - server + type: object + bitwardensecretsmanager: + description: BitwardenSecretsManager configures this store to sync secrets using BitwardenSecretsManager provider + properties: + apiURL: + type: string + auth: + description: |- + Auth configures how secret-manager authenticates with a bitwarden machine account instance. + Make sure that the token being used has permissions on the given secret. + properties: + secretRef: + description: BitwardenSecretsManagerSecretRef contains the credential ref to the bitwarden instance. + properties: + credentials: + description: AccessToken used for the bitwarden instance. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - credentials + type: object + required: + - secretRef + type: object + bitwardenServerSDKURL: + type: string + caBundle: + description: |- + Base64 encoded certificate for the bitwarden server sdk. The sdk MUST run with HTTPS to make sure no MITM attack + can be performed. + type: string + caProvider: + description: 'see: https://external-secrets.io/latest/spec/#external-secrets.io/v1alpha1.CAProvider' + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + identityURL: + type: string + organizationID: + description: OrganizationID determines which organization this secret store manages. + type: string + projectID: + description: ProjectID determines which project this secret store manages. + type: string + required: + - auth + - organizationID + - projectID + type: object + chef: + description: Chef configures this store to sync secrets with chef server + properties: + auth: + description: Auth defines the information necessary to authenticate against chef Server + properties: + secretRef: + description: ChefAuthSecretRef holds secret references for chef server login credentials. + properties: + privateKeySecretRef: + description: SecretKey is the Signing Key in PEM format, used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - privateKeySecretRef + type: object + required: + - secretRef + type: object + serverUrl: + description: ServerURL is the chef server URL used to connect to. If using orgs you should include your org in the url and terminate the url with a "/" + type: string + username: + description: UserName should be the user ID on the chef server + type: string + required: + - auth + - serverUrl + - username + type: object + conjur: + description: Conjur configures this store to sync secrets using conjur provider + properties: + auth: + properties: + apikey: + properties: + account: + type: string + apiKeyRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + userRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - account + - apiKeyRef + - userRef + type: object + jwt: + properties: + account: + type: string + hostId: + description: |- + Optional HostID for JWT authentication. This may be used depending + on how the Conjur JWT authenticator policy is configured. + type: string + secretRef: + description: |- + Optional SecretRef that refers to a key in a Secret resource containing JWT token to + authenticate with Conjur using the JWT authentication method. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional ServiceAccountRef specifies the Kubernetes service account for which to request + a token for with the `TokenRequest` API. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + serviceID: + description: The conjur authn jwt webservice id + type: string + required: + - account + - serviceID + type: object + type: object + caBundle: + type: string + caProvider: + description: |- + Used to provide custom certificate authority (CA) certificates + for a secret store. The CAProvider points to a Secret or ConfigMap resource + that contains a PEM-encoded certificate. + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + url: + type: string + required: + - auth + - url + type: object + delinea: + description: |- + Delinea DevOps Secrets Vault + https://docs.delinea.com/online-help/products/devops-secrets-vault/current + properties: + clientId: + description: ClientID is the non-secret part of the credential. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + clientSecret: + description: ClientSecret is the secret part of the credential. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + tenant: + description: Tenant is the chosen hostname / site name. + type: string + tld: + description: |- + TLD is based on the server location that was chosen during provisioning. + If unset, defaults to "com". + type: string + urlTemplate: + description: |- + URLTemplate + If unset, defaults to "https://%s.secretsvaultcloud.%s/v1/%s%s". + type: string + required: + - clientId + - clientSecret + - tenant + type: object + device42: + description: Device42 configures this store to sync secrets using the Device42 provider + properties: + auth: + description: Auth configures how secret-manager authenticates with a Device42 instance. + properties: + secretRef: + properties: + credentials: + description: Username / Password is used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - secretRef + type: object + host: + description: URL configures the Device42 instance URL. + type: string + required: + - auth + - host + type: object + doppler: + description: Doppler configures this store to sync secrets using the Doppler provider + properties: + auth: + description: Auth configures how the Operator authenticates with the Doppler API + properties: + secretRef: + properties: + dopplerToken: + description: |- + The DopplerToken is used for authentication. + See https://docs.doppler.com/reference/api#authentication for auth token types. + The Key attribute defaults to dopplerToken if not specified. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - dopplerToken + type: object + required: + - secretRef + type: object + config: + description: Doppler config (required if not using a Service Token) + type: string + format: + description: Format enables the downloading of secrets as a file (string) + enum: + - json + - dotnet-json + - env + - yaml + - docker + type: string + nameTransformer: + description: Environment variable compatible name transforms that change secret names to a different format + enum: + - upper-camel + - camel + - lower-snake + - tf-var + - dotnet-env + - lower-kebab + type: string + project: + description: Doppler project (required if not using a Service Token) + type: string + required: + - auth + type: object + fake: + description: Fake configures a store with static key/value pairs + properties: + data: + items: + properties: + key: + type: string + value: + type: string + valueMap: + additionalProperties: + type: string + description: 'Deprecated: ValueMap is deprecated and is intended to be removed in the future, use the `value` field instead.' + type: object + version: + type: string + required: + - key + type: object + type: array + required: + - data + type: object + fortanix: + description: Fortanix configures this store to sync secrets using the Fortanix provider + properties: + apiKey: + description: APIKey is the API token to access SDKMS Applications. + properties: + secretRef: + description: SecretRef is a reference to a secret containing the SDKMS API Key. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + apiUrl: + description: APIURL is the URL of SDKMS API. Defaults to `sdkms.fortanix.com`. + type: string + type: object + gcpsm: + description: GCPSM configures this store to sync secrets using Google Cloud Platform Secret Manager provider + properties: + auth: + description: Auth defines the information necessary to authenticate against GCP + properties: + secretRef: + properties: + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + workloadIdentity: + properties: + clusterLocation: + type: string + clusterName: + type: string + clusterProjectID: + type: string + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - clusterLocation + - clusterName + - serviceAccountRef + type: object + type: object + location: + description: Location optionally defines a location for a secret + type: string + projectID: + description: ProjectID project where secret is located + type: string + type: object + gitlab: + description: GitLab configures this store to sync secrets using GitLab Variables provider + properties: + auth: + description: Auth configures how secret-manager authenticates with a GitLab instance. + properties: + SecretRef: + properties: + accessToken: + description: AccessToken is used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - SecretRef + type: object + environment: + description: Environment environment_scope of gitlab CI/CD variables (Please see https://docs.gitlab.com/ee/ci/environments/#create-a-static-environment on how to create environments) + type: string + groupIDs: + description: GroupIDs specify, which gitlab groups to pull secrets from. Group secrets are read from left to right followed by the project variables. + items: + type: string + type: array + inheritFromGroups: + description: InheritFromGroups specifies whether parent groups should be discovered and checked for secrets. + type: boolean + projectID: + description: ProjectID specifies a project where secrets are located. + type: string + url: + description: URL configures the GitLab instance URL. Defaults to https://gitlab.com/. + type: string + required: + - auth + type: object + ibm: + description: IBM configures this store to sync secrets using IBM Cloud provider + properties: + auth: + description: Auth configures how secret-manager authenticates with the IBM secrets manager. + maxProperties: 1 + minProperties: 1 + properties: + containerAuth: + description: IBM Container-based auth with IAM Trusted Profile. + properties: + iamEndpoint: + type: string + profile: + description: the IBM Trusted Profile + type: string + tokenLocation: + description: Location the token is mounted on the pod + type: string + required: + - profile + type: object + secretRef: + properties: + secretApiKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + serviceUrl: + description: ServiceURL is the Endpoint URL that is specific to the Secrets Manager service instance + type: string + required: + - auth + type: object + infisical: + description: Infisical configures this store to sync secrets using the Infisical provider + properties: + auth: + description: Auth configures how the Operator authenticates with the Infisical API + properties: + universalAuthCredentials: + properties: + clientId: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientSecret: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - clientId + - clientSecret + type: object + type: object + hostAPI: + default: https://app.infisical.com/api + type: string + secretsScope: + properties: + environmentSlug: + type: string + projectSlug: + type: string + secretsPath: + default: / + type: string + required: + - environmentSlug + - projectSlug + type: object + required: + - auth + - secretsScope + type: object + keepersecurity: + description: KeeperSecurity configures this store to sync secrets using the KeeperSecurity provider + properties: + authRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + folderID: + type: string + required: + - authRef + - folderID + type: object + kubernetes: + description: Kubernetes configures this store to sync secrets using a Kubernetes cluster provider + properties: + auth: + description: Auth configures how secret-manager authenticates with a Kubernetes instance. + maxProperties: 1 + minProperties: 1 + properties: + cert: + description: has both clientCert and clientKey as secretKeySelector + properties: + clientCert: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + clientKey: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + serviceAccount: + description: points to a service account that should be used for authentication + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + token: + description: use static token to authenticate with + properties: + bearerToken: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + type: object + authRef: + description: A reference to a secret that contains the auth information. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + remoteNamespace: + default: default + description: Remote namespace to fetch the secrets from + type: string + server: + description: configures the Kubernetes server Address. + properties: + caBundle: + description: CABundle is a base64-encoded CA certificate + format: byte + type: string + caProvider: + description: 'see: https://external-secrets.io/v0.4.1/spec/#external-secrets.io/v1alpha1.CAProvider' + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + url: + default: kubernetes.default + description: configures the Kubernetes server Address. + type: string + type: object + type: object + onboardbase: + description: Onboardbase configures this store to sync secrets using the Onboardbase provider + properties: + apiHost: + default: https://public.onboardbase.com/api/v1/ + description: APIHost use this to configure the host url for the API for selfhosted installation, default is https://public.onboardbase.com/api/v1/ + type: string + auth: + description: Auth configures how the Operator authenticates with the Onboardbase API + properties: + apiKeyRef: + description: |- + OnboardbaseAPIKey is the APIKey generated by an admin account. + It is used to recognize and authorize access to a project and environment within onboardbase + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + passcodeRef: + description: OnboardbasePasscode is the passcode attached to the API Key + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - apiKeyRef + - passcodeRef + type: object + environment: + default: development + description: Environment is the name of an environmnent within a project to pull the secrets from + type: string + project: + default: development + description: Project is an onboardbase project that the secrets should be pulled from + type: string + required: + - apiHost + - auth + - environment + - project + type: object + onepassword: + description: OnePassword configures this store to sync secrets using the 1Password Cloud provider + properties: + auth: + description: Auth defines the information necessary to authenticate against OnePassword Connect Server + properties: + secretRef: + description: OnePasswordAuthSecretRef holds secret references for 1Password credentials. + properties: + connectTokenSecretRef: + description: The ConnectToken is used for authentication to a 1Password Connect Server. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - connectTokenSecretRef + type: object + required: + - secretRef + type: object + connectHost: + description: ConnectHost defines the OnePassword Connect Server to connect to + type: string + vaults: + additionalProperties: + type: integer + description: Vaults defines which OnePassword vaults to search in which order + type: object + required: + - auth + - connectHost + - vaults + type: object + oracle: + description: Oracle configures this store to sync secrets using Oracle Vault provider + properties: + auth: + description: |- + Auth configures how secret-manager authenticates with the Oracle Vault. + If empty, use the instance principal, otherwise the user credentials specified in Auth. + properties: + secretRef: + description: SecretRef to pass through sensitive information. + properties: + fingerprint: + description: Fingerprint is the fingerprint of the API private key. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + privatekey: + description: PrivateKey is the user's API Signing Key in PEM format, used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - fingerprint + - privatekey + type: object + tenancy: + description: Tenancy is the tenancy OCID where user is located. + type: string + user: + description: User is an access OCID specific to the account. + type: string + required: + - secretRef + - tenancy + - user + type: object + compartment: + description: |- + Compartment is the vault compartment OCID. + Required for PushSecret + type: string + encryptionKey: + description: |- + EncryptionKey is the OCID of the encryption key within the vault. + Required for PushSecret + type: string + principalType: + description: |- + The type of principal to use for authentication. If left blank, the Auth struct will + determine the principal type. This optional field must be specified if using + workload identity. + enum: + - "" + - UserPrincipal + - InstancePrincipal + - Workload + type: string + region: + description: Region is the region where vault is located. + type: string + serviceAccountRef: + description: |- + ServiceAccountRef specified the service account + that should be used when authenticating with WorkloadIdentity. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + vault: + description: Vault is the vault's OCID of the specific vault where secret is located. + type: string + required: + - region + - vault + type: object + passbolt: + properties: + auth: + description: Auth defines the information necessary to authenticate against Passbolt Server + properties: + passwordSecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + privateKeySecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - passwordSecretRef + - privateKeySecretRef + type: object + host: + description: Host defines the Passbolt Server to connect to + type: string + required: + - auth + - host + type: object + passworddepot: + description: Configures a store to sync secrets with a Password Depot instance. + properties: + auth: + description: Auth configures how secret-manager authenticates with a Password Depot instance. + properties: + secretRef: + properties: + credentials: + description: Username / Password is used for authentication. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - secretRef + type: object + database: + description: Database to use as source + type: string + host: + description: URL configures the Password Depot instance URL. + type: string + required: + - auth + - database + - host + type: object + previder: + description: Previder configures this store to sync secrets using the Previder provider + properties: + auth: + description: PreviderAuth contains a secretRef for credentials. + properties: + secretRef: + description: PreviderAuthSecretRef holds secret references for Previder Vault credentials. + properties: + accessToken: + description: The AccessToken is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - accessToken + type: object + type: object + baseUri: + type: string + required: + - auth + type: object + pulumi: + description: Pulumi configures this store to sync secrets using the Pulumi provider + properties: + accessToken: + description: AccessToken is the access tokens to sign in to the Pulumi Cloud Console. + properties: + secretRef: + description: SecretRef is a reference to a secret containing the Pulumi API token. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + apiUrl: + default: https://api.pulumi.com/api/esc + description: APIURL is the URL of the Pulumi API. + type: string + environment: + description: |- + Environment are YAML documents composed of static key-value pairs, programmatic expressions, + dynamically retrieved values from supported providers including all major clouds, + and other Pulumi ESC environments. + To create a new environment, visit https://www.pulumi.com/docs/esc/environments/ for more information. + type: string + organization: + description: |- + Organization are a space to collaborate on shared projects and stacks. + To create a new organization, visit https://app.pulumi.com/ and click "New Organization". + type: string + project: + description: Project is the name of the Pulumi ESC project the environment belongs to. + type: string + required: + - accessToken + - environment + - organization + - project + type: object + scaleway: + description: Scaleway + properties: + accessKey: + description: AccessKey is the non-secret part of the api key. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + apiUrl: + description: APIURL is the url of the api to use. Defaults to https://api.scaleway.com + type: string + projectId: + description: 'ProjectID is the id of your project, which you can find in the console: https://console.scaleway.com/project/settings' + type: string + region: + description: 'Region where your secrets are located: https://developers.scaleway.com/en/quickstart/#region-and-zone' + type: string + secretKey: + description: SecretKey is the non-secret part of the api key. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + required: + - accessKey + - projectId + - region + - secretKey + type: object + secretserver: + description: |- + SecretServer configures this store to sync secrets using SecretServer provider + https://docs.delinea.com/online-help/secret-server/start.htm + properties: + password: + description: Password is the secret server account password. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + serverURL: + description: |- + ServerURL + URL to your secret server installation + type: string + username: + description: Username is the secret server account username. + properties: + secretRef: + description: SecretRef references a key in a secret that will be used as value. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + value: + description: Value can be specified directly to set a value without using a secret. + type: string + type: object + required: + - password + - serverURL + - username + type: object + senhasegura: + description: Senhasegura configures this store to sync secrets using senhasegura provider + properties: + auth: + description: Auth defines parameters to authenticate in senhasegura + properties: + clientId: + type: string + clientSecretSecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - clientId + - clientSecretSecretRef + type: object + ignoreSslCertificate: + default: false + description: IgnoreSslCertificate defines if SSL certificate must be ignored + type: boolean + module: + description: Module defines which senhasegura module should be used to get secrets + type: string + url: + description: URL of senhasegura + type: string + required: + - auth + - module + - url + type: object + vault: + description: Vault configures this store to sync secrets using Hashi provider + properties: + auth: + description: Auth configures how secret-manager authenticates with the Vault server. + properties: + appRole: + description: |- + AppRole authenticates with Vault using the App Role auth mechanism, + with the role and secret stored in a Kubernetes Secret resource. + properties: + path: + default: approle + description: |- + Path where the App Role authentication backend is mounted + in Vault, e.g: "approle" + type: string + roleId: + description: |- + RoleID configured in the App Role authentication backend when setting + up the authentication backend in Vault. + type: string + roleRef: + description: |- + Reference to a key in a Secret that contains the App Role ID used + to authenticate with Vault. + The `key` field must be specified and denotes which entry within the Secret + resource is used as the app role id. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretRef: + description: |- + Reference to a key in a Secret that contains the App Role secret used + to authenticate with Vault. + The `key` field must be specified and denotes which entry within the Secret + resource is used as the app role secret. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - path + - secretRef + type: object + cert: + description: |- + Cert authenticates with TLS Certificates by passing client certificate, private key and ca certificate + Cert authentication method + properties: + clientCert: + description: |- + ClientCert is a certificate to authenticate using the Cert Vault + authentication method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretRef: + description: |- + SecretRef to a key in a Secret resource containing client private key to + authenticate with Vault using the Cert authentication method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + iam: + description: |- + Iam authenticates with vault by passing a special AWS request signed with AWS IAM credentials + AWS IAM authentication method + properties: + externalID: + description: AWS External ID set on assumed IAM roles + type: string + jwt: + description: Specify a service account with IRSA enabled + properties: + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + path: + description: 'Path where the AWS auth method is enabled in Vault, e.g: "aws"' + type: string + region: + description: AWS region + type: string + role: + description: This is the AWS role to be assumed before talking to vault + type: string + secretRef: + description: Specify credentials in a Secret object + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + sessionTokenSecretRef: + description: |- + The SessionToken used for authentication + This must be defined if AccessKeyID and SecretAccessKey are temporary credentials + see: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + vaultAwsIamServerID: + description: 'X-Vault-AWS-IAM-Server-ID is an additional header used by Vault IAM auth method to mitigate against different types of replay attacks. More details here: https://developer.hashicorp.com/vault/docs/auth/aws' + type: string + vaultRole: + description: Vault Role. In vault, a role describes an identity with a set of permissions, groups, or policies you want to attach a user of the secrets engine + type: string + required: + - vaultRole + type: object + jwt: + description: |- + Jwt authenticates with Vault by passing role and JWT token using the + JWT/OIDC authentication method + properties: + kubernetesServiceAccountToken: + description: |- + Optional ServiceAccountToken specifies the Kubernetes service account for which to request + a token for with the `TokenRequest` API. + properties: + audiences: + description: |- + Optional audiences field that will be used to request a temporary Kubernetes service + account token for the service account referenced by `serviceAccountRef`. + Defaults to a single audience `vault` it not specified. + Deprecated: use serviceAccountRef.Audiences instead + items: + type: string + type: array + expirationSeconds: + description: |- + Optional expiration time in seconds that will be used to request a temporary + Kubernetes service account token for the service account referenced by + `serviceAccountRef`. + Deprecated: this will be removed in the future. + Defaults to 10 minutes. + format: int64 + type: integer + serviceAccountRef: + description: Service account field containing the name of a kubernetes ServiceAccount. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - serviceAccountRef + type: object + path: + default: jwt + description: |- + Path where the JWT authentication backend is mounted + in Vault, e.g: "jwt" + type: string + role: + description: |- + Role is a JWT role to authenticate using the JWT/OIDC Vault + authentication method + type: string + secretRef: + description: |- + Optional SecretRef that refers to a key in a Secret resource containing JWT token to + authenticate with Vault using the JWT/OIDC authentication method. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - path + type: object + kubernetes: + description: |- + Kubernetes authenticates with Vault by passing the ServiceAccount + token stored in the named Secret resource to the Vault server. + properties: + mountPath: + default: kubernetes + description: |- + Path where the Kubernetes authentication backend is mounted in Vault, e.g: + "kubernetes" + type: string + role: + description: |- + A required field containing the Vault Role to assume. A Role binds a + Kubernetes ServiceAccount with a set of Vault policies. + type: string + secretRef: + description: |- + Optional secret field containing a Kubernetes ServiceAccount JWT used + for authenticating with Vault. If a name is specified without a key, + `token` is the default. If one is not specified, the one bound to + the controller will be used. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional service account field containing the name of a kubernetes ServiceAccount. + If the service account is specified, the service account secret token JWT will be used + for authenticating with Vault. If the service account selector is not supplied, + the secretRef will be used instead. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - mountPath + - role + type: object + ldap: + description: |- + Ldap authenticates with Vault by passing username/password pair using + the LDAP authentication method + properties: + path: + default: ldap + description: |- + Path where the LDAP authentication backend is mounted + in Vault, e.g: "ldap" + type: string + secretRef: + description: |- + SecretRef to a key in a Secret resource containing password for the LDAP + user used to authenticate with Vault using the LDAP authentication + method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + username: + description: |- + Username is a LDAP user name used to authenticate using the LDAP Vault + authentication method + type: string + required: + - path + - username + type: object + namespace: + description: |- + Name of the vault namespace to authenticate to. This can be different than the namespace your secret is in. + Namespaces is a set of features within Vault Enterprise that allows + Vault environments to support Secure Multi-tenancy. e.g: "ns1". + More about namespaces can be found here https://www.vaultproject.io/docs/enterprise/namespaces + This will default to Vault.Namespace field if set, or empty otherwise + type: string + tokenSecretRef: + description: TokenSecretRef authenticates with Vault by presenting a token. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + userPass: + description: UserPass authenticates with Vault by passing username/password pair + properties: + path: + default: user + description: |- + Path where the UserPassword authentication backend is mounted + in Vault, e.g: "user" + type: string + secretRef: + description: |- + SecretRef to a key in a Secret resource containing password for the + user used to authenticate with Vault using the UserPass authentication + method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + username: + description: |- + Username is a user name used to authenticate using the UserPass Vault + authentication method + type: string + required: + - path + - username + type: object + type: object + caBundle: + description: |- + PEM encoded CA bundle used to validate Vault server certificate. Only used + if the Server URL is using HTTPS protocol. This parameter is ignored for + plain HTTP protocol connection. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate Vault server certificate. + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + forwardInconsistent: + description: |- + ForwardInconsistent tells Vault to forward read-after-write requests to the Vault + leader instead of simply retrying within a loop. This can increase performance if + the option is enabled serverside. + https://www.vaultproject.io/docs/configuration/replication#allow_forwarding_via_header + type: boolean + headers: + additionalProperties: + type: string + description: Headers to be added in Vault request + type: object + namespace: + description: |- + Name of the vault namespace. Namespaces is a set of features within Vault Enterprise that allows + Vault environments to support Secure Multi-tenancy. e.g: "ns1". + More about namespaces can be found here https://www.vaultproject.io/docs/enterprise/namespaces + type: string + path: + description: |- + Path is the mount path of the Vault KV backend endpoint, e.g: + "secret". The v2 KV secret engine version specific "/data" path suffix + for fetching secrets from Vault is optional and will be appended + if not present in specified path. + type: string + readYourWrites: + description: |- + ReadYourWrites ensures isolated read-after-write semantics by + providing discovered cluster replication states in each request. + More information about eventual consistency in Vault can be found here + https://www.vaultproject.io/docs/enterprise/consistency + type: boolean + server: + description: 'Server is the connection address for the Vault server, e.g: "https://vault.example.com:8200".' + type: string + tls: + description: |- + The configuration used for client side related TLS communication, when the Vault server + requires mutual authentication. Only used if the Server URL is using HTTPS protocol. + This parameter is ignored for plain HTTP protocol connection. + It's worth noting this configuration is different from the "TLS certificates auth method", + which is available under the `auth.cert` section. + properties: + certSecretRef: + description: |- + CertSecretRef is a certificate added to the transport layer + when communicating with the Vault server. + If no key for the Secret is specified, external-secret will default to 'tls.crt'. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + keySecretRef: + description: |- + KeySecretRef to a key in a Secret resource containing client private key + added to the transport layer when communicating with the Vault server. + If no key for the Secret is specified, external-secret will default to 'tls.key'. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + version: + default: v2 + description: |- + Version is the Vault KV secret engine version. This can be either "v1" or + "v2". Version defaults to "v2". + enum: + - v1 + - v2 + type: string + required: + - auth + - server + type: object + webhook: + description: Webhook configures this store to sync secrets using a generic templated webhook + properties: + body: + description: Body + type: string + caBundle: + description: |- + PEM encoded CA bundle used to validate webhook server certificate. Only used + if the Server URL is using HTTPS protocol. This parameter is ignored for + plain HTTP protocol connection. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate webhook server certificate. + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + headers: + additionalProperties: + type: string + description: Headers + type: object + method: + description: Webhook Method + type: string + result: + description: Result formatting + properties: + jsonPath: + description: Json path of return value + type: string + type: object + secrets: + description: |- + Secrets to fill in templates + These secrets will be passed to the templating function as key value pairs under the given name + items: + properties: + name: + description: Name of this secret in templates + type: string + secretRef: + description: Secret ref to fill in credentials + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - name + - secretRef + type: object + type: array + timeout: + description: Timeout + type: string + url: + description: Webhook url to call + type: string + required: + - result + - url + type: object + yandexcertificatemanager: + description: YandexCertificateManager configures this store to sync secrets using Yandex Certificate Manager provider + properties: + apiEndpoint: + description: Yandex.Cloud API endpoint (e.g. 'api.cloud.yandex.net:443') + type: string + auth: + description: Auth defines the information necessary to authenticate against Yandex Certificate Manager + properties: + authorizedKeySecretRef: + description: The authorized key used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + caProvider: + description: The provider for the CA bundle to use to validate Yandex.Cloud server certificate. + properties: + certSecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - auth + type: object + yandexlockbox: + description: YandexLockbox configures this store to sync secrets using Yandex Lockbox provider + properties: + apiEndpoint: + description: Yandex.Cloud API endpoint (e.g. 'api.cloud.yandex.net:443') + type: string + auth: + description: Auth defines the information necessary to authenticate against Yandex Lockbox + properties: + authorizedKeySecretRef: + description: The authorized key used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + caProvider: + description: The provider for the CA bundle to use to validate Yandex.Cloud server certificate. + properties: + certSecretRef: + description: |- + A reference to a specific 'key' within a Secret resource, + In some instances, `key` is a required field. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + required: + - auth + type: object + type: object + refreshInterval: + description: Used to configure store refresh interval in seconds. Empty or 0 will default to the controller config. + type: integer + retrySettings: + description: Used to configure http retries if failed + properties: + maxRetries: + format: int32 + type: integer + retryInterval: + type: string + type: object + required: + - provider + type: object + status: + description: SecretStoreStatus defines the observed state of the SecretStore. + properties: + capabilities: + description: SecretStoreCapabilities defines the possible operations a SecretStore can do. + type: string + conditions: + items: + properties: + lastTransitionTime: + format: date-time + type: string + message: + type: string + reason: + type: string + status: + type: string + type: + type: string + required: + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/uuid.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/uuid.yaml new file mode 100644 index 00000000..d12aaf2f --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/uuid.yaml @@ -0,0 +1,72 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: uuids.generators.external-secrets.io +spec: + group: generators.external-secrets.io + names: + categories: + - external-secrets + - external-secrets-generators + kind: UUID + listKind: UUIDList + plural: uuids + shortNames: + - uuids + singular: uuid + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: UUID generates a version 1 UUID (e56657e3-764f-11ef-a397-65231a88c216). + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: UUIDSpec controls the behavior of the uuid generator. + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/vaultdynamicsecret.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/vaultdynamicsecret.yaml new file mode 100644 index 00000000..8459dbba --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/vaultdynamicsecret.yaml @@ -0,0 +1,708 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: vaultdynamicsecrets.generators.external-secrets.io +spec: + group: generators.external-secrets.io + names: + categories: + - external-secrets + - external-secrets-generators + kind: VaultDynamicSecret + listKind: VaultDynamicSecretList + plural: vaultdynamicsecrets + shortNames: + - vaultdynamicsecret + singular: vaultdynamicsecret + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + controller: + description: |- + Used to select the correct ESO controller (think: ingress.ingressClassName) + The ESO controller is instantiated with a specific controller name and filters VDS based on this property + type: string + method: + description: Vault API method to use (GET/POST/other) + type: string + parameters: + description: Parameters to pass to Vault write (for non-GET methods) + x-kubernetes-preserve-unknown-fields: true + path: + description: Vault path to obtain the dynamic secret from + type: string + provider: + description: Vault provider common spec + properties: + auth: + description: Auth configures how secret-manager authenticates with the Vault server. + properties: + appRole: + description: |- + AppRole authenticates with Vault using the App Role auth mechanism, + with the role and secret stored in a Kubernetes Secret resource. + properties: + path: + default: approle + description: |- + Path where the App Role authentication backend is mounted + in Vault, e.g: "approle" + type: string + roleId: + description: |- + RoleID configured in the App Role authentication backend when setting + up the authentication backend in Vault. + type: string + roleRef: + description: |- + Reference to a key in a Secret that contains the App Role ID used + to authenticate with Vault. + The `key` field must be specified and denotes which entry within the Secret + resource is used as the app role id. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretRef: + description: |- + Reference to a key in a Secret that contains the App Role secret used + to authenticate with Vault. + The `key` field must be specified and denotes which entry within the Secret + resource is used as the app role secret. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - path + - secretRef + type: object + cert: + description: |- + Cert authenticates with TLS Certificates by passing client certificate, private key and ca certificate + Cert authentication method + properties: + clientCert: + description: |- + ClientCert is a certificate to authenticate using the Cert Vault + authentication method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretRef: + description: |- + SecretRef to a key in a Secret resource containing client private key to + authenticate with Vault using the Cert authentication method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + iam: + description: |- + Iam authenticates with vault by passing a special AWS request signed with AWS IAM credentials + AWS IAM authentication method + properties: + externalID: + description: AWS External ID set on assumed IAM roles + type: string + jwt: + description: Specify a service account with IRSA enabled + properties: + serviceAccountRef: + description: A reference to a ServiceAccount resource. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + type: object + path: + description: 'Path where the AWS auth method is enabled in Vault, e.g: "aws"' + type: string + region: + description: AWS region + type: string + role: + description: This is the AWS role to be assumed before talking to vault + type: string + secretRef: + description: Specify credentials in a Secret object + properties: + accessKeyIDSecretRef: + description: The AccessKeyID is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + secretAccessKeySecretRef: + description: The SecretAccessKey is used for authentication + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + sessionTokenSecretRef: + description: |- + The SessionToken used for authentication + This must be defined if AccessKeyID and SecretAccessKey are temporary credentials + see: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + vaultAwsIamServerID: + description: 'X-Vault-AWS-IAM-Server-ID is an additional header used by Vault IAM auth method to mitigate against different types of replay attacks. More details here: https://developer.hashicorp.com/vault/docs/auth/aws' + type: string + vaultRole: + description: Vault Role. In vault, a role describes an identity with a set of permissions, groups, or policies you want to attach a user of the secrets engine + type: string + required: + - vaultRole + type: object + jwt: + description: |- + Jwt authenticates with Vault by passing role and JWT token using the + JWT/OIDC authentication method + properties: + kubernetesServiceAccountToken: + description: |- + Optional ServiceAccountToken specifies the Kubernetes service account for which to request + a token for with the `TokenRequest` API. + properties: + audiences: + description: |- + Optional audiences field that will be used to request a temporary Kubernetes service + account token for the service account referenced by `serviceAccountRef`. + Defaults to a single audience `vault` it not specified. + Deprecated: use serviceAccountRef.Audiences instead + items: + type: string + type: array + expirationSeconds: + description: |- + Optional expiration time in seconds that will be used to request a temporary + Kubernetes service account token for the service account referenced by + `serviceAccountRef`. + Deprecated: this will be removed in the future. + Defaults to 10 minutes. + format: int64 + type: integer + serviceAccountRef: + description: Service account field containing the name of a kubernetes ServiceAccount. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - serviceAccountRef + type: object + path: + default: jwt + description: |- + Path where the JWT authentication backend is mounted + in Vault, e.g: "jwt" + type: string + role: + description: |- + Role is a JWT role to authenticate using the JWT/OIDC Vault + authentication method + type: string + secretRef: + description: |- + Optional SecretRef that refers to a key in a Secret resource containing JWT token to + authenticate with Vault using the JWT/OIDC authentication method. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + required: + - path + type: object + kubernetes: + description: |- + Kubernetes authenticates with Vault by passing the ServiceAccount + token stored in the named Secret resource to the Vault server. + properties: + mountPath: + default: kubernetes + description: |- + Path where the Kubernetes authentication backend is mounted in Vault, e.g: + "kubernetes" + type: string + role: + description: |- + A required field containing the Vault Role to assume. A Role binds a + Kubernetes ServiceAccount with a set of Vault policies. + type: string + secretRef: + description: |- + Optional secret field containing a Kubernetes ServiceAccount JWT used + for authenticating with Vault. If a name is specified without a key, + `token` is the default. If one is not specified, the one bound to + the controller will be used. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + serviceAccountRef: + description: |- + Optional service account field containing the name of a kubernetes ServiceAccount. + If the service account is specified, the service account secret token JWT will be used + for authenticating with Vault. If the service account selector is not supplied, + the secretRef will be used instead. + properties: + audiences: + description: |- + Audience specifies the `aud` claim for the service account token + If the service account uses a well-known annotation for e.g. IRSA or GCP Workload Identity + then this audiences will be appended to the list + items: + type: string + type: array + name: + description: The name of the ServiceAccount resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + required: + - name + type: object + required: + - mountPath + - role + type: object + ldap: + description: |- + Ldap authenticates with Vault by passing username/password pair using + the LDAP authentication method + properties: + path: + default: ldap + description: |- + Path where the LDAP authentication backend is mounted + in Vault, e.g: "ldap" + type: string + secretRef: + description: |- + SecretRef to a key in a Secret resource containing password for the LDAP + user used to authenticate with Vault using the LDAP authentication + method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + username: + description: |- + Username is a LDAP user name used to authenticate using the LDAP Vault + authentication method + type: string + required: + - path + - username + type: object + namespace: + description: |- + Name of the vault namespace to authenticate to. This can be different than the namespace your secret is in. + Namespaces is a set of features within Vault Enterprise that allows + Vault environments to support Secure Multi-tenancy. e.g: "ns1". + More about namespaces can be found here https://www.vaultproject.io/docs/enterprise/namespaces + This will default to Vault.Namespace field if set, or empty otherwise + type: string + tokenSecretRef: + description: TokenSecretRef authenticates with Vault by presenting a token. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + userPass: + description: UserPass authenticates with Vault by passing username/password pair + properties: + path: + default: user + description: |- + Path where the UserPassword authentication backend is mounted + in Vault, e.g: "user" + type: string + secretRef: + description: |- + SecretRef to a key in a Secret resource containing password for the + user used to authenticate with Vault using the UserPass authentication + method + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + username: + description: |- + Username is a user name used to authenticate using the UserPass Vault + authentication method + type: string + required: + - path + - username + type: object + type: object + caBundle: + description: |- + PEM encoded CA bundle used to validate Vault server certificate. Only used + if the Server URL is using HTTPS protocol. This parameter is ignored for + plain HTTP protocol connection. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate Vault server certificate. + properties: + key: + description: The key where the CA certificate can be found in the Secret or ConfigMap. + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: |- + The namespace the Provider type is in. + Can only be defined when used in a ClusterSecretStore. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + forwardInconsistent: + description: |- + ForwardInconsistent tells Vault to forward read-after-write requests to the Vault + leader instead of simply retrying within a loop. This can increase performance if + the option is enabled serverside. + https://www.vaultproject.io/docs/configuration/replication#allow_forwarding_via_header + type: boolean + headers: + additionalProperties: + type: string + description: Headers to be added in Vault request + type: object + namespace: + description: |- + Name of the vault namespace. Namespaces is a set of features within Vault Enterprise that allows + Vault environments to support Secure Multi-tenancy. e.g: "ns1". + More about namespaces can be found here https://www.vaultproject.io/docs/enterprise/namespaces + type: string + path: + description: |- + Path is the mount path of the Vault KV backend endpoint, e.g: + "secret". The v2 KV secret engine version specific "/data" path suffix + for fetching secrets from Vault is optional and will be appended + if not present in specified path. + type: string + readYourWrites: + description: |- + ReadYourWrites ensures isolated read-after-write semantics by + providing discovered cluster replication states in each request. + More information about eventual consistency in Vault can be found here + https://www.vaultproject.io/docs/enterprise/consistency + type: boolean + server: + description: 'Server is the connection address for the Vault server, e.g: "https://vault.example.com:8200".' + type: string + tls: + description: |- + The configuration used for client side related TLS communication, when the Vault server + requires mutual authentication. Only used if the Server URL is using HTTPS protocol. + This parameter is ignored for plain HTTP protocol connection. + It's worth noting this configuration is different from the "TLS certificates auth method", + which is available under the `auth.cert` section. + properties: + certSecretRef: + description: |- + CertSecretRef is a certificate added to the transport layer + when communicating with the Vault server. + If no key for the Secret is specified, external-secret will default to 'tls.crt'. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + keySecretRef: + description: |- + KeySecretRef to a key in a Secret resource containing client private key + added to the transport layer when communicating with the Vault server. + If no key for the Secret is specified, external-secret will default to 'tls.key'. + properties: + key: + description: |- + The key of the entry in the Secret resource's `data` field to be used. Some instances of this field may be + defaulted, in others it may be required. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + namespace: + description: |- + Namespace of the resource being referred to. Ignored if referent is not cluster-scoped. cluster-scoped defaults + to the namespace of the referent. + type: string + type: object + type: object + version: + default: v2 + description: |- + Version is the Vault KV secret engine version. This can be either "v1" or + "v2". Version defaults to "v2". + enum: + - v1 + - v2 + type: string + required: + - auth + - server + type: object + resultType: + default: Data + description: |- + Result type defines which data is returned from the generator. + By default it is the "data" section of the Vault API response. + When using e.g. /auth/token/create the "data" section is empty but + the "auth" section contains the generated token. + Please refer to the vault docs regarding the result data structure. + enum: + - Data + - Auth + type: string + required: + - path + - provider + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/webhook.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/webhook.yaml new file mode 100644 index 00000000..9e0c42cc --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/crds/webhook.yaml @@ -0,0 +1,158 @@ +{{- if .Values.installCRDs }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- with .Values.crds.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- if and .Values.crds.conversion.enabled .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} + controller-gen.kubebuilder.io/version: v0.16.3 + labels: + external-secrets.io/component: controller + name: webhooks.generators.external-secrets.io +spec: + group: generators.external-secrets.io + names: + categories: + - external-secrets + - external-secrets-generators + kind: Webhook + listKind: WebhookList + plural: webhooks + shortNames: + - webhookl + singular: webhook + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + Webhook connects to a third party API server to handle the secrets generation + configuration parameters in spec. + You can specify the server, the token, and additional body parameters. + See documentation for the full API specification for requests and responses. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: WebhookSpec controls the behavior of the external generator. Any body parameters should be passed to the server through the parameters field. + properties: + body: + description: Body + type: string + caBundle: + description: |- + PEM encoded CA bundle used to validate webhook server certificate. Only used + if the Server URL is using HTTPS protocol. This parameter is ignored for + plain HTTP protocol connection. If not set the system root certificates + are used to validate the TLS connection. + format: byte + type: string + caProvider: + description: The provider for the CA bundle to use to validate webhook server certificate. + properties: + key: + description: The key the value inside of the provider type to use, only used with "Secret" type + type: string + name: + description: The name of the object located at the provider type. + type: string + namespace: + description: The namespace the Provider type is in. + type: string + type: + description: The type of provider to use such as "Secret", or "ConfigMap". + enum: + - Secret + - ConfigMap + type: string + required: + - name + - type + type: object + headers: + additionalProperties: + type: string + description: Headers + type: object + method: + description: Webhook Method + type: string + result: + description: Result formatting + properties: + jsonPath: + description: Json path of return value + type: string + type: object + secrets: + description: |- + Secrets to fill in templates + These secrets will be passed to the templating function as key value pairs under the given name + items: + properties: + name: + description: Name of this secret in templates + type: string + secretRef: + description: Secret ref to fill in credentials + properties: + key: + description: The key where the token is found. + type: string + name: + description: The name of the Secret resource being referred to. + type: string + type: object + required: + - name + - secretRef + type: object + type: array + timeout: + description: Timeout + type: string + url: + description: Webhook url to call + type: string + required: + - result + - url + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- if .Values.crds.conversion.enabled }} + conversion: + strategy: Webhook + webhook: + conversionReviewVersions: + - v1 + clientConfig: + service: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ .Release.Namespace | quote }} + path: /convert +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/deployment.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/deployment.yaml new file mode 100644 index 00000000..75a908e6 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/deployment.yaml @@ -0,0 +1,146 @@ +{{- if .Values.createOperator }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "external-secrets.fullname" . }} + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} + {{- with .Values.deploymentAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.replicaCount }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + selector: + matchLabels: + {{- include "external-secrets.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "external-secrets.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "external-secrets.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.serviceAccount.automount }} + {{- with .Values.podSecurityContext }} + {{- if and (.enabled) (gt (keys . | len) 1) }} + securityContext: + {{- include "external-secrets.renderSecurityContext" (dict "securityContext" . "context" $) | nindent 8 }} + {{- end }} + {{- end }} + hostNetwork: {{ .Values.hostNetwork }} + containers: + - name: {{ .Chart.Name }} + {{- with .Values.securityContext }} + {{- if and (.enabled) (gt (keys . | len) 1) }} + securityContext: + {{- include "external-secrets.renderSecurityContext" (dict "securityContext" . "context" $) | nindent 12 }} + {{- end }} + {{- end }} + image: {{ include "external-secrets.image" (dict "chartAppVersion" .Chart.AppVersion "image" .Values.image) | trim }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if or (.Values.leaderElect) (.Values.scopedNamespace) (.Values.processClusterStore) (.Values.processClusterExternalSecret) (.Values.concurrent) (.Values.extraArgs) }} + args: + {{- if .Values.leaderElect }} + - --enable-leader-election=true + {{- end }} + {{- if .Values.scopedNamespace }} + - --namespace={{ .Values.scopedNamespace }} + {{- end }} + {{- if and .Values.scopedNamespace .Values.scopedRBAC }} + - --enable-cluster-store-reconciler=false + - --enable-cluster-external-secret-reconciler=false + {{- else }} + {{- if not .Values.processClusterStore }} + - --enable-cluster-store-reconciler=false + {{- end }} + {{- if not .Values.processClusterExternalSecret }} + - --enable-cluster-external-secret-reconciler=false + {{- end }} + {{- end }} + {{- if not .Values.processPushSecret }} + - --enable-push-secret-reconciler=false + {{- end }} + {{- if .Values.controllerClass }} + - --controller-class={{ .Values.controllerClass }} + {{- end }} + {{- if .Values.extendedMetricLabels }} + - --enable-extended-metric-labels={{ .Values.extendedMetricLabels }} + {{- end }} + {{- if .Values.concurrent }} + - --concurrent={{ .Values.concurrent }} + {{- end }} + {{- range $key, $value := .Values.extraArgs }} + {{- if $value }} + - --{{ $key }}={{ $value }} + {{- else }} + - --{{ $key }} + {{- end }} + {{- end }} + {{- end }} + - --metrics-addr=:{{ .Values.metrics.listen.port }} + - --loglevel={{ .Values.log.level }} + - --zap-time-encoding={{ .Values.log.timeEncoding }} + ports: + - containerPort: {{ .Values.metrics.listen.port }} + protocol: TCP + name: metrics + {{- with .Values.extraEnv }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.extraVolumeMounts }} + volumeMounts: + {{- toYaml .Values.extraVolumeMounts | nindent 12 }} + {{- end }} + {{- if .Values.extraContainers }} + {{ toYaml .Values.extraContainers | nindent 8}} + {{- end }} + dnsPolicy: {{ .Values.dnsPolicy }} + {{- if .Values.dnsConfig }} + dnsConfig: + {{- toYaml .Values.dnsConfig | nindent 8 }} + {{- end }} + {{- if .Values.extraVolumes }} + volumes: + {{- toYaml .Values.extraVolumes | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector | default .Values.global.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity | default .Values.global.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations | default .Values.global.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologySpreadConstraints | default .Values.global.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- if .Values.podSpecExtra }} + {{- toYaml .Values.podSpecExtra | nindent 6 }} + {{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/extra-manifests.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/extra-manifests.yaml new file mode 100644 index 00000000..1dfe8f48 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/extra-manifests.yaml @@ -0,0 +1,4 @@ +{{- range .Values.extraObjects }} +--- +{{ include "external-secrets.render" (dict "value" . "context" $) }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/poddisruptionbudget.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/poddisruptionbudget.yaml new file mode 100644 index 00000000..7b75ca3f --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/poddisruptionbudget.yaml @@ -0,0 +1,19 @@ +{{- if .Values.podDisruptionBudget.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "external-secrets.fullname" . }}-pdb + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} +spec: + {{- if .Values.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + {{- include "external-secrets.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/rbac.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/rbac.yaml new file mode 100644 index 00000000..4f4ab48f --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/rbac.yaml @@ -0,0 +1,301 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +{{- if and .Values.scopedNamespace .Values.scopedRBAC }} +kind: Role +{{- else }} +kind: ClusterRole +{{- end }} +metadata: + name: {{ include "external-secrets.fullname" . }}-controller + {{- if and .Values.scopedNamespace .Values.scopedRBAC }} + namespace: {{ .Values.scopedNamespace | quote }} + {{- end }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} +rules: + - apiGroups: + - "external-secrets.io" + resources: + - "secretstores" + - "clustersecretstores" + - "externalsecrets" + - "clusterexternalsecrets" + - "pushsecrets" + verbs: + - "get" + - "list" + - "watch" + - apiGroups: + - "external-secrets.io" + resources: + - "externalsecrets" + - "externalsecrets/status" + - "externalsecrets/finalizers" + - "secretstores" + - "secretstores/status" + - "secretstores/finalizers" + - "clustersecretstores" + - "clustersecretstores/status" + - "clustersecretstores/finalizers" + - "clusterexternalsecrets" + - "clusterexternalsecrets/status" + - "clusterexternalsecrets/finalizers" + - "pushsecrets" + - "pushsecrets/status" + - "pushsecrets/finalizers" + verbs: + - "get" + - "update" + - "patch" + - apiGroups: + - "generators.external-secrets.io" + resources: + - "acraccesstokens" + - "ecrauthorizationtokens" + - "fakes" + - "gcraccesstokens" + - "githubaccesstokens" + - "passwords" + - "vaultdynamicsecrets" + - "webhooks" + verbs: + - "get" + - "list" + - "watch" + - apiGroups: + - "" + resources: + - "serviceaccounts" + - "namespaces" + verbs: + - "get" + - "list" + - "watch" + - apiGroups: + - "" + resources: + - "configmaps" + verbs: + - "get" + - "list" + - "watch" + - apiGroups: + - "" + resources: + - "secrets" + verbs: + - "get" + - "list" + - "watch" + - "create" + - "update" + - "delete" + - "patch" + - apiGroups: + - "" + resources: + - "serviceaccounts/token" + verbs: + - "create" + - apiGroups: + - "" + resources: + - "events" + verbs: + - "create" + - "patch" + - apiGroups: + - "external-secrets.io" + resources: + - "externalsecrets" + verbs: + - "create" + - "update" + - "delete" +--- +apiVersion: rbac.authorization.k8s.io/v1 +{{- if and .Values.scopedNamespace .Values.scopedRBAC }} +kind: Role +{{- else }} +kind: ClusterRole +{{- end }} +metadata: + name: {{ include "external-secrets.fullname" . }}-view + {{- if and .Values.scopedNamespace .Values.scopedRBAC }} + namespace: {{ .Values.scopedNamespace | quote }} + {{- end }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} + rbac.authorization.k8s.io/aggregate-to-view: "true" + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-admin: "true" +rules: + - apiGroups: + - "external-secrets.io" + resources: + - "externalsecrets" + - "secretstores" + - "clustersecretstores" + - "pushsecrets" + verbs: + - "get" + - "watch" + - "list" + - apiGroups: + - "generators.external-secrets.io" + resources: + - "acraccesstokens" + - "ecrauthorizationtokens" + - "fakes" + - "gcraccesstokens" + - "githubaccesstokens" + - "passwords" + - "vaultdynamicsecrets" + - "webhooks" + verbs: + - "get" + - "watch" + - "list" +--- +apiVersion: rbac.authorization.k8s.io/v1 +{{- if and .Values.scopedNamespace .Values.scopedRBAC }} +kind: Role +{{- else }} +kind: ClusterRole +{{- end }} +metadata: + name: {{ include "external-secrets.fullname" . }}-edit + {{- if and .Values.scopedNamespace .Values.scopedRBAC }} + namespace: {{ .Values.scopedNamespace | quote }} + {{- end }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-admin: "true" +rules: + - apiGroups: + - "external-secrets.io" + resources: + - "externalsecrets" + - "secretstores" + - "clustersecretstores" + - "pushsecrets" + verbs: + - "create" + - "delete" + - "deletecollection" + - "patch" + - "update" + - apiGroups: + - "generators.external-secrets.io" + resources: + - "acraccesstokens" + - "ecrauthorizationtokens" + - "fakes" + - "gcraccesstokens" + - "githubaccesstokens" + - "passwords" + - "vaultdynamicsecrets" + - "webhooks" + verbs: + - "create" + - "delete" + - "deletecollection" + - "patch" + - "update" +--- +apiVersion: rbac.authorization.k8s.io/v1 +{{- if and .Values.scopedNamespace .Values.scopedRBAC }} +kind: RoleBinding +{{- else }} +kind: ClusterRoleBinding +{{- end }} +metadata: + name: {{ include "external-secrets.fullname" . }}-controller + {{- if and .Values.scopedNamespace .Values.scopedRBAC }} + namespace: {{ .Values.scopedNamespace | quote }} + {{- end }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + {{- if and .Values.scopedNamespace .Values.scopedRBAC }} + kind: Role + {{- else }} + kind: ClusterRole + {{- end }} + name: {{ include "external-secrets.fullname" . }}-controller +subjects: + - name: {{ include "external-secrets.serviceAccountName" . }} + namespace: {{ template "external-secrets.namespace" . }} + kind: ServiceAccount +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "external-secrets.fullname" . }}-leaderelection + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} +rules: + - apiGroups: + - "" + resources: + - "configmaps" + resourceNames: + - "external-secrets-controller" + verbs: + - "get" + - "update" + - "patch" + - apiGroups: + - "" + resources: + - "configmaps" + verbs: + - "create" + - apiGroups: + - "coordination.k8s.io" + resources: + - "leases" + verbs: + - "get" + - "create" + - "update" + - "patch" +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "external-secrets.fullname" . }}-leaderelection + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "external-secrets.fullname" . }}-leaderelection +subjects: + - kind: ServiceAccount + name: {{ include "external-secrets.serviceAccountName" . }} + namespace: {{ template "external-secrets.namespace" . }} +{{- if .Values.rbac.servicebindings.create }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "external-secrets.fullname" . }}-servicebindings + labels: + servicebinding.io/controller: "true" + {{- include "external-secrets.labels" . | nindent 4 }} +rules: + - apiGroups: + - "external-secrets.io" + resources: + - "externalsecrets" + verbs: + - "get" + - "list" + - "watch" +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/service.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/service.yaml new file mode 100644 index 00000000..94859a34 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/service.yaml @@ -0,0 +1,28 @@ +{{- if .Values.metrics.service.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "external-secrets.fullname" . }}-metrics + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} + {{- with .Values.metrics.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: ClusterIP + {{- if .Values.service.ipFamilyPolicy }} + ipFamilyPolicy: {{ .Values.service.ipFamilyPolicy }} + {{- end }} + {{- if .Values.service.ipFamilies }} + ipFamilies: {{ .Values.service.ipFamilies | toYaml | nindent 2 }} + {{- end }} + ports: + - port: {{ .Values.metrics.service.port }} + protocol: TCP + targetPort: metrics + name: metrics + selector: + {{- include "external-secrets.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/serviceaccount.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/serviceaccount.yaml new file mode 100644 index 00000000..ceaa98e1 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/serviceaccount.yaml @@ -0,0 +1,16 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "external-secrets.serviceAccountName" . }} + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.extraLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/servicemonitor.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/servicemonitor.yaml new file mode 100644 index 00000000..31451791 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/servicemonitor.yaml @@ -0,0 +1,164 @@ +{{- if and ( .Capabilities.APIVersions.Has "monitoring.coreos.com/v1" ) .Values.serviceMonitor.enabled -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "external-secrets.fullname" . }}-metrics + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets.labels" . | nindent 4 }} +spec: + type: ClusterIP + {{- if .Values.service.ipFamilyPolicy }} + ipFamilyPolicy: {{ .Values.service.ipFamilyPolicy }} + {{- end }} + {{- if .Values.service.ipFamilies }} + ipFamilies: {{ .Values.service.ipFamilies | toYaml | nindent 2 }} + {{- end }} + ports: + - port: {{ .Values.metrics.service.port }} + protocol: TCP + name: metrics + selector: + {{- include "external-secrets.selectorLabels" . | nindent 4 }} +--- +apiVersion: "monitoring.coreos.com/v1" +kind: ServiceMonitor +metadata: + labels: + {{- include "external-secrets.labels" . | nindent 4 }} +{{- if .Values.serviceMonitor.additionalLabels }} +{{ toYaml .Values.serviceMonitor.additionalLabels | indent 4 }} +{{- end }} + name: {{ include "external-secrets.fullname" . }}-metrics + namespace: {{ .Values.serviceMonitor.namespace | default (include "external-secrets.namespace" .) | quote }} +spec: + selector: + matchLabels: + {{- include "external-secrets.selectorLabels" . | nindent 6 }} + namespaceSelector: + matchNames: + - {{ template "external-secrets.namespace" . }} + endpoints: + - port: metrics + interval: {{ .Values.serviceMonitor.interval }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }} + honorLabels: {{ .Values.serviceMonitor.honorLabels }} + {{- with .Values.serviceMonitor.metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.serviceMonitor.relabelings }} + relabelings: + {{- toYaml . | nindent 6 }} + {{- end }} +--- +{{- if .Values.webhook.create }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "external-secrets.fullname" . }}-webhook-metrics + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-webhook-metrics.labels" . | nindent 4 }} +spec: + type: ClusterIP + {{- if .Values.service.ipFamilyPolicy }} + ipFamilyPolicy: {{ .Values.service.ipFamilyPolicy }} + {{- end }} + {{- if .Values.service.ipFamilies }} + ipFamilies: {{ .Values.service.ipFamilies | toYaml | nindent 2 }} + {{- end }} + ports: + - port: {{ .Values.webhook.metrics.service.port }} + protocol: TCP + name: metrics + selector: + {{- include "external-secrets-webhook.selectorLabels" . | nindent 4 }} +--- +apiVersion: "monitoring.coreos.com/v1" +kind: ServiceMonitor +metadata: + labels: + {{- include "external-secrets-webhook.labels" . | nindent 4 }} +{{- if .Values.serviceMonitor.additionalLabels }} +{{ toYaml .Values.serviceMonitor.additionalLabels | indent 4 }} +{{- end }} + name: {{ include "external-secrets.fullname" . }}-webhook-metrics + namespace: {{ .Values.serviceMonitor.namespace | default (include "external-secrets.namespace" .) | quote }} +spec: + selector: + matchLabels: + {{- include "external-secrets-webhook-metrics.labels" . | nindent 6 }} + namespaceSelector: + matchNames: + - {{ template "external-secrets.namespace" . }} + endpoints: + - port: metrics + interval: {{ .Values.serviceMonitor.interval }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }} + honorLabels: {{ .Values.serviceMonitor.honorLabels }} + {{- with .Values.serviceMonitor.metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.serviceMonitor.relabelings }} + relabelings: + {{- toYaml . | nindent 6 }} + {{- end }} +{{- end }} +--- +{{- if .Values.certController.create }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "external-secrets.fullname" . }}-cert-controller-metrics + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-cert-controller-metrics.labels" . | nindent 4 }} +spec: + type: ClusterIP + {{- if .Values.service.ipFamilyPolicy }} + ipFamilyPolicy: {{ .Values.service.ipFamilyPolicy }} + {{- end }} + {{- if .Values.service.ipFamilies }} + ipFamilies: {{ .Values.service.ipFamilies | toYaml | nindent 2 }} + {{- end }} + ports: + - port: {{ .Values.certController.metrics.listen.port }} + protocol: TCP + name: metrics + selector: + {{- include "external-secrets-cert-controller.selectorLabels" . | nindent 4 }} +--- +apiVersion: "monitoring.coreos.com/v1" +kind: ServiceMonitor +metadata: + labels: + {{- include "external-secrets-cert-controller.labels" . | nindent 4 }} +{{- if .Values.serviceMonitor.additionalLabels }} +{{ toYaml .Values.serviceMonitor.additionalLabels | indent 4 }} +{{- end }} + name: {{ include "external-secrets.fullname" . }}-cert-controller-metrics + namespace: {{ .Values.serviceMonitor.namespace | default (include "external-secrets.namespace" .) | quote }} +spec: + selector: + matchLabels: + {{- include "external-secrets-cert-controller-metrics.labels" . | nindent 6 }} + namespaceSelector: + matchNames: + - {{ template "external-secrets.namespace" . }} + endpoints: + - port: metrics + interval: {{ .Values.serviceMonitor.interval }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }} + honorLabels: {{ .Values.serviceMonitor.honorLabels }} + {{- with .Values.serviceMonitor.metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.serviceMonitor.relabelings }} + relabelings: + {{- toYaml . | nindent 6 }} + {{- end }} +{{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/validatingwebhook.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/validatingwebhook.yaml new file mode 100644 index 00000000..63b39763 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/validatingwebhook.yaml @@ -0,0 +1,78 @@ +{{- if .Values.webhook.create }} +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: secretstore-validate + labels: + external-secrets.io/component: webhook + {{- with .Values.commonLabels }} + {{ toYaml . | nindent 4 }} + {{- end }} + {{- if and .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + annotations: + cert-manager.io/inject-ca-from: {{ template "external-secrets.namespace" . }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} +webhooks: +- name: "validate.secretstore.external-secrets.io" + rules: + - apiGroups: ["external-secrets.io"] + apiVersions: ["v1beta1"] + operations: ["CREATE", "UPDATE", "DELETE"] + resources: ["secretstores"] + scope: "Namespaced" + clientConfig: + service: + namespace: {{ template "external-secrets.namespace" . }} + name: {{ include "external-secrets.fullname" . }}-webhook + path: /validate-external-secrets-io-v1beta1-secretstore + admissionReviewVersions: ["v1", "v1beta1"] + sideEffects: None + timeoutSeconds: 5 + +- name: "validate.clustersecretstore.external-secrets.io" + rules: + - apiGroups: ["external-secrets.io"] + apiVersions: ["v1beta1"] + operations: ["CREATE", "UPDATE", "DELETE"] + resources: ["clustersecretstores"] + scope: "Cluster" + clientConfig: + service: + namespace: {{ template "external-secrets.namespace" . }} + name: {{ include "external-secrets.fullname" . }}-webhook + path: /validate-external-secrets-io-v1beta1-clustersecretstore + admissionReviewVersions: ["v1", "v1beta1"] + sideEffects: None + timeoutSeconds: 5 +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: externalsecret-validate + labels: + external-secrets.io/component: webhook + {{- with .Values.commonLabels }} + {{ toYaml . | nindent 4 }} + {{- end }} + {{- if and .Values.webhook.certManager.enabled .Values.webhook.certManager.addInjectorAnnotations }} + annotations: + cert-manager.io/inject-ca-from: {{ template "external-secrets.namespace" . }}/{{ include "external-secrets.fullname" . }}-webhook + {{- end }} +webhooks: +- name: "validate.externalsecret.external-secrets.io" + rules: + - apiGroups: ["external-secrets.io"] + apiVersions: ["v1beta1"] + operations: ["CREATE", "UPDATE", "DELETE"] + resources: ["externalsecrets"] + scope: "Namespaced" + clientConfig: + service: + namespace: {{ template "external-secrets.namespace" . }} + name: {{ include "external-secrets.fullname" . }}-webhook + path: /validate-external-secrets-io-v1beta1-externalsecret + admissionReviewVersions: ["v1", "v1beta1"] + sideEffects: None + timeoutSeconds: 5 + failurePolicy: {{ .Values.webhook.failurePolicy}} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-certificate.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-certificate.yaml new file mode 100644 index 00000000..adb19fd9 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-certificate.yaml @@ -0,0 +1,30 @@ +{{- if and .Values.webhook.create .Values.webhook.certManager.enabled .Values.webhook.certManager.cert.create }} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-webhook.labels" . | nindent 4 }} + external-secrets.io/component: webhook + {{- with .Values.webhook.certManager.cert.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + commonName: {{ include "external-secrets.fullname" . }}-webhook + dnsNames: + - {{ include "external-secrets.fullname" . }}-webhook + - {{ include "external-secrets.fullname" . }}-webhook.{{ template "external-secrets.namespace" . }} + - {{ include "external-secrets.fullname" . }}-webhook.{{ template "external-secrets.namespace" . }}.svc + issuerRef: + {{- toYaml .Values.webhook.certManager.cert.issuerRef | nindent 4 }} + {{- with .Values.webhook.certManager.cert.duration }} + duration: {{ . | quote }} + {{- end }} + {{- with .Values.webhook.certManager.cert.renewBefore }} + renewBefore: {{ . | quote }} + {{- end }} + secretName: {{ include "external-secrets.fullname" . }}-webhook +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-deployment.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-deployment.yaml new file mode 100644 index 00000000..7419a426 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-deployment.yaml @@ -0,0 +1,128 @@ +{{- if .Values.webhook.create }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-webhook.labels" . | nindent 4 }} + {{- with .Values.webhook.deploymentAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.webhook.replicaCount }} + revisionHistoryLimit: {{ .Values.webhook.revisionHistoryLimit }} + selector: + matchLabels: + {{- include "external-secrets-webhook.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.webhook.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "external-secrets-webhook.labels" . | nindent 8 }} + {{- with .Values.webhook.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.webhook.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + hostNetwork: {{ .Values.webhook.hostNetwork}} + serviceAccountName: {{ include "external-secrets-webhook.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.webhook.serviceAccount.automount }} + {{- with .Values.webhook.podSecurityContext }} + {{- if and (.enabled) (gt (keys . | len) 1) }} + securityContext: + {{- include "external-secrets.renderSecurityContext" (dict "securityContext" . "context" $) | nindent 8 }} + {{- end }} + {{- end }} + containers: + - name: webhook + {{- with .Values.webhook.securityContext }} + {{- if and (.enabled) (gt (keys . | len) 1) }} + securityContext: + {{- include "external-secrets.renderSecurityContext" (dict "securityContext" . "context" $) | nindent 12 }} + {{- end }} + {{- end }} + image: {{ include "external-secrets.image" (dict "chartAppVersion" .Chart.AppVersion "image" .Values.webhook.image) | trim }} + imagePullPolicy: {{ .Values.webhook.image.pullPolicy }} + args: + - webhook + - --port={{ .Values.webhook.port }} + - --dns-name={{ include "external-secrets.fullname" . }}-webhook.{{ template "external-secrets.namespace" . }}.svc + - --cert-dir={{ .Values.webhook.certDir }} + - --check-interval={{ .Values.webhook.certCheckInterval }} + - --metrics-addr=:{{ .Values.webhook.metrics.listen.port }} + - --healthz-addr={{ .Values.webhook.readinessProbe.address }}:{{ .Values.webhook.readinessProbe.port }} + - --loglevel={{ .Values.webhook.log.level }} + - --zap-time-encoding={{ .Values.webhook.log.timeEncoding }} + {{- if .Values.webhook.lookaheadInterval }} + - --lookahead-interval={{ .Values.webhook.lookaheadInterval }} + {{- end }} + {{- range $key, $value := .Values.webhook.extraArgs }} + {{- if $value }} + - --{{ $key }}={{ $value }} + {{- else }} + - --{{ $key }} + {{- end }} + {{- end }} + ports: + - containerPort: {{ .Values.webhook.metrics.listen.port }} + protocol: TCP + name: metrics + - containerPort: {{ .Values.webhook.port }} + protocol: TCP + name: webhook + readinessProbe: + httpGet: + port: {{ .Values.webhook.readinessProbe.port }} + path: /readyz + initialDelaySeconds: 20 + periodSeconds: 5 + {{- with .Values.webhook.extraEnv }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.webhook.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: certs + mountPath: {{ .Values.webhook.certDir }} + readOnly: true + {{- if .Values.webhook.extraVolumeMounts }} + {{- toYaml .Values.webhook.extraVolumeMounts | nindent 12 }} + {{- end }} + volumes: + - name: certs + secret: + secretName: {{ include "external-secrets.fullname" . }}-webhook + {{- if .Values.webhook.extraVolumes }} + {{- toYaml .Values.webhook.extraVolumes | nindent 8 }} + {{- end }} + {{- with .Values.webhook.nodeSelector | default .Values.global.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.webhook.affinity | default .Values.global.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.webhook.tolerations | default .Values.global.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.webhook.topologySpreadConstraints | default .Values.global.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.webhook.priorityClassName }} + priorityClassName: {{ .Values.webhook.priorityClassName }} + {{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-poddisruptionbudget.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-poddisruptionbudget.yaml new file mode 100644 index 00000000..58345ba6 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-poddisruptionbudget.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.webhook.create .Values.webhook.podDisruptionBudget.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ include "external-secrets.fullname" . }}-webhook-pdb + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-webhook.labels" . | nindent 4 }} + external-secrets.io/component: webhook +spec: + {{- if .Values.webhook.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.webhook.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.webhook.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.webhook.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + {{- include "external-secrets-webhook.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-secret.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-secret.yaml new file mode 100644 index 00000000..fa7760ed --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-secret.yaml @@ -0,0 +1,14 @@ +{{- if and .Values.webhook.create (not .Values.webhook.certManager.enabled) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-webhook.labels" . | nindent 4 }} + external-secrets.io/component: webhook + {{- with .Values.webhook.secretAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-service.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-service.yaml new file mode 100644 index 00000000..59dbddc9 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-service.yaml @@ -0,0 +1,37 @@ +{{- if .Values.webhook.create }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "external-secrets.fullname" . }}-webhook + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-webhook.labels" . | nindent 4 }} + external-secrets.io/component: webhook + {{- if .Values.webhook.metrics.service.enabled }} + {{- with .Values.webhook.metrics.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} +spec: + type: ClusterIP + {{- if .Values.service.ipFamilyPolicy }} + ipFamilyPolicy: {{ .Values.service.ipFamilyPolicy }} + {{- end }} + {{- if .Values.service.ipFamilies }} + ipFamilies: {{ .Values.service.ipFamilies | toYaml | nindent 2 }} + {{- end }} + ports: + - port: 443 + targetPort: {{ .Values.webhook.port }} + protocol: TCP + name: webhook + {{- if .Values.webhook.metrics.service.enabled }} + - port: {{ .Values.webhook.metrics.service.port }} + protocol: TCP + targetPort: metrics + name: metrics + {{- end }} + selector: + {{- include "external-secrets-webhook.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-serviceaccount.yaml b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-serviceaccount.yaml new file mode 100644 index 00000000..19362184 --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/templates/webhook-serviceaccount.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.webhook.create .Values.webhook.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "external-secrets-webhook.serviceAccountName" . }} + namespace: {{ template "external-secrets.namespace" . }} + labels: + {{- include "external-secrets-webhook.labels" . | nindent 4 }} + {{- with .Values.webhook.serviceAccount.extraLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.webhook.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/values.schema.json b/packages/system/external-secrets-operator/charts/external-secrets/values.schema.json new file mode 100644 index 00000000..08cef96a --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/values.schema.json @@ -0,0 +1,905 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "affinity": { + "properties": {}, + "type": "object" + }, + "bitwarden-sdk-server": { + "properties": { + "enabled": { + "type": "boolean" + } + }, + "type": "object" + }, + "certController": { + "properties": { + "affinity": { + "properties": {}, + "type": "object" + }, + "create": { + "type": "boolean" + }, + "deploymentAnnotations": { + "properties": {}, + "type": "object" + }, + "extraArgs": { + "properties": {}, + "type": "object" + }, + "extraEnv": { + "type": "array" + }, + "extraVolumeMounts": { + "type": "array" + }, + "extraVolumes": { + "type": "array" + }, + "fullnameOverride": { + "type": "string" + }, + "hostNetwork": { + "type": "boolean" + }, + "image": { + "properties": { + "flavour": { + "type": "string" + }, + "pullPolicy": { + "type": "string" + }, + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + } + }, + "type": "object" + }, + "imagePullSecrets": { + "type": "array" + }, + "log": { + "properties": { + "level": { + "type": "string" + }, + "timeEncoding": { + "type": "string" + } + }, + "type": "object" + }, + "metrics": { + "properties": { + "listen": { + "properties": { + "port": { + "type": "integer" + } + }, + "type": "object" + }, + "service": { + "properties": { + "annotations": { + "properties": {}, + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "port": { + "type": "integer" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "nameOverride": { + "type": "string" + }, + "nodeSelector": { + "properties": {}, + "type": "object" + }, + "podAnnotations": { + "properties": {}, + "type": "object" + }, + "podDisruptionBudget": { + "properties": { + "enabled": { + "type": "boolean" + }, + "minAvailable": { + "type": "integer" + } + }, + "type": "object" + }, + "podLabels": { + "properties": {}, + "type": "object" + }, + "podSecurityContext": { + "properties": { + "enabled": { + "type": "boolean" + } + }, + "type": "object" + }, + "priorityClassName": { + "type": "string" + }, + "rbac": { + "properties": { + "create": { + "type": "boolean" + } + }, + "type": "object" + }, + "readinessProbe": { + "properties": { + "address": { + "type": "string" + }, + "port": { + "type": "integer" + } + }, + "type": "object" + }, + "replicaCount": { + "type": "integer" + }, + "requeueInterval": { + "type": "string" + }, + "resources": { + "properties": {}, + "type": "object" + }, + "revisionHistoryLimit": { + "type": "integer" + }, + "securityContext": { + "properties": { + "allowPrivilegeEscalation": { + "type": "boolean" + }, + "capabilities": { + "properties": { + "drop": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "readOnlyRootFilesystem": { + "type": "boolean" + }, + "runAsNonRoot": { + "type": "boolean" + }, + "runAsUser": { + "type": "integer" + }, + "seccompProfile": { + "properties": { + "type": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "serviceAccount": { + "properties": { + "annotations": { + "properties": {}, + "type": "object" + }, + "automount": { + "type": "boolean" + }, + "create": { + "type": "boolean" + }, + "extraLabels": { + "properties": {}, + "type": "object" + }, + "name": { + "type": "string" + } + }, + "type": "object" + }, + "tolerations": { + "type": "array" + }, + "topologySpreadConstraints": { + "type": "array" + } + }, + "type": "object" + }, + "commonLabels": { + "properties": {}, + "type": "object" + }, + "concurrent": { + "type": "integer" + }, + "controllerClass": { + "type": "string" + }, + "crds": { + "properties": { + "annotations": { + "properties": {}, + "type": "object" + }, + "conversion": { + "properties": { + "enabled": { + "type": "boolean" + } + }, + "type": "object" + }, + "createClusterExternalSecret": { + "type": "boolean" + }, + "createClusterSecretStore": { + "type": "boolean" + }, + "createPushSecret": { + "type": "boolean" + } + }, + "type": "object" + }, + "createOperator": { + "type": "boolean" + }, + "deploymentAnnotations": { + "properties": {}, + "type": "object" + }, + "dnsConfig": { + "properties": {}, + "type": "object" + }, + "dnsPolicy": { + "type": "string" + }, + "extendedMetricLabels": { + "type": "boolean" + }, + "extraArgs": { + "properties": {}, + "type": "object" + }, + "extraContainers": { + "type": "array" + }, + "extraEnv": { + "type": "array" + }, + "extraObjects": { + "type": "array" + }, + "extraVolumeMounts": { + "type": "array" + }, + "extraVolumes": { + "type": "array" + }, + "fullnameOverride": { + "type": "string" + }, + "global": { + "properties": { + "affinity": { + "properties": {}, + "type": "object" + }, + "compatibility": { + "properties": { + "openshift": { + "properties": { + "adaptSecurityContext": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "nodeSelector": { + "properties": {}, + "type": "object" + }, + "tolerations": { + "type": "array" + }, + "topologySpreadConstraints": { + "type": "array" + } + }, + "type": "object" + }, + "hostNetwork": { + "type": "boolean" + }, + "image": { + "properties": { + "flavour": { + "type": "string" + }, + "pullPolicy": { + "type": "string" + }, + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + } + }, + "type": "object" + }, + "imagePullSecrets": { + "type": "array" + }, + "installCRDs": { + "type": "boolean" + }, + "leaderElect": { + "type": "boolean" + }, + "log": { + "properties": { + "level": { + "type": "string" + }, + "timeEncoding": { + "type": "string" + } + }, + "type": "object" + }, + "metrics": { + "properties": { + "listen": { + "properties": { + "port": { + "type": "integer" + } + }, + "type": "object" + }, + "service": { + "properties": { + "annotations": { + "properties": {}, + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "port": { + "type": "integer" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "nameOverride": { + "type": "string" + }, + "namespaceOverride": { + "type": "string" + }, + "nodeSelector": { + "properties": {}, + "type": "object" + }, + "podAnnotations": { + "properties": {}, + "type": "object" + }, + "podDisruptionBudget": { + "properties": { + "enabled": { + "type": "boolean" + }, + "minAvailable": { + "type": "integer" + } + }, + "type": "object" + }, + "podLabels": { + "properties": {}, + "type": "object" + }, + "podSecurityContext": { + "properties": { + "enabled": { + "type": "boolean" + } + }, + "type": "object" + }, + "podSpecExtra": { + "properties": {}, + "type": "object" + }, + "priorityClassName": { + "type": "string" + }, + "processClusterExternalSecret": { + "type": "boolean" + }, + "processClusterStore": { + "type": "boolean" + }, + "processPushSecret": { + "type": "boolean" + }, + "rbac": { + "properties": { + "create": { + "type": "boolean" + }, + "servicebindings": { + "properties": { + "create": { + "type": "boolean" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "replicaCount": { + "type": "integer" + }, + "resources": { + "properties": {}, + "type": "object" + }, + "revisionHistoryLimit": { + "type": "integer" + }, + "scopedNamespace": { + "type": "string" + }, + "scopedRBAC": { + "type": "boolean" + }, + "securityContext": { + "properties": { + "allowPrivilegeEscalation": { + "type": "boolean" + }, + "capabilities": { + "properties": { + "drop": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "readOnlyRootFilesystem": { + "type": "boolean" + }, + "runAsNonRoot": { + "type": "boolean" + }, + "runAsUser": { + "type": "integer" + }, + "seccompProfile": { + "properties": { + "type": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "service": { + "properties": { + "ipFamilies": { + "type": "array" + }, + "ipFamilyPolicy": { + "type": "string" + } + }, + "type": "object" + }, + "serviceAccount": { + "properties": { + "annotations": { + "properties": {}, + "type": "object" + }, + "automount": { + "type": "boolean" + }, + "create": { + "type": "boolean" + }, + "extraLabels": { + "properties": {}, + "type": "object" + }, + "name": { + "type": "string" + } + }, + "type": "object" + }, + "serviceMonitor": { + "properties": { + "additionalLabels": { + "properties": {}, + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "honorLabels": { + "type": "boolean" + }, + "interval": { + "type": "string" + }, + "metricRelabelings": { + "type": "array" + }, + "namespace": { + "type": "string" + }, + "relabelings": { + "type": "array" + }, + "scrapeTimeout": { + "type": "string" + } + }, + "type": "object" + }, + "tolerations": { + "type": "array" + }, + "topologySpreadConstraints": { + "type": "array" + }, + "webhook": { + "properties": { + "affinity": { + "properties": {}, + "type": "object" + }, + "certCheckInterval": { + "type": "string" + }, + "certDir": { + "type": "string" + }, + "certManager": { + "properties": { + "addInjectorAnnotations": { + "type": "boolean" + }, + "cert": { + "properties": { + "annotations": { + "properties": {}, + "type": "object" + }, + "create": { + "type": "boolean" + }, + "duration": { + "type": "string" + }, + "issuerRef": { + "properties": { + "group": { + "type": "string" + }, + "kind": { + "type": "string" + }, + "name": { + "type": "string" + } + }, + "type": "object" + }, + "renewBefore": { + "type": "string" + } + }, + "type": "object" + }, + "enabled": { + "type": "boolean" + } + }, + "type": "object" + }, + "create": { + "type": "boolean" + }, + "deploymentAnnotations": { + "properties": {}, + "type": "object" + }, + "extraArgs": { + "properties": {}, + "type": "object" + }, + "extraEnv": { + "type": "array" + }, + "extraVolumeMounts": { + "type": "array" + }, + "extraVolumes": { + "type": "array" + }, + "failurePolicy": { + "type": "string" + }, + "fullnameOverride": { + "type": "string" + }, + "hostNetwork": { + "type": "boolean" + }, + "image": { + "properties": { + "flavour": { + "type": "string" + }, + "pullPolicy": { + "type": "string" + }, + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + } + }, + "type": "object" + }, + "imagePullSecrets": { + "type": "array" + }, + "log": { + "properties": { + "level": { + "type": "string" + }, + "timeEncoding": { + "type": "string" + } + }, + "type": "object" + }, + "lookaheadInterval": { + "type": "string" + }, + "metrics": { + "properties": { + "listen": { + "properties": { + "port": { + "type": "integer" + } + }, + "type": "object" + }, + "service": { + "properties": { + "annotations": { + "properties": {}, + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "port": { + "type": "integer" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "nameOverride": { + "type": "string" + }, + "nodeSelector": { + "properties": {}, + "type": "object" + }, + "podAnnotations": { + "properties": {}, + "type": "object" + }, + "podDisruptionBudget": { + "properties": { + "enabled": { + "type": "boolean" + }, + "minAvailable": { + "type": "integer" + } + }, + "type": "object" + }, + "podLabels": { + "properties": {}, + "type": "object" + }, + "podSecurityContext": { + "properties": { + "enabled": { + "type": "boolean" + } + }, + "type": "object" + }, + "port": { + "type": "integer" + }, + "priorityClassName": { + "type": "string" + }, + "rbac": { + "properties": { + "create": { + "type": "boolean" + } + }, + "type": "object" + }, + "readinessProbe": { + "properties": { + "address": { + "type": "string" + }, + "port": { + "type": "integer" + } + }, + "type": "object" + }, + "replicaCount": { + "type": "integer" + }, + "resources": { + "properties": {}, + "type": "object" + }, + "revisionHistoryLimit": { + "type": "integer" + }, + "secretAnnotations": { + "properties": {}, + "type": "object" + }, + "securityContext": { + "properties": { + "allowPrivilegeEscalation": { + "type": "boolean" + }, + "capabilities": { + "properties": { + "drop": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "readOnlyRootFilesystem": { + "type": "boolean" + }, + "runAsNonRoot": { + "type": "boolean" + }, + "runAsUser": { + "type": "integer" + }, + "seccompProfile": { + "properties": { + "type": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "serviceAccount": { + "properties": { + "annotations": { + "properties": {}, + "type": "object" + }, + "automount": { + "type": "boolean" + }, + "create": { + "type": "boolean" + }, + "extraLabels": { + "properties": {}, + "type": "object" + }, + "name": { + "type": "string" + } + }, + "type": "object" + }, + "tolerations": { + "type": "array" + }, + "topologySpreadConstraints": { + "type": "array" + } + }, + "type": "object" + } + }, + "type": "object" +} diff --git a/packages/system/external-secrets-operator/charts/external-secrets/values.yaml b/packages/system/external-secrets-operator/charts/external-secrets/values.yaml new file mode 100644 index 00000000..21f4a94c --- /dev/null +++ b/packages/system/external-secrets-operator/charts/external-secrets/values.yaml @@ -0,0 +1,532 @@ +global: + nodeSelector: {} + tolerations: [] + topologySpreadConstraints: [] + affinity: {} + compatibility: + openshift: + # -- Manages the securityContext properties to make them compatible with OpenShift. + # Possible values: + # auto - Apply configurations if it is detected that OpenShift is the target platform. + # force - Always apply configurations. + # disabled - No modification applied. + adaptSecurityContext: auto + +replicaCount: 1 + +bitwarden-sdk-server: + enabled: false + +# -- Specifies the amount of historic ReplicaSets k8s should keep (see https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#clean-up-policy) +revisionHistoryLimit: 10 + +image: + repository: oci.external-secrets.io/external-secrets/external-secrets + pullPolicy: IfNotPresent + # -- The image tag to use. The default is the chart appVersion. + tag: "" + # -- The flavour of tag you want to use + # There are different image flavours available, like distroless and ubi. + # Please see GitHub release notes for image tags for these flavors. + # By default, the distroless image is used. + flavour: "" + +# -- If set, install and upgrade CRDs through helm chart. +installCRDs: true + +crds: + # -- If true, create CRDs for Cluster External Secret. + createClusterExternalSecret: true + # -- If true, create CRDs for Cluster Secret Store. + createClusterSecretStore: true + # -- If true, create CRDs for Push Secret. + createPushSecret: true + annotations: {} + conversion: + enabled: true + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" +namespaceOverride: "" + +# -- Additional labels added to all helm chart resources. +commonLabels: {} + +# -- If true, external-secrets will perform leader election between instances to ensure no more +# than one instance of external-secrets operates at a time. +leaderElect: false + +# -- If set external secrets will filter matching +# Secret Stores with the appropriate controller values. +controllerClass: "" + +# -- If true external secrets will use recommended kubernetes +# annotations as prometheus metric labels. +extendedMetricLabels: false + +# -- If set external secrets are only reconciled in the +# provided namespace +scopedNamespace: "" + +# -- Must be used with scopedNamespace. If true, create scoped RBAC roles under the scoped namespace +# and implicitly disable cluster stores and cluster external secrets +scopedRBAC: false + +# -- if true, the operator will process cluster external secret. Else, it will ignore them. +processClusterExternalSecret: true + +# -- if true, the operator will process cluster store. Else, it will ignore them. +processClusterStore: true + +# -- if true, the operator will process push secret. Else, it will ignore them. +processPushSecret: true + +# -- Specifies whether an external secret operator deployment be created. +createOperator: true + +# -- Specifies the number of concurrent ExternalSecret Reconciles external-secret executes at +# a time. +concurrent: 1 +# -- Specifices Log Params to the Webhook +log: + level: info + timeEncoding: epoch +service: + # -- Set the ip family policy to configure dual-stack see [Configure dual-stack](https://kubernetes.io/docs/concepts/services-networking/dual-stack/#services) + ipFamilyPolicy: "" + # -- Sets the families that should be supported and the order in which they should be applied to ClusterIP as well. Can be IPv4 and/or IPv6. + ipFamilies: [] + +serviceAccount: + # -- Specifies whether a service account should be created. + create: true + # -- Automounts the service account token in all containers of the pod + automount: true + # -- Annotations to add to the service account. + annotations: {} + # -- Extra Labels to add to the service account. + extraLabels: {} + # -- The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template. + name: "" + +rbac: + # -- Specifies whether role and rolebinding resources should be created. + create: true + + servicebindings: + # -- Specifies whether a clusterrole to give servicebindings read access should be created. + create: true + +## -- Extra environment variables to add to container. +extraEnv: [] + +## -- Map of extra arguments to pass to container. +extraArgs: {} + +## -- Extra volumes to pass to pod. +extraVolumes: [] + +## -- Extra Kubernetes objects to deploy with the helm chart +extraObjects: [] + +## -- Extra volumes to mount to the container. +extraVolumeMounts: [] + +## -- Extra containers to add to the pod. +extraContainers: [] + +# -- Annotations to add to Deployment +deploymentAnnotations: {} + +# -- Annotations to add to Pod +podAnnotations: {} + +podLabels: {} + +podSecurityContext: + enabled: true + # fsGroup: 2000 + +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + enabled: true + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + +resources: {} + # requests: + # cpu: 10m + # memory: 32Mi + +serviceMonitor: + # -- Specifies whether to create a ServiceMonitor resource for collecting Prometheus metrics + enabled: false + + # -- namespace where you want to install ServiceMonitors + namespace: "" + + # -- Additional labels + additionalLabels: {} + + # -- Interval to scrape metrics + interval: 30s + + # -- Timeout if metrics can't be retrieved in given time interval + scrapeTimeout: 25s + + # -- Let prometheus add an exported_ prefix to conflicting labels + honorLabels: false + + # -- Metric relabel configs to apply to samples before ingestion. [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs) + metricRelabelings: [] + # - action: replace + # regex: (.*) + # replacement: $1 + # sourceLabels: + # - exported_namespace + # targetLabel: namespace + + # -- Relabel configs to apply to samples before ingestion. [Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + +metrics: + + listen: + port: 8080 + + service: + # -- Enable if you use another monitoring tool than Prometheus to scrape the metrics + enabled: false + + # -- Metrics service port to scrape + port: 8080 + + # -- Additional service annotations + annotations: {} + +nodeSelector: {} + +tolerations: [] + +topologySpreadConstraints: [] + +affinity: {} + +# -- Pod priority class name. +priorityClassName: "" + +# -- Pod disruption budget - for more details see https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ +podDisruptionBudget: + enabled: false + minAvailable: 1 + # maxUnavailable: 1 + +# -- Run the controller on the host network +hostNetwork: false + +webhook: + # -- Specifies whether a webhook deployment be created. + create: true + # -- Specifices the time to check if the cert is valid + certCheckInterval: "5m" + # -- Specifices the lookaheadInterval for certificate validity + lookaheadInterval: "" + replicaCount: 1 + # -- Specifices Log Params to the Webhook + log: + level: info + timeEncoding: epoch + # -- Specifies the amount of historic ReplicaSets k8s should keep (see https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#clean-up-policy) + revisionHistoryLimit: 10 + + certDir: /tmp/certs + # -- Specifies whether validating webhooks should be created with failurePolicy: Fail or Ignore + failurePolicy: Fail + # -- Specifies if webhook pod should use hostNetwork or not. + hostNetwork: false + image: + repository: oci.external-secrets.io/external-secrets/external-secrets + pullPolicy: IfNotPresent + # -- The image tag to use. The default is the chart appVersion. + tag: "" + # -- The flavour of tag you want to use + flavour: "" + imagePullSecrets: [] + nameOverride: "" + fullnameOverride: "" + # -- The port the webhook will listen to + port: 10250 + rbac: + # -- Specifies whether role and rolebinding resources should be created. + create: true + serviceAccount: + # -- Specifies whether a service account should be created. + create: true + # -- Automounts the service account token in all containers of the pod + automount: true + # -- Annotations to add to the service account. + annotations: {} + # -- Extra Labels to add to the service account. + extraLabels: {} + # -- The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template. + name: "" + nodeSelector: {} + + certManager: + # -- Enabling cert-manager support will disable the built in secret and + # switch to using cert-manager (installed separately) to automatically issue + # and renew the webhook certificate. This chart does not install + # cert-manager for you, See https://cert-manager.io/docs/ + enabled: false + # -- Automatically add the cert-manager.io/inject-ca-from annotation to the + # webhooks and CRDs. As long as you have the cert-manager CA Injector + # enabled, this will automatically setup your webhook's CA to the one used + # by cert-manager. See https://cert-manager.io/docs/concepts/ca-injector + addInjectorAnnotations: true + cert: + # -- Create a certificate resource within this chart. See + # https://cert-manager.io/docs/usage/certificate/ + create: true + # -- For the Certificate created by this chart, setup the issuer. See + # https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.IssuerSpec + issuerRef: + group: cert-manager.io + kind: "Issuer" + name: "my-issuer" + # -- Set the requested duration (i.e. lifetime) of the Certificate. See + # https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec + # One year by default. + duration: "8760h" + # -- How long before the currently issued certificate’s expiry + # cert-manager should renew the certificate. See + # https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec + # Note that renewBefore should be greater than .webhook.lookaheadInterval + # since the webhook will check this far in advance that the certificate is + # valid. + renewBefore: "" + # -- Add extra annotations to the Certificate resource. + annotations: {} + + tolerations: [] + + topologySpreadConstraints: [] + + affinity: {} + + # -- Pod priority class name. + priorityClassName: "" + + # -- Pod disruption budget - for more details see https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ + podDisruptionBudget: + enabled: false + minAvailable: 1 + # maxUnavailable: 1 + + metrics: + + listen: + port: 8080 + + service: + # -- Enable if you use another monitoring tool than Prometheus to scrape the metrics + enabled: false + + # -- Metrics service port to scrape + port: 8080 + + # -- Additional service annotations + annotations: {} + + + readinessProbe: + # -- Address for readiness probe + address: "" + # -- ReadinessProbe port for kubelet + port: 8081 + + + ## -- Extra environment variables to add to container. + extraEnv: [] + + ## -- Map of extra arguments to pass to container. + extraArgs: {} + + ## -- Extra volumes to pass to pod. + extraVolumes: [] + + ## -- Extra volumes to mount to the container. + extraVolumeMounts: [] + + # -- Annotations to add to Secret + secretAnnotations: {} + + # -- Annotations to add to Deployment + deploymentAnnotations: {} + + # -- Annotations to add to Pod + podAnnotations: {} + + podLabels: {} + + podSecurityContext: + enabled: true + # fsGroup: 2000 + + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + enabled: true + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + + resources: {} + # requests: + # cpu: 10m + # memory: 32Mi + +certController: + # -- Specifies whether a certificate controller deployment be created. + create: true + requeueInterval: "5m" + replicaCount: 1 + # -- Specifices Log Params to the Webhook + log: + level: info + timeEncoding: epoch + # -- Specifies the amount of historic ReplicaSets k8s should keep (see https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#clean-up-policy) + revisionHistoryLimit: 10 + + image: + repository: oci.external-secrets.io/external-secrets/external-secrets + pullPolicy: IfNotPresent + tag: "" + flavour: "" + imagePullSecrets: [] + nameOverride: "" + fullnameOverride: "" + rbac: + # -- Specifies whether role and rolebinding resources should be created. + create: true + serviceAccount: + # -- Specifies whether a service account should be created. + create: true + # -- Automounts the service account token in all containers of the pod + automount: true + # -- Annotations to add to the service account. + annotations: {} + # -- Extra Labels to add to the service account. + extraLabels: {} + # -- The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template. + name: "" + nodeSelector: {} + + tolerations: [] + + topologySpreadConstraints: [] + + affinity: {} + + # -- Run the certController on the host network + hostNetwork: false + + # -- Pod priority class name. + priorityClassName: "" + + # -- Pod disruption budget - for more details see https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ + podDisruptionBudget: + enabled: false + minAvailable: 1 + # maxUnavailable: 1 + + metrics: + + listen: + port: 8080 + + service: + # -- Enable if you use another monitoring tool than Prometheus to scrape the metrics + enabled: false + + # -- Metrics service port to scrape + port: 8080 + + # -- Additional service annotations + annotations: {} + + readinessProbe: + # -- Address for readiness probe + address: "" + # -- ReadinessProbe port for kubelet + port: 8081 + + ## -- Extra environment variables to add to container. + extraEnv: [] + + ## -- Map of extra arguments to pass to container. + extraArgs: {} + + + ## -- Extra volumes to pass to pod. + extraVolumes: [] + + ## -- Extra volumes to mount to the container. + extraVolumeMounts: [] + + # -- Annotations to add to Deployment + deploymentAnnotations: {} + + # -- Annotations to add to Pod + podAnnotations: {} + + podLabels: {} + + podSecurityContext: + enabled: true + # fsGroup: 2000 + + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + enabled: true + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + + resources: {} + # requests: + # cpu: 10m + # memory: 32Mi + +# -- Specifies `dnsPolicy` to deployment +dnsPolicy: ClusterFirst + +# -- Specifies `dnsOptions` to deployment +dnsConfig: {} + +# -- Any extra pod spec on the deployment +podSpecExtra: {} diff --git a/packages/system/external-secrets-operator/values.yaml b/packages/system/external-secrets-operator/values.yaml new file mode 100644 index 00000000..0deabf46 --- /dev/null +++ b/packages/system/external-secrets-operator/values.yaml @@ -0,0 +1,4 @@ +external-secrets: + bitwarden-sdk-server: + enabled: false + installCRDs: true From eda62ff77be366ac5e08764dff5e07dcc5c04626 Mon Sep 17 00:00:00 2001 From: Mr Khachaturov <105451445+mrkhachaturov@users.noreply.github.com> Date: Fri, 4 Oct 2024 13:56:39 +0300 Subject: [PATCH 27/41] External-dns and new clusterissuer dns01 Cloudflare (#374) Overview This pull request introduces the integration of External-DNS into the full bundles and adds support for a dns01 ClusterIssuer using Cloudflare. It enhances the DNS management capabilities for our deployments by allowing dynamic DNS record management directly from Kubernetes resources. Changes Made 1. **External-DNS Integration:** - Added External-DNS to the full deployment bundles. - Configured External-DNS to automatically manage DNS records for services within the Kubernetes cluster ( we must discuss how to configure external-dns via configmap or create an application in tenant `external-dns` where we can define values). We must define some additional annotations for ingresses in order to make external-dns work , so we must discuss this also which is best method to configure it ( from configmap or dashboard ). **2. dns01 ClusterIssuer for Cloudflare:** - Implemented support for a dns01 ClusterIssuer using Cloudflare. - This allows for automated certificate issuance via DNS challenge, leveraging Cloudflare as the DNS provider. - The configuration can be defined in the Cozystack ConfigMap 3. Default Ingress Configuration: - Updated the default Ingress resources to use Cloudflare for DNS challenges. - Ensured that if the Cloudflare issuer is defined in the Cozystack ConfigMap, it will be utilized for all default Ingresses, streamlining the deployment process and improving reliability. **Benefits** - Automated DNS Management: With External-DNS, DNS entries will be created and updated automatically based on the state of Kubernetes resources, reducing manual overhead. - Seamless Certificate Management: The dns01 ClusterIssuer integration allows for automated SSL/TLS certificate issuance, enhancing security for deployed applications. - Flexibility in Configuration: Users can easily switch between different issuers by updating the Cozystack ConfigMap, providing flexibility in the choice of DNS and certificate management solutions. ## Summary by CodeRabbit ## Release Notes - **New Features** - Introduced a new `external-dns` release with support for managing DNS records in Kubernetes. - Added configuration options for DNS synchronization policies and provider settings. - Implemented a new lookup for issuer types in Ingress configurations. - Expanded configuration with new entries for `external-dns` in multiple deployment files, enhancing deployment flexibility. - **Documentation** - Comprehensive README and configuration schema for the `external-dns` Helm chart added, detailing installation and customization options. - **Improvements** - Enhanced RBAC configuration for flexible permissions management. - Updated annotations and health check configurations for better service monitoring. --------- Signed-off-by: Andrei Kvapil Co-authored-by: Andrei Kvapil Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .../core/platform/bundles/distro-full.yaml | 8 +- .../core/platform/bundles/distro-hosted.yaml | 6 + packages/core/platform/bundles/paas-full.yaml | 8 +- .../core/platform/bundles/paas-hosted.yaml | 6 + .../extra/ingress/templates/dashboard.yaml | 63 ++-- .../monitoring/templates/alerta/alerta.yaml | 7 +- .../monitoring/templates/grafana/grafana.yaml | 7 +- .../templates/cluster-issuers.yaml | 89 ++++-- packages/system/external-dns/.helmignore | 3 + packages/system/external-dns/Chart.yaml | 3 + packages/system/external-dns/Makefile | 10 + .../charts/external-dns/.helmignore | 23 ++ .../charts/external-dns/CHANGELOG.md | 219 +++++++++++++ .../charts/external-dns/Chart.yaml | 33 ++ .../charts/external-dns/README.md | 182 +++++++++++ .../charts/external-dns/README.md.gotmpl | 91 ++++++ .../charts/external-dns/RELEASE.md | 10 + .../charts/external-dns/ci/ci-values.yaml | 2 + .../charts/external-dns/crds/dnsendpoint.yaml | 102 ++++++ .../charts/external-dns/templates/NOTES.txt | 7 + .../external-dns/templates/_helpers.tpl | 95 ++++++ .../external-dns/templates/clusterrole.yaml | 127 ++++++++ .../templates/clusterrolebinding.yaml | 16 + .../external-dns/templates/deployment.yaml | 209 ++++++++++++ .../charts/external-dns/templates/secret.yaml | 13 + .../external-dns/templates/service.yaml | 36 +++ .../templates/serviceaccount.yaml | 17 + .../templates/servicemonitor.yaml | 86 +++++ .../charts/external-dns/values.schema.json | 91 ++++++ .../charts/external-dns/values.yaml | 297 ++++++++++++++++++ packages/system/external-dns/values.yaml | 23 ++ 31 files changed, 1823 insertions(+), 66 deletions(-) create mode 100644 packages/system/external-dns/.helmignore create mode 100644 packages/system/external-dns/Chart.yaml create mode 100644 packages/system/external-dns/Makefile create mode 100644 packages/system/external-dns/charts/external-dns/.helmignore create mode 100644 packages/system/external-dns/charts/external-dns/CHANGELOG.md create mode 100644 packages/system/external-dns/charts/external-dns/Chart.yaml create mode 100644 packages/system/external-dns/charts/external-dns/README.md create mode 100644 packages/system/external-dns/charts/external-dns/README.md.gotmpl create mode 100644 packages/system/external-dns/charts/external-dns/RELEASE.md create mode 100644 packages/system/external-dns/charts/external-dns/ci/ci-values.yaml create mode 100644 packages/system/external-dns/charts/external-dns/crds/dnsendpoint.yaml create mode 100644 packages/system/external-dns/charts/external-dns/templates/NOTES.txt create mode 100644 packages/system/external-dns/charts/external-dns/templates/_helpers.tpl create mode 100644 packages/system/external-dns/charts/external-dns/templates/clusterrole.yaml create mode 100644 packages/system/external-dns/charts/external-dns/templates/clusterrolebinding.yaml create mode 100644 packages/system/external-dns/charts/external-dns/templates/deployment.yaml create mode 100644 packages/system/external-dns/charts/external-dns/templates/secret.yaml create mode 100644 packages/system/external-dns/charts/external-dns/templates/service.yaml create mode 100644 packages/system/external-dns/charts/external-dns/templates/serviceaccount.yaml create mode 100644 packages/system/external-dns/charts/external-dns/templates/servicemonitor.yaml create mode 100644 packages/system/external-dns/charts/external-dns/values.schema.json create mode 100644 packages/system/external-dns/charts/external-dns/values.yaml create mode 100644 packages/system/external-dns/values.yaml diff --git a/packages/core/platform/bundles/distro-full.yaml b/packages/core/platform/bundles/distro-full.yaml index 061e27b9..6cd88fbb 100644 --- a/packages/core/platform/bundles/distro-full.yaml +++ b/packages/core/platform/bundles/distro-full.yaml @@ -142,8 +142,14 @@ releases: namespace: cozy-telepresence dependsOn: [] +- name: external-dns + releaseName: external-dns + chart: cozy-external-dns + namespace: cozy-external-dns + dependsOn: [cilium] + - name: external-secrets-operator releaseName: external-secrets-operator chart: cozy-external-secrets-operator namespace: cozy-external-secrets-operator - dependsOn: [cilium] \ No newline at end of file + dependsOn: [cilium] diff --git a/packages/core/platform/bundles/distro-hosted.yaml b/packages/core/platform/bundles/distro-hosted.yaml index 5be68fbb..09f6f1f5 100644 --- a/packages/core/platform/bundles/distro-hosted.yaml +++ b/packages/core/platform/bundles/distro-hosted.yaml @@ -93,6 +93,12 @@ releases: namespace: cozy-telepresence dependsOn: [] +- name: external-dns + releaseName: external-dns + chart: cozy-external-dns + namespace: cozy-external-dns + dependsOn: [] + - name: external-secrets-operator releaseName: external-secrets-operator chart: cozy-external-secrets-operator diff --git a/packages/core/platform/bundles/paas-full.yaml b/packages/core/platform/bundles/paas-full.yaml index ed9a81c5..78481828 100644 --- a/packages/core/platform/bundles/paas-full.yaml +++ b/packages/core/platform/bundles/paas-full.yaml @@ -217,8 +217,14 @@ releases: privileged: true dependsOn: [cilium,kubeovn,capi-operator] +- name: external-dns + releaseName: external-dns + chart: cozy-external-dns + namespace: cozy-external-dns + dependsOn: [cilium,kubeovn] + - name: external-secrets-operator releaseName: external-secrets-operator chart: cozy-external-secrets-operator namespace: cozy-external-secrets-operator - dependsOn: [cilium,kubeovn] \ No newline at end of file + dependsOn: [cilium,kubeovn] diff --git a/packages/core/platform/bundles/paas-hosted.yaml b/packages/core/platform/bundles/paas-hosted.yaml index 2f63f870..63500982 100644 --- a/packages/core/platform/bundles/paas-hosted.yaml +++ b/packages/core/platform/bundles/paas-hosted.yaml @@ -99,6 +99,12 @@ releases: namespace: cozy-telepresence dependsOn: [] +- name: external-dns + releaseName: external-dns + chart: cozy-external-dns + namespace: cozy-external-dns + dependsOn: [cilium,kubeovn] + - name: external-secrets-operator releaseName: external-secrets-operator chart: cozy-external-secrets-operator diff --git a/packages/extra/ingress/templates/dashboard.yaml b/packages/extra/ingress/templates/dashboard.yaml index 940fdefe..106f2e6a 100644 --- a/packages/extra/ingress/templates/dashboard.yaml +++ b/packages/extra/ingress/templates/dashboard.yaml @@ -1,29 +1,36 @@ -{{- $myNS := lookup "v1" "Namespace" "" .Release.Namespace }} -{{- $host := index $myNS.metadata.annotations "namespace.cozystack.io/host" }} -{{- if .Values.dashboard }} -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - annotations: - cert-manager.io/cluster-issuer: letsencrypt-prod - acme.cert-manager.io/http01-ingress-class: tenant-root - name: dashboard-{{ .Release.Namespace }} - namespace: cozy-dashboard -spec: - ingressClassName: {{ .Release.Namespace }} - rules: - - host: dashboard.{{ $host }} - http: - paths: - - backend: - service: - name: dashboard - port: - number: 80 - path: / - pathType: Prefix - tls: - - hosts: - - dashboard.{{ $host }} - secretName: dashboard-{{ .Release.Namespace }}-tls +{{- $cozyConfig := lookup "v1" "ConfigMap" "cozy-system" "cozystack" }} +{{- $issuerType := (index $cozyConfig.data "clusterissuer") | default "http01" }} + +{{- $myNS := lookup "v1" "Namespace" "" .Release.Namespace }} +{{- $host := index $myNS.metadata.annotations "namespace.cozystack.io/host" }} + +{{- if .Values.dashboard }} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + {{- if eq $issuerType "cloudflare" }} + {{- else }} + acme.cert-manager.io/http01-ingress-class: {{ .Release.Namespace }} + {{- end }} + name: dashboard-{{ .Release.Namespace }} + namespace: cozy-dashboard +spec: + ingressClassName: {{ .Release.Namespace }} + rules: + - host: dashboard.{{ $host }} + http: + paths: + - backend: + service: + name: dashboard + port: + number: 80 + path: / + pathType: Prefix + tls: + - hosts: + - dashboard.{{ $host }} + secretName: dashboard-{{ .Release.Namespace }}-tls {{- end }} diff --git a/packages/extra/monitoring/templates/alerta/alerta.yaml b/packages/extra/monitoring/templates/alerta/alerta.yaml index e723661e..18932f56 100644 --- a/packages/extra/monitoring/templates/alerta/alerta.yaml +++ b/packages/extra/monitoring/templates/alerta/alerta.yaml @@ -1,3 +1,6 @@ +{{- $cozyConfig := lookup "v1" "ConfigMap" "cozy-system" "cozystack" }} +{{- $issuerType := (index $cozyConfig.data "clusterissuer") | default "http01" }} + {{- $myNS := lookup "v1" "Namespace" "" .Release.Namespace }} {{- $ingress := index $myNS.metadata.annotations "namespace.cozystack.io/ingress" }} {{- $host := index $myNS.metadata.annotations "namespace.cozystack.io/host" }} @@ -146,7 +149,9 @@ metadata: app: alerta annotations: acme.cert-manager.io/http01-ingress-class: {{ $ingress }} - cert-manager.io/cluster-issuer: letsencrypt-prod + {{- if ne $issuerType "cloudflare" }} + acme.cert-manager.io/http01-ingress-class: {{ $ingress }} + {{- end }} spec: ingressClassName: {{ $ingress }} tls: diff --git a/packages/extra/monitoring/templates/grafana/grafana.yaml b/packages/extra/monitoring/templates/grafana/grafana.yaml index 5cbff82f..4e1e65a6 100644 --- a/packages/extra/monitoring/templates/grafana/grafana.yaml +++ b/packages/extra/monitoring/templates/grafana/grafana.yaml @@ -1,3 +1,6 @@ +{{- $cozyConfig := lookup "v1" "ConfigMap" "cozy-system" "cozystack" }} +{{- $issuerType := (index $cozyConfig.data "clusterissuer") | default "http01" }} + {{- $myNS := lookup "v1" "Namespace" "" .Release.Namespace }} {{- $ingress := index $myNS.metadata.annotations "namespace.cozystack.io/ingress" }} {{- $host := index $myNS.metadata.annotations "namespace.cozystack.io/host" }} @@ -90,7 +93,9 @@ spec: ingress: metadata: annotations: - acme.cert-manager.io/http01-ingress-class: "{{ $ingress }}" + {{- if ne $issuerType "cloudflare" }} + acme.cert-manager.io/http01-ingress-class: "{{ $ingress }}" + {{- end }} cert-manager.io/cluster-issuer: letsencrypt-prod spec: ingressClassName: "{{ $ingress }}" diff --git a/packages/system/cert-manager-issuers/templates/cluster-issuers.yaml b/packages/system/cert-manager-issuers/templates/cluster-issuers.yaml index ec52575f..2d8b050d 100644 --- a/packages/system/cert-manager-issuers/templates/cluster-issuers.yaml +++ b/packages/system/cert-manager-issuers/templates/cluster-issuers.yaml @@ -1,35 +1,56 @@ -apiVersion: cert-manager.io/v1 -kind: ClusterIssuer -metadata: - annotations: - name: letsencrypt-prod -spec: - acme: - privateKeySecretRef: - name: letsencrypt-prod - server: https://acme-v02.api.letsencrypt.org/directory - solvers: - - http01: - ingress: - class: nginx ---- -apiVersion: cert-manager.io/v1 -kind: ClusterIssuer -metadata: - name: letsencrypt-stage -spec: - acme: - privateKeySecretRef: +{{- $cozyConfig := lookup "v1" "ConfigMap" "cozy-system" "cozystack" }} +{{- $issuerType := (index $cozyConfig.data "clusterissuer") | default "http01" }} + +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-prod +spec: + acme: + privateKeySecretRef: + name: letsencrypt-prod + server: https://acme-v02.api.letsencrypt.org/directory + solvers: + - {{- if eq $issuerType "cloudflare" }} + dns01: + cloudflare: + apiTokenSecretRef: + name: cloudflare-api-token-secret + key: api-token + {{- else }} + http01: + ingress: + class: nginx + {{- end }} + +--- + +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-stage + acme: + privateKeySecretRef: name: letsencrypt-stage - server: https://acme-staging-v02.api.letsencrypt.org/directory - solvers: - - http01: - ingress: - class: nginx ---- -apiVersion: cert-manager.io/v1 -kind: ClusterIssuer -metadata: - name: selfsigned-cluster-issuer -spec: - selfSigned: {} + server: https://acme-staging-v02.api.letsencrypt.org/directory + solvers: + - {{- if eq $issuerType "cloudflare" }} + dns01: + cloudflare: + apiTokenSecretRef: + name: cloudflare-api-token-secret + key: api-token + {{- else }} + http01: + ingress: + class: nginx + {{- end }} + +--- + +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: selfsigned-cluster-issuer +spec: + selfSigned: {} \ No newline at end of file diff --git a/packages/system/external-dns/.helmignore b/packages/system/external-dns/.helmignore new file mode 100644 index 00000000..d5c178e8 --- /dev/null +++ b/packages/system/external-dns/.helmignore @@ -0,0 +1,3 @@ +images +hack +.gitkeep diff --git a/packages/system/external-dns/Chart.yaml b/packages/system/external-dns/Chart.yaml new file mode 100644 index 00000000..5223150a --- /dev/null +++ b/packages/system/external-dns/Chart.yaml @@ -0,0 +1,3 @@ +apiVersion: v2 +name: cozy-external-dns +version: 0.0.0 # Placeholder, the actual version will be automatically set during the build process diff --git a/packages/system/external-dns/Makefile b/packages/system/external-dns/Makefile new file mode 100644 index 00000000..1ddfa773 --- /dev/null +++ b/packages/system/external-dns/Makefile @@ -0,0 +1,10 @@ +export NAME=external-dns +export NAMESPACE=cozy-$(NAME) + +include ../../../scripts/package.mk + +update: + rm -rf charts + helm repo add external-dns https://kubernetes-sigs.github.io/external-dns/ + helm repo update external-dns + helm pull external-dns/external-dns --untar --untardir charts \ No newline at end of file diff --git a/packages/system/external-dns/charts/external-dns/.helmignore b/packages/system/external-dns/charts/external-dns/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/packages/system/external-dns/charts/external-dns/CHANGELOG.md b/packages/system/external-dns/charts/external-dns/CHANGELOG.md new file mode 100644 index 00000000..02b467e1 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/CHANGELOG.md @@ -0,0 +1,219 @@ +# ExternalDNS Helm Chart Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +--- + + + +## [UNRELEASED] + +## [v1.15.0] - 2023-09-10 + +### Changed + +- Updated _ExternalDNS_ OCI image version to [v0.15.0](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.15.0). ([#xxxx](https://github.com/kubernetes-sigs/external-dns/pull/xxxx)) _@stevehipwell_ + +### Fixed + +- Fixed `provider.webhook.resources` behavior to correctly leverage resource limits. ([#4560](https://github.com/kubernetes-sigs/external-dns/pull/4560)) _@crutonjohn_ +- Fixed `provider.webhook.imagePullPolicy` behavior to correctly leverage pull policy. ([#4643](https://github.com/kubernetes-sigs/external-dns/pull/4643)) _@kimsondrup_ +- Fixed to add correct webhook metric port to `Service` and `ServiceMonitor`. ([#4643](https://github.com/kubernetes-sigs/external-dns/pull/4643)) _@kimsondrup_ +- Fixed to no longer require the unauthenticated webhook provider port to be exposed for health probes. ([#4691](https://github.com/kubernetes-sigs/external-dns/pull/4691)) _@kimsondrup_ & _@hatrx_ + +## [v1.14.5] - 2023-06-10 + +### Added + +- Added support for `extraContainers` argument. ([#4432](https://github.com/kubernetes-sigs/external-dns/pull/4432)) _@omerap12_ +- Added support for setting `excludeDomains` argument. ([#4380](https://github.com/kubernetes-sigs/external-dns/pull/4380)) _@bford-evs_ + +### Changed + +- Updated _ExternalDNS_ OCI image version to [v0.14.2](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.14.2). ([#4541](https://github.com/kubernetes-sigs/external-dns/pull/4541)) _@stevehipwell_ +- Updated `DNSEndpoint` CRD. ([#4541](https://github.com/kubernetes-sigs/external-dns/pull/4541)) _@stevehipwell_ +- Changed the implementation for `revisionHistoryLimit` to be more generic. ([#4541](https://github.com/kubernetes-sigs/external-dns/pull/4541)) _@stevehipwell_ + +### Fixed + +- Fixed the `ServiceMonitor` job name to correctly use the instance label. ([#4541](https://github.com/kubernetes-sigs/external-dns/pull/4541)) _@stevehipwell_ + +## [v1.14.4] - 2023-04-03 + +### Added + +- Added support for setting `dnsConfig`. ([#4265](https://github.com/kubernetes-sigs/external-dns/pull/4265)) _@davhdavh_ +- Added support for `DNSEndpoint` CRD. ([#4322](https://github.com/kubernetes-sigs/external-dns/pull/4322)) _@onedr0p_ + +### Changed + +- Updated _ExternalDNS_ OCI image version to [v0.14.1](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.14.1). ([#4357](https://github.com/kubernetes-sigs/external-dns/pull/4357)) _@stevehipwell_ + +## [v1.14.3] - 2023-01-26 + +### Fixed + +- Fixed args for webhook deployment. ([#4202](https://github.com/kubernetes-sigs/external-dns/pull/4202)) [@webwurst](https://github.com/webwurst) +- Fixed support for `gateway-grpcroute`, `gateway-tlsroute`, `gateway-tcproute` & `gateway-udproute`. ([#4205](https://github.com/kubernetes-sigs/external-dns/pull/4205)) [@orenlevi111](https://github.com/orenlevi111) +- Fixed incorrect implementation for setting the `automountServiceAccountToken`. ([#4208](https://github.com/kubernetes-sigs/external-dns/pull/4208)) [@stevehipwell](https://github.com/stevehipwell) + +## [v1.14.2] - 2024-01-22 + +### Fixed + +- Restore template support in `.Values.provider` and `.Values.provider.name` + +## [v1.14.1] - 2024-01-11 + +### Fixed + +- Fixed webhook install failure: `"http-webhook-metrics": must be no more than 15 characters`. ([#4173](https://github.com/kubernetes-sigs/external-dns/pull/4173)) [@gabe565](https://github.com/gabe565) + +## [v1.14.0] - 2024-01-10 + +### Added + +- Added the option to explicitly enable or disable service account token automounting. ([#3983](https://github.com/kubernetes-sigs/external-dns/pull/3983)) [@gilles-gosuin](https://github.com/gilles-gosuin) +- Added the option to configure revisionHistoryLimit on the K8s Deployment resource. ([#4008](https://github.com/kubernetes-sigs/external-dns/pull/4008)) [@arnisoph](https://github.com/arnisoph) +- Added support for webhook providers, as a sidecar. ([#4032](https://github.com/kubernetes-sigs/external-dns/pull/4032) [@mloiseleur](https://github.com/mloiseleur) +- Added the option to configure ipFamilyPolicy and ipFamilies of external-dns Service. ([#4153](https://github.com/kubernetes-sigs/external-dns/pull/4153)) [@dongjiang1989](https://github.com/dongjiang1989) + +### Changed + +- Avoid unnecessary pod restart on each helm chart version. ([#4103](https://github.com/kubernetes-sigs/external-dns/pull/4103)) [@jkroepke](https://github.com/jkroepke) +- Updated _ExternalDNS_ OCI image version to [v0.14.0](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.14.0). ([#4073](https://github.com/kubernetes-sigs/external-dns/pull/4073)) [@appkins](https://github.com/appkins) + +### Deprecated + +- The `secretConfiguration` value has been deprecated in favour of creating secrets external to the Helm chart and configuring their use via the `extraVolumes` & `extraVolumeMounts` values. ([#4161](https://github.com/kubernetes-sigs/external-dns/pull/4161)) [@stevehipwell](https://github.com/stevehipwell) + +## [v1.13.1] - 2023-09-07 + +### Added + +- Added RBAC for Traefik to ClusterRole. ([#3325](https://github.com/kubernetes-sigs/external-dns/pull/3325)) [@ThomasK33](https://github.com/thomask33) +- Added support for init containers. ([#3325](https://github.com/kubernetes-sigs/external-dns/pull/3838)) [@calvinbui](https://github.com/calvinbui) + +### Changed + +- Disallowed privilege escalation in container security context and set the seccomp profile type to `RuntimeDefault`. ([#3689](https://github.com/kubernetes-sigs/external-dns/pull/3689)) [@nrvnrvn](https://github.com/nrvnrvn) +- Updated _ExternalDNS_ OCI image version to [v0.13.6](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.13.6). ([#3917](https://github.com/kubernetes-sigs/external-dns/pull/3917)) [@stevehipwell](https://github.com/stevehipwell) + +### Removed + +- Removed RBAC rule for already removed `contour-ingressroute` source. ([#3764](https://github.com/kubernetes-sigs/external-dns/pull/3764)) [@johngmyers](https://github.com/johngmyers) + +## [v1.13.0] - 2023-03-30 + +### All Changes + +- Updated _ExternalDNS_ version to [v0.13.5](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.13.5). ([#3661](https://github.com/kubernetes-sigs/external-dns/pull/3661)) [@GMartinez-Sisti](https://github.com/GMartinez-Sisti) +- Adding missing gateway-httproute cluster role permission. ([#3541](https://github.com/kubernetes-sigs/external-dns/pull/3541)) [@nicon89](https://github.com/nicon89) + +## [v1.12.2] - 2023-03-30 + +### All Changes + +- Added support for ServiceMonitor relabelling. ([#3366](https://github.com/kubernetes-sigs/external-dns/pull/3366)) [@jkroepke](https://github.com/jkroepke) +- Updated chart icon path. ([#3492](https://github.com/kubernetes-sigs/external-dns/pull/3494)) [kundan2707](https://github.com/kundan2707) +- Added RBAC for Gateway-API resources to ClusterRole. ([#3499](https://github.com/kubernetes-sigs/external-dns/pull/3499)) [@michaelvl](https://github.com/MichaelVL) +- Added RBAC for F5 VirtualServer to ClusterRole. ([#3503](https://github.com/kubernetes-sigs/external-dns/pull/3503)) [@mikejoh](https://github.com/mikejoh) +- Added support for running ExternalDNS with namespaced scope. ([#3403](https://github.com/kubernetes-sigs/external-dns/pull/3403)) [@jkroepke](https://github.com/jkroepke) +- Updated _ExternalDNS_ version to [v0.13.4](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.13.4). ([#3516](https://github.com/kubernetes-sigs/external-dns/pull/3516)) [@stevehipwell](https://github.com/stevehipwell) + +## [v1.12.1] - 2023-02-06 + +### All Changes + +- Updated _ExternalDNS_ version to [v0.13.2](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.13.2). ([#3371](https://github.com/kubernetes-sigs/external-dns/pull/3371)) [@stevehipwell](https://github.com/stevehipwell) +- Added `secretConfiguration.subPath` to mount specific files from secret as a sub-path. ([#3227](https://github.com/kubernetes-sigs/external-dns/pull/3227)) [@jkroepke](https://github.com/jkroepke) +- Changed to use `registry.k8s.io` instead of `k8s.gcr.io`. ([#3261](https://github.com/kubernetes-sigs/external-dns/pull/3261)) [@johngmyers](https://github.com/johngmyers) + +## [v1.12.0] - 2022-11-29 + +### All Changes + +- Added ability to provide ExternalDNS with secret configuration via `secretConfiguration`. ([#3144](https://github.com/kubernetes-sigs/external-dns/pull/3144)) [@jkroepke](https://github.com/jkroepke) +- Added the ability to template `provider` & `extraArgs`. ([#3144](https://github.com/kubernetes-sigs/external-dns/pull/3144)) [@jkroepke](https://github.com/jkroepke) +- Added the ability to customise the service account labels. ([#3145](https://github.com/kubernetes-sigs/external-dns/pull/3145)) [@jkroepke](https://github.com/jkroepke) +- Updated _ExternalDNS_ version to [v0.13.1](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.13.1). ([#3197](https://github.com/kubernetes-sigs/external-dns/pull/3197)) [@stevehipwell](https://github.com/stevehipwell) + +## [v1.11.0] - 2022-08-10 + +### Added + +- Added support to configure `dnsPolicy` on the Helm chart deployment. [@michelzanini](https://github.com/michelzanini) +- Added ability to customise the deployment strategy. [mac-chaffee](https://github.com/mac-chaffee) + +### Changed + +- Updated _ExternalDNS_ version to [v0.12.2](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.12.2). [@stevehipwell](https://github.com/stevehipwell) +- Changed default deployment strategy to `Recreate`. [mac-chaffee](https://github.com/mac-chaffee) + +## [v1.10.1] - 2022-07-11 + +### Fixed + +- Fixed incorrect addition of `namespace` to `ClusterRole` & `ClusterRoleBinding`. [@stevehipwell](https://github.com/stevehipwell) + +## [v1.10.0] - 2022-07-08 + +### Added + +- Added `commonLabels` value to allow the addition of labels to all resources. [@stevehipwell](https://github.com/stevehipwell) +- Added support for [Process Namespace Sharing](https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/) via the `shareProcessNamespace` + value. ([#2715](https://github.com/kubernetes-sigs/external-dns/pull/2715)) [@wolffberg](https://github.com/wolffberg) + +### Changed + +- Update _ExternalDNS_ version to [v0.12.0](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.12.0). [@vojtechmares](https://github.com/vojtechmares) +- Set resource namespaces to `{{ .Release.Namespace }}` in the templates instead of waiting until apply time for inference. [@stevehipwell](https://github.com/stevehipwell) +- Fixed `rbac.additionalPermissions` default value.([#2796](https://github.com/kubernetes-sigs/external-dns/pull/2796)) [@tamalsaha](https://github.com/tamalsaha) + +## [v1.9.0] - 2022-04-19 + +### Changed + +- Update _ExternalDNS_ version to [v0.11.0](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.11.0). ([#2690](https://github.com/kubernetes-sigs/external-dns/pull/2690)) [@stevehipwell](https://github.com/stevehipwell) + +## [v1.8.0] - 2022-04-13 + +### Added + +- Add annotations to Deployment. ([#2477](https://github.com/kubernetes-sigs/external-dns/pull/2477)) [@beastob](https://github.com/beastob) + +### Changed + +- Fix RBAC for `istio-virtualservice` source when `istio-gateway` isn't also added. ([#2564](https://github.com/kubernetes-sigs/external-dns/pull/2564)) [@mcwarman](https://github.com/mcwarman) + + +[UNRELEASED]: https://github.com/kubernetes-sigs/external-dns/tree/master/charts/external-dns +[v1.15.0]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.15.0 +[v1.14.5]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.14.5 +[v1.14.4]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.14.4 +[v1.14.3]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.14.3 +[v1.14.2]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.14.2 +[v1.14.1]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.14.1 +[v1.14.0]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.14.0 +[v1.13.1]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.13.1 +[v1.13.0]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.13.0 +[v1.12.2]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.12.2 +[v1.12.1]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.12.1 +[v1.12.0]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.12.0 +[v1.11.0]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.11.0 +[v1.10.1]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.10.1 +[v1.10.0]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.10.0 +[v1.9.0]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.9.0 +[v1.8.0]: https://github.com/kubernetes-sigs/external-dns/releases/tag/external-dns-helm-chart-1.8.0 diff --git a/packages/system/external-dns/charts/external-dns/Chart.yaml b/packages/system/external-dns/charts/external-dns/Chart.yaml new file mode 100644 index 00000000..c7245bd1 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/Chart.yaml @@ -0,0 +1,33 @@ +annotations: + artifacthub.io/changes: | + - kind: changed + description: "Updated _ExternalDNS_ OCI image version to [v0.15.0](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.15.0)." + - kind: fixed + description: "Fixed `provider.webhook.resources` behavior to correctly leverage resource limits." + - kind: fixed + description: "Fixed `provider.webhook.imagePullPolicy` behavior to correctly leverage pull policy." + - kind: fixed + description: "Fixed to add correct webhook metric port to `Service` and `ServiceMonitor`." + - kind: fixed + description: "Fixed to no longer require the unauthenticated webhook provider port to be exposed for health probes." +apiVersion: v2 +appVersion: 0.15.0 +description: ExternalDNS synchronizes exposed Kubernetes Services and Ingresses with + DNS providers. +home: https://github.com/kubernetes-sigs/external-dns/ +icon: https://github.com/kubernetes-sigs/external-dns/raw/master/docs/img/external-dns.png +keywords: +- kubernetes +- externaldns +- external-dns +- dns +- service +- ingress +maintainers: +- email: steve.hipwell@gmail.com + name: stevehipwell +name: external-dns +sources: +- https://github.com/kubernetes-sigs/external-dns/ +type: application +version: 1.15.0 diff --git a/packages/system/external-dns/charts/external-dns/README.md b/packages/system/external-dns/charts/external-dns/README.md new file mode 100644 index 00000000..9b21ecde --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/README.md @@ -0,0 +1,182 @@ +# external-dns + +![Version: 1.15.0](https://img.shields.io/badge/Version-1.15.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.15.0](https://img.shields.io/badge/AppVersion-0.15.0-informational?style=flat-square) + +ExternalDNS synchronizes exposed Kubernetes Services and Ingresses with DNS providers. + +**Homepage:** + +## Maintainers + +| Name | Email | Url | +| ---- | ------ | --- | +| stevehipwell | | | + +## Source Code + +* + +## Installing the Chart + +Before you can install the chart you will need to add the `external-dns` repo to [Helm](https://helm.sh/). + +```shell +helm repo add external-dns https://kubernetes-sigs.github.io/external-dns/ +``` + +After you've installed the repo you can install the chart. + +```shell +helm upgrade --install external-dns external-dns/external-dns --version 1.15.0 +``` + +## Providers + +Configuring the _ExternalDNS_ provider should be done via the `provider.name` value with provider specific configuration being set via the `provider..` values, where supported, and the `extraArgs` value. For legacy support `provider` can be set to the name of the provider with all additional configuration being set via the `extraArgs` value. +See [documentation](https://kubernetes-sigs.github.io/external-dns/#new-providers) for more info on available providers and tutorials. + +### Providers with Specific Configuration Support + +| Provider | Supported | +|------------------------|------------| +| `webhook` | ✅ | + +### Other Providers + +For set up for a specific provider using the Helm chart, see the following links: + +- [AWS](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/aws.md#using-helm-with-oidc) +- [akamai-edgedns](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/akamai-edgedns.md#using-helm) +- [cloudflare](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/cloudflare.md#using-helm) +- [digitalocean](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/digitalocean.md#using-helm) +- [godaddy](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/godaddy.md#using-helm) +- [ns1](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/ns1.md#using-helm) +- [plural](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/plural.md#using-helm) + +## Namespaced Scoped Installation + +external-dns supports running on a namespaced only scope, too. +If `namespaced=true` is defined, the helm chart will setup `Roles` and `RoleBindings` instead `ClusterRoles` and `ClusterRoleBindings`. + +### Limited Supported + +Not all sources are supported in namespaced scope, since some sources depends on cluster-wide resources. +For example: Source `node` isn't supported, since `kind: Node` has scope `Cluster`. +Sources like `istio-virtualservice` only work, if all resources like `Gateway` and `VirtualService` are present in the same +namespaces as `external-dns`. + +The annotation `external-dns.alpha.kubernetes.io/endpoints-type: NodeExternalIP` is not supported. + +If `namespaced` is set to `true`, please ensure that `sources` my only contains supported sources (Default: `service,ingress`). + +### Support Matrix + +| Source | Supported | Infos | +|------------------------|------------|------------------------| +| `ingress` | ✅ | | +| `istio-gateway` | ✅ | | +| `istio-virtualservice` | ✅ | | +| `crd` | ✅ | | +| `kong-tcpingress` | ✅ | | +| `openshift-route` | ✅ | | +| `skipper-routegroup` | ✅ | | +| `gloo-proxy` | ✅ | | +| `contour-httpproxy` | ✅ | | +| `service` | ⚠️️ | NodePort not supported | +| `node` | ❌ | | +| `pod` | ❌ | | + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity settings for `Pod` [scheduling](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). If an explicit label selector is not provided for pod affinity or pod anti-affinity one will be created from the pod selector labels. | +| automountServiceAccountToken | bool | `nil` | Set this to `false` to [opt out of API credential automounting](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#opt-out-of-api-credential-automounting) for the `Pod`. | +| commonLabels | object | `{}` | Labels to add to all chart resources. | +| deploymentAnnotations | object | `{}` | Annotations to add to the `Deployment`. | +| deploymentStrategy | object | `{"type":"Recreate"}` | [Deployment Strategy](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy). | +| dnsConfig | object | `nil` | [DNS config](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config) for the pod, if not set the default will be used. | +| dnsPolicy | string | `nil` | [DNS policy](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-s-dns-policy) for the pod, if not set the default will be used. | +| domainFilters | list | `[]` | | +| env | list | `[]` | [Environment variables](https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/) for the `external-dns` container. | +| excludeDomains | list | `[]` | | +| extraArgs | list | `[]` | Extra arguments to provide to _ExternalDNS_. | +| extraContainers | object | `{}` | Extra containers to add to the `Deployment`. | +| extraVolumeMounts | list | `[]` | Extra [volume mounts](https://kubernetes.io/docs/concepts/storage/volumes/) for the `external-dns` container. | +| extraVolumes | list | `[]` | Extra [volumes](https://kubernetes.io/docs/concepts/storage/volumes/) for the `Pod`. | +| fullnameOverride | string | `nil` | Override the full name of the chart. | +| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for the `external-dns` container. | +| image.repository | string | `"registry.k8s.io/external-dns/external-dns"` | Image repository for the `external-dns` container. | +| image.tag | string | `nil` | Image tag for the `external-dns` container, this will default to `.Chart.AppVersion` if not set. | +| imagePullSecrets | list | `[]` | Image pull secrets. | +| initContainers | list | `[]` | [Init containers](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) to add to the `Pod` definition. | +| interval | string | `"1m"` | Interval for DNS updates. | +| livenessProbe | object | See _values.yaml_ | [Liveness probe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) configuration for the `external-dns` container. | +| logFormat | string | `"text"` | Log format. | +| logLevel | string | `"info"` | Log level. | +| nameOverride | string | `nil` | Override the name of the chart. | +| namespaced | bool | `false` | if `true`, _ExternalDNS_ will run in a namespaced scope (`Role`` and `Rolebinding`` will be namespaced too). | +| nodeSelector | object | `{}` | Node labels to match for `Pod` [scheduling](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). | +| podAnnotations | object | `{}` | Annotations to add to the `Pod`. | +| podLabels | object | `{}` | Labels to add to the `Pod`. | +| podSecurityContext | object | See _values.yaml_ | [Pod security context](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#podsecuritycontext-v1-core), this supports full customisation. | +| policy | string | `"upsert-only"` | How DNS records are synchronized between sources and providers; available values are `sync` & `upsert-only`. | +| priorityClassName | string | `nil` | Priority class name for the `Pod`. | +| provider.name | string | `"aws"` | _ExternalDNS_ provider name; for the available providers and how to configure them see [README](https://github.com/kubernetes-sigs/external-dns/blob/master/charts/external-dns/README.md#providers). | +| provider.webhook.args | list | `[]` | Extra arguments to provide for the `webhook` container. | +| provider.webhook.env | list | `[]` | [Environment variables](https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/) for the `webhook` container. | +| provider.webhook.extraVolumeMounts | list | `[]` | Extra [volume mounts](https://kubernetes.io/docs/concepts/storage/volumes/) for the `webhook` container. | +| provider.webhook.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for the `webhook` container. | +| provider.webhook.image.repository | string | `nil` | Image repository for the `webhook` container. | +| provider.webhook.image.tag | string | `nil` | Image tag for the `webhook` container. | +| provider.webhook.livenessProbe | object | See _values.yaml_ | [Liveness probe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) configuration for the `external-dns` container. | +| provider.webhook.readinessProbe | object | See _values.yaml_ | [Readiness probe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) configuration for the `webhook` container. | +| provider.webhook.resources | object | `{}` | [Resources](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) for the `webhook` container. | +| provider.webhook.securityContext | object | See _values.yaml_ | [Pod security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for the `webhook` container. | +| provider.webhook.service.port | int | `8080` | Webhook exposed HTTP port for the service. | +| provider.webhook.serviceMonitor | object | See _values.yaml_ | Optional [Service Monitor](https://prometheus-operator.dev/docs/operator/design/#servicemonitor) configuration for the `webhook` container. | +| rbac.additionalPermissions | list | `[]` | Additional rules to add to the `ClusterRole`. | +| rbac.create | bool | `true` | If `true`, create a `ClusterRole` & `ClusterRoleBinding` with access to the Kubernetes API. | +| readinessProbe | object | See _values.yaml_ | [Readiness probe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) configuration for the `external-dns` container. | +| registry | string | `"txt"` | Specify the registry for storing ownership and labels. Valid values are `txt`, `aws-sd`, `dynamodb` & `noop`. | +| resources | object | `{}` | [Resources](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) for the `external-dns` container. | +| revisionHistoryLimit | int | `nil` | Specify the number of old `ReplicaSets` to retain to allow rollback of the `Deployment``. | +| secretConfiguration.data | object | `{}` | `Secret` data. | +| secretConfiguration.enabled | bool | `false` | If `true`, create a `Secret` to store sensitive provider configuration (**DEPRECATED**). | +| secretConfiguration.mountPath | string | `nil` | Mount path for the `Secret`, this can be templated. | +| secretConfiguration.subPath | string | `nil` | Sub-path for mounting the `Secret`, this can be templated. | +| securityContext | object | See _values.yaml_ | [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for the `external-dns` container. | +| service.annotations | object | `{}` | Service annotations. | +| service.ipFamilies | list | `[]` | Service IP families. | +| service.ipFamilyPolicy | string | `nil` | Service IP family policy. | +| service.port | int | `7979` | Service HTTP port. | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account. | +| serviceAccount.automountServiceAccountToken | string | `nil` | Set this to `false` to [opt out of API credential automounting](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#opt-out-of-api-credential-automounting) for the `ServiceAccount`. | +| serviceAccount.create | bool | `true` | If `true`, create a new `ServiceAccount`. | +| serviceAccount.labels | object | `{}` | Labels to add to the service account. | +| serviceAccount.name | string | `nil` | If this is set and `serviceAccount.create` is `true` this will be used for the created `ServiceAccount` name, if set and `serviceAccount.create` is `false` then this will define an existing `ServiceAccount` to use. | +| serviceMonitor.additionalLabels | object | `{}` | Additional labels for the `ServiceMonitor`. | +| serviceMonitor.annotations | object | `{}` | Annotations to add to the `ServiceMonitor`. | +| serviceMonitor.bearerTokenFile | string | `nil` | Provide a bearer token file for the `ServiceMonitor`. | +| serviceMonitor.enabled | bool | `false` | If `true`, create a `ServiceMonitor` resource to support the _Prometheus Operator_. | +| serviceMonitor.interval | string | `nil` | If set override the _Prometheus_ default interval. | +| serviceMonitor.metricRelabelings | list | `[]` | [Metric relabel configs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs) to apply to samples before ingestion. | +| serviceMonitor.namespace | string | `nil` | If set create the `ServiceMonitor` in an alternate namespace. | +| serviceMonitor.relabelings | list | `[]` | [Relabel configs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) to apply to samples before ingestion. | +| serviceMonitor.scheme | string | `nil` | If set overrides the _Prometheus_ default scheme. | +| serviceMonitor.scrapeTimeout | string | `nil` | If set override the _Prometheus_ default scrape timeout. | +| serviceMonitor.targetLabels | list | `[]` | Provide target labels for the `ServiceMonitor`. | +| serviceMonitor.tlsConfig | object | `{}` | Configure the `ServiceMonitor` [TLS config](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig). | +| shareProcessNamespace | bool | `false` | If `true`, the `Pod` will have [process namespace sharing](https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/) enabled. | +| sources | list | `["service","ingress"]` | _Kubernetes_ resources to monitor for DNS entries. | +| terminationGracePeriodSeconds | int | `nil` | Termination grace period for the `Pod` in seconds. | +| tolerations | list | `[]` | Node taints which will be tolerated for `Pod` [scheduling](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). | +| topologySpreadConstraints | list | `[]` | Topology spread constraints for `Pod` [scheduling](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). If an explicit label selector is not provided one will be created from the pod selector labels. | +| triggerLoopOnEvent | bool | `false` | If `true`, triggers run loop on create/update/delete events in addition of regular interval. | +| txtOwnerId | string | `nil` | Specify an identifier for this instance of _ExternalDNS_ wWhen using a registry other than `noop`. | +| txtPrefix | string | `nil` | Specify a prefix for the domain names of TXT records created for the `txt` registry. Mutually exclusive with `txtSuffix`. | +| txtSuffix | string | `nil` | Specify a suffix for the domain names of TXT records created for the `txt` registry. Mutually exclusive with `txtPrefix`. | + +---------------------------------------------- + +Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/). diff --git a/packages/system/external-dns/charts/external-dns/README.md.gotmpl b/packages/system/external-dns/charts/external-dns/README.md.gotmpl new file mode 100644 index 00000000..e313a2ba --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/README.md.gotmpl @@ -0,0 +1,91 @@ +{{ template "chart.header" . }} +{{ template "chart.deprecationWarning" . }} + +{{ template "chart.badgesSection" . }} + +{{ template "chart.description" . }} + +{{ template "chart.homepageLine" . }} + +{{ template "chart.maintainersSection" . }} + +{{ template "chart.sourcesSection" . }} + +## Installing the Chart + +Before you can install the chart you will need to add the `external-dns` repo to [Helm](https://helm.sh/). + +```shell +helm repo add external-dns https://kubernetes-sigs.github.io/external-dns/ +``` + +After you've installed the repo you can install the chart. + +```shell +helm upgrade --install {{ template "chart.name" . }} external-dns/{{ template "chart.name" . }} --version {{ template "chart.version" . }} +``` + +## Providers + +Configuring the _ExternalDNS_ provider should be done via the `provider.name` value with provider specific configuration being set via the `provider..` values, where supported, and the `extraArgs` value. For legacy support `provider` can be set to the name of the provider with all additional configuration being set via the `extraArgs` value. +See [documentation](https://kubernetes-sigs.github.io/external-dns/#new-providers) for more info on available providers and tutorials. + +### Providers with Specific Configuration Support + +| Provider | Supported | +|------------------------|------------| +| `webhook` | ✅ | + +### Other Providers + +For set up for a specific provider using the Helm chart, see the following links: + +- [AWS](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/aws.md#using-helm-with-oidc) +- [akamai-edgedns](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/akamai-edgedns.md#using-helm) +- [cloudflare](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/cloudflare.md#using-helm) +- [digitalocean](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/digitalocean.md#using-helm) +- [godaddy](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/godaddy.md#using-helm) +- [ns1](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/ns1.md#using-helm) +- [plural](https://github.com/kubernetes-sigs/external-dns/blob/master/docs/tutorials/plural.md#using-helm) + +## Namespaced Scoped Installation + +external-dns supports running on a namespaced only scope, too. +If `namespaced=true` is defined, the helm chart will setup `Roles` and `RoleBindings` instead `ClusterRoles` and `ClusterRoleBindings`. + +### Limited Supported + +Not all sources are supported in namespaced scope, since some sources depends on cluster-wide resources. +For example: Source `node` isn't supported, since `kind: Node` has scope `Cluster`. +Sources like `istio-virtualservice` only work, if all resources like `Gateway` and `VirtualService` are present in the same +namespaces as `external-dns`. + +The annotation `external-dns.alpha.kubernetes.io/endpoints-type: NodeExternalIP` is not supported. + +If `namespaced` is set to `true`, please ensure that `sources` my only contains supported sources (Default: `service,ingress`). + +### Support Matrix + +| Source | Supported | Infos | +|------------------------|------------|------------------------| +| `ingress` | ✅ | | +| `istio-gateway` | ✅ | | +| `istio-virtualservice` | ✅ | | +| `crd` | ✅ | | +| `kong-tcpingress` | ✅ | | +| `openshift-route` | ✅ | | +| `skipper-routegroup` | ✅ | | +| `gloo-proxy` | ✅ | | +| `contour-httpproxy` | ✅ | | +| `service` | ⚠️️ | NodePort not supported | +| `node` | ❌ | | +| `pod` | ❌ | | + + +{{ template "chart.requirementsSection" . }} + +{{ template "chart.valuesSection" . }} + +---------------------------------------------- + +Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/). diff --git a/packages/system/external-dns/charts/external-dns/RELEASE.md b/packages/system/external-dns/charts/external-dns/RELEASE.md new file mode 100644 index 00000000..02634a30 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/RELEASE.md @@ -0,0 +1,10 @@ +### Changed + +- Updated _ExternalDNS_ OCI image version to [v0.15.0](https://github.com/kubernetes-sigs/external-dns/releases/tag/v0.15.0). ([#xxxx](https://github.com/kubernetes-sigs/external-dns/pull/xxxx)) _@stevehipwell_ + +### Fixed + +- Fixed `provider.webhook.resources` behavior to correctly leverage resource limits. ([#4560](https://github.com/kubernetes-sigs/external-dns/pull/4560)) _@crutonjohn_ +- Fixed `provider.webhook.imagePullPolicy` behavior to correctly leverage pull policy. ([#4643](https://github.com/kubernetes-sigs/external-dns/pull/4643)) _@kimsondrup_ +- Fixed to add correct webhook metric port to `Service` and `ServiceMonitor`. ([#4643](https://github.com/kubernetes-sigs/external-dns/pull/4643)) _@kimsondrup_ +- Fixed to no longer require the unauthenticated webhook provider port to be exposed for health probes. ([#4691](https://github.com/kubernetes-sigs/external-dns/pull/4691)) _@kimsondrup_ & _@hatrx_ diff --git a/packages/system/external-dns/charts/external-dns/ci/ci-values.yaml b/packages/system/external-dns/charts/external-dns/ci/ci-values.yaml new file mode 100644 index 00000000..4d278e94 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/ci/ci-values.yaml @@ -0,0 +1,2 @@ +provider: + name: inmemory diff --git a/packages/system/external-dns/charts/external-dns/crds/dnsendpoint.yaml b/packages/system/external-dns/charts/external-dns/crds/dnsendpoint.yaml new file mode 100644 index 00000000..822cd850 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/crds/dnsendpoint.yaml @@ -0,0 +1,102 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: dnsendpoints.externaldns.k8s.io + annotations: + api-approved.kubernetes.io: https://github.com/kubernetes-sigs/external-dns/pull/2007 +spec: + group: externaldns.k8s.io + names: + kind: DNSEndpoint + listKind: DNSEndpointList + plural: dnsendpoints + singular: dnsendpoint + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: DNSEndpointSpec defines the desired state of DNSEndpoint + properties: + endpoints: + items: + description: + Endpoint is a high-level way of a connection between + a service and an IP + properties: + dnsName: + description: The hostname of the DNS record + type: string + labels: + additionalProperties: + type: string + description: Labels stores labels defined for the Endpoint + type: object + providerSpecific: + description: ProviderSpecific stores provider specific config + items: + description: + ProviderSpecificProperty holds the name and value + of a configuration which is specific to individual DNS providers + properties: + name: + type: string + value: + type: string + type: object + type: array + recordTTL: + description: TTL for the record + format: int64 + type: integer + recordType: + description: + RecordType type of record, e.g. CNAME, A, AAAA, + SRV, TXT etc + type: string + setIdentifier: + description: + Identifier to distinguish multiple records with + the same name and type (e.g. Route53 records with routing + policies other than 'simple') + type: string + targets: + description: The targets the DNS record points to + items: + type: string + type: array + type: object + type: array + type: object + status: + description: DNSEndpointStatus defines the observed state of DNSEndpoint + properties: + observedGeneration: + description: The generation observed by the external-dns controller. + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/packages/system/external-dns/charts/external-dns/templates/NOTES.txt b/packages/system/external-dns/charts/external-dns/templates/NOTES.txt new file mode 100644 index 00000000..5e37ecca --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/templates/NOTES.txt @@ -0,0 +1,7 @@ +*********************************************************************** +* External DNS * +*********************************************************************** + Chart version: {{ .Chart.Version }} + App version: {{ .Chart.AppVersion }} + Image tag: {{ include "external-dns.image" . }} +*********************************************************************** diff --git a/packages/system/external-dns/charts/external-dns/templates/_helpers.tpl b/packages/system/external-dns/charts/external-dns/templates/_helpers.tpl new file mode 100644 index 00000000..3ce55cd8 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/templates/_helpers.tpl @@ -0,0 +1,95 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "external-dns.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "external-dns.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "external-dns.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "external-dns.labels" -}} +helm.sh/chart: {{ include "external-dns.chart" . }} +{{ include "external-dns.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- with .Values.commonLabels }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "external-dns.selectorLabels" -}} +app.kubernetes.io/name: {{ include "external-dns.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "external-dns.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "external-dns.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +The image to use +*/}} +{{- define "external-dns.image" -}} +{{- printf "%s:%s" .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- end }} + +{{/* +Provider name, Keeps backward compatibility on provider +*/}} +{{- define "external-dns.providerName" -}} +{{- if eq (typeOf .Values.provider) "string" }} +{{- .Values.provider }} +{{- else }} +{{- .Values.provider.name }} +{{- end }} +{{- end }} + +{{/* +The image to use for optional webhook sidecar +*/}} +{{- define "external-dns.webhookImage" -}} +{{- with .image }} +{{- if or (empty .repository) (empty .tag) }} +{{- fail "ERROR: webhook provider needs an image repository and a tag" }} +{{- end }} +{{- printf "%s:%s" .repository .tag }} +{{- end }} +{{- end }} diff --git a/packages/system/external-dns/charts/external-dns/templates/clusterrole.yaml b/packages/system/external-dns/charts/external-dns/templates/clusterrole.yaml new file mode 100644 index 00000000..44f72bd2 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/templates/clusterrole.yaml @@ -0,0 +1,127 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: {{ .Values.namespaced | ternary "Role" "ClusterRole" }} +metadata: + name: {{ template "external-dns.fullname" . }} + labels: + {{- include "external-dns.labels" . | nindent 4 }} +rules: +{{- if and (not .Values.namespaced) (or (has "node" .Values.sources) (has "pod" .Values.sources) (has "service" .Values.sources) (has "contour-httpproxy" .Values.sources) (has "gloo-proxy" .Values.sources) (has "openshift-route" .Values.sources) (has "skipper-routegroup" .Values.sources)) }} + - apiGroups: [""] + resources: ["nodes"] + verbs: ["list","watch"] +{{- end }} +{{- if or (has "pod" .Values.sources) (has "service" .Values.sources) (has "contour-httpproxy" .Values.sources) (has "gloo-proxy" .Values.sources) (has "openshift-route" .Values.sources) (has "skipper-routegroup" .Values.sources) }} + - apiGroups: [""] + resources: ["pods"] + verbs: ["get","watch","list"] +{{- end }} +{{- if or (has "service" .Values.sources) (has "contour-httpproxy" .Values.sources) (has "gloo-proxy" .Values.sources) (has "istio-gateway" .Values.sources) (has "istio-virtualservice" .Values.sources) (has "openshift-route" .Values.sources) (has "skipper-routegroup" .Values.sources) }} + - apiGroups: [""] + resources: ["services","endpoints"] + verbs: ["get","watch","list"] +{{- end }} +{{- if or (has "ingress" .Values.sources) (has "contour-httpproxy" .Values.sources) (has "openshift-route" .Values.sources) (has "skipper-routegroup" .Values.sources) }} + - apiGroups: ["extensions","networking.k8s.io"] + resources: ["ingresses"] + verbs: ["get","watch","list"] +{{- end }} +{{- if or (has "istio-gateway" .Values.sources) (has "istio-virtualservice" .Values.sources) }} + - apiGroups: ["networking.istio.io"] + resources: ["gateways"] + verbs: ["get","watch","list"] +{{- end }} + +{{- if has "istio-virtualservice" .Values.sources }} + - apiGroups: ["networking.istio.io"] + resources: ["virtualservices"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "ambassador-host" .Values.sources }} + - apiGroups: ["getambassador.io"] + resources: ["hosts","ingresses"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "contour-httpproxy" .Values.sources }} + - apiGroups: ["projectcontour.io"] + resources: ["httpproxies"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "crd" .Values.sources }} + - apiGroups: ["externaldns.k8s.io"] + resources: ["dnsendpoints"] + verbs: ["get","watch","list"] + - apiGroups: ["externaldns.k8s.io"] + resources: ["dnsendpoints/status"] + verbs: ["*"] +{{- end }} +{{- if or (has "gateway-httproute" .Values.sources) (has "gateway-grpcroute" .Values.sources) (has "gateway-tlsroute" .Values.sources) (has "gateway-tcproute" .Values.sources) (has "gateway-udproute" .Values.sources) }} + - apiGroups: ["gateway.networking.k8s.io"] + resources: ["gateways"] + verbs: ["get","watch","list"] + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "gateway-httproute" .Values.sources }} + - apiGroups: ["gateway.networking.k8s.io"] + resources: ["httproutes"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "gateway-grpcroute" .Values.sources }} + - apiGroups: ["gateway.networking.k8s.io"] + resources: ["grpcroutes"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "gateway-tlsroute" .Values.sources }} + - apiGroups: ["gateway.networking.k8s.io"] + resources: ["tlsroutes"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "gateway-tcproute" .Values.sources }} + - apiGroups: ["gateway.networking.k8s.io"] + resources: ["tcproutes"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "gateway-udproute" .Values.sources }} + - apiGroups: ["gateway.networking.k8s.io"] + resources: ["udproutes"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "gloo-proxy" .Values.sources }} + - apiGroups: ["gloo.solo.io","gateway.solo.io"] + resources: ["proxies","virtualservices"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "kong-tcpingress" .Values.sources }} + - apiGroups: ["configuration.konghq.com"] + resources: ["tcpingresses"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "traefik-proxy" .Values.sources }} + - apiGroups: ["traefik.containo.us", "traefik.io"] + resources: ["ingressroutes", "ingressroutetcps", "ingressrouteudps"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "openshift-route" .Values.sources }} + - apiGroups: ["route.openshift.io"] + resources: ["routes"] + verbs: ["get","watch","list"] +{{- end }} +{{- if has "skipper-routegroup" .Values.sources }} + - apiGroups: ["zalando.org"] + resources: ["routegroups"] + verbs: ["get","watch","list"] + - apiGroups: ["zalando.org"] + resources: ["routegroups/status"] + verbs: ["patch","update"] +{{- end }} +{{- if has "f5-virtualserver" .Values.sources }} + - apiGroups: ["cis.f5.com"] + resources: ["virtualservers"] + verbs: ["get","watch","list"] +{{- end }} +{{- with .Values.rbac.additionalPermissions }} + {{- toYaml . | nindent 2 }} +{{- end }} +{{- end }} diff --git a/packages/system/external-dns/charts/external-dns/templates/clusterrolebinding.yaml b/packages/system/external-dns/charts/external-dns/templates/clusterrolebinding.yaml new file mode 100644 index 00000000..74a51476 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/templates/clusterrolebinding.yaml @@ -0,0 +1,16 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: {{ .Values.namespaced | ternary "RoleBinding" "ClusterRoleBinding" }} +metadata: + name: {{ printf "%s-viewer" (include "external-dns.fullname" .) }} + labels: + {{- include "external-dns.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: {{ .Values.namespaced | ternary "Role" "ClusterRole" }} + name: {{ template "external-dns.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ template "external-dns.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/packages/system/external-dns/charts/external-dns/templates/deployment.yaml b/packages/system/external-dns/charts/external-dns/templates/deployment.yaml new file mode 100644 index 00000000..02e9b397 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/templates/deployment.yaml @@ -0,0 +1,209 @@ +{{- $providerName := tpl (include "external-dns.providerName" .) $ }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "external-dns.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "external-dns.labels" . | nindent 4 }} + {{- with .Values.deploymentAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "external-dns.selectorLabels" . | nindent 6 }} + strategy: + {{- toYaml .Values.deploymentStrategy | nindent 4 }} + {{- if not (has (quote .Values.revisionHistoryLimit) (list "" (quote ""))) }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit | int64 }} + {{- end }} + template: + metadata: + labels: + {{- include "external-dns.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if or .Values.secretConfiguration.enabled .Values.podAnnotations }} + annotations: + {{- if .Values.secretConfiguration.enabled }} + checksum/secret: {{ tpl (toYaml .Values.secretConfiguration.data) . | sha256sum }} + {{- end }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + spec: + {{- if not (quote .Values.automountServiceAccountToken | empty) }} + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} + {{- end }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "external-dns.serviceAccountName" . }} + {{- with .Values.shareProcessNamespace }} + shareProcessNamespace: {{ . }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.priorityClassName }} + priorityClassName: {{ . | quote }} + {{- end }} + {{- with .Values.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ . }} + {{- end }} + {{- with .Values.dnsPolicy }} + dnsPolicy: {{ . }} + {{- end }} + {{- with .Values.dnsConfig }} + dnsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.initContainers }} + initContainers: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + {{- with .Values.extraContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} + - name: external-dns + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + image: {{ include "external-dns.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.env }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + args: + - --log-level={{ .Values.logLevel }} + - --log-format={{ .Values.logFormat }} + - --interval={{ .Values.interval }} + {{- if .Values.triggerLoopOnEvent }} + - --events + {{- end }} + {{- range .Values.sources }} + - --source={{ . }} + {{- end }} + - --policy={{ .Values.policy }} + - --registry={{ .Values.registry }} + {{- if .Values.txtOwnerId }} + - --txt-owner-id={{ .Values.txtOwnerId }} + {{- end }} + {{- if .Values.txtPrefix }} + - --txt-prefix={{ .Values.txtPrefix }} + {{- end }} + {{- if and (eq .Values.txtPrefix "") (ne .Values.txtSuffix "") }} + - --txt-suffix={{ .Values.txtSuffix }} + {{- end }} + {{- if .Values.namespaced }} + - --namespace={{ .Release.Namespace }} + {{- end }} + {{- range .Values.domainFilters }} + - --domain-filter={{ . }} + {{- end }} + {{- range .Values.excludeDomains }} + - --exclude-domains={{ . }} + {{- end }} + - --provider={{ $providerName }} + {{- range .Values.extraArgs }} + - {{ tpl . $ }} + {{- end }} + ports: + - name: http + protocol: TCP + containerPort: 7979 + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- if or .Values.secretConfiguration.enabled .Values.extraVolumeMounts }} + volumeMounts: + {{- if .Values.secretConfiguration.enabled }} + - name: secrets + mountPath: {{ tpl .Values.secretConfiguration.mountPath $ }} + {{- with .Values.secretConfiguration.subPath }} + subPath: {{ tpl . $ }} + {{- end }} + {{- end }} + {{- with .Values.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if eq $providerName "webhook" }} + {{- with .Values.provider.webhook }} + - name: webhook + image: {{ include "external-dns.webhookImage" . }} + imagePullPolicy: {{ .image.pullPolicy }} + {{- with .env }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .args }} + args: + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: http-webhook + protocol: TCP + containerPort: 8080 + livenessProbe: + {{- toYaml .livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .readinessProbe | nindent 12 }} + {{- if .extraVolumeMounts }} + volumeMounts: + {{- with .extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- end }} + {{- with .resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- end }} + {{- end }} + {{- if or .Values.secretConfiguration.enabled .Values.extraVolumes }} + volumes: + {{- if .Values.secretConfiguration.enabled }} + - name: secrets + secret: + secretName: {{ include "external-dns.fullname" . }} + {{- end }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/packages/system/external-dns/charts/external-dns/templates/secret.yaml b/packages/system/external-dns/charts/external-dns/templates/secret.yaml new file mode 100644 index 00000000..89ec1fe5 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/templates/secret.yaml @@ -0,0 +1,13 @@ +{{- if .Values.secretConfiguration.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "external-dns.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "external-dns.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.secretConfiguration.data }} + {{ $key }}: {{ tpl $value $ | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/packages/system/external-dns/charts/external-dns/templates/service.yaml b/packages/system/external-dns/charts/external-dns/templates/service.yaml new file mode 100644 index 00000000..e55e2a36 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/templates/service.yaml @@ -0,0 +1,36 @@ +{{- $providerName := include "external-dns.providerName" . }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "external-dns.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "external-dns.labels" . | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: +{{- with .Values.service.ipFamilies }} + ipFamilies: + {{- toYaml . | nindent 4 }} +{{- end }} +{{- with .Values.service.ipFamilyPolicy }} + ipFamilyPolicy: {{ . }} +{{- end }} + type: ClusterIP + selector: + {{- include "external-dns.selectorLabels" . | nindent 4 }} + ports: + - name: http + port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + {{- if eq $providerName "webhook" }} + {{- with .Values.provider.webhook.service }} + - name: http-webhook + port: {{ .port }} + targetPort: http-webhook + protocol: TCP + {{- end }} + {{- end }} diff --git a/packages/system/external-dns/charts/external-dns/templates/serviceaccount.yaml b/packages/system/external-dns/charts/external-dns/templates/serviceaccount.yaml new file mode 100644 index 00000000..f627313a --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "external-dns.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "external-dns.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }} +{{- end }} diff --git a/packages/system/external-dns/charts/external-dns/templates/servicemonitor.yaml b/packages/system/external-dns/charts/external-dns/templates/servicemonitor.yaml new file mode 100644 index 00000000..004756c7 --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/templates/servicemonitor.yaml @@ -0,0 +1,86 @@ +{{- if .Values.serviceMonitor.enabled -}} +{{- $providerName := include "external-dns.providerName" . }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "external-dns.fullname" . }} + namespace: {{ default .Release.Namespace .Values.serviceMonitor.namespace }} + {{- with .Values.serviceMonitor.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + {{- include "external-dns.labels" . | nindent 4 }} + {{- with .Values.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: app.kubernetes.io/instance + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + selector: + matchLabels: + {{- include "external-dns.selectorLabels" . | nindent 6 }} + endpoints: + - port: http + path: /metrics + {{- with .Values.serviceMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.scheme }} + scheme: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.tlsConfig }} + tlsConfig: + {{- toYaml .| nindent 8 }} + {{- end }} + {{- with .Values.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.serviceMonitor.relabelings }} + relabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if eq $providerName "webhook" }} + {{- with .Values.provider.webhook.serviceMonitor }} + - port: http-webhook + path: /metrics + {{- with .interval }} + interval: {{ . }} + {{- end }} + {{- with .scheme }} + scheme: {{ . }} + {{- end }} + {{- with .bearerTokenFile }} + bearerTokenFile: {{ . }} + {{- end }} + {{- with .tlsConfig }} + tlsConfig: + {{- toYaml .| nindent 8 }} + {{- end }} + {{- with .scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + {{- with .metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .relabelings }} + relabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/packages/system/external-dns/charts/external-dns/values.schema.json b/packages/system/external-dns/charts/external-dns/values.schema.json new file mode 100644 index 00000000..614deeac --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/values.schema.json @@ -0,0 +1,91 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "properties": { + "provider": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + } + } + } + ] + }, + "extraArgs": { + "type": "array", + "items": { + "type": "string" + } + }, + "secretConfiguration": { + "$comment": "This value is DEPRECATED as secrets should be configured external to the chart and exposed to the container via extraVolumes & extraVolumeMounts.", + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "mountPath": { + "type": [ + "string", + "null" + ] + }, + "subPath": { + "type": [ + "string", + "null" + ] + }, + "data": { + "type": "object", + "patternProperties": { + ".+": { + "type": "string" + } + } + } + } + }, + "service": { + "type": "object", + "properties": { + "annotations": { + "type": "object" + }, + "ipFamilies": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "IPv6", + "IPv4" + ] + } + }, + "ipFamilyPolicy": { + "type": [ + "string", + "null" + ], + "items": { + "type": "string", + "enum": [ + "SingleStack", + "PreferDualStack", + "RequireDualStack" + ] + } + }, + "port": { + "type": "integer" + } + } + } + } +} diff --git a/packages/system/external-dns/charts/external-dns/values.yaml b/packages/system/external-dns/charts/external-dns/values.yaml new file mode 100644 index 00000000..9d7dea1b --- /dev/null +++ b/packages/system/external-dns/charts/external-dns/values.yaml @@ -0,0 +1,297 @@ +# Default values for external-dns. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +image: + # -- Image repository for the `external-dns` container. + repository: registry.k8s.io/external-dns/external-dns + # -- (string) Image tag for the `external-dns` container, this will default to `.Chart.AppVersion` if not set. + tag: + # -- Image pull policy for the `external-dns` container. + pullPolicy: IfNotPresent + +# -- Image pull secrets. +imagePullSecrets: [] + +# -- (string) Override the name of the chart. +nameOverride: + +# -- (string) Override the full name of the chart. +fullnameOverride: + +# -- Labels to add to all chart resources. +commonLabels: {} + +serviceAccount: + # -- If `true`, create a new `ServiceAccount`. + create: true + # -- Labels to add to the service account. + labels: {} + # -- Annotations to add to the service account. + annotations: {} + # -- (string) If this is set and `serviceAccount.create` is `true` this will be used for the created `ServiceAccount` name, if set and `serviceAccount.create` is `false` then this will define an existing `ServiceAccount` to use. + name: + # -- Set this to `false` to [opt out of API credential automounting](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#opt-out-of-api-credential-automounting) for the `ServiceAccount`. + automountServiceAccountToken: + +service: + # -- Service annotations. + annotations: {} + # -- Service HTTP port. + port: 7979 + # -- Service IP families. + ipFamilies: [] + # -- (string) Service IP family policy. + ipFamilyPolicy: + +rbac: + # -- If `true`, create a `ClusterRole` & `ClusterRoleBinding` with access to the Kubernetes API. + create: true + # -- Additional rules to add to the `ClusterRole`. + additionalPermissions: [] + +# -- Annotations to add to the `Deployment`. +deploymentAnnotations: {} + +# -- Extra containers to add to the `Deployment`. +extraContainers: {} + +# -- [Deployment Strategy](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy). +deploymentStrategy: + type: Recreate + +# -- (int) Specify the number of old `ReplicaSets` to retain to allow rollback of the `Deployment``. +revisionHistoryLimit: + +# -- Labels to add to the `Pod`. +podLabels: {} + +# -- Annotations to add to the `Pod`. +podAnnotations: {} + +# -- (bool) Set this to `false` to [opt out of API credential automounting](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#opt-out-of-api-credential-automounting) for the `Pod`. +automountServiceAccountToken: + +# -- If `true`, the `Pod` will have [process namespace sharing](https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/) enabled. +shareProcessNamespace: false + +# -- [Pod security context](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#podsecuritycontext-v1-core), this supports full customisation. +# @default -- See _values.yaml_ +podSecurityContext: + runAsNonRoot: true + fsGroup: 65534 + seccompProfile: + type: RuntimeDefault + +# -- (string) Priority class name for the `Pod`. +priorityClassName: + +# -- (int) Termination grace period for the `Pod` in seconds. +terminationGracePeriodSeconds: + +# -- (string) [DNS policy](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-s-dns-policy) for the pod, if not set the default will be used. +dnsPolicy: + +# -- (object) [DNS config](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config) for the pod, if not set the default will be used. +dnsConfig: + +# -- [Init containers](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) to add to the `Pod` definition. +initContainers: [] + +# -- [Security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for the `external-dns` container. +# @default -- See _values.yaml_ +securityContext: + privileged: false + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + capabilities: + drop: ["ALL"] + +# -- [Environment variables](https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/) for the `external-dns` container. +env: [] + +# -- [Liveness probe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) configuration for the `external-dns` container. +# @default -- See _values.yaml_ +livenessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 2 + successThreshold: 1 + +# -- [Readiness probe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) configuration for the `external-dns` container. +# @default -- See _values.yaml_ +readinessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + successThreshold: 1 + +# -- Extra [volumes](https://kubernetes.io/docs/concepts/storage/volumes/) for the `Pod`. +extraVolumes: [] + +# -- Extra [volume mounts](https://kubernetes.io/docs/concepts/storage/volumes/) for the `external-dns` container. +extraVolumeMounts: [] + +# -- [Resources](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) for the `external-dns` container. +resources: {} + +# -- Node labels to match for `Pod` [scheduling](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). +nodeSelector: {} + +# -- Affinity settings for `Pod` [scheduling](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). If an explicit label selector is not provided for pod affinity or pod anti-affinity one will be created from the pod selector labels. +affinity: {} + +# -- Topology spread constraints for `Pod` [scheduling](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). If an explicit label selector is not provided one will be created from the pod selector labels. +topologySpreadConstraints: [] + +# -- Node taints which will be tolerated for `Pod` [scheduling](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/). +tolerations: [] + +serviceMonitor: + # -- If `true`, create a `ServiceMonitor` resource to support the _Prometheus Operator_. + enabled: false + # -- Additional labels for the `ServiceMonitor`. + additionalLabels: {} + # -- Annotations to add to the `ServiceMonitor`. + annotations: {} + # -- (string) If set create the `ServiceMonitor` in an alternate namespace. + namespace: + # -- (string) If set override the _Prometheus_ default interval. + interval: + # -- (string) If set override the _Prometheus_ default scrape timeout. + scrapeTimeout: + # -- (string) If set overrides the _Prometheus_ default scheme. + scheme: + # -- Configure the `ServiceMonitor` [TLS config](https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig). + tlsConfig: {} + # -- (string) Provide a bearer token file for the `ServiceMonitor`. + bearerTokenFile: + # -- [Relabel configs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) to apply to samples before ingestion. + relabelings: [] + # -- [Metric relabel configs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs) to apply to samples before ingestion. + metricRelabelings: [] + # -- Provide target labels for the `ServiceMonitor`. + targetLabels: [] + +# -- Log level. +logLevel: info + +# -- Log format. +logFormat: text + +# -- Interval for DNS updates. +interval: 1m + +# -- If `true`, triggers run loop on create/update/delete events in addition of regular interval. +triggerLoopOnEvent: false + +# -- if `true`, _ExternalDNS_ will run in a namespaced scope (`Role`` and `Rolebinding`` will be namespaced too). +namespaced: false + +# -- _Kubernetes_ resources to monitor for DNS entries. +sources: + - service + - ingress + +# -- How DNS records are synchronized between sources and providers; available values are `sync` & `upsert-only`. +policy: upsert-only + +# -- Specify the registry for storing ownership and labels. +# Valid values are `txt`, `aws-sd`, `dynamodb` & `noop`. +registry: txt +# -- (string) Specify an identifier for this instance of _ExternalDNS_ wWhen using a registry other than `noop`. +txtOwnerId: +# -- (string) Specify a prefix for the domain names of TXT records created for the `txt` registry. +# Mutually exclusive with `txtSuffix`. +txtPrefix: +# -- (string) Specify a suffix for the domain names of TXT records created for the `txt` registry. +# Mutually exclusive with `txtPrefix`. +txtSuffix: + +## - Limit possible target zones by domain suffixes. +domainFilters: [] + +## -- Intentionally exclude domains from being managed. +excludeDomains: [] + +provider: + # -- _ExternalDNS_ provider name; for the available providers and how to configure them see [README](https://github.com/kubernetes-sigs/external-dns/blob/master/charts/external-dns/README.md#providers). + name: aws + webhook: + image: + # -- (string) Image repository for the `webhook` container. + repository: + # -- (string) Image tag for the `webhook` container. + tag: + # -- Image pull policy for the `webhook` container. + pullPolicy: IfNotPresent + # -- [Environment variables](https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/) for the `webhook` container. + env: [] + # -- Extra arguments to provide for the `webhook` container. + args: [] + # -- Extra [volume mounts](https://kubernetes.io/docs/concepts/storage/volumes/) for the `webhook` container. + extraVolumeMounts: [] + # -- [Resources](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) for the `webhook` container. + resources: {} + # -- [Pod security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for the `webhook` container. + # @default -- See _values.yaml_ + securityContext: {} + # -- [Liveness probe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) configuration for the `external-dns` container. + # @default -- See _values.yaml_ + livenessProbe: + httpGet: + path: /healthz + port: http-webhook + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 2 + successThreshold: 1 + # -- [Readiness probe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) configuration for the `webhook` container. + # @default -- See _values.yaml_ + readinessProbe: + httpGet: + path: /healthz + port: http-webhook + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + successThreshold: 1 + service: + # -- Webhook exposed HTTP port for the service. + port: 8080 + # -- Optional [Service Monitor](https://prometheus-operator.dev/docs/operator/design/#servicemonitor) configuration for the `webhook` container. + # @default -- See _values.yaml_ + serviceMonitor: + interval: + scheme: + tlsConfig: {} + bearerTokenFile: + scrapeTimeout: + metricRelabelings: [] + relabelings: [] + +# -- Extra arguments to provide to _ExternalDNS_. +extraArgs: [] + +secretConfiguration: + # -- If `true`, create a `Secret` to store sensitive provider configuration (**DEPRECATED**). + enabled: false + # -- Mount path for the `Secret`, this can be templated. + mountPath: + # -- Sub-path for mounting the `Secret`, this can be templated. + subPath: + # -- `Secret` data. + data: {} diff --git a/packages/system/external-dns/values.yaml b/packages/system/external-dns/values.yaml new file mode 100644 index 00000000..33627179 --- /dev/null +++ b/packages/system/external-dns/values.yaml @@ -0,0 +1,23 @@ +external-dns: + # -- How DNS records are synchronized between sources and providers; available values are `sync` & `upsert-only`. + policy: upsert-only + # -- Specify the registry for storing ownership and labels. + # Valid values are `txt`, `aws-sd`, `dynamodb` & `noop`. + registry: txt + # -- (string) Specify an identifier for this instance of _ExternalDNS_ wWhen using a registry other than `noop`. + txtOwnerId: + # -- (string) Specify a prefix for the domain names of TXT records created for the `txt` registry. + # Mutually exclusive with `txtSuffix`. + txtPrefix: + # -- (string) Specify a suffix for the domain names of TXT records created for the `txt` registry. + # Mutually exclusive with `txtPrefix`. + txtSuffix: + + ## - Limit possible target zones by domain suffixes. + domainFilters: [] + ## -- Intentionally exclude domains from being managed. + excludeDomains: [] + + # -- Specify the DNS provider (e.g., "aws", "google", "azure", etc.) + provider: + name: "" From 4f430a90470768b0590bdd0aab773d9f6ccb5d76 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Fri, 4 Oct 2024 13:31:09 +0200 Subject: [PATCH 28/41] cilium: Fix tunnel option (#392) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- packages/apps/kubernetes/Chart.yaml | 2 +- packages/apps/kubernetes/templates/helmreleases/cilium.yaml | 1 - packages/apps/versions_map | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/apps/kubernetes/Chart.yaml b/packages/apps/kubernetes/Chart.yaml index 7265232a..72b0c76a 100644 --- a/packages/apps/kubernetes/Chart.yaml +++ b/packages/apps/kubernetes/Chart.yaml @@ -16,7 +16,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.11.0 +version: 0.11.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/packages/apps/kubernetes/templates/helmreleases/cilium.yaml b/packages/apps/kubernetes/templates/helmreleases/cilium.yaml index 2cc7ac0d..50546df3 100644 --- a/packages/apps/kubernetes/templates/helmreleases/cilium.yaml +++ b/packages/apps/kubernetes/templates/helmreleases/cilium.yaml @@ -30,7 +30,6 @@ spec: retries: -1 values: cilium: - tunnel: disabled k8sServiceHost: {{ .Release.Name }}.{{ .Release.Namespace }}.svc k8sServicePort: 6443 routingMode: tunnel diff --git a/packages/apps/versions_map b/packages/apps/versions_map index 492cd1c1..17c99435 100644 --- a/packages/apps/versions_map +++ b/packages/apps/versions_map @@ -32,7 +32,8 @@ kubernetes 0.8.1 e54608d8 kubernetes 0.8.2 5ca8823 kubernetes 0.9.0 9b6dd19 kubernetes 0.10.0 ac5c38b -kubernetes 0.11.0 HEAD +kubernetes 0.11.0 4eaca42 +kubernetes 0.11.1 HEAD mysql 0.1.0 f642698 mysql 0.2.0 8b975ff0 mysql 0.3.0 5ca8823 From 9241fb99407898ce1a8c513f2627469826a03d72 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Fri, 4 Oct 2024 13:54:08 +0200 Subject: [PATCH 29/41] fix grpc address lookup in kubevirt-csi-driver (#393) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile b/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile index b53c7b3d..bf4433d2 100644 --- a/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile +++ b/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile @@ -3,7 +3,7 @@ ARG builder_image=docker.io/library/golang:1.22.5 FROM ${builder_image} AS builder RUN git clone https://github.com/kubevirt/csi-driver /src/kubevirt-csi-driver \ && cd /src/kubevirt-csi-driver \ - && git checkout 35836e0c8b68d9916d29a838ea60cdd3fc6199cf + && git checkout fa92820448e583c7fd722dc20270544e0c3eca53 WORKDIR /src/kubevirt-csi-driver RUN make build From 111e9a582d6b6fa42aab27785925910792511049 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Fri, 4 Oct 2024 14:09:32 +0200 Subject: [PATCH 30/41] Prepare release v0.16.2 (#394) Signed-off-by: Andrei Kvapil --- manifests/cozystack-installer.yaml | 4 ++-- packages/apps/kubernetes/images/cluster-autoscaler.tag | 2 +- packages/apps/kubernetes/images/kubevirt-cloud-provider.tag | 2 +- packages/apps/kubernetes/images/kubevirt-csi-driver.tag | 2 +- packages/core/installer/values.yaml | 2 +- packages/core/testing/values.yaml | 2 +- packages/system/dashboard/values.yaml | 4 ++-- packages/system/kamaji/values.yaml | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/manifests/cozystack-installer.yaml b/manifests/cozystack-installer.yaml index d54bd748..c141374d 100644 --- a/manifests/cozystack-installer.yaml +++ b/manifests/cozystack-installer.yaml @@ -68,7 +68,7 @@ spec: serviceAccountName: cozystack containers: - name: cozystack - image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.1" + image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.2" env: - name: KUBERNETES_SERVICE_HOST value: localhost @@ -87,7 +87,7 @@ spec: fieldRef: fieldPath: metadata.name - name: darkhttpd - image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.1" + image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.2" command: - /usr/bin/darkhttpd - /cozystack/assets diff --git a/packages/apps/kubernetes/images/cluster-autoscaler.tag b/packages/apps/kubernetes/images/cluster-autoscaler.tag index 5f8ef3e0..05698855 100644 --- a/packages/apps/kubernetes/images/cluster-autoscaler.tag +++ b/packages/apps/kubernetes/images/cluster-autoscaler.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/cluster-autoscaler:0.11.0@sha256:7f617de5a24de790a15d9e97c6287ff2b390922e6e74c7a665cbf498f634514d +ghcr.io/aenix-io/cozystack/cluster-autoscaler:0.11.1@sha256:7f617de5a24de790a15d9e97c6287ff2b390922e6e74c7a665cbf498f634514d diff --git a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag index bb0cf834..e0a560a0 100644 --- a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag +++ b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:0.11.0@sha256:91e6843afa704ba7c513842bc3a612f2c0b295ce95aebe60fbb6be09709a1947 +ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:0.11.1@sha256:b5aa62a53be566b49dea635ce8f6b9280566e260f8493ff3d71f8c7501fb4cbc diff --git a/packages/apps/kubernetes/images/kubevirt-csi-driver.tag b/packages/apps/kubernetes/images/kubevirt-csi-driver.tag index d6150e58..781be025 100644 --- a/packages/apps/kubernetes/images/kubevirt-csi-driver.tag +++ b/packages/apps/kubernetes/images/kubevirt-csi-driver.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:0.11.0@sha256:1a9e6592fc035dbaae27f308b934206858c2e0025d4c99cd906b51615cc9766c +ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:0.11.1@sha256:705e20e638315501aaa8b8156ceb8b260086b21876aa994bec9d6c406955c6d4 diff --git a/packages/core/installer/values.yaml b/packages/core/installer/values.yaml index 65cbc04c..ab7ea85b 100644 --- a/packages/core/installer/values.yaml +++ b/packages/core/installer/values.yaml @@ -1,2 +1,2 @@ cozystack: - image: ghcr.io/aenix-io/cozystack/cozystack:v0.16.1@sha256:f27695d23d449f10888295bd2ba6c084c8fa4b81f109d4836ec9db528b943b62 + image: ghcr.io/aenix-io/cozystack/cozystack:v0.16.2@sha256:0ee9d03a0453f19cc8deabf9ee4b9c6d9cc61e4ba833546a62a2f6b2265868f3 diff --git a/packages/core/testing/values.yaml b/packages/core/testing/values.yaml index bb8aa2f2..3c1e0fcc 100644 --- a/packages/core/testing/values.yaml +++ b/packages/core/testing/values.yaml @@ -1,2 +1,2 @@ e2e: - image: ghcr.io/aenix-io/cozystack/e2e-sandbox:v0.16.1@sha256:25b298d621ec79431d106184d59849bbae634588742583d111628126ad8615c5 + image: ghcr.io/aenix-io/cozystack/e2e-sandbox:v0.16.2@sha256:25b298d621ec79431d106184d59849bbae634588742583d111628126ad8615c5 diff --git a/packages/system/dashboard/values.yaml b/packages/system/dashboard/values.yaml index 03a1c4ff..2e09ed4e 100644 --- a/packages/system/dashboard/values.yaml +++ b/packages/system/dashboard/values.yaml @@ -33,11 +33,11 @@ kubeapps: image: registry: ghcr.io/aenix-io/cozystack repository: dashboard - tag: v0.16.1 + tag: v0.16.2 digest: "sha256:4818712e9fc9c57cc321512760c3226af564a04e69d4b3ec9229ab91fd39abeb" kubeappsapis: image: registry: ghcr.io/aenix-io/cozystack repository: kubeapps-apis - tag: v0.16.1 + tag: v0.16.2 digest: "sha256:55bc8e2495933112c7cb4bb9e3b1fcb8df46aa14e27fa007f78388a9757e3238" diff --git a/packages/system/kamaji/values.yaml b/packages/system/kamaji/values.yaml index b3ccffe5..76af5b67 100644 --- a/packages/system/kamaji/values.yaml +++ b/packages/system/kamaji/values.yaml @@ -3,7 +3,7 @@ kamaji: deploy: false image: pullPolicy: IfNotPresent - tag: v0.16.1@sha256:95a9658cbbe1cbfbc42b9ab1df4f2a39342d7a8f1ff10a10b81b8656f3744c39 + tag: v0.16.2@sha256:95a9658cbbe1cbfbc42b9ab1df4f2a39342d7a8f1ff10a10b81b8656f3744c39 repository: ghcr.io/aenix-io/cozystack/kamaji resources: limits: From fe901efc66e43d11e8cb44f074f09bf74fe56b1d Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Mon, 7 Oct 2024 14:53:01 +0200 Subject: [PATCH 31/41] Fix spec field in cert-manager-issuers (#399) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- .../system/cert-manager-issuers/templates/cluster-issuers.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/system/cert-manager-issuers/templates/cluster-issuers.yaml b/packages/system/cert-manager-issuers/templates/cluster-issuers.yaml index 2d8b050d..6a70eef0 100644 --- a/packages/system/cert-manager-issuers/templates/cluster-issuers.yaml +++ b/packages/system/cert-manager-issuers/templates/cluster-issuers.yaml @@ -29,6 +29,7 @@ apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: name: letsencrypt-stage +spec: acme: privateKeySecretRef: name: letsencrypt-stage @@ -53,4 +54,4 @@ kind: ClusterIssuer metadata: name: selfsigned-cluster-issuer spec: - selfSigned: {} \ No newline at end of file + selfSigned: {} From 31a45c4d52a3e0778b85337bbc0eba03de97f776 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Mon, 7 Oct 2024 14:53:32 +0200 Subject: [PATCH 32/41] Add MachineHealthChecks for Kubernetes VMs (#398) Add `MachineHealthCheck` resource to continiusly checking Machine state. If Machine is not ready it will be recreated in 60 seconds after unavailabilty. (30 sec kubelet to stop posing the status + 30 sec MachineHealthCheck timeout) Fixes https://github.com/aenix-io/cozystack/issues/365 Signed-off-by: Andrei Kvapil ## Summary by CodeRabbit - **New Features** - Introduced a `MachineHealthCheck` resource to monitor the health of machine deployments in Kubernetes. - **Version Updates** - Updated the Kubernetes chart version from `0.11.1` to `0.12.0`. - Various packages' versions have been updated to reflect the latest revisions, ensuring accuracy in versioning. Signed-off-by: Andrei Kvapil --- packages/apps/kubernetes/Chart.yaml | 2 +- .../apps/kubernetes/templates/cluster.yaml | 20 +++++++++++++++++++ packages/apps/versions_map | 3 ++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/packages/apps/kubernetes/Chart.yaml b/packages/apps/kubernetes/Chart.yaml index 72b0c76a..7ae338d6 100644 --- a/packages/apps/kubernetes/Chart.yaml +++ b/packages/apps/kubernetes/Chart.yaml @@ -16,7 +16,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.11.1 +version: 0.12.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/packages/apps/kubernetes/templates/cluster.yaml b/packages/apps/kubernetes/templates/cluster.yaml index 72358c24..637902af 100644 --- a/packages/apps/kubernetes/templates/cluster.yaml +++ b/packages/apps/kubernetes/templates/cluster.yaml @@ -210,6 +210,26 @@ spec: name: {{ $.Release.Name }}-{{ $groupName }}-{{ $kubevirtmachinetemplateHash }} namespace: default version: v1.30.1 +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineHealthCheck +metadata: + name: {{ $.Release.Name }}-{{ $groupName }} + namespace: {{ $.Release.Namespace }} +spec: + clusterName: {{ $.Release.Name }} + nodeStartupTimeout: 10m + selector: + matchLabels: + cluster.x-k8s.io/cluster-name: {{ $.Release.Name }} + cluster.x-k8s.io/deployment-name: {{ $.Release.Name }}-{{ $groupName }} + unhealthyConditions: + - type: Ready + status: Unknown + timeout: 30s + - type: Ready + status: "False" + timeout: 30s {{- end }} --- {{- /* diff --git a/packages/apps/versions_map b/packages/apps/versions_map index 17c99435..302bbcae 100644 --- a/packages/apps/versions_map +++ b/packages/apps/versions_map @@ -33,7 +33,8 @@ kubernetes 0.8.2 5ca8823 kubernetes 0.9.0 9b6dd19 kubernetes 0.10.0 ac5c38b kubernetes 0.11.0 4eaca42 -kubernetes 0.11.1 HEAD +kubernetes 0.11.1 4f430a90 +kubernetes 0.12.0 HEAD mysql 0.1.0 f642698 mysql 0.2.0 8b975ff0 mysql 0.3.0 5ca8823 From 845f22db5897787cc69c22b3cb6c35d48ef1b6ed Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Mon, 7 Oct 2024 21:11:27 +0200 Subject: [PATCH 33/41] increase capi-operator resources limits (#400) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- packages/system/capi-operator/values.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 packages/system/capi-operator/values.yaml diff --git a/packages/system/capi-operator/values.yaml b/packages/system/capi-operator/values.yaml new file mode 100644 index 00000000..901d9d27 --- /dev/null +++ b/packages/system/capi-operator/values.yaml @@ -0,0 +1,8 @@ +cluster-api-operator: + resources: + limits: + cpu: 200m + memory: 512Mi + requests: + cpu: 100m + memory: 100Mi From 15001dc6ad9a20e41596dfad598a3f91b70b5ab2 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Mon, 7 Oct 2024 21:12:53 +0200 Subject: [PATCH 34/41] Fix ingress for grafana and alerta (#401) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- packages/extra/monitoring/templates/alerta/alerta.yaml | 8 ++++---- packages/extra/monitoring/templates/grafana/grafana.yaml | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/extra/monitoring/templates/alerta/alerta.yaml b/packages/extra/monitoring/templates/alerta/alerta.yaml index 18932f56..30aa2493 100644 --- a/packages/extra/monitoring/templates/alerta/alerta.yaml +++ b/packages/extra/monitoring/templates/alerta/alerta.yaml @@ -148,18 +148,18 @@ metadata: labels: app: alerta annotations: - acme.cert-manager.io/http01-ingress-class: {{ $ingress }} {{- if ne $issuerType "cloudflare" }} - acme.cert-manager.io/http01-ingress-class: {{ $ingress }} + acme.cert-manager.io/http01-ingress-class: {{ $ingress }} {{- end }} + cert-manager.io/cluster-issuer: letsencrypt-prod spec: ingressClassName: {{ $ingress }} tls: - hosts: - - "{{ .Values.host | default (printf "alerta.%s" $host) }}" + - "{{ printf "alerta.%s" (.Values.host | default $host) }}" secretName: alerta-tls rules: - - host: "{{ .Values.host | default (printf "alerta.%s" $host) }}" + - host: "{{ printf "alerta.%s" (.Values.host | default $host) }}" http: paths: - path: / diff --git a/packages/extra/monitoring/templates/grafana/grafana.yaml b/packages/extra/monitoring/templates/grafana/grafana.yaml index 4e1e65a6..bce84d13 100644 --- a/packages/extra/monitoring/templates/grafana/grafana.yaml +++ b/packages/extra/monitoring/templates/grafana/grafana.yaml @@ -25,7 +25,7 @@ spec: password: ${GF_DATABASE_PASSWORD} #ssl_mode: require server: - root_url: "https://{{ .Values.host | default (printf "grafana.%s" $host) }}" + root_url: "https://{{ printf "grafana.%s" (.Values.host | default $host) }}" security: admin_user: user admin_password: ${GF_PASSWORD} @@ -100,7 +100,7 @@ spec: spec: ingressClassName: "{{ $ingress }}" rules: - - host: "{{ .Values.host | default (printf "grafana.%s" $host) }}" + - host: "{{ printf "grafana.%s" (.Values.host | default $host) }}" http: paths: - backend: @@ -112,5 +112,5 @@ spec: pathType: Prefix tls: - hosts: - - "{{ .Values.host | default (printf "grafana.%s" $host) }}" + - "{{ printf "grafana.%s" (.Values.host | default $host) }}" secretName: grafana-ingress-tls From 65bdb7bd7b5617a7f377df23d77fd47fdda91b43 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Tue, 8 Oct 2024 10:56:51 +0200 Subject: [PATCH 35/41] Introduce optional components (#391) This PR introduces an extra option to cozystack configmap. It allows to enable components that are optional to specific bundle name, example usage: ```yaml bundle-enable: telepresence,external-dns,external-secrets-operator ``` ## Summary by CodeRabbit - **New Features** - Added optional attributes to several release entries, allowing for more flexible deployment configurations. - Enhanced Helm release generation logic to consider both enabled and disabled states for components. - Improved namespace management by incorporating checks for optional components based on enablement and disablement. - **Bug Fixes** - Updated dependencies for `external-dns` to include `cilium` and `kubeovn`. - **Documentation** - Clarified the configurability of deployment components with the introduction of optional attributes. --------- Signed-off-by: Andrei Kvapil --- .../core/platform/bundles/distro-full.yaml | 3 +++ .../core/platform/bundles/distro-hosted.yaml | 3 +++ packages/core/platform/bundles/paas-full.yaml | 3 +++ .../core/platform/bundles/paas-hosted.yaml | 3 +++ .../core/platform/templates/helmreleases.yaml | 3 +++ .../core/platform/templates/namespaces.yaml | 20 ++++++++++++------- 6 files changed, 28 insertions(+), 7 deletions(-) diff --git a/packages/core/platform/bundles/distro-full.yaml b/packages/core/platform/bundles/distro-full.yaml index 6cd88fbb..88772cbe 100644 --- a/packages/core/platform/bundles/distro-full.yaml +++ b/packages/core/platform/bundles/distro-full.yaml @@ -140,16 +140,19 @@ releases: releaseName: traffic-manager chart: cozy-telepresence namespace: cozy-telepresence + optional: true dependsOn: [] - name: external-dns releaseName: external-dns chart: cozy-external-dns namespace: cozy-external-dns + optional: true dependsOn: [cilium] - name: external-secrets-operator releaseName: external-secrets-operator chart: cozy-external-secrets-operator namespace: cozy-external-secrets-operator + optional: true dependsOn: [cilium] diff --git a/packages/core/platform/bundles/distro-hosted.yaml b/packages/core/platform/bundles/distro-hosted.yaml index 09f6f1f5..2dd5535a 100644 --- a/packages/core/platform/bundles/distro-hosted.yaml +++ b/packages/core/platform/bundles/distro-hosted.yaml @@ -91,16 +91,19 @@ releases: releaseName: traffic-manager chart: cozy-telepresence namespace: cozy-telepresence + optional: true dependsOn: [] - name: external-dns releaseName: external-dns chart: cozy-external-dns namespace: cozy-external-dns + optional: true dependsOn: [] - name: external-secrets-operator releaseName: external-secrets-operator chart: cozy-external-secrets-operator namespace: cozy-external-secrets-operator + optional: true dependsOn: [] diff --git a/packages/core/platform/bundles/paas-full.yaml b/packages/core/platform/bundles/paas-full.yaml index 78481828..0ecf1527 100644 --- a/packages/core/platform/bundles/paas-full.yaml +++ b/packages/core/platform/bundles/paas-full.yaml @@ -175,6 +175,7 @@ releases: releaseName: traffic-manager chart: cozy-telepresence namespace: cozy-telepresence + optional: true dependsOn: [cilium,kubeovn] - name: dashboard @@ -221,10 +222,12 @@ releases: releaseName: external-dns chart: cozy-external-dns namespace: cozy-external-dns + optional: true dependsOn: [cilium,kubeovn] - name: external-secrets-operator releaseName: external-secrets-operator chart: cozy-external-secrets-operator namespace: cozy-external-secrets-operator + optional: true dependsOn: [cilium,kubeovn] diff --git a/packages/core/platform/bundles/paas-hosted.yaml b/packages/core/platform/bundles/paas-hosted.yaml index 63500982..21af835f 100644 --- a/packages/core/platform/bundles/paas-hosted.yaml +++ b/packages/core/platform/bundles/paas-hosted.yaml @@ -97,18 +97,21 @@ releases: releaseName: traffic-manager chart: cozy-telepresence namespace: cozy-telepresence + optional: true dependsOn: [] - name: external-dns releaseName: external-dns chart: cozy-external-dns namespace: cozy-external-dns + optional: true dependsOn: [cilium,kubeovn] - name: external-secrets-operator releaseName: external-secrets-operator chart: cozy-external-secrets-operator namespace: cozy-external-secrets-operator + optional: true dependsOn: [] - name: dashboard diff --git a/packages/core/platform/templates/helmreleases.yaml b/packages/core/platform/templates/helmreleases.yaml index 06ef0463..7b438932 100644 --- a/packages/core/platform/templates/helmreleases.yaml +++ b/packages/core/platform/templates/helmreleases.yaml @@ -3,6 +3,7 @@ {{- $bundle := tpl (.Files.Get (printf "bundles/%s.yaml" $bundleName)) . | fromYaml }} {{- $dependencyNamespaces := dict }} {{- $disabledComponents := splitList "," ((index $cozyConfig.data "bundle-disable") | default "") }} +{{- $enabledComponents := splitList "," ((index $cozyConfig.data "bundle-enable") | default "") }} {{/* collect dependency namespaces from releases */}} {{- range $x := $bundle.releases }} @@ -11,6 +12,7 @@ {{- range $x := $bundle.releases }} {{- if not (has $x.name $disabledComponents) }} +{{- if and ($x.optional) (has $x.name $enabledComponents) }} --- apiVersion: helm.toolkit.fluxcd.io/v2 kind: HelmRelease @@ -65,3 +67,4 @@ spec: {{- end }} {{- end }} {{- end }} +{{- end }} diff --git a/packages/core/platform/templates/namespaces.yaml b/packages/core/platform/templates/namespaces.yaml index 669d852c..c7b174bd 100644 --- a/packages/core/platform/templates/namespaces.yaml +++ b/packages/core/platform/templates/namespaces.yaml @@ -1,17 +1,23 @@ {{- $cozyConfig := lookup "v1" "ConfigMap" "cozy-system" "cozystack" }} {{- $bundleName := index $cozyConfig.data "bundle-name" }} {{- $bundle := tpl (.Files.Get (printf "bundles/%s.yaml" $bundleName)) . | fromYaml }} +{{- $disabledComponents := splitList "," ((index $cozyConfig.data "bundle-disable") | default "") }} +{{- $enabledComponents := splitList "," ((index $cozyConfig.data "bundle-enable") | default "") }} {{- $namespaces := dict }} {{/* collect namespaces from releases */}} {{- range $x := $bundle.releases }} -{{- if not (hasKey $namespaces $x.namespace) }} -{{- $_ := set $namespaces $x.namespace false }} -{{- end }} -{{/* if at least one release requires a privileged namespace, then it should be privileged */}} -{{- if or $x.privileged (index $namespaces $x.namespace) }} -{{- $_ := set $namespaces $x.namespace true }} -{{- end }} + {{- if not (hasKey $namespaces $x.namespace) }} + {{- if not (has $x.name $disabledComponents) }} + {{- if and ($x.optional) (has $x.name $enabledComponents) }} + {{- $_ := set $namespaces $x.namespace false }} + {{- end }} + {{- end }} + {{- end }} + {{/* if at least one release requires a privileged namespace, then it should be privileged */}} + {{- if or $x.privileged (index $namespaces $x.namespace) }} + {{- $_ := set $namespaces $x.namespace true }} + {{- end }} {{- end }} {{/* Add extra namespaces */}} From cbadfef3d83df7e634335779e7ef736196835a31 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Tue, 8 Oct 2024 11:30:05 +0200 Subject: [PATCH 36/41] Fix optional=false components (#402) --- packages/core/platform/templates/helmreleases.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/platform/templates/helmreleases.yaml b/packages/core/platform/templates/helmreleases.yaml index 7b438932..1563a1a3 100644 --- a/packages/core/platform/templates/helmreleases.yaml +++ b/packages/core/platform/templates/helmreleases.yaml @@ -12,7 +12,7 @@ {{- range $x := $bundle.releases }} {{- if not (has $x.name $disabledComponents) }} -{{- if and ($x.optional) (has $x.name $enabledComponents) }} +{{- if or (not $x.optional) (and ($x.optional) (has $x.name $enabledComponents)) }} --- apiVersion: helm.toolkit.fluxcd.io/v2 kind: HelmRelease From 18d658c73d4e8c4904d7ac083669fb330bebec82 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Wed, 9 Oct 2024 17:45:47 +0200 Subject: [PATCH 37/41] Disable serviceLinks (#406) Fixes https://github.com/kubevirt/csi-driver/issues/120#issuecomment-2402467106 ## Summary by CodeRabbit - **New Features** - Introduced a new configuration option to disable service links for various Kubernetes deployments, enhancing service resolution control for the following: - Kafka - Cluster Autoscaler - CSI Controller - Cloud Controller Manager - RabbitMQ Signed-off-by: Andrei Kvapil --- packages/apps/kafka/templates/kafka.yaml | 2 ++ .../kubernetes/templates/cluster-autoscaler/deployment.yaml | 1 + packages/apps/kubernetes/templates/csi/deploy.yaml | 1 + packages/apps/kubernetes/templates/kccm/manager.yaml | 1 + packages/apps/rabbitmq/templates/rabbitmq.yaml | 2 ++ 5 files changed, 7 insertions(+) diff --git a/packages/apps/kafka/templates/kafka.yaml b/packages/apps/kafka/templates/kafka.yaml index 9f380b9b..eed36f63 100644 --- a/packages/apps/kafka/templates/kafka.yaml +++ b/packages/apps/kafka/templates/kafka.yaml @@ -76,3 +76,5 @@ spec: metadata: labels: policy.cozystack.io/allow-to-apiserver: "true" + spec: + enableServiceLinks: false diff --git a/packages/apps/kubernetes/templates/cluster-autoscaler/deployment.yaml b/packages/apps/kubernetes/templates/cluster-autoscaler/deployment.yaml index 934abe36..4ec017af 100644 --- a/packages/apps/kubernetes/templates/cluster-autoscaler/deployment.yaml +++ b/packages/apps/kubernetes/templates/cluster-autoscaler/deployment.yaml @@ -16,6 +16,7 @@ spec: app: {{ .Release.Name }}-cluster-autoscaler policy.cozystack.io/allow-to-apiserver: "true" spec: + enableServiceLinks: false tolerations: - key: CriticalAddonsOnly operator: Exists diff --git a/packages/apps/kubernetes/templates/csi/deploy.yaml b/packages/apps/kubernetes/templates/csi/deploy.yaml index 0b1a4c9a..d0d83e37 100644 --- a/packages/apps/kubernetes/templates/csi/deploy.yaml +++ b/packages/apps/kubernetes/templates/csi/deploy.yaml @@ -15,6 +15,7 @@ spec: app: {{ .Release.Name }}-kcsi-driver policy.cozystack.io/allow-to-apiserver: "true" spec: + enableServiceLinks: false serviceAccountName: {{ .Release.Name }}-kcsi priorityClassName: system-cluster-critical tolerations: diff --git a/packages/apps/kubernetes/templates/kccm/manager.yaml b/packages/apps/kubernetes/templates/kccm/manager.yaml index 0f581243..12e48245 100644 --- a/packages/apps/kubernetes/templates/kccm/manager.yaml +++ b/packages/apps/kubernetes/templates/kccm/manager.yaml @@ -15,6 +15,7 @@ spec: k8s-app: {{ .Release.Name }}-kccm policy.cozystack.io/allow-to-apiserver: "true" spec: + enableServiceLinks: false tolerations: - key: CriticalAddonsOnly operator: Exists diff --git a/packages/apps/rabbitmq/templates/rabbitmq.yaml b/packages/apps/rabbitmq/templates/rabbitmq.yaml index 1417af76..adf7e852 100644 --- a/packages/apps/rabbitmq/templates/rabbitmq.yaml +++ b/packages/apps/rabbitmq/templates/rabbitmq.yaml @@ -16,6 +16,8 @@ spec: statefulSet: spec: template: + spec: + enableServiceLinks: false metadata: labels: policy.cozystack.io/allow-to-apiserver: "true" From 4631ea26f76931808555e7022cba661256c6e3b8 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Wed, 9 Oct 2024 17:48:57 +0200 Subject: [PATCH 38/41] Update KubeVirt CSI Driver (#409) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile b/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile index bf4433d2..b53c7b3d 100644 --- a/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile +++ b/packages/apps/kubernetes/images/kubevirt-csi-driver/Dockerfile @@ -3,7 +3,7 @@ ARG builder_image=docker.io/library/golang:1.22.5 FROM ${builder_image} AS builder RUN git clone https://github.com/kubevirt/csi-driver /src/kubevirt-csi-driver \ && cd /src/kubevirt-csi-driver \ - && git checkout fa92820448e583c7fd722dc20270544e0c3eca53 + && git checkout 35836e0c8b68d9916d29a838ea60cdd3fc6199cf WORKDIR /src/kubevirt-csi-driver RUN make build From c2b6636fe772a335d59c3d9bf529d67e5ba8671f Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Wed, 9 Oct 2024 17:53:50 +0200 Subject: [PATCH 39/41] fix specifying domainName for seaweedfs filer (#410) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- packages/extra/seaweedfs/templates/seaweedfs.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/extra/seaweedfs/templates/seaweedfs.yaml b/packages/extra/seaweedfs/templates/seaweedfs.yaml index e3bea096..70cf06d2 100644 --- a/packages/extra/seaweedfs/templates/seaweedfs.yaml +++ b/packages/extra/seaweedfs/templates/seaweedfs.yaml @@ -38,6 +38,10 @@ spec: storageClass: {{ . }} {{- end }} maxVolumes: 0 + + filer: + s3: + domainName: {{ .Values.host | default (printf "s3.%s" $host) }} s3: ingress: From 249bf35446a72a282a5a1583d8e09db5d2b2cae7 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Wed, 9 Oct 2024 20:32:45 +0200 Subject: [PATCH 40/41] Prepare release v0.16.3 (#411) Signed-off-by: Andrei Kvapil Signed-off-by: Andrei Kvapil --- hack/e2e.sh | 4 ++-- manifests/cozystack-installer.yaml | 4 ++-- packages/apps/kubernetes/images/cluster-autoscaler.tag | 2 +- packages/apps/kubernetes/images/kubevirt-cloud-provider.tag | 2 +- packages/apps/kubernetes/images/kubevirt-csi-driver.tag | 2 +- packages/core/installer/values.yaml | 2 +- packages/core/testing/values.yaml | 2 +- packages/extra/seaweedfs/Chart.yaml | 2 +- packages/extra/versions_map | 3 ++- packages/system/dashboard/values.yaml | 4 ++-- packages/system/kamaji/values.yaml | 2 +- packages/system/kubeovn/values.yaml | 2 +- 12 files changed, 16 insertions(+), 15 deletions(-) diff --git a/hack/e2e.sh b/hack/e2e.sh index 89637949..7f81f041 100755 --- a/hack/e2e.sh +++ b/hack/e2e.sh @@ -36,7 +36,7 @@ mkdir -p srv1 srv2 srv3 # Prepare cloud-init for i in 1 2 3; do - echo "local-hostname: srv$i" > "srv$i/meta-data" + echo "hostname: srv$i" > "srv$i/meta-data" echo '#cloud-config' > "srv$i/user-data" cat > "srv$i/network-config" <&1 | grep "rpc error"; do sleep 1; done' +timeout 180 sh -c 'while talosctl etcd members -n 192.168.123.11,192.168.123.12,192.168.123.13 -e 192.168.123.10 2>&1 | grep "rpc error"; do sleep 1; done' rm -f kubeconfig talosctl kubeconfig kubeconfig -e 192.168.123.10 -n 192.168.123.10 diff --git a/manifests/cozystack-installer.yaml b/manifests/cozystack-installer.yaml index c141374d..7a5cf921 100644 --- a/manifests/cozystack-installer.yaml +++ b/manifests/cozystack-installer.yaml @@ -68,7 +68,7 @@ spec: serviceAccountName: cozystack containers: - name: cozystack - image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.2" + image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.3" env: - name: KUBERNETES_SERVICE_HOST value: localhost @@ -87,7 +87,7 @@ spec: fieldRef: fieldPath: metadata.name - name: darkhttpd - image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.2" + image: "ghcr.io/aenix-io/cozystack/cozystack:v0.16.3" command: - /usr/bin/darkhttpd - /cozystack/assets diff --git a/packages/apps/kubernetes/images/cluster-autoscaler.tag b/packages/apps/kubernetes/images/cluster-autoscaler.tag index 05698855..6a7663a6 100644 --- a/packages/apps/kubernetes/images/cluster-autoscaler.tag +++ b/packages/apps/kubernetes/images/cluster-autoscaler.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/cluster-autoscaler:0.11.1@sha256:7f617de5a24de790a15d9e97c6287ff2b390922e6e74c7a665cbf498f634514d +ghcr.io/aenix-io/cozystack/cluster-autoscaler:0.12.0@sha256:7f617de5a24de790a15d9e97c6287ff2b390922e6e74c7a665cbf498f634514d diff --git a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag index e0a560a0..43f8e2c6 100644 --- a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag +++ b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:0.11.1@sha256:b5aa62a53be566b49dea635ce8f6b9280566e260f8493ff3d71f8c7501fb4cbc +ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:0.12.0@sha256:b9dc8e5f0296146b37b332b07b8cd74d1b0308786160b161c670c55005d3dbe9 diff --git a/packages/apps/kubernetes/images/kubevirt-csi-driver.tag b/packages/apps/kubernetes/images/kubevirt-csi-driver.tag index 781be025..c105815f 100644 --- a/packages/apps/kubernetes/images/kubevirt-csi-driver.tag +++ b/packages/apps/kubernetes/images/kubevirt-csi-driver.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:0.11.1@sha256:705e20e638315501aaa8b8156ceb8b260086b21876aa994bec9d6c406955c6d4 +ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:0.12.0@sha256:bd9175e1307c0afa828974df40edaa4ab905b869e1260a09675ceb1c1b248f1f diff --git a/packages/core/installer/values.yaml b/packages/core/installer/values.yaml index ab7ea85b..44f82771 100644 --- a/packages/core/installer/values.yaml +++ b/packages/core/installer/values.yaml @@ -1,2 +1,2 @@ cozystack: - image: ghcr.io/aenix-io/cozystack/cozystack:v0.16.2@sha256:0ee9d03a0453f19cc8deabf9ee4b9c6d9cc61e4ba833546a62a2f6b2265868f3 + image: ghcr.io/aenix-io/cozystack/cozystack:v0.16.3@sha256:7f281a046f648a53e9b957c6297e5e62c53b39a2816240720c142da0d1d79700 diff --git a/packages/core/testing/values.yaml b/packages/core/testing/values.yaml index 3c1e0fcc..4c3ede9b 100644 --- a/packages/core/testing/values.yaml +++ b/packages/core/testing/values.yaml @@ -1,2 +1,2 @@ e2e: - image: ghcr.io/aenix-io/cozystack/e2e-sandbox:v0.16.2@sha256:25b298d621ec79431d106184d59849bbae634588742583d111628126ad8615c5 + image: ghcr.io/aenix-io/cozystack/e2e-sandbox:v0.16.3@sha256:25b298d621ec79431d106184d59849bbae634588742583d111628126ad8615c5 diff --git a/packages/extra/seaweedfs/Chart.yaml b/packages/extra/seaweedfs/Chart.yaml index fbb08137..c3f187ef 100644 --- a/packages/extra/seaweedfs/Chart.yaml +++ b/packages/extra/seaweedfs/Chart.yaml @@ -16,7 +16,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.2.0 +version: 0.2.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/packages/extra/versions_map b/packages/extra/versions_map index fc446b7e..f5fadb77 100644 --- a/packages/extra/versions_map +++ b/packages/extra/versions_map @@ -15,4 +15,5 @@ monitoring 1.3.0 6c5cf5b monitoring 1.4.0 adaf603b monitoring 1.5.0 HEAD seaweedfs 0.1.0 5ca8823 -seaweedfs 0.2.0 HEAD +seaweedfs 0.2.0 9e33dc0 +seaweedfs 0.2.1 HEAD diff --git a/packages/system/dashboard/values.yaml b/packages/system/dashboard/values.yaml index 2e09ed4e..200152ea 100644 --- a/packages/system/dashboard/values.yaml +++ b/packages/system/dashboard/values.yaml @@ -33,11 +33,11 @@ kubeapps: image: registry: ghcr.io/aenix-io/cozystack repository: dashboard - tag: v0.16.2 + tag: v0.16.3 digest: "sha256:4818712e9fc9c57cc321512760c3226af564a04e69d4b3ec9229ab91fd39abeb" kubeappsapis: image: registry: ghcr.io/aenix-io/cozystack repository: kubeapps-apis - tag: v0.16.2 + tag: v0.16.3 digest: "sha256:55bc8e2495933112c7cb4bb9e3b1fcb8df46aa14e27fa007f78388a9757e3238" diff --git a/packages/system/kamaji/values.yaml b/packages/system/kamaji/values.yaml index 76af5b67..bb0cfde4 100644 --- a/packages/system/kamaji/values.yaml +++ b/packages/system/kamaji/values.yaml @@ -3,7 +3,7 @@ kamaji: deploy: false image: pullPolicy: IfNotPresent - tag: v0.16.2@sha256:95a9658cbbe1cbfbc42b9ab1df4f2a39342d7a8f1ff10a10b81b8656f3744c39 + tag: v0.16.3@sha256:95a9658cbbe1cbfbc42b9ab1df4f2a39342d7a8f1ff10a10b81b8656f3744c39 repository: ghcr.io/aenix-io/cozystack/kamaji resources: limits: diff --git a/packages/system/kubeovn/values.yaml b/packages/system/kubeovn/values.yaml index 70816d53..ad937906 100644 --- a/packages/system/kubeovn/values.yaml +++ b/packages/system/kubeovn/values.yaml @@ -22,4 +22,4 @@ global: images: kubeovn: repository: kubeovn - tag: v1.13.0@sha256:d13ac4f916cd88d33d1d64c949978165272998d6594441a9dd4be5e6892caf4e + tag: v1.13.0@sha256:ba4e98866295db13d88b10984c230e1cb0db86782767c5b9aff452865cdd1012 From de70081821d2561ffd2769fd0a1c565f57be3115 Mon Sep 17 00:00:00 2001 From: Andrei Kvapil Date: Wed, 9 Oct 2024 21:29:50 +0200 Subject: [PATCH 41/41] Prepare release v0.16.3 (#412) Signed-off-by: Andrei Kvapil --- packages/apps/kubernetes/images/kubevirt-cloud-provider.tag | 2 +- packages/apps/kubernetes/images/kubevirt-csi-driver.tag | 2 +- packages/core/installer/values.yaml | 2 +- packages/system/kubeovn/values.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag index 43f8e2c6..1247d3f4 100644 --- a/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag +++ b/packages/apps/kubernetes/images/kubevirt-cloud-provider.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:0.12.0@sha256:b9dc8e5f0296146b37b332b07b8cd74d1b0308786160b161c670c55005d3dbe9 +ghcr.io/aenix-io/cozystack/kubevirt-cloud-provider:0.12.0@sha256:735aa8092501fc0f2904b685b15bc0137ea294cb08301ca1185d3dec5f467f0f diff --git a/packages/apps/kubernetes/images/kubevirt-csi-driver.tag b/packages/apps/kubernetes/images/kubevirt-csi-driver.tag index c105815f..118ba92b 100644 --- a/packages/apps/kubernetes/images/kubevirt-csi-driver.tag +++ b/packages/apps/kubernetes/images/kubevirt-csi-driver.tag @@ -1 +1 @@ -ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:0.12.0@sha256:bd9175e1307c0afa828974df40edaa4ab905b869e1260a09675ceb1c1b248f1f +ghcr.io/aenix-io/cozystack/kubevirt-csi-driver:0.12.0@sha256:86029548078960feecca116087b2135230d676b83c503f292eb50e1199be2790 diff --git a/packages/core/installer/values.yaml b/packages/core/installer/values.yaml index 44f82771..68ef3293 100644 --- a/packages/core/installer/values.yaml +++ b/packages/core/installer/values.yaml @@ -1,2 +1,2 @@ cozystack: - image: ghcr.io/aenix-io/cozystack/cozystack:v0.16.3@sha256:7f281a046f648a53e9b957c6297e5e62c53b39a2816240720c142da0d1d79700 + image: ghcr.io/aenix-io/cozystack/cozystack:v0.16.3@sha256:d0a563047f86c89fb7831eb9fcf4a9032628f1afaa71f915502729710d43642c diff --git a/packages/system/kubeovn/values.yaml b/packages/system/kubeovn/values.yaml index ad937906..70816d53 100644 --- a/packages/system/kubeovn/values.yaml +++ b/packages/system/kubeovn/values.yaml @@ -22,4 +22,4 @@ global: images: kubeovn: repository: kubeovn - tag: v1.13.0@sha256:ba4e98866295db13d88b10984c230e1cb0db86782767c5b9aff452865cdd1012 + tag: v1.13.0@sha256:d13ac4f916cd88d33d1d64c949978165272998d6594441a9dd4be5e6892caf4e