mirror of
https://github.com/outbackdingo/cozystack.git
synced 2026-02-05 08:17:59 +00:00
358 lines
19 KiB
YAML
358 lines
19 KiB
YAML
apiVersion: operator.victoriametrics.com/v1beta1
|
|
kind: VMRule
|
|
metadata:
|
|
name: alerts-node
|
|
namespace: cozy-monitoring
|
|
spec:
|
|
groups:
|
|
- name: kubernetes.node.disk_inodes_usage
|
|
rules:
|
|
- alert: KubeletNodeFSInodesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_nodefs_inodes{type="soft"})
|
|
)
|
|
for: 10m
|
|
labels:
|
|
severity_level: "9"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Soft eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_nodefs_inodes{type=\"soft\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Soft eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
- alert: KubeletNodeFSInodesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_nodefs_inodes{type="hard"} + 5)
|
|
)
|
|
for: 5m
|
|
labels:
|
|
severity_level: "7"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Close to hard eviction threshold of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_nodefs_inodes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: >
|
|
Close to hard eviction threshold of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|
|
|
|
- alert: KubeletNodeFSInodesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_nodefs_inodes{type="hard"})
|
|
)
|
|
labels:
|
|
severity_level: "6"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Hard eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_nodefs_inodes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Hard eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
- alert: KubeletNodeFSInodesUsage
|
|
expr: |
|
|
(
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_files_free)
|
|
) == 0
|
|
)
|
|
* (max by (node, mountpoint) ({__name__=~"kubelet_eviction_nodefs_inodes"}))
|
|
labels:
|
|
severity_level: "5"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
summary: No more free inodes on nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|
|
|
|
- alert: KubeletImageFSInodesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_imagefs_inodes{type="soft"})
|
|
)
|
|
for: 10m
|
|
labels:
|
|
severity_level: "9"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Soft eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_imagefs_inodes{type=\"soft\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Soft eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
- alert: KubeletImageFSInodesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_imagefs_inodes{type="hard"} + 5)
|
|
)
|
|
for: 5m
|
|
labels:
|
|
severity_level: "7"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Close to hard eviction threshold of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_imagefs_inodes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Close to hard eviction threshold of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|
|
|
|
- alert: KubeletImageFSInodesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_files_free / node_filesystem_files) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_imagefs_inodes{type="hard"})
|
|
)
|
|
labels:
|
|
severity_level: "6"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Hard eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_imagefs_inodes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Hard eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
- alert: KubeletImageFSInodesUsage
|
|
expr: |
|
|
(
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_files_free)
|
|
) == 0
|
|
)
|
|
* (max by (node, mountpoint) ({__name__=~"kubelet_eviction_imagefs_inodes"}))
|
|
labels:
|
|
severity_level: "5"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
summary: No more free inodes on imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|
|
|
|
- name: kubernetes.node.disk_bytes_usage
|
|
rules:
|
|
- alert: KubeletNodeFSBytesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_nodefs_bytes{type="soft"})
|
|
)
|
|
for: 10m
|
|
labels:
|
|
severity_level: "9"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Soft eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_nodefs_bytes{type=\"soft\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Soft eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
- alert: KubeletNodeFSBytesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_nodefs_bytes{type="hard"} + 5)
|
|
)
|
|
for: 5m
|
|
labels:
|
|
severity_level: "7"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Close to hard eviction threshold of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_nodefs_bytes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Close to hard eviction threshold of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|
|
|
|
- alert: KubeletNodeFSBytesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_nodefs_bytes{type="hard"})
|
|
)
|
|
labels:
|
|
severity_level: "6"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Hard eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_nodefs_bytes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Hard eviction of nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
- alert: KubeletNodeFSBytesUsage
|
|
expr: |
|
|
(
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_avail_bytes)
|
|
) == 0
|
|
)
|
|
* (max by (node, mountpoint) ({__name__=~"kubelet_eviction_nodefs_bytes"}))
|
|
labels:
|
|
severity_level: "5"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
summary: No more free space on nodefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|
|
|
|
- alert: KubeletImageFSBytesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_imagefs_bytes{type="soft"})
|
|
)
|
|
for: 10m
|
|
labels:
|
|
severity_level: "9"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Soft eviction of imagefs (filesystem that the container runtime uses for storing images and container writable layers) on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_imagefs_bytes{type=\"soft\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Soft eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
- alert: KubeletImageFSBytesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_imagefs_bytes{type="hard"} + 5)
|
|
)
|
|
for: 5m
|
|
labels:
|
|
severity_level: "7"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Close to hard eviction threshold of imagefs (filesystem that the container runtime uses for storing images and container writable layers) on node {{$labels.node}} mountpoint {{$labels.mountpoint}}.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_imagefs_bytes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Close to hard eviction threshold of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|
|
|
|
- alert: KubeletImageFSBytesUsage
|
|
expr: |
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 <
|
|
max by (node, mountpoint) (kubelet_eviction_imagefs_bytes{type="hard"})
|
|
)
|
|
labels:
|
|
severity_level: "6"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
Hard eviction of imagefs (filesystem that the container runtime uses for storing images and container writable layers) on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
Threshold at: {{ printf "kubelet_eviction_imagefs_bytes{type=\"hard\", node=\"%s\", mountpoint=\"%s\"}" $labels.node $labels.mountpoint | query | first | value }}%
|
|
|
|
Currently at: {{ .Value }}%
|
|
summary: Hard eviction of imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint is in progress.
|
|
|
|
- alert: KubeletImageFSBytesUsage
|
|
expr: |
|
|
(
|
|
(
|
|
max by (node, mountpoint) (node_filesystem_avail_bytes)
|
|
) == 0
|
|
)
|
|
* (max by (node, mountpoint) ({__name__=~"kubelet_eviction_imagefs_bytes"}))
|
|
labels:
|
|
severity_level: "5"
|
|
tier: cluster
|
|
annotations:
|
|
plk_protocol_version: "1"
|
|
plk_markup_format: markdown
|
|
plk_create_group_if_not_exists__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
plk_grouped_by__node_disk_usage: "NodeDiskUsage,tier=cluster,prometheus=deckhouse,node={{ $labels.node }},kubernetes=~kubernetes"
|
|
description: |
|
|
No more free bytes on imagefs (filesystem that the container runtime uses for storing images and container writable layers) on node {{$labels.node}} mountpoint {{$labels.mountpoint}}.
|
|
summary: No more free bytes on imagefs on the {{$labels.node}} Node at the {{$labels.mountpoint}} mountpoint.
|