From 6d9df4453f0ca4ae5914603c10fa403e9458f09d Mon Sep 17 00:00:00 2001 From: Johann Hoffmann Date: Fri, 4 Nov 2022 15:24:21 +0100 Subject: [PATCH] Adapt alert to use new exitcode metric Signed-off-by: Johann Hoffmann --- helmfile/cloud-sdk/helmfile.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/helmfile/cloud-sdk/helmfile.yaml b/helmfile/cloud-sdk/helmfile.yaml index 7703b3d..7f6145c 100644 --- a/helmfile/cloud-sdk/helmfile.yaml +++ b/helmfile/cloud-sdk/helmfile.yaml @@ -254,8 +254,8 @@ releases: annotations: title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* has been OOM killed description: The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* has been killed due to reaching its memory limit. Investigate the memory usage or increase the limit to prevent this. - - alert: Pod terminated - expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="Error"}[10m]) == 1 + - alert: Pod exited with a segfault + expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and min_over_time(kube_pod_container_status_last_terminated_exitcode[10m]) == 139 for: 0m labels: severity: error @@ -264,8 +264,8 @@ releases: pod: "{{`{{ $labels.pod }}`}}" reason: "{{`{{ $labels.reason }}`}}" annotations: - title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* terminated due to an error - description: The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* has terminated due to an error, please investigate the cause. + title: Pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* exited with a segfault + description: The pod *{{`{{ $labels.namespace }}/{{ $labels.pod }}`}}* exited with a segmentation fault, please examine the coredump. - alert: Node low on memory expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 5 for: 1m