feat(alertmanager): don't group by namespace, healthchecks.io heartbeat

This commit is contained in:
JJGadgets
2024-12-19 01:33:14 +08:00
parent 1736e2f4cb
commit 2ab3e04d2d
2 changed files with 65 additions and 13 deletions

View File

@@ -211,6 +211,23 @@ spec:
# yaml-language-server: $schema=https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: labelled-allow-egress-mdns
spec:
endpointSelector:
matchLabels:
egress.home.arpa/mdns: allow
egress:
- toCIDR:
- "224.0.0.251/32"
toPorts:
- ports:
- port: "5353"
protocol: "UDP"
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: labelled-allow-egress-cloudflare-r2
spec:
@@ -350,6 +367,39 @@ spec:
# yaml-language-server: $schema=https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: labelled-allow-egress-heartbeat
spec:
endpointSelector:
matchLabels:
egress.home.arpa/heartbeat: allow
egress:
- toFQDNs: &dns
- matchPattern: "hc-ping.com"
- matchPattern: "*.hc-ping.com"
toPorts:
- ports:
- port: "443"
protocol: "TCP"
- port: "443"
protocol: "UDP"
- toEndpoints:
- matchLabels:
"k8s:io.kubernetes.pod.namespace": kube-system
"k8s:k8s-app": kube-dns
- matchLabels:
io.kubernetes.pod.namespace: kube-system
k8s-app: kube-dns
toPorts:
- ports:
- port: "53"
protocol: "ANY"
rules:
dns: *dns
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/cilium.io/ciliumclusterwidenetworkpolicy_v2.json
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: labelled-allow-egress-mullvad
spec:

View File

@@ -2,19 +2,19 @@
global:
resolve_timeout: 5m
route:
group_by: ["alertname", "cluster", "namespace", "job"]
group_by: ["alertname", "cluster", "job"]
group_interval: 10m
group_wait: 1m
receiver: discord
#repeat_interval: 1h # temporarily set 1m because testing notifs
repeat_interval: 24h
routes:
# - receiver: heartbeat
# group_interval: 5m
# group_wait: 0s
# matchers:
# - alertname =~ "Watchdog"
# repeat_interval: 5m
- receiver: heartbeat
group_interval: 1m
group_wait: 0s
repeat_interval: 1m
matchers:
- alertname =~ "Watchdog"
- receiver: "null"
matchers:
- alertname =~ "InfoInhibitor"
@@ -31,10 +31,10 @@ inhibit_rules:
target_matchers:
- severity = "warning"
receivers:
# - name: heartbeat
# webhook_configs:
# - send_resolved: true
# url: ""
- name: heartbeat
webhook_configs:
- send_resolved: true
url_file: "/secrets/healthchecks.io"
- name: "null"
- name: discord
discord_configs:
@@ -42,8 +42,10 @@ receivers:
webhook_url_file: "/secrets/discord"
# yoinked below from onedr0p, blame him if something doesn't work
title: >-
{{ .GroupLabels.alertname }} - {{ .GroupLabels.namespace }}
{{- if ne .CommonLabels.severity "" }} ({{ .CommonLabels.severity}}){{- end }}
{{ .GroupLabels.alertname }}
{{- if ne .GroupLabels.namespace "" }} - {{ .GroupLabels.namespace }}{{- end }}
{{- if ne .CommonLabels.namespace "" }} - {{ .CommonLabels.namespace }}{{- end }}
{{- if ne .CommonLabels.severity "" }} ({{ .CommonLabels.severity }}){{- end }}
({{ .GroupLabels.cluster }})
[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}]
message: |-