From 59dac085ce69232df0073eb856f1a24000095044 Mon Sep 17 00:00:00 2001 From: Steve Wilkerson Date: Wed, 13 Nov 2019 08:51:26 -0600 Subject: [PATCH] Nagios: Update ceph health check command This updates the ceph health check command in Nagios to use the updated plugin that determines the active ceph-mgr instance endpoint to use before querying for ceph's health. This results in more robust and reliable reporting of ceph's overall health Depends-On: https://review.opendev.org/#/c/693900/ Change-Id: I5eeb076e5af3c820dbdcc3cc321cefcb5f85ef8d Signed-off-by: Steve Wilkerson --- nagios/values.yaml | 2 +- tools/deployment/multinode/110-nagios.sh | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/nagios/values.yaml b/nagios/values.yaml index 9d251985..5097bae0 100644 --- a/nagios/values.yaml +++ b/nagios/values.yaml @@ -588,7 +588,7 @@ conf: } define command { - command_line $USER1$/check_exporter_health_metric.py --exporter_api $USER10$ --health_metric ceph_health_status --critical 2 --warning 1 + command_line $USER1$/check_exporter_health_metric.py --exporter_namespace "ceph" --label_selector "application=ceph,component=manager" --health_metric ceph_health_status --critical 2 --warning 1 command_name check_ceph_health } diff --git a/tools/deployment/multinode/110-nagios.sh b/tools/deployment/multinode/110-nagios.sh index 359c93db..0d02d23c 100755 --- a/tools/deployment/multinode/110-nagios.sh +++ b/tools/deployment/multinode/110-nagios.sh @@ -43,3 +43,6 @@ helm status nagios #NOTE: Verify elasticsearch query clauses are functional by execing into pod NAGIOS_POD=$(kubectl -n osh-infra get pods -l='application=nagios,component=monitoring' --output=jsonpath='{.items[0].metadata.name}') kubectl exec $NAGIOS_POD -n osh-infra -c nagios -- cat /opt/nagios/etc/objects/query_es_clauses.json | python -m json.tool + +#NOTE: Verify plugin for checking ceph health directly via ceph-mgr working as intended +kubectl exec $NAGIOS_POD -n osh-infra -c nagios -- python /usr/lib/nagios/plugins/check_exporter_health_metric.py --exporter_namespace "ceph" --label_selector "application=ceph,component=manager" --health_metric ceph_health_status --critical 2 --warning 1