From 4fc7ae5d0e6d17221e4f88607fb1c8f640ca5457 Mon Sep 17 00:00:00 2001 From: Andrew Dryga Date: Thu, 19 Oct 2023 23:12:32 -0600 Subject: [PATCH] Alert when services are down --- terraform/modules/google-cloud-ops/main.tf | 84 ++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/terraform/modules/google-cloud-ops/main.tf b/terraform/modules/google-cloud-ops/main.tf index 3da06bce5..3afeb1aee 100644 --- a/terraform/modules/google-cloud-ops/main.tf +++ b/terraform/modules/google-cloud-ops/main.tf @@ -96,6 +96,90 @@ resource "google_monitoring_uptime_check_config" "web-https" { checker_type = "STATIC_IP_CHECKERS" } +resource "google_monitoring_alert_policy" "api-downtime" { + project = var.project_id + + display_name = "API service is DOWN!" + combiner = "OR" + + notification_channels = [ + google_monitoring_notification_channel.slack.name + ] + + conditions { + display_name = "Uptime Health Check on api-https" + + condition_threshold { + filter = "resource.type = \"uptime_url\" AND metric.type = \"monitoring.googleapis.com/uptime_check/check_passed\" AND metric.labels.check_id = \"${google_monitoring_uptime_check_config.api-https.id}\"" + comparison = "COMPARISON_GT" + + threshold_value = 1 + duration = "0s" + + trigger { + count = 1 + } + + aggregations { + alignment_period = "60s" + cross_series_reducer = "REDUCE_COUNT_FALSE" + per_series_aligner = "ALIGN_NEXT_OLDER" + + group_by_fields = [ + "resource.label.project_id", + "resource.label.host" + ] + } + } + } + + alert_strategy { + auto_close = "28800s" + } +} + +resource "google_monitoring_alert_policy" "web-downtime" { + project = var.project_id + + display_name = "Portal service is DOWN!" + combiner = "OR" + + notification_channels = [ + google_monitoring_notification_channel.slack.name + ] + + conditions { + display_name = "Uptime Health Check on web-https" + + condition_threshold { + filter = "resource.type = \"uptime_url\" AND metric.type = \"monitoring.googleapis.com/uptime_check/check_passed\" AND metric.labels.check_id = \"${google_monitoring_uptime_check_config.web-https.id}\"" + comparison = "COMPARISON_GT" + + threshold_value = 1 + duration = "0s" + + trigger { + count = 1 + } + + aggregations { + alignment_period = "60s" + cross_series_reducer = "REDUCE_COUNT_FALSE" + per_series_aligner = "ALIGN_NEXT_OLDER" + + group_by_fields = [ + "resource.label.project_id", + "resource.label.host" + ] + } + } + } + + alert_strategy { + auto_close = "28800s" + } +} + resource "google_monitoring_alert_policy" "instances_high_cpu_policy" { project = var.project_id