From 8f1785f7c703e1bd3fc2abb9199f884ace278ce3 Mon Sep 17 00:00:00 2001 From: Andrew Dryga Date: Thu, 11 Apr 2024 23:33:46 -0600 Subject: [PATCH] Do not raise alerts on errors from auditlog --- terraform/environments/production/relays.tf | 6 +++--- terraform/modules/google-cloud/ops/main.tf | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/terraform/environments/production/relays.tf b/terraform/environments/production/relays.tf index 07039e07b..0292c5ba5 100644 --- a/terraform/environments/production/relays.tf +++ b/terraform/environments/production/relays.tf @@ -156,7 +156,7 @@ resource "google_compute_firewall" "relays-ssh-ipv4" { } # Trigger an alert when more than 20% of relays are down -resource "google_monitoring_alert_policy" "instances_high_cpu_policy" { +resource "google_monitoring_alert_policy" "connected_relays_count" { project = module.google-cloud-project.project.project_id display_name = "Relays are down" @@ -172,8 +172,8 @@ resource "google_monitoring_alert_policy" "instances_high_cpu_policy" { comparison = "COMPARISON_GT" # at least one relay per region must be always online - threshold_value = module.relays[0].instances - duration = "5s" + threshold_value = length(module.relays[0].instances) + duration = "0s" trigger { count = 1 diff --git a/terraform/modules/google-cloud/ops/main.tf b/terraform/modules/google-cloud/ops/main.tf index 5746ffd18..56a745d2f 100644 --- a/terraform/modules/google-cloud/ops/main.tf +++ b/terraform/modules/google-cloud/ops/main.tf @@ -318,6 +318,7 @@ resource "google_monitoring_alert_policy" "genservers_crash_policy" { resource.type="gce_instance" severity>=ERROR -"invalid ssh key entry - expired key" + -protoPayload.@type="type.googleapis.com/google.cloud.audit.AuditLog" EOT } }