From 37ae1a4e921012a28abc25a36c805af5ed9a2c23 Mon Sep 17 00:00:00 2001 From: Jamil Date: Sun, 1 Jun 2025 17:56:19 -0700 Subject: [PATCH] fix(portal): fix false-positive cluster errors (#9351) Fixes the following issues after learning they're still a problem: - We need to include our own node when checking for connected node count - Need to match against the `formatted` key inside message when filtering Sentry events --- .../lib/domain/cluster/google_compute_labels_strategy.ex | 2 +- elixir/apps/domain/lib/domain/telemetry/sentry.ex | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/elixir/apps/domain/lib/domain/cluster/google_compute_labels_strategy.ex b/elixir/apps/domain/lib/domain/cluster/google_compute_labels_strategy.ex index d6ed677f5..cc33cb04b 100644 --- a/elixir/apps/domain/lib/domain/cluster/google_compute_labels_strategy.ex +++ b/elixir/apps/domain/lib/domain/cluster/google_compute_labels_strategy.ex @@ -171,7 +171,7 @@ defmodule Domain.Cluster.GoogleComputeLabelsStrategy do end defp enough_nodes_connected?(state) do - connected_nodes = state.connected_nodes + connected_nodes = state.connected_nodes ++ [Node.self()] expected_api_node_count = Keyword.fetch!(state.config, :api_node_count) expected_domain_node_count = Keyword.fetch!(state.config, :domain_node_count) expected_web_node_count = Keyword.fetch!(state.config, :web_node_count) diff --git a/elixir/apps/domain/lib/domain/telemetry/sentry.ex b/elixir/apps/domain/lib/domain/telemetry/sentry.ex index b7b5eca46..baa513c43 100644 --- a/elixir/apps/domain/lib/domain/telemetry/sentry.ex +++ b/elixir/apps/domain/lib/domain/telemetry/sentry.ex @@ -3,9 +3,10 @@ defmodule Domain.Telemetry.Sentry do nil end - def before_send(%{message: message} = event) when is_binary(message) do + def before_send(%{message: %{formatted: formatted_message}} = event) + when is_binary(formatted_message) do if String.contains?( - message, + formatted_message, "Node ~p not responding **~n** Removing (timedout) connection" ) do # This happens when libcluster loses connection to a node, which is normal during deploys.