From f5b4736f12c8cc773420db0aa8a268f8e9509922 Mon Sep 17 00:00:00 2001 From: Andrew Dryga Date: Sat, 11 May 2024 09:37:28 -0600 Subject: [PATCH] fix(portal): Fix edge cases with OIDC discovered in logs (#4777) Can be reviewed commit by commit. --- elixir/apps/api/lib/api/client/channel.ex | 15 ++++++- .../apps/api/test/api/client/channel_test.exs | 19 ++++++++ .../{ => openid_connect}/directory_sync.ex | 2 +- .../adapter/openid_connect/refresh_tokens.ex | 44 +++++++++++++++++++ .../jobs/refresh_access_tokens.ex | 29 ++---------- .../google_workspace/jobs/sync_directory.ex | 2 +- .../jobs/refresh_access_tokens.ex | 29 ++---------- .../microsoft_entra/jobs/sync_directory.ex | 2 +- .../okta/jobs/refresh_access_tokens.ex | 29 ++---------- .../auth/adapters/okta/jobs/sync_directory.ex | 2 +- .../domain/auth/adapters/openid_connect.ex | 14 ++++-- elixir/apps/domain/mix.exs | 2 +- elixir/mix.lock | 2 +- terraform/environments/production/relays.tf | 2 +- terraform/modules/google-cloud/ops/main.tf | 2 +- 15 files changed, 107 insertions(+), 88 deletions(-) rename elixir/apps/domain/lib/domain/auth/adapter/{ => openid_connect}/directory_sync.ex (99%) create mode 100644 elixir/apps/domain/lib/domain/auth/adapter/openid_connect/refresh_tokens.ex diff --git a/elixir/apps/api/lib/api/client/channel.ex b/elixir/apps/api/lib/api/client/channel.ex index f2992b2b2..ec2f88fcd 100644 --- a/elixir/apps/api/lib/api/client/channel.ex +++ b/elixir/apps/api/lib/api/client/channel.ex @@ -349,8 +349,19 @@ defmodule API.Client.Channel do OpenTelemetry.Tracer.with_span "client.create_log_sink" do case Instrumentation.create_remote_log_sink(socket.assigns.client, actor_name, account_slug) do - {:ok, signed_url} -> {:reply, {:ok, signed_url}, socket} - {:error, :disabled} -> {:reply, {:error, %{reason: :disabled}}, socket} + {:ok, signed_url} -> + {:reply, {:ok, signed_url}, socket} + + {:error, :disabled} -> + {:reply, {:error, %{reason: :disabled}}, socket} + + {:error, reason} -> + Logger.error("Failed to create log sink for client", + client_id: socket.assigns.client.id, + reason: inspect(reason) + ) + + {:reply, {:error, %{reason: :retry_later}}, socket} end end end diff --git a/elixir/apps/api/test/api/client/channel_test.exs b/elixir/apps/api/test/api/client/channel_test.exs index e3ffd8ce9..4cf32523c 100644 --- a/elixir/apps/api/test/api/client/channel_test.exs +++ b/elixir/apps/api/test/api/client/channel_test.exs @@ -577,6 +577,25 @@ defmodule API.Client.ChannelTest do assert_reply ref, :error, %{reason: :disabled} end + test "returns error when google api is not available", %{socket: socket} do + bypass = Bypass.open() + + GoogleCloudPlatform.override_endpoint_url( + :metadata_endpoint_url, + "http://localhost:#{bypass.port}/" + ) + + GoogleCloudPlatform.override_endpoint_url( + :sign_endpoint_url, + "http://localhost:#{bypass.port}/service_accounts/" + ) + + Bypass.down(bypass) + + ref = push(socket, "create_log_sink", %{}) + assert_reply ref, :error, %{reason: :retry_later} + end + test "returns a signed URL which can be used to upload the logs", %{ account: account, socket: socket, diff --git a/elixir/apps/domain/lib/domain/auth/adapter/directory_sync.ex b/elixir/apps/domain/lib/domain/auth/adapter/openid_connect/directory_sync.ex similarity index 99% rename from elixir/apps/domain/lib/domain/auth/adapter/directory_sync.ex rename to elixir/apps/domain/lib/domain/auth/adapter/openid_connect/directory_sync.ex index 69de8e809..d4455cd59 100644 --- a/elixir/apps/domain/lib/domain/auth/adapter/directory_sync.ex +++ b/elixir/apps/domain/lib/domain/auth/adapter/openid_connect/directory_sync.ex @@ -1,4 +1,4 @@ -defmodule Domain.Auth.Adapter.DirectorySync do +defmodule Domain.Auth.Adapter.OpenIDConnect.DirectorySync do alias Domain.Repo alias Domain.Jobs.Executors.Concurrent alias Domain.{Auth, Actors} diff --git a/elixir/apps/domain/lib/domain/auth/adapter/openid_connect/refresh_tokens.ex b/elixir/apps/domain/lib/domain/auth/adapter/openid_connect/refresh_tokens.ex new file mode 100644 index 000000000..448ce74c0 --- /dev/null +++ b/elixir/apps/domain/lib/domain/auth/adapter/openid_connect/refresh_tokens.ex @@ -0,0 +1,44 @@ +defmodule Domain.Auth.Adapter.OpenIDConnect.RefreshTokens do + require Logger + + def refresh_access_tokens(name, adapter) do + providers = Domain.Auth.all_providers_pending_token_refresh_by_adapter!(name) + Logger.debug("Refreshing access tokens for #{length(providers)} #{name} providers") + + Enum.each(providers, fn provider -> + Logger.metadata( + provider_id: provider.id, + account_id: provider.account_id, + adapter: name + ) + + Logger.debug("Refreshing access token") + + fn -> adapter.refresh_access_token(provider) end + |> with_sync_retries() + |> case do + {:ok, _provider} -> + Logger.debug("Finished refreshing access token") + + {:error, reason} -> + Logger.error("Failed refreshing access token", + reason: inspect(reason) + ) + end + end) + end + + defp with_sync_retries(cb, retries_left \\ 3, retry_timeout \\ 100) do + case cb.() do + {:ok, result} -> + {:ok, result} + + {:error, _reason} when retries_left > 0 -> + Process.sleep(retry_timeout) + with_sync_retries(cb, retries_left - 1, retry_timeout) + + {:error, reason} -> + {:error, reason} + end + end +end diff --git a/elixir/apps/domain/lib/domain/auth/adapters/google_workspace/jobs/refresh_access_tokens.ex b/elixir/apps/domain/lib/domain/auth/adapters/google_workspace/jobs/refresh_access_tokens.ex index 0cad522c5..0689d5bad 100644 --- a/elixir/apps/domain/lib/domain/auth/adapters/google_workspace/jobs/refresh_access_tokens.ex +++ b/elixir/apps/domain/lib/domain/auth/adapters/google_workspace/jobs/refresh_access_tokens.ex @@ -4,32 +4,11 @@ defmodule Domain.Auth.Adapters.GoogleWorkspace.Jobs.RefreshAccessTokens do every: :timer.minutes(5), executor: Domain.Jobs.Executors.GloballyUnique - alias Domain.Auth.Adapters.GoogleWorkspace - require Logger - @impl true def execute(_config) do - providers = Domain.Auth.all_providers_pending_token_refresh_by_adapter!(:google_workspace) - Logger.debug("Refreshing access tokens for #{length(providers)} providers") - - Enum.each(providers, fn provider -> - Logger.metadata( - account_id: provider.account_id, - provider_id: provider.id, - provider_adapter: provider.adapter - ) - - Logger.debug("Refreshing access token") - - case GoogleWorkspace.refresh_access_token(provider) do - {:ok, _provider} -> - Logger.debug("Finished refreshing access token") - - {:error, reason} -> - Logger.error("Failed refreshing access token", - reason: inspect(reason) - ) - end - end) + Domain.Auth.Adapter.OpenIDConnect.RefreshTokens.refresh_access_tokens( + :google_workspace, + Domain.Auth.Adapters.GoogleWorkspace + ) end end diff --git a/elixir/apps/domain/lib/domain/auth/adapters/google_workspace/jobs/sync_directory.ex b/elixir/apps/domain/lib/domain/auth/adapters/google_workspace/jobs/sync_directory.ex index 7113c8f10..9d3638dd4 100644 --- a/elixir/apps/domain/lib/domain/auth/adapters/google_workspace/jobs/sync_directory.ex +++ b/elixir/apps/domain/lib/domain/auth/adapters/google_workspace/jobs/sync_directory.ex @@ -4,7 +4,7 @@ defmodule Domain.Auth.Adapters.GoogleWorkspace.Jobs.SyncDirectory do every: :timer.minutes(2), executor: Domain.Jobs.Executors.Concurrent - alias Domain.Auth.Adapter.DirectorySync + alias Domain.Auth.Adapter.OpenIDConnect.DirectorySync alias Domain.Auth.Adapters.GoogleWorkspace require Logger require OpenTelemetry.Tracer diff --git a/elixir/apps/domain/lib/domain/auth/adapters/microsoft_entra/jobs/refresh_access_tokens.ex b/elixir/apps/domain/lib/domain/auth/adapters/microsoft_entra/jobs/refresh_access_tokens.ex index 8433d36e6..3ebe3ab15 100644 --- a/elixir/apps/domain/lib/domain/auth/adapters/microsoft_entra/jobs/refresh_access_tokens.ex +++ b/elixir/apps/domain/lib/domain/auth/adapters/microsoft_entra/jobs/refresh_access_tokens.ex @@ -4,32 +4,11 @@ defmodule Domain.Auth.Adapters.MicrosoftEntra.Jobs.RefreshAccessTokens do every: :timer.minutes(5), executor: Domain.Jobs.Executors.GloballyUnique - alias Domain.Auth.Adapters.MicrosoftEntra - require Logger - @impl true def execute(_config) do - providers = Domain.Auth.all_providers_pending_token_refresh_by_adapter!(:microsoft_entra) - Logger.debug("Refreshing access tokens for #{length(providers)} Microsoft Entra providers") - - Enum.each(providers, fn provider -> - Logger.metadata( - account_id: provider.account_id, - provider_id: provider.id, - provider_adapter: provider.adapter - ) - - Logger.debug("Refreshing access token") - - case MicrosoftEntra.refresh_access_token(provider) do - {:ok, _provider} -> - Logger.debug("Finished refreshing access token") - - {:error, reason} -> - Logger.error("Failed refreshing access token", - reason: inspect(reason) - ) - end - end) + Domain.Auth.Adapter.OpenIDConnect.RefreshTokens.refresh_access_tokens( + :microsoft_entra, + Domain.Auth.Adapters.MicrosoftEntra + ) end end diff --git a/elixir/apps/domain/lib/domain/auth/adapters/microsoft_entra/jobs/sync_directory.ex b/elixir/apps/domain/lib/domain/auth/adapters/microsoft_entra/jobs/sync_directory.ex index f97072e1e..0d5328f90 100644 --- a/elixir/apps/domain/lib/domain/auth/adapters/microsoft_entra/jobs/sync_directory.ex +++ b/elixir/apps/domain/lib/domain/auth/adapters/microsoft_entra/jobs/sync_directory.ex @@ -4,7 +4,7 @@ defmodule Domain.Auth.Adapters.MicrosoftEntra.Jobs.SyncDirectory do every: :timer.minutes(5), executor: Domain.Jobs.Executors.Concurrent - alias Domain.Auth.Adapter.DirectorySync + alias Domain.Auth.Adapter.OpenIDConnect.DirectorySync alias Domain.Auth.Adapters.MicrosoftEntra require Logger require OpenTelemetry.Tracer diff --git a/elixir/apps/domain/lib/domain/auth/adapters/okta/jobs/refresh_access_tokens.ex b/elixir/apps/domain/lib/domain/auth/adapters/okta/jobs/refresh_access_tokens.ex index 648722daf..ebf70ede2 100644 --- a/elixir/apps/domain/lib/domain/auth/adapters/okta/jobs/refresh_access_tokens.ex +++ b/elixir/apps/domain/lib/domain/auth/adapters/okta/jobs/refresh_access_tokens.ex @@ -4,32 +4,11 @@ defmodule Domain.Auth.Adapters.Okta.Jobs.RefreshAccessTokens do every: :timer.minutes(5), executor: Domain.Jobs.Executors.GloballyUnique - alias Domain.Auth.Adapters.Okta - require Logger - @impl true def execute(_config) do - providers = Domain.Auth.all_providers_pending_token_refresh_by_adapter!(:okta) - Logger.debug("Refreshing access tokens for #{length(providers)} Okta providers") - - Enum.each(providers, fn provider -> - Logger.metadata( - account_id: provider.account_id, - provider_id: provider.id, - provider_adapter: provider.adapter - ) - - Logger.debug("Refreshing access token") - - case Okta.refresh_access_token(provider) do - {:ok, _provider} -> - Logger.debug("Finished refreshing access token") - - {:error, reason} -> - Logger.error("Failed refreshing access token", - reason: inspect(reason) - ) - end - end) + Domain.Auth.Adapter.OpenIDConnect.RefreshTokens.refresh_access_tokens( + :okta, + Domain.Auth.Adapters.Okta + ) end end diff --git a/elixir/apps/domain/lib/domain/auth/adapters/okta/jobs/sync_directory.ex b/elixir/apps/domain/lib/domain/auth/adapters/okta/jobs/sync_directory.ex index 391aac148..ba5c9fa0a 100644 --- a/elixir/apps/domain/lib/domain/auth/adapters/okta/jobs/sync_directory.ex +++ b/elixir/apps/domain/lib/domain/auth/adapters/okta/jobs/sync_directory.ex @@ -4,7 +4,7 @@ defmodule Domain.Auth.Adapters.Okta.Jobs.SyncDirectory do every: :timer.minutes(5), executor: Domain.Jobs.Executors.Concurrent - alias Domain.Auth.Adapter.DirectorySync + alias Domain.Auth.Adapter.OpenIDConnect.DirectorySync alias Domain.Auth.Adapters.Okta require Logger require OpenTelemetry.Tracer diff --git a/elixir/apps/domain/lib/domain/auth/adapters/openid_connect.ex b/elixir/apps/domain/lib/domain/auth/adapters/openid_connect.ex index 9d5741fb4..ec15f4e96 100644 --- a/elixir/apps/domain/lib/domain/auth/adapters/openid_connect.ex +++ b/elixir/apps/domain/lib/domain/auth/adapters/openid_connect.ex @@ -243,7 +243,7 @@ defmodule Domain.Auth.Adapters.OpenIDConnect do with {:ok, tokens} <- OpenIDConnect.fetch_tokens(config, token_params), {:ok, claims} <- OpenIDConnect.verify(config, tokens["id_token"]), - {:ok, userinfo} <- OpenIDConnect.fetch_userinfo(config, tokens["access_token"]) do + {:ok, userinfo} <- fetch_userinfo(config, tokens) do expires_at = cond do not is_nil(tokens["expires_in"]) -> @@ -274,7 +274,7 @@ defmodule Domain.Auth.Adapters.OpenIDConnect do {:error, :invalid_token} {:error, {status, _reason} = other} when status in 400..499 -> - Logger.info("Failed to connect OpenID Connect provider", + Logger.info("Failed to fetch OpenID Connect state", provider_id: provider.id, reason: inspect(other) ) @@ -282,7 +282,7 @@ defmodule Domain.Auth.Adapters.OpenIDConnect do {:error, :invalid_token} {:error, other} -> - Logger.error("Failed to connect OpenID Connect provider", + Logger.error("Failed to fetch OpenID Connect state", provider_id: provider.id, account_id: provider.account_id, reason: inspect(other) @@ -292,6 +292,14 @@ defmodule Domain.Auth.Adapters.OpenIDConnect do end end + defp fetch_userinfo(config, tokens) do + case OpenIDConnect.fetch_userinfo(config, tokens["access_token"]) do + {:ok, userinfo} -> {:ok, userinfo} + {:error, :userinfo_endpoint_is_not_implemented} -> {:ok, %{}} + {:error, _reason} -> {:error, :invalid_token} + end + end + defp config_for_provider(%Provider{} = provider) do Ecto.embedded_load(Settings, provider.adapter_config, :json) |> Map.from_struct() diff --git a/elixir/apps/domain/mix.exs b/elixir/apps/domain/mix.exs index dd450014b..ff93d5903 100644 --- a/elixir/apps/domain/mix.exs +++ b/elixir/apps/domain/mix.exs @@ -54,7 +54,7 @@ defmodule Domain.MixProject do # Auth-related deps {:plug_crypto, "~> 2.0"}, {:openid_connect, - github: "firezone/openid_connect", ref: "76c1a1c9a9da3b8be7b8270306a9240d80d7696f"}, + github: "firezone/openid_connect", ref: "dee689382699fce7a6ca70084ccbc8bc351d3246"}, {:argon2_elixir, "~> 4.0"}, # Erlang Clustering diff --git a/elixir/mix.lock b/elixir/mix.lock index ec9dcafb8..6be49b74d 100644 --- a/elixir/mix.lock +++ b/elixir/mix.lock @@ -61,7 +61,7 @@ "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, "number": {:hex, :number, "1.0.4", "3e6e6032a3c1d4c3760e77a42c580a57a15545dd993af380809da30fe51a032c", [:mix], [{:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "16f7516584ef2be812af4f33f2eaf3f9b9f6ed8892f45853eb93113f83721e42"}, "observer_cli": {:hex, :observer_cli, "1.7.4", "3c1bfb6d91bf68f6a3d15f46ae20da0f7740d363ee5bc041191ce8722a6c4fae", [:mix, :rebar3], [{:recon, "~> 2.5.1", [hex: :recon, repo: "hexpm", optional: false]}], "hexpm", "50de6d95d814f447458bd5d72666a74624eddb0ef98bdcee61a0153aae0865ff"}, - "openid_connect": {:git, "https://github.com/firezone/openid_connect.git", "76c1a1c9a9da3b8be7b8270306a9240d80d7696f", [ref: "76c1a1c9a9da3b8be7b8270306a9240d80d7696f"]}, + "openid_connect": {:git, "https://github.com/firezone/openid_connect.git", "dee689382699fce7a6ca70084ccbc8bc351d3246", [ref: "dee689382699fce7a6ca70084ccbc8bc351d3246"]}, "opentelemetry": {:hex, :opentelemetry, "1.4.0", "f928923ed80adb5eb7894bac22e9a198478e6a8f04020ae1d6f289fdcad0b498", [:rebar3], [{:opentelemetry_api, "~> 1.3.0", [hex: :opentelemetry_api, repo: "hexpm", optional: false]}, {:opentelemetry_semantic_conventions, "~> 0.2", [hex: :opentelemetry_semantic_conventions, repo: "hexpm", optional: false]}], "hexpm", "50b32ce127413e5d87b092b4d210a3449ea80cd8224090fe68d73d576a3faa15"}, "opentelemetry_api": {:hex, :opentelemetry_api, "1.3.0", "03e2177f28dd8d11aaa88e8522c81c2f6a788170fe52f7a65262340961e663f9", [:mix, :rebar3], [{:opentelemetry_semantic_conventions, "~> 0.2", [hex: :opentelemetry_semantic_conventions, repo: "hexpm", optional: false]}], "hexpm", "b9e5ff775fd064fa098dba3c398490b77649a352b40b0b730a6b7dc0bdd68858"}, "opentelemetry_cowboy": {:hex, :opentelemetry_cowboy, "0.3.0", "0144b211fa6cda0e6211c340cebd1bbd9158e350099ea3bf3d838f993cb4b90e", [:rebar3], [{:cowboy_telemetry, "~> 0.4", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:opentelemetry_api, "~> 1.2", [hex: :opentelemetry_api, repo: "hexpm", optional: false]}, {:opentelemetry_telemetry, "~> 1.0", [hex: :opentelemetry_telemetry, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "4f44537b4c7430018198d480f55bc88a40f7d0582c3ad927a5bab4ceb39e80ea"}, diff --git a/terraform/environments/production/relays.tf b/terraform/environments/production/relays.tf index af1bea68b..9f9f5b56a 100644 --- a/terraform/environments/production/relays.tf +++ b/terraform/environments/production/relays.tf @@ -172,7 +172,7 @@ resource "google_monitoring_alert_policy" "connected_relays_count" { comparison = "COMPARISON_GT" # at least one relay per region must be always online - threshold_value = length(module.relays[0].instances) + threshold_value = length(module.relays[0].instances) - 1 duration = "0s" trigger { diff --git a/terraform/modules/google-cloud/ops/main.tf b/terraform/modules/google-cloud/ops/main.tf index 2c10f791c..358b130f6 100644 --- a/terraform/modules/google-cloud/ops/main.tf +++ b/terraform/modules/google-cloud/ops/main.tf @@ -302,7 +302,7 @@ resource "google_monitoring_alert_policy" "sql_disk_utiliziation_policy" { } } -resource "google_monitoring_alert_policy" "genservers_crash_policy" { +resource "google_monitoring_alert_policy" "errors" { project = var.project_id display_name = "Errors in logs"